Skip to content
Snippets Groups Projects
Commit f99e684b authored by elabaron's avatar elabaron
Browse files

modify RNAseq script and adapt queue for lake

parent b0f45d92
No related branches found
No related tags found
No related merge requests found
...@@ -9,49 +9,46 @@ profiles { ...@@ -9,49 +9,46 @@ profiles {
cpus = 1 cpus = 1
memory = "20GB" memory = "20GB"
time = "12h" time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: rRNA_removal { withName: rRNA_removal {
beforeScript = "source $baseDir/.conda_psmn.sh" beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1" conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1"
executor = "sge" executor = "sge"
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
cpus = 16 queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
memory = "30GB" penv = 'openmp32'
time = "24h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
} }
withName: hisat2_human { withName: hisat2_human {
beforeScript = "source $baseDir/.conda_psmn.sh" beforeScript = "source $baseDir/.conda_psmn.sh"
module = "$baseDir/.conda_envs/hisat2_2.1.0" conda = "$baseDir/.conda_envs/hisat2_2.1.0"
executor = "sge" executor = "sge"
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
memory = "20GB" memory = "20GB"
cpus = 16 cpus = 16
time = "12h" time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
penv = 'openmp16' penv = 'openmp32'
} }
withName: sort_bam { withName: sort_bam {
beforeScript = "source $baseDir/.conda_psmn.sh" beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7" conda = "$baseDir/.conda_envs/hisat2_2.1.0"
executor = "sge" executor = "sge"
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
cpus = 1 cpus = 1
memory = "20GB" memory = "20GB"
time = "12h" time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: index_bam { withName: index_bam {
beforeScript = "source $baseDir/.conda_psmn.sh" beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7" conda = "$baseDir/.conda_envs/hisat2_2.1.0"
executor = "sge" executor = "sge"
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
cpus = 1 cpus = 1
memory = "20GB" memory = "20GB"
time = "12h" time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: dedup { withName: dedup {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
...@@ -61,7 +58,7 @@ profiles { ...@@ -61,7 +58,7 @@ profiles {
cpus = 1 cpus = 1
memory = "20GB" memory = "20GB"
time = "12h" time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: counting { withName: counting {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
...@@ -71,7 +68,7 @@ profiles { ...@@ -71,7 +68,7 @@ profiles {
cpus = 1 cpus = 1
memory = "20GB" memory = "20GB"
time = "12h" time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
} }
} }
......
...@@ -27,6 +27,7 @@ process trimming { ...@@ -27,6 +27,7 @@ process trimming {
script: script:
""" """
cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \ cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \
--minimum-length 50 \
-o ${file_id}_cut_R1.fastq.gz -p ${file_id}_tmp_R2.fastq.gz \ -o ${file_id}_cut_R1.fastq.gz -p ${file_id}_tmp_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${file_id}_report.txt ${reads[0]} ${reads[1]} > ${file_id}_report.txt
...@@ -50,7 +51,7 @@ Channel ...@@ -50,7 +51,7 @@ Channel
process rRNA_removal { process rRNA_removal {
tag "$file_id" tag "$file_id"
cpus 8 cpus 8
publishDir "results/RNAseq/U937/02_rRNA_depletion/", mode: 'copy' publishDir "results/RNAseq/02_rRNA_depletion/", mode: 'copy'
input: input:
set file_id, file(reads) from fastq_files_cut set file_id, file(reads) from fastq_files_cut
...@@ -107,26 +108,31 @@ hisat2 -x genome_tran -p ${task.cpus} \ ...@@ -107,26 +108,31 @@ hisat2 -x genome_tran -p ${task.cpus} \
""" """
} }
reads_aligned_hg38.into{for_mapping;for_htseq}
/* sorting */ /* sorting */
process index_bam { process index_bam {
tag "$file_id" tag "$file_id"
publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy' publishDir "results/RNAseq/03_hisat2_hg38/", mode: 'copy'
input: input:
set file_id, file(bam) from reads_aligned_hg38 set file_id, file(bam) from for_mapping
file report from hisat_report
output: output:
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
file "*.txt" into report_hisat
script: script:
""" """
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam samtools index ${file_id}_sorted.bam
cat ${report} > ${file_id}_hisat_hg38.txt
""" """
} }
sorted_bam_files.into{for_dedup;for_htseq}
/* deduplicating reads /* deduplicating reads
...@@ -172,6 +178,21 @@ cat ${dedup} > ${file_id}_dedup_report.txt ...@@ -172,6 +178,21 @@ cat ${dedup} > ${file_id}_dedup_report.txt
/* HTseq */ /* HTseq */
process sort_bam {
tag "$file_id"
input:
set file_id, file(bam) from for_htseq
output:
set file_id, "*_sorted.bam" into sorted_bam_files_2
script:
"""
samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_sorted.bam ${bam}
"""
}
params.gtf = "$baseDir/data/annotation/*.gtf" params.gtf = "$baseDir/data/annotation/*.gtf"
log.info "gtf files : ${params.gtf}" log.info "gtf files : ${params.gtf}"
...@@ -182,10 +203,10 @@ Channel ...@@ -182,10 +203,10 @@ Channel
process counting { process counting {
tag "$file_id" tag "$file_id"
publishDir "${params.output}/04_HTseq/", mode: 'copy' publishDir "results/RNAseq/04_HTseq/", mode: 'copy'
input: input:
set file_id, file(bam) from for_htseq set file_id, file(bam) from sorted_bam_files_2
file gtf from gtf_file.toList() file gtf from gtf_file.toList()
output: output:
...@@ -199,7 +220,6 @@ htseq-count ${bam[0]} ${gtf} \ ...@@ -199,7 +220,6 @@ htseq-count ${bam[0]} ${gtf} \
-s yes \ -s yes \
-t CDS \ -t CDS \
-i gene_id \ -i gene_id \
-r pos \
-f bam \ -f bam \
> ${file_id}_CDS.count > ${file_id}_CDS.count
...@@ -209,7 +229,6 @@ htseq-count ${bam[0]} ${gtf} \ ...@@ -209,7 +229,6 @@ htseq-count ${bam[0]} ${gtf} \
-s yes \ -s yes \
-t exon \ -t exon \
-i gene_id \ -i gene_id \
-r pos \
-f bam \ -f bam \
> ${file_id}_exon.count > ${file_id}_exon.count
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment