diff --git a/src/RNAseq.config b/src/RNAseq.config index ed86a1f44faed4a8c48d273632b571b09ed8472c..30c302096a5db023ce8b18d45e65fd795e559d0f 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -51,16 +51,6 @@ profiles { time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } - withName: dedup { - beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" - module = "umi_tools_1.0.0" - executor = "sge" - clusterOptions = "-cwd -V" - cpus = 1 - memory = "20GB" - time = "12h" - queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' - } withName: counting { beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" module = "htseq/0.11.2" diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 084b9ff2e84260fddfe215d13913faacb4f2c191..3b6e39574f9c2c80914226443b629ea2b762df57 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -146,49 +146,6 @@ cat ${report} > ${file_id}_hisat_hg38.txt """ } - -/* deduplicating reads - -params.dedup_options = "--paired" - -process dedup { - tag "$file_id" - - input: - set file_id, file(bam) from for_dedup - - output: - set file_id, "*dedup.bam" into dedup_bam - file "*.txt" into dedup_report - - script: -""" -umi_tools dedup -I ${bam[0]} \ - ${params.dedup_options} \ - -S ${file_id}_dedup.bam > report.txt -""" -} - -process sort_bam { - tag "$file_id" - publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy' - - input: - set file_id, file(bam) from dedup_bam - file dedup from dedup_report - - output: - set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files_2 - file "*.txt" into report_dedup - - script: -""" -samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} -samtools index ${file_id}_sorted.bam -cat ${dedup} > ${file_id}_dedup_report.txt -""" -} */ - /* HTseq */ process sort_bam { diff --git a/src/RibosomeProfiling.config b/src/RibosomeProfiling.config index 6837f9cc1217343f7067a3a7a0eff8019c7391c0..b4abd97ecbdddc78a7222e821846157f9e295cec 100644 --- a/src/RibosomeProfiling.config +++ b/src/RibosomeProfiling.config @@ -3,13 +3,13 @@ profiles { process{ withName: trimming { beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/cutadapt_2.1" + conda = "$baseDir/.conda_envs/cutadapt_2.4" executor = "sge" clusterOptions = "-cwd -V" cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: rRNA_removal { beforeScript = "source $baseDir/.conda_psmn.sh" @@ -19,7 +19,7 @@ profiles { cpus = 16 memory = "30GB" time = "24h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' penv = 'openmp16' } withName: hisat2_human { @@ -30,19 +30,9 @@ profiles { memory = "20GB" cpus = 16 time = "12h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' penv = 'openmp16' } - withName: sort_bam { - beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/samtools_1.7" - executor = "sge" - clusterOptions = "-cwd -V" - cpus = 1 - memory = "20GB" - time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' - } withName: index_bam { beforeScript = "source $baseDir/.conda_psmn.sh" conda = "$baseDir/.conda_envs/samtools_1.7" @@ -51,17 +41,7 @@ profiles { cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' - } - withName: dedup { - beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" - module = "umi_tools/1.0.0" - executor = "sge" - clusterOptions = "-cwd -V" - cpus = 1 - memory = "20GB" - time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: counting { beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" @@ -71,7 +51,7 @@ profiles { cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } } } @@ -91,18 +71,10 @@ profiles { cpus = 4 container = "lbmc/hisat2:2.1.0" } - withName: sort_bam { - container = "lbmc/samtools:1.7" - cpus = 1 - } withName: index_bam { container = "lbmc/samtools:1.7" cpus = 1 } - withName: dedup { - container = "lbmc/umi_tools:1.0.0" - cpus = 1 - } withName: counting { container = "lbmc/htseq:0.11.2" cpus = 1 diff --git a/src/RibosomeProfiling.nf b/src/RibosomeProfiling.nf index 9586d01e71ab885c4a733c8d439eb3c388352491..faeb37f2402504642be76281ab7a57462073c3a0 100644 --- a/src/RibosomeProfiling.nf +++ b/src/RibosomeProfiling.nf @@ -110,72 +110,52 @@ hisat2 -x ${index_id} -p ${task.cpus} \ --end-to-end --rna-strandness 'F' \ 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam +if grep -q "Error " ${file_id}_hisat2_hg38.txt; then + exit 1 +fi """ } -/* sorting */ - -process index_bam { +process save_hisat { tag "$file_id" publishDir "${params.output}/03_hisat2/", mode: 'copy' input: - set file_id, file(bam) from reads_aligned_hg38 + set file_id, file(fastq) from reads_non_aligned_hg38 output: - set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files + file "*" into saved_hisat script: """ -samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} -samtools index ${file_id}_sorted.bam +cat ${fastq} > ${file_id}_nonhuman.fastq.gz """ } -sorted_bam_files.into{for_dedup;for_htseq} - -/* deduplicating reads - -params.dedup_options = "" - -process dedup { - tag "$file_id" - - input: - set file_id, file(bam) from for_dedup - - output: - set file_id, "*dedup.bam" into dedup_bam - file "*.txt" into dedup_report +/* sorting */ - script: -""" -umi_tools dedup -I ${bam[0]} \ - ${params.dedup_options} \ - -S ${file_id}_dedup.bam > report.txt -""" -}*/ -/* -process sort_bam { +process index_bam { tag "$file_id" - publishDir "${params.output}/03_hisat2_dedup/", mode: 'copy' + publishDir "${params.output}/03_hisat2/", mode: 'copy' input: - set file_id, file(bam) from dedup_bam - file dedup from dedup_report + set file_id, file(bam) from reads_aligned_hg38 + file report from hisat_report output: - set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files_2 - file "*.txt" into report_dedup + set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files + file "*.txt" into report_hisat2 script: """ samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} samtools index ${file_id}_sorted.bam -cat ${dedup} > ${file_id}_dedup_report.txt + +cat ${report} > ${file_id}_hg38_hisat2.txt """ } -*/ + + /* HTseq */ params.gtf = "$baseDir/data/annotation/*.gtf" @@ -189,9 +169,11 @@ Channel process counting { tag "$file_id" publishDir "${params.output}/04_HTseq/", mode: 'copy' + errorStrategy 'retry' + maxRetries 2 input: - set file_id, file(bam) from for_htseq + set file_id, file(bam) from sorted_bam_files file gtf from gtf_file.toList() output: diff --git a/src/dual_mapping.config b/src/dual_mapping.config index fdf3c362c7b45fb23ffe5bf44ddcdf15bbb0a964..2c186ba6681f939276042d6d2a2e2040742293ec 100644 --- a/src/dual_mapping.config +++ b/src/dual_mapping.config @@ -7,21 +7,19 @@ profiles { executor = "sge" clusterOptions = "-cwd -V" memory = "20GB" - cpus = 16 + cpus = 16 time = "12h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' - penv = 'openmp16' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp16' } withName: index_fasta_bowtie { - beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/bowtie_1.2.2" + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "GCC/6.4.0/Bowtie/1.1.2:samtools/1.7" executor = "sge" clusterOptions = "-cwd -V" memory = "20GB" - cpus = 16 time = "12h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' - penv = 'openmp16' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: hisat2 { beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" @@ -31,7 +29,7 @@ profiles { memory = "20GB" cpus = 16 time = "12h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' penv = 'openmp16' } withName: sort_bam { @@ -42,7 +40,7 @@ profiles { cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: merge_bam { beforeScript = "source $baseDir/.conda_psmn.sh" @@ -52,7 +50,7 @@ profiles { cpus = 16 memory = "30GB" time = "24h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' penv = 'openmp16' } } diff --git a/src/dual_mapping.nf b/src/dual_mapping.nf index c8509b9a7b698ebd6a35c0507cf042a887ab4ccc..1fec02e2069276a9af71984598bcf0ca03438274 100644 --- a/src/dual_mapping.nf +++ b/src/dual_mapping.nf @@ -4,6 +4,8 @@ params.fasta = "data/genome/NC001802.1.fa" log.info "fasta files : ${params.fasta}" +params.output = "results" +log.info "output folder : ${params.output}" Channel .fromPath( params.fasta ) @@ -15,7 +17,6 @@ fasta_file.into{fasta_hisat ; fasta_bowtie} /* HISAT */ process index_fasta_hisat { tag "$fasta.baseName" - publishDir "data/indexes/${fasta.baseName}_hisat/", mode: 'copy' input: file fasta from fasta_hisat @@ -33,7 +34,6 @@ hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName} process index_fasta_bowtie { tag "$fasta.baseName" - publishDir "results/${fasta.baseName}_bowtie/", mode: 'copy' input: file fasta from fasta_bowtie @@ -44,7 +44,7 @@ process index_fasta_bowtie { script: """ -bowtie-build --threads ${task.cpus} -f ${fasta} ${fasta.baseName} \ +bowtie-build -f ${fasta} ${fasta.baseName} \ &> ${fasta.baseName}_bowtie_report.txt if grep -q "Error" ${fasta.baseName}_bowtie_report.txt; then @@ -123,8 +123,8 @@ for (index_file in index) { } } """ -bowtie --best --fr -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ --q ${reads} 2> \ +zcat ${reads} | bowtie --best --fr -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ +-q - 2> \ ${file_id}_bowtie_report_tmp.txt | \ samtools view -F 4 -F 16 -Sb - > ${file_id}_bowtie.bam @@ -145,7 +145,7 @@ bam_bowtie.join(bam_hisat) //merged_bam.println() process merge_bam{ - publishDir "results/05_${index_id}_mergedBAM/", mode: 'copy' + publishDir "${params.output}/05_${index_id}_mergedBAM/", mode: 'copy' input: set file_id, index_id, file(bam_bowtie), file(bam_hisat) from merged_bam diff --git a/src/dual_mapping_paired.config b/src/dual_mapping_paired.config new file mode 100644 index 0000000000000000000000000000000000000000..fd8de75af39c1840acd8e6b3afbdf3df5c0747c0 --- /dev/null +++ b/src/dual_mapping_paired.config @@ -0,0 +1,85 @@ +profiles { + sge { + process{ + withName: index_fasta_hisat { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.1.0:samtools/1.7" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp16' + } + withName: index_fasta_bowtie { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + time = "12h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + } + withName: hisat2 { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.1.0:samtools/1.7" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp16' + } + withName: bowtie{ + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 8 + memory = "20GB" + time = "12h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp8' + } + withName: merge_bam { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp16' + } + } + } + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: index_fasta_hisat { + cpus = 4 + container = "lbmc/hisat2:2.1.0" + } + withName: index_fasta_bowtie { + cpus = 4 + container = "lbmc/bowtie:1.2.2" + } + withName: hisat2 { + cpus = 4 + container = "lbmc/hisat2:2.1.0" + } + withName: bowtie { + cpus = 4 + container = "lbmc/bowtie2:2.3.4.1" + } + withName: merge_bam { + container = "lbmc/samtools:1.7" + cpus = 4 + } + } + } +} diff --git a/src/dual_mapping_paired.nf b/src/dual_mapping_paired.nf index f5a5fe5fc5b08e2078f6164559a15a4d95739608..16490f7e88d7a683dbadf5e9e821d0268b7fa5a3 100644 --- a/src/dual_mapping_paired.nf +++ b/src/dual_mapping_paired.nf @@ -29,23 +29,23 @@ hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName} """ } -/* BOWTIE 1 */ +/* BOWTIE 2 */ process index_fasta_bowtie { tag "$fasta.baseName" - publishDir "results/${fasta.baseName}_bowtie/", mode: 'copy' + publishDir "results/${fasta.baseName}_bowtie2/", mode: 'copy' input: file fasta from fasta_bowtie output: - file "*ebwt" into index_files_bowtie + file "*bt2" into index_files_bowtie file "*_report.txt" into indexing_report script: """ -bowtie-build --threads ${task.cpus} -f ${fasta} ${fasta.baseName} \ -&> ${fasta.baseName}_bowtie_report.txt +bowtie2-build --threads ${task.cpus} -f ${fasta} ${fasta.baseName} \ +&> ${fasta.baseName}_bowtie2_report.txt if grep -q "Error" ${fasta.baseName}_bowtie_report.txt; then exit 1 @@ -97,7 +97,6 @@ process hisat2 { hisat2 -x ${index_id} -p ${task.cpus} \ -1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} --un-conc-gz ${file_id}_notaligned_hisat_R%.fastq.gz \ --rna-strandness 'F' --dta --no-temp-splicesite \ ---novel-splicesite-outfile ${file_id}splicesite.txt 2> ${file_id}_hisat2_NY5.txt | samtools view -F 4 -F 16 -Sb - > ${file_id}.bam """ } @@ -118,14 +117,14 @@ process bowtie { script: index_id = index[0] for (index_file in index) { - if (index_file =~ /.*\.1\.ebwt/ && !(index_file =~ /.*\.rev\.1\.ebwt/)) { - index_id = ( index_file =~ /(.*)\.1\.ebwt/)[0][1] + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] } } """ -bowtie --best --fr -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ +bowtie2 --best --fr -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ -1 ${reads[0]} -2 ${reads[1]} 2> \ -${file_id}_bowtie_report_tmp.txt | \ +${file_id}_bowtie2_report_tmp.txt | \ samtools view -F 4 -F 16 -Sb - > ${file_id}_bowtie.bam if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then