From 225e2ed821bd0e6406aa253abb8938e6ea9a0f6b Mon Sep 17 00:00:00 2001 From: Emmanuel Labaronne <emmanuel.labaronne@ens-lyon.fr> Date: Tue, 4 May 2021 14:01:11 +0200 Subject: [PATCH] add dsl2 scripts of bowtie fastp hisat and stringtie --- src/nf_modules/bowtie2/rmi2_pipeline.nf | 185 ++++++++++++++++++++++ src/nf_modules/fastp/rmi2_pipeline.nf | 61 +++++++ src/nf_modules/hisat2/rmi2_pipeline.nf | 47 ++++++ src/nf_modules/stringtie/rmi2_pipeline.nf | 89 +++++++++++ 4 files changed, 382 insertions(+) create mode 100644 src/nf_modules/bowtie2/rmi2_pipeline.nf create mode 100644 src/nf_modules/fastp/rmi2_pipeline.nf create mode 100644 src/nf_modules/hisat2/rmi2_pipeline.nf create mode 100644 src/nf_modules/stringtie/rmi2_pipeline.nf diff --git a/src/nf_modules/bowtie2/rmi2_pipeline.nf b/src/nf_modules/bowtie2/rmi2_pipeline.nf new file mode 100644 index 00000000..8cafca61 --- /dev/null +++ b/src/nf_modules/bowtie2/rmi2_pipeline.nf @@ -0,0 +1,185 @@ +version = "2.3.4.1" +container_url = "lbmc/bowtie2:${version}" + +process index_fasta { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$fasta.baseName" + + input: + path fasta + + output: + path "*.index*", emit: index + path "*_report.txt", emit: report + + script: +""" +bowtie2-build --threads ${task.cpus} \ + ${fasta} \ + ${fasta.baseName}.index &> \ + ${fasta.baseName}_bowtie2_index_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie2_index_report.txt; then + exit 1 +fi +""" +} + +process mapping_fastq_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie2 --very-sensitive \ + -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ + ${pair_id}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie2_mapping_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \ + ${pair_id}_bowtie2_mapping_report.txt +""" +} + + +process mapping_fastq_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + path index + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + tuple val(file_id), path("*.fastq.gz"), emit : fastq + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + """ + bowtie2 --sensitive -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads[0]} --no-unal \ + --un-gz ${file_id}_filter.fastq.gz 2> \ + ${file_id}_bowtie2_mapping_report_tmp.txt | samtools view -bS - \ + | samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \ + && samtools index ${file_id}.filter.bam \ + && samtools idxstats ${file_id}.filter.bam > \ + ${file_id}.filter.stats + + if grep -q "Error " ${file_id}_filter.txt; then + exit 1 + fi + + tail -n 19 ${rfile_id}_bowtie2_mapping_report_tmp.txt > \ + ${file_id}_bowtie2_mapping_report.txt + """ +} + +process filtering_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.bam"), emit: bam + tuple val(pair_id), path("*.fastq.gz"), emit : fastq + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } +""" +bowtie2 --very-sensitive \ + -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ + ${pair_id}_bowtie2_mapping_report_tmp.txt | \ + samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_bowtie2_mapping_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \ + ${pair_id}_bowtie2_mapping_report.txt +""" +} + + +process filtering_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + path index + tuple val(file_id), path(reads) + + output: + tuple val(file_id), path("*.bam"), emit: bam + tuple val(file_id), path("*.fastq.gz"), emit : fastq + path "*_report.txt", emit: report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } + """ + bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \ + -U ${reads[0]} --no-unal \ + --un-gz ${file_id}_filter.fastq.gz 2> \ + ${file_id}_filter.txt | samtools view -bS - \ + | samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \ + && samtools index ${file_id}.filter.bam \ + && samtools idxstats ${file_id}.filter.bam > \ + ${file_id}.filter.stats + + if grep -q "Error " ${file_id}_filter.txt; then + exit 1 + fi + + tail -n 19 ${rfile_id}_bowtie2_mapping_report_tmp.txt > \ + ${file_id}_bowtie2_mapping_report.txt + """ +} diff --git a/src/nf_modules/fastp/rmi2_pipeline.nf b/src/nf_modules/fastp/rmi2_pipeline.nf new file mode 100644 index 00000000..5d7a569e --- /dev/null +++ b/src/nf_modules/fastp/rmi2_pipeline.nf @@ -0,0 +1,61 @@ +version = "0.20.1" +container_url = "lbmc/fastp:${version}" + +process fastp_pairedend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + publishDir "${output}/00_fastp", mode: 'copy', pattern: "*.{html,json}" + + input: + tuple val(pair_id), path(reads) + val output + + output: + tuple val(pair_id), path("*.fastq.gz"), emit: FASTQ + tuple val(pair_id), path("*.html"), emit: HTML + tuple val(pair_id), path("*.json"), emit: LOG + + script: +""" +fastp --thread ${task.cpus} \ +--qualified_quality_phred 20 \ +--disable_length_filtering \ +--detect_adapter_for_pe \ +--trim_poly_x \ +--in1 ${reads[0]} \ +--in2 ${reads[1]} \ +--out1 ${pair_id}_R1_trim.fastq.gz \ +--out2 ${pair_id}_R2_trim.fastq.gz \ +--html ${pair_id}.html \ +--json ${pair_id}_fastp.json \ +--report_title ${pair_id} +""" +} + +process fastp_singleend { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + publishDir "${output}/00_fastp", mode: 'copy', pattern: "*.{html,json}" + + input: + tuple val(pair_id), path(reads) + + output: + tuple val(pair_id), path("*.fastq.gz"), emit: FASTQ + tuple val(pair_id), path("*.html"), emit: HTML + tuple val(pair_id), path("*.json"), emit: LOG + + script: +""" +fastp --thread ${task.cpus} \ +--qualified_quality_phred 20 \ +--length_required \ +--in1 ${reads} \ +--out1 ${pair_id}_trim.fastq.gz \ +--html ${pair_id}.html \ +--json ${pair_id}_fastp.json \ +--report_title ${pair_id} +""" +} diff --git a/src/nf_modules/hisat2/rmi2_pipeline.nf b/src/nf_modules/hisat2/rmi2_pipeline.nf new file mode 100644 index 00000000..be0e96a8 --- /dev/null +++ b/src/nf_modules/hisat2/rmi2_pipeline.nf @@ -0,0 +1,47 @@ +version = "2.1.0" +container = "lbmc/hisat2:${version}" + +process paired_end { + tag "$pair_id" + label "big_mem_multi_cpus" + publishDir "${params.output}/hisat2", mode: 'copy' + + input: + tuple val(pair_id), file(fastq) + file(index) + val(output) + + output: + tuple val(pair_id), file("*.bam"), emit: BAM + file("*_report.txt"), emit: LOGS + tule val(pair_id), file("*.fastq.gz"), emit : FASTQ + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } +""" +hisat2 -x ${index_id} \ + -p ${task.cpus} \ + -1 ${fastq[0]} \ + -2 ${fastq[1]} \ + --un-conc-gz ${pair_id}_notaligned_R%.fastq.gz \ + --rna-strandness 'FR' \ + --dta \ + --no-softclip\ + --trim3 1\ + --trim5 1\ + 2> ${pair_id}_report.txt \ +| samtools view -bS -F 4 - \ +| samtools sort -@ ${task.cpus} -o ${pair_id}.bam \ +&& samtools index ${pair_id}.bam + +if grep -q "ERR" ${pair_id}.txt; then + exit 1 +fi + +""" +} diff --git a/src/nf_modules/stringtie/rmi2_pipeline.nf b/src/nf_modules/stringtie/rmi2_pipeline.nf new file mode 100644 index 00000000..3e11fc8e --- /dev/null +++ b/src/nf_modules/stringtie/rmi2_pipeline.nf @@ -0,0 +1,89 @@ +version = "2.1.5--h978d192_1" +container_url = "quay.io/biocontainers/stringtie:${version}" + + +/////////////////////////////////////////////////////////////////////////////// +// TRANSCRIPTOME BUILDING + +process assembly_from_longreads { + container = "${container_url}" + tag "$file_id" + label "big_mem_multi_cpus" + publishDir "${output}/${file_id}", mode: 'copy' + + input: + tuple val(file_id), file(bam) + val output + + output: + path("*.gtf"), emit: GTF + + script: + """ +stringtie -o ${file_id}.gtf \ + -f 0.01 \ + -p ${task.cpus}\ + -j 0.5 \ + ${bam} + """ +} + +process assembly_from_RNAseq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" +// publishDir "results/stringtie/${file_id}", mode: 'copy' + + input: + tuple val(file_id), file(bam) + file(gtf) + + output: + path("*.gtf"), emit: GTF + + script: + """ +stringtie -p ${task.cpus}\ + -G ${gtf} \ + -o ${file_id}.gtf \ + ${bam} + """ +} + +process merge_transcriptomes { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "merging transcriptome" + + input: + file(transcriptome) + file(gtf) + + output: + path("transcriptome_merged.gtf"), emit: MERGED_GTF + + script: +""" +stringtie --merge -G ${gtf} -o transcriptome_merged.gtf ${transcriptome} +""" +} + +process abundance { + container = "${container_url}" + tag "${file_id}" + label "big_mem_multi_cpus" + publishDir "${output}/stringtie/${file_id}", mode: 'copy' + + input: + tuple val(file_id), file(bam) + file(gtf) + val(output) + + output: + tuple val(file_id), path("*"), emit: ABUNDANCE + + script: +""" +stringtie -p ${task.cpus} -e -B -G ${gtf} -o ${file_id}.gtf ${bam} +""" +} -- GitLab