From 225e2ed821bd0e6406aa253abb8938e6ea9a0f6b Mon Sep 17 00:00:00 2001
From: Emmanuel Labaronne <emmanuel.labaronne@ens-lyon.fr>
Date: Tue, 4 May 2021 14:01:11 +0200
Subject: [PATCH] add dsl2 scripts of bowtie fastp hisat and stringtie

---
 src/nf_modules/bowtie2/rmi2_pipeline.nf   | 185 ++++++++++++++++++++++
 src/nf_modules/fastp/rmi2_pipeline.nf     |  61 +++++++
 src/nf_modules/hisat2/rmi2_pipeline.nf    |  47 ++++++
 src/nf_modules/stringtie/rmi2_pipeline.nf |  89 +++++++++++
 4 files changed, 382 insertions(+)
 create mode 100644 src/nf_modules/bowtie2/rmi2_pipeline.nf
 create mode 100644 src/nf_modules/fastp/rmi2_pipeline.nf
 create mode 100644 src/nf_modules/hisat2/rmi2_pipeline.nf
 create mode 100644 src/nf_modules/stringtie/rmi2_pipeline.nf

diff --git a/src/nf_modules/bowtie2/rmi2_pipeline.nf b/src/nf_modules/bowtie2/rmi2_pipeline.nf
new file mode 100644
index 00000000..8cafca61
--- /dev/null
+++ b/src/nf_modules/bowtie2/rmi2_pipeline.nf
@@ -0,0 +1,185 @@
+version = "2.3.4.1"
+container_url = "lbmc/bowtie2:${version}"
+
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$fasta.baseName"
+
+  input:
+    path fasta
+
+  output:
+    path "*.index*", emit: index
+    path "*_report.txt", emit: report
+
+  script:
+"""
+bowtie2-build --threads ${task.cpus} \
+  ${fasta} \
+  ${fasta.baseName}.index &> \
+  ${fasta.baseName}_bowtie2_index_report.txt
+
+if grep -q "Error" ${fasta.baseName}_bowtie2_index_report.txt; then
+  exit 1
+fi
+"""
+}
+
+process mapping_fastq_pairedend {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$pair_id"
+
+  input:
+  path index
+  tuple val(pair_id), path(reads)
+
+  output:
+  tuple val(pair_id), path("*.bam"), emit: bam
+  path "*_report.txt", emit: report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+"""
+bowtie2 --very-sensitive \
+  -p ${task.cpus} \
+  -x ${index_id} \
+  -1 ${reads[0]} \
+  -2 ${reads[1]} 2> \
+  ${pair_id}_bowtie2_mapping_report_tmp.txt | \
+  samtools view -Sb - > ${pair_id}.bam
+
+if grep -q "Error" ${pair_id}_bowtie2_mapping_report_tmp.txt; then
+  exit 1
+fi
+tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \
+  ${pair_id}_bowtie2_mapping_report.txt
+"""
+}
+
+
+process mapping_fastq_singleend {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+  path index
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+  tuple val(file_id), path("*.fastq.gz"), emit : fastq
+  path "*_report.txt", emit: report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+  """
+  bowtie2 --sensitive -p ${task.cpus} \
+  -x ${index_id} \
+  -U ${reads[0]} --no-unal \
+  --un-gz ${file_id}_filter.fastq.gz 2> \
+  ${file_id}_bowtie2_mapping_report_tmp.txt | samtools view -bS - \
+  | samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \
+              && samtools index ${file_id}.filter.bam \
+              && samtools idxstats ${file_id}.filter.bam  > \
+                 ${file_id}.filter.stats
+
+  if grep -q "Error " ${file_id}_filter.txt; then
+    exit 1
+  fi
+
+  tail -n 19 ${rfile_id}_bowtie2_mapping_report_tmp.txt > \
+   ${file_id}_bowtie2_mapping_report.txt
+   """
+}
+
+process filtering_pairedend {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$pair_id"
+
+  input:
+  path index
+  tuple val(pair_id), path(reads)
+
+  output:
+  tuple val(pair_id), path("*.bam"), emit: bam
+  tuple val(pair_id), path("*.fastq.gz"), emit : fastq
+  path "*_report.txt", emit: report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+"""
+bowtie2 --very-sensitive \
+  -p ${task.cpus} \
+  -x ${index_id} \
+  -1 ${reads[0]} \
+  -2 ${reads[1]} 2> \
+  ${pair_id}_bowtie2_mapping_report_tmp.txt | \
+  samtools view -Sb - > ${pair_id}.bam
+
+if grep -q "Error" ${pair_id}_bowtie2_mapping_report_tmp.txt; then
+  exit 1
+fi
+tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \
+  ${pair_id}_bowtie2_mapping_report.txt
+"""
+}
+
+
+process filtering_singleend {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+  path index
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+  tuple val(file_id), path("*.fastq.gz"), emit : fastq
+  path "*_report.txt", emit: report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
+  """
+  bowtie2 --very-sensitive -p ${task.cpus} -x ${index_id} \
+   -U ${reads[0]} --no-unal \
+   --un-gz ${file_id}_filter.fastq.gz 2> \
+   ${file_id}_filter.txt | samtools view -bS - \
+   | samtools sort -@ ${task.cpus} -o ${file_id}.filter.bam \
+               && samtools index ${file_id}.filter.bam \
+               && samtools idxstats ${file_id}.filter.bam  > \
+                  ${file_id}.filter.stats
+
+   if grep -q "Error " ${file_id}_filter.txt; then
+     exit 1
+   fi
+
+  tail -n 19 ${rfile_id}_bowtie2_mapping_report_tmp.txt > \
+   ${file_id}_bowtie2_mapping_report.txt
+   """
+}
diff --git a/src/nf_modules/fastp/rmi2_pipeline.nf b/src/nf_modules/fastp/rmi2_pipeline.nf
new file mode 100644
index 00000000..5d7a569e
--- /dev/null
+++ b/src/nf_modules/fastp/rmi2_pipeline.nf
@@ -0,0 +1,61 @@
+version = "0.20.1"
+container_url = "lbmc/fastp:${version}"
+
+process fastp_pairedend {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$pair_id"
+  publishDir "${output}/00_fastp", mode: 'copy', pattern: "*.{html,json}"
+
+  input:
+  tuple val(pair_id), path(reads)
+  val output
+
+  output:
+    tuple val(pair_id), path("*.fastq.gz"), emit: FASTQ
+    tuple val(pair_id), path("*.html"), emit: HTML
+    tuple val(pair_id), path("*.json"), emit: LOG
+
+  script:
+"""
+fastp --thread ${task.cpus} \
+--qualified_quality_phred 20 \
+--disable_length_filtering \
+--detect_adapter_for_pe \
+--trim_poly_x \
+--in1 ${reads[0]} \
+--in2 ${reads[1]} \
+--out1 ${pair_id}_R1_trim.fastq.gz \
+--out2 ${pair_id}_R2_trim.fastq.gz \
+--html ${pair_id}.html \
+--json ${pair_id}_fastp.json \
+--report_title ${pair_id}
+"""
+}
+
+process fastp_singleend {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$pair_id"
+  publishDir "${output}/00_fastp", mode: 'copy', pattern: "*.{html,json}"
+
+  input:
+  tuple val(pair_id), path(reads)
+
+  output:
+    tuple val(pair_id), path("*.fastq.gz"), emit: FASTQ
+    tuple val(pair_id), path("*.html"), emit: HTML
+    tuple val(pair_id), path("*.json"), emit: LOG
+
+  script:
+"""
+fastp --thread ${task.cpus} \
+--qualified_quality_phred 20 \
+--length_required \
+--in1 ${reads} \
+--out1 ${pair_id}_trim.fastq.gz \
+--html ${pair_id}.html \
+--json ${pair_id}_fastp.json \
+--report_title ${pair_id}
+"""
+}
diff --git a/src/nf_modules/hisat2/rmi2_pipeline.nf b/src/nf_modules/hisat2/rmi2_pipeline.nf
new file mode 100644
index 00000000..be0e96a8
--- /dev/null
+++ b/src/nf_modules/hisat2/rmi2_pipeline.nf
@@ -0,0 +1,47 @@
+version = "2.1.0"
+container = "lbmc/hisat2:${version}"
+
+process paired_end {
+  tag "$pair_id"
+  label "big_mem_multi_cpus"
+  publishDir "${params.output}/hisat2", mode: 'copy'
+
+  input:
+  tuple val(pair_id), file(fastq)
+  file(index)
+  val(output)
+
+  output:
+  tuple val(pair_id), file("*.bam"), emit: BAM
+  file("*_report.txt"), emit: LOGS
+  tule val(pair_id), file("*.fastq.gz"), emit : FASTQ
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
+    }
+  }
+"""
+hisat2 -x ${index_id} \
+       -p ${task.cpus} \
+       -1 ${fastq[0]} \
+       -2 ${fastq[1]} \
+       --un-conc-gz ${pair_id}_notaligned_R%.fastq.gz \
+       --rna-strandness 'FR' \
+       --dta \
+       --no-softclip\
+       --trim3 1\
+       --trim5 1\
+       2> ${pair_id}_report.txt \
+| samtools view -bS -F 4 - \
+| samtools sort -@ ${task.cpus} -o ${pair_id}.bam \
+&& samtools index ${pair_id}.bam
+
+if grep -q "ERR" ${pair_id}.txt; then
+  exit 1
+fi
+
+"""
+}
diff --git a/src/nf_modules/stringtie/rmi2_pipeline.nf b/src/nf_modules/stringtie/rmi2_pipeline.nf
new file mode 100644
index 00000000..3e11fc8e
--- /dev/null
+++ b/src/nf_modules/stringtie/rmi2_pipeline.nf
@@ -0,0 +1,89 @@
+version = "2.1.5--h978d192_1"
+container_url = "quay.io/biocontainers/stringtie:${version}"
+
+
+///////////////////////////////////////////////////////////////////////////////
+// TRANSCRIPTOME BUILDING
+
+process assembly_from_longreads {
+  container = "${container_url}"
+  tag "$file_id"
+  label "big_mem_multi_cpus"
+  publishDir "${output}/${file_id}", mode: 'copy'
+
+ input:
+ tuple val(file_id), file(bam)
+ val output
+
+ output:
+ path("*.gtf"), emit: GTF
+
+ script:
+ """
+stringtie -o ${file_id}.gtf \
+          -f 0.01 \
+          -p ${task.cpus}\
+          -j 0.5 \
+          ${bam}
+ """
+}
+
+process assembly_from_RNAseq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+//  publishDir "results/stringtie/${file_id}", mode: 'copy'
+
+ input:
+ tuple val(file_id), file(bam)
+ file(gtf)
+
+ output:
+ path("*.gtf"), emit: GTF
+
+ script:
+ """
+stringtie -p ${task.cpus}\
+          -G ${gtf} \
+          -o ${file_id}.gtf \
+          ${bam}
+ """
+}
+
+process merge_transcriptomes {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "merging transcriptome"
+
+  input:
+    file(transcriptome)
+    file(gtf)
+
+  output:
+    path("transcriptome_merged.gtf"), emit: MERGED_GTF
+
+  script:
+"""
+stringtie --merge -G ${gtf} -o transcriptome_merged.gtf ${transcriptome}
+"""
+}
+
+process abundance {
+  container = "${container_url}"
+  tag "${file_id}"
+  label "big_mem_multi_cpus"
+  publishDir "${output}/stringtie/${file_id}", mode: 'copy'
+
+  input:
+    tuple val(file_id), file(bam)
+    file(gtf)
+    val(output)
+
+  output:
+    tuple val(file_id), path("*"), emit: ABUNDANCE
+
+  script:
+"""
+stringtie -p ${task.cpus} -e -B -G ${gtf} -o ${file_id}.gtf ${bam}
+"""
+}
-- 
GitLab