From 0a015caa8e984e8d616cf65d03ceb81cd931cd8a Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent@modolo.fr>
Date: Tue, 31 Mar 2020 11:31:04 +0200
Subject: [PATCH] RNASeq: add experimental biologists practical solution

---
 src/RNASeq.config | 126 ++++++++++++++++++++++++++++++++++++++++++++--
 src/RNASeq.nf     |  95 ++++++++++++++++++++++++++++++++--
 2 files changed, 213 insertions(+), 8 deletions(-)

diff --git a/src/RNASeq.config b/src/RNASeq.config
index ed42799d..69f61457 100644
--- a/src/RNASeq.config
+++ b/src/RNASeq.config
@@ -4,9 +4,25 @@ profiles {
     docker.enabled = true
     process {
       withName: adaptor_removal {
-        container = "lbmc/cutadapt:2.4"
+        container = "lbmc/cutadapt:2.1"
         cpus = 1
       }
+      withName: trimming {
+        cpus = 4
+        container = "lbmc/urqt:d62c1f8"
+      }
+      withName: fasta_from_bed {
+        container = "lbmc/bedtools:2.25.0"
+        cpus = 1
+      }
+      withName: index_fasta {
+        container = "lbmc/kallisto:0.44.0"
+        cpus = 4
+      }
+      withName: mapping_fastq {
+        container = "lbmc/kallisto:0.44.0"
+        cpus = 4
+      }
     }
   }
   singularity {
@@ -14,16 +30,53 @@ profiles {
     singularity.cacheDir = "./bin/"
     process {
       withName: adaptor_removal {
-        container = "lbmc/cutadapt:2.4"
+        container = "lbmc/cutadapt:2.1"
+        cpus = 1
+      }
+      withName: trimming {
+        cpus = 4
+        container = "lbmc/urqt:d62c1f8"
+      }
+      withName: fasta_from_bed {
+        container = "lbmc/bedtools:2.25.0"
         cpus = 1
       }
+      withName: index_fasta {
+        container = "lbmc/kallisto:0.44.0"
+        cpus = 4
+      }
+      withName: mapping_fastq {
+        container = "lbmc/kallisto:0.44.0"
+        cpus = 4
+      }
     }
   }
   psmn{
     process{
       withName: adaptor_removal {
         beforeScript = "source $baseDir/.conda_psmn.sh"
-        conda = "$baseDir/.conda_envs/cutadapt_2.4"
+        conda = "$baseDir/.conda_envs/cutadapt_2.1"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 1
+        memory = "20GB"
+        time = "12h"
+        queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
+      }
+      withName: trimming {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/urqt_d62c1f8"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        memory = "5GB"
+        cpus = 16
+        time = "12h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+      withName: fasta_from_bed {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/bedtools_2.25.0"
         executor = "sge"
         clusterOptions = "-cwd -V"
         cpus = 1
@@ -31,6 +84,28 @@ profiles {
         time = "12h"
         queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
       }
+      withName: index_fasta {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/kallisto_0.44.0"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "30GB"
+        time = "24h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+      withName: mapping_fastq {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/kallisto_0.44.0"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "30GB"
+        time = "24h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
     }
   }
   ccin2p3 {
@@ -39,7 +114,28 @@ profiles {
     singularity.runOptions = "--bind /pbs,/sps,/scratch"
     process{
       withName: adaptor_removal {
-        container = "lbmc/cutadapt:2.4"
+        container = "lbmc/cutadapt:2.1"
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "sge"
+        clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
+        cpus = 1
+        queue = 'huge'
+      }
+      withName: trimming {
+        container = "lbmc/urqt:d62c1f8"
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "sge"
+        clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\
+        "
+        cpus = 1
+        queue = 'huge'
+      }
+      withName: fasta_from_bed {
+        container = "lbmc/bedtools:2.25.0"
         scratch = true
         stageInMode = "copy"
         stageOutMode = "rsync"
@@ -48,6 +144,28 @@ profiles {
         cpus = 1
         queue = 'huge'
       }
+      withName: index_fasta {
+        container = "lbmc/kallisto:0.44.0"
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "sge"
+        clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\
+        "
+        cpus = 1
+        queue = 'huge'
+      }
+      withName: mapping_fastq {
+        container = "lbmc/kallisto:0.44.0"
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "sge"
+        clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\
+        "
+        cpus = 1
+        queue = 'huge'
+      }
     }
   }
 }
diff --git a/src/RNASeq.nf b/src/RNASeq.nf
index c59eaaaa..73940d65 100644
--- a/src/RNASeq.nf
+++ b/src/RNASeq.nf
@@ -1,5 +1,15 @@
 log.info "fastq files : ${params.fastq}"
+log.info "fasta file : ${params.fasta}"
+log.info "bed file : ${params.bed}"
 
+Channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
+  .set { fasta_files }
+Channel
+  .fromPath( params.bed )
+  .ifEmpty { error "Cannot find any bed files matching: ${params.bed}" }
+  .set { bed_files }
 Channel
   .fromFilePairs( params.fastq )
   .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
@@ -16,9 +26,86 @@ process adaptor_removal {
   set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut
 
   script:
-    """
-    cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \
-    -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \
-    ${reads[0]} ${reads[1]} > ${pair_id}_report.txt
   """
+
+  cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \
+  -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \
+  ${reads[0]} ${reads[1]} > ${pair_id}_report.txt
+  """
+}
+
+process trimming {
+  tag "${reads}"
+  publishDir "results/fastq/trimming/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_cut
+
+  output:
+  set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_trim
+
+  script:
+"""
+UrQt --t 20 --m ${task.cpus} --gz \
+--in ${reads[0]} --inpair ${reads[1]} \
+--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \
+> ${pair_id}_trimming_report.txt
+"""
+}
+
+process fasta_from_bed {
+  tag "${bed.baseName}"
+  publishDir "results/fasta/", mode: 'copy'
+
+  input:
+  file fasta from fasta_files
+  file bed from bed_files
+
+  output:
+  file "*_extracted.fasta" into fasta_files_extracted
+
+  script:
+"""
+bedtools getfasta -name \
+-fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta
+"""
+}
+
+process index_fasta {
+  tag "$fasta.baseName"
+  publishDir "results/mapping/index/", mode: 'copy'
+
+  input:
+    file fasta from fasta_files_extracted
+
+  output:
+    file "*.index*" into index_files
+    file "*_kallisto_report.txt" into index_files_report
+
+  script:
+"""
+kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \
+2> ${fasta.baseName}_kallisto_report.txt
+"""
+}
+
+process mapping_fastq {
+  tag "$reads"
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files_trim
+  file index from index_files.collect()
+
+  output:
+  file "*" into counts_files
+
+  script:
+"""
+mkdir ${pair_id}
+
+kallisto quant -i ${index} -t ${task.cpus} \
+--bias --bootstrap-samples 100 -o ${pair_id} \
+${reads[0]} ${reads[1]} &> ${pair_id}/kallisto_report.txt
+"""
 }
-- 
GitLab