Merge branch 'dev'

139175cb · Laurent Modolo · a4809608 · 7aaaa9ed · 139175cb · a4809608
Verified Commit 139175cb authored Aug 22, 2018 by Laurent Modolo
--- a/src/nf_modules/Kallisto/tests/tests.sh
+++ b/src/nf_modules/Kallisto/tests/tests.sh
-nextflow src/nf_modules/Kallisto/tests/index.nf \
-  -c src/nf_modules/Kallisto/kallisto.config \
+nextflow src/nf_modules/Kallisto/indexing.nf \
+  -c src/nf_modules/Kallisto/indexing.config \
  -profile docker \
  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta"

-nextflow src/nf_modules/Kallisto/tests/mapping_single.nf \
-  -c src/nf_modules/Kallisto/kallisto.config \
+nextflow src/nf_modules/Kallisto/mapping_single.nf \
+  -c src/nf_modules/Kallisto/mapping_single.config \
  -profile docker \
  --index "results/mapping/index/tiny_v2.index" \
  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq"

-nextflow src/nf_modules/Kallisto/tests/mapping_paired.nf \
-  -c src/nf_modules/Kallisto/kallisto.config \
+nextflow src/nf_modules/Kallisto/mapping_paired.nf \
+  -c src/nf_modules/Kallisto/mapping_paired.config \
  -profile docker \
  --index "results/mapping/index/tiny_v2.index" \
  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"

--- a/src/nf_modules/MultiQC/multiqc.nf
+++ b/src/nf_modules/MultiQC/multiqc.nf
-/*
-* multiqc :
-* Imputs : report files
-* Output :  multiqc report
-*/
-
-/*                      MultiQC                                     */
-
-process multiqc {
-  tag "$report.baseName"
-  publishDir "results/fastq/multiqc/", mode: 'copy'
-  cpus = 1
-
-  input:
-    file report from fastqc_report.collect()
-
-  output:
-    file "*multiqc_*" into multiqc_report
-
-  script:
-"""
-multiqc -f .
-"""
-}
-
--- a/src/nf_modules/MultiQC/multiqc.config
+++ b/src/nf_modules/MultiQC/multiqc.config
--- a/src/nf_modules/MultiQC/tests/multiqc_paired.nf
+++ b/src/nf_modules/MultiQC/tests/multiqc_paired.nf
--- a/src/nf_modules/MultiQC/multiqc_single.config
+++ b/src/nf_modules/MultiQC/multiqc_single.config
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $fastqc_fastq {
+        container = "fastqc:0.11.5"
+      }
+      $multiqc {
+        container = "multiqc:1.0"
+      }
+    }
+  }
+  sge {
+    process{
+      $fastqc_fastq {
+        beforeScript = "module purge; module load FastQC/0.11.5"
+        executor = "sge"
+        cpus = 1
+        memory = "5GB"
+        time = "6h"
+        queueSize = 1000
+        pollInterval = '60sec'
+        queue = 'monointeldeb128'
+      }
+      $multiqc {
+        beforeScript = "module purge; module load FastQC/1.0"
+        executor = "sge"
+        cpus = 1
+        memory = "5GB"
+        time = "6h"
+        queueSize = 1000
+        pollInterval = '60sec'
+        queue = 'monointeldeb128'
+      }
+    }
+  }
+}
--- a/src/nf_modules/MultiQC/tests/multiqc_single.nf
+++ b/src/nf_modules/MultiQC/tests/multiqc_single.nf
--- a/src/nf_modules/MultiQC/tests/tests.sh
+++ b/src/nf_modules/MultiQC/tests/tests.sh
-nextflow src/nf_modules/MultiQC/tests/multiqc_paired.nf \
-  -c src/nf_modules/MultiQC/multiqc.config \
+nextflow src/nf_modules/MultiQC/multiqc_paired.nf \
+  -c src/nf_modules/MultiQC/multiqc_paired.config \
  -profile docker \
  --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq"

-nextflow src/nf_modules/MultiQC/tests/multiqc_single.nf \
-  -c src/nf_modules/MultiQC/multiqc.config \
+nextflow src/nf_modules/MultiQC/multiqc_single.nf \
+  -c src/nf_modules/MultiQC/multiqc_single.config \
  -profile docker \
  --fastq "data/tiny_dataset/fastq/tiny_S.fastq"
--- a/src/nf_modules/RSEM/indexing.config
+++ b/src/nf_modules/RSEM/indexing.config
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $index_fasta {
+        container = "rsem:1.3.0"
+      }
+    }
+  }
+  sge {
+    process{
+      $index_fasta {
+        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
+      }
+    }
+  }
+}
--- a/src/nf_modules/RSEM/tests/index.nf
+++ b/src/nf_modules/RSEM/tests/index.nf
--- a/src/nf_modules/RSEM/rsem.config
+++ b/src/nf_modules/RSEM/rsem.config
@@ -3,9 +3,6 @@ profiles {
    docker.temp = 'auto'
    docker.enabled = true
    process {
-      $index_fasta {
-        container = "rsem:1.3.0"
-      }
      $mapping_fastq {
        container = "rsem:1.3.0"
      }
@@ -13,9 +10,6 @@ profiles {
  }
  sge {
    process{
-      $index_fasta {
-        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
-      }
      $mapping_fastq {
        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
      }

--- a/src/nf_modules/RSEM/tests/quantification_paired.nf
+++ b/src/nf_modules/RSEM/tests/quantification_paired.nf
@@ -20,20 +20,29 @@ process mapping_fastq {

  input:
  set pair_id, file(reads) from fastq_files
-  file index from index_files.collect()
+  file index from index_files.toList()

  output:
  file "*" into counts_files

  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
 """
 rsem-calculate-expression --bowtie2 \
 --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
 --bowtie2-sensitivity-level "very_sensitive" \
 -output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
-> ${pair_id}_rsem_bowtie2_report.txt
+--paired-end ${reads[0]} ${reads[1]} ${index_id} ${pair_id} \
+2> ${pair_id}_rsem_bowtie2_report.txt
+
+if grep -q "Error" ${pair_id}_rsem_bowtie2_report.txt; then
+  exit 1
+fi
 """
 }


--- a/src/nf_modules/RSEM/quantification_single.config
+++ b/src/nf_modules/RSEM/quantification_single.config
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $mapping_fastq {
+        container = "rsem:1.3.0"
+      }
+    }
+  }
+  sge {
+    process{
+      $mapping_fastq {
+        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
+      }
+    }
+  }
+}
--- a/src/nf_modules/RSEM/tests/quantification_single.nf
+++ b/src/nf_modules/RSEM/tests/quantification_single.nf
 params.fastq = "$baseDir/data/fastq/*.fastq"
 params.index = "$baseDir/data/index/*.index*"
-params.mean = 125
+params.mean = 200
 params.sd = 100

 log.info "fastq files : ${params.fastq}"
@@ -25,21 +25,31 @@ process mapping_fastq {

  input:
  set file_id, file(reads) from fastq_files
-  file index from index_files.collect()
+  file index from index_files.toList()

  output:
  file "*" into count_files

  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
 """
+ls -l
 rsem-calculate-expression --bowtie2 \
 --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
 --bowtie2-sensitivity-level "very_sensitive" \
 --fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
 --output-genome-bam -p ${task.cpus} \
-${reads} ${index_name} ${file_id} \
-> ${reads.baseName}_rsem_bowtie2_report.txt
+${reads} ${index_id} ${file_id} \
+2> ${file_id}_rsem_bowtie2_report.txt
+
+if grep -q "Error" ${file_id}_rsem_bowtie2_report.txt; then
+  exit 1
+fi
 """
 }

--- a/src/nf_modules/RSEM/rsem.nf
+++ b/src/nf_modules/RSEM/rsem.nf
-/*
-* RSEM :
-* Imputs : fastq files
-* Imputs : fasta files
-* Output : bam files
-*/
-
-/*                      fasta indexing                                     */
-params.fasta = "$baseDir/data/bam/*.fasta"
-params.annotation = "$baseDir/data/bam/*.gff3"
-
-log.info "fasta files : ${params.fasta}"
-
-Channel
-  .fromPath( params.fasta )
-  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
-  .set { fasta_file }
-Channel
-  .fromPath( params.annotation )
-  .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
-  .set { annotation_file }
-
-process index_fasta {
-  tag "$fasta.baseName"
-  cpus 4
-  publishDir "results/mapping/index/", mode: 'copy'
-
-  input:
-    file fasta from fasta_file
-    file annotation from annotation_file
-
-  output:
-    file "*.index*" into index_files
-
-  script:
-  def cmd_annotation = "--gff3 ${annotation}"
-  if(annotation ==~ /.*\.gtf$/){
-    cmd_annotation = "--gtf ${annotation}"
-  }
-"""
-rsem-prepare-reference -p ${task.cpus} --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
-${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
-${fasta.baseName}_rsem_bowtie2_report.txt
-"""
-}
-
-
-/*
-* for paired-end data
-*/
-params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
-params.index = "$baseDir/data/index/*.index.*"
-
-log.info "fastq files : ${params.fastq}"
-log.info "index files : ${params.index}"
-
-Channel
-  .fromFilePairs( params.fastq )
-  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
-  .set { fastq_files }
-Channel
-  .fromPath( params.index )
-  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
-  .set { index_files }
-
-process mapping_fastq {
-  tag "$pair_id"
-  cpus 4
-  publishDir "results/mapping/quantification/", mode: 'copy'
-
-  input:
-  set pair_id, file(reads) from fastq_files
-  file index from index_files.collect()
-
-  output:
-  file "*" into counts_files
-
-  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
-"""
-rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
-> ${pair_id}_rsem_bowtie2_report.txt
-"""
-}
-
-
-
-/*
-* for single-end data
-*/
-
-params.fastq = "$baseDir/data/fastq/*.fastq"
-params.index = "$baseDir/data/index/*.index*"
-params.mean = 125
-params.sd = 100
-
-log.info "fastq files : ${params.fastq}"
-log.info "index files : ${params.index}"
-log.info "mean read size: ${params.mean}"
-log.info "sd read size: ${params.sd}"
-
-Channel
-  .fromPath( params.fastq )
-  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
-  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
-  .set { fastq_files }
-Channel
-  .fromPath( params.index )
-  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
-  .set { index_files }
-
-process mapping_fastq {
-  tag "$file_id"
-  cpus 4
-  publishDir "results/mapping/quantification/", mode: 'copy'
-
-  input:
-  set file_id, file(reads) from fastq_files
-  file index from index_files.collect()
-
-  output:
-  file "*" into count_files
-
-  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
-"""
-rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \
-${reads} ${index_name} ${file_id} \
-> ${reads.baseName}_rsem_bowtie2_report.txt
-"""
-}
--- a/src/nf_modules/RSEM/tests/tests.sh
+++ b/src/nf_modules/RSEM/tests/tests.sh
-nextflow src/nf_modules/RSEM/tests/index.nf \
-  -c src/nf_modules/RSEM/rsem.config \
+nextflow src/nf_modules/RSEM/indexing.nf \
+  -c src/nf_modules/RSEM/indexing.config \
  -profile docker \
  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
  --annotation "data/tiny_dataset/annot/tiny.gff"

-nextflow src/nf_modules/RSEM/tests/quantification_single.nf \
-  -c src/nf_modules/RSEM/rsem.config \
+nextflow src/nf_modules/RSEM/quantification_single.nf \
+  -c src/nf_modules/RSEM/quantification_single.config \
  -profile docker \
  --index "results/mapping/index/tiny_v2.index*" \
  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq"

-nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \
-  -c src/nf_modules/RSEM/rsem.config \
+nextflow src/nf_modules/RSEM/quantification_paired.nf \
+  -c src/nf_modules/RSEM/quantification_paired.config \
  -profile docker \
  --index "results/mapping/index/tiny_v2.index*" \
  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"

--- a/src/nf_modules/SAMtools/samtools.config
+++ b/src/nf_modules/SAMtools/samtools.config
@@ -3,15 +3,6 @@ profiles {
    docker.temp = 'auto'
    docker.enabled = true
    process {
-      $sort_bam {
-        container = "samtools:1.7"
-      }
-      $index_bam {
-        container = "samtools:1.7"
-      }
-      $split_bam {
-        container = "samtools:1.7"
-      }
      $filter_bam {
        container = "samtools:1.7"
      }
@@ -19,15 +10,6 @@ profiles {
  }
  sge {
    process{
-      $trimming {
-        beforeScript = "module purge; module load SAMtools/1.7"
-      }
-      $index_bam {
-        beforeScript = "module purge; module load SAMtools/1.7"
-      }
-      $split_bam {
-        beforeScript = "module purge; module load SAMtools/1.7"
-      }
      $filter_bam {
        beforeScript = "module purge; module load SAMtools/1.7"
      }

--- a/src/nf_modules/SAMtools/tests/filter_bams.nf
+++ b/src/nf_modules/SAMtools/tests/filter_bams.nf
@@ -7,6 +7,7 @@ log.info "bed file : ${params.bed}"
 Channel
  .fromPath( params.bam )
  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
+  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
  .set { bam_files }
 Channel
  .fromPath( params.bed )
@@ -14,18 +15,18 @@ Channel
  .set { bed_files }

 process filter_bam {
-  tag "$bam.baseName"
+  tag "$file_id"
  cpus 4

  input:
-    file bam from bam_files
+    set file_id, file(bam) from bam_files
    file bed from bed_files

  output:
-    file "*_filtered.bam*" into filtered_bam_files
+    set file_id, "*_filtered.bam*" into filtered_bam_files
  script:
 """
-samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > ${bam.baseName}_filtered.bam
+samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > ${file_id}_filtered.bam
 """
 }


--- a/src/nf_modules/SAMtools/index_bams.config
+++ b/src/nf_modules/SAMtools/index_bams.config
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $index_bam {
+        container = "samtools:1.7"
+      }
+    }
+  }
+  sge {
+    process{
+      $index_bam {
+        beforeScript = "module purge; module load SAMtools/1.7"
+      }
+    }
+  }
+}
--- a/src/nf_modules/SAMtools/tests/index_bams.nf
+++ b/src/nf_modules/SAMtools/tests/index_bams.nf
@@ -5,14 +5,18 @@ log.info "bams files : ${params.bam}"
 Channel
  .fromPath( params.bam )
  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
+  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
  .set { bam_files }

 process index_bam {
-  tag "$bam.baseName"
+  tag "$file_id"
+
  input:
-    file bam from bam_files
+    set file_id, file(bam) from bam_files
+
  output:
-    file "*bam*" into indexed_bam_file
+    set file_id, "*.bam*" into indexed_bam_file
+
  script:
 """
 samtools index ${bam}

--- a/src/nf_modules/SAMtools/samtools.nf
+++ b/src/nf_modules/SAMtools/samtools.nf
-/*
-* SAMtools :
-* Imputs : bam files
-* Output : bam files
-*/
-
-/*                      bams sorting                                     */
-params.bam = "$baseDir/data/bam/*.bam"
-
-log.info "bams files : ${params.bam}"
-
-Channel
-  .fromPath( params.bam )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
-  .set { bam_files }
-
-process sort_bam {
-  tag "$bam.baseName"
-  cpus 4
-
-  input:
-    file bam from bam_files
-
-  output:
-    file "*_sorted.bam" into sorted_bam_files
-
-  script:
-"""
-samtools sort -@ ${task.cpus} -O BAM -o ${bam.baseName}_sorted.bam ${bam}
-"""
-}
-
-/*                      bams indexing                                     */
-
-params.bam = "$baseDir/data/bam/*.bam"
-
-log.info "bams files : ${params.bam}"
-
-Channel
-  .fromPath( params.bam )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
-  .set { bam_files }
-
-process index_bam {
-  tag "$bam.baseName"
-  input:
-    file bam from bam_files
-  output:
-    file "*bam*" into indexed_bam_file
-  script:
-"""
-samtools index ${bam}
-"""
-}
-
-
-/*                      bams spliting                                     */
-params.bam = "$baseDir/data/bam/*.bam"
-
-log.info "bams files : ${params.bam}"
-
-Channel
-  .fromPath( params.bam )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
-  .set { bam_files }
-
-process split_bam {
-  tag "$bam.baseName"
-  cpus 2
-
-  input:
-    file bam from bam_files
-
-  output:
-    file "*_forward.bam*" into forward_bam_files
-    file "*_reverse.bam*" into reverse_bam_files
-  script:
-"""
-samtools view -hb -F 0x10 ${bam} > ${bam}_forward.bam &
-samtools view -hb -f 0x10 ${bam} > ${bam}_reverse.bam
-"""
-}
-
-
-/*                      bams filtering                                     */
-params.bam = "$baseDir/data/bam/*.bam"
-params.bed = "$baseDir/data/bam/*.bed"
-
-log.info "bams files : ${params.bam}"
-log.info "bed file : ${params.bed}"
-
-Channel
-  .fromPath( params.bam )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
-  .set { bam_files }
-Channel
-  .fromPath( params.bed )
-  .ifEmpty { error "Cannot find any bed file matching: ${params.bed}" }
-  .set { bed_files }
-
-process filter_bam {
-  tag "$bam.baseName"
-  cpus 4
-
-  input:
-    file bam from bam_files
-    file bed from bed_files
-
-  output:
-    file "*_filtered.bam*" into filtered_bam_files
-  script:
-"""
-samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} > ${bam.baseName}_filtered.bam
-"""
-}
-
-