RSEM: update nf structure

a4a0d6f0 · Laurent Modolo · 927521d7 · a4a0d6f0 · a4a0d6f0 · a4a0d6f0
Verified Commit a4a0d6f0 authored 6 years ago by Laurent Modolo
--- a/src/nf_modules/RSEM/indexing.config
+++ b/src/nf_modules/RSEM/indexing.config
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $index_fasta {
+        container = "rsem:1.3.0"
+      }
+    }
+  }
+  sge {
+    process{
+      $index_fasta {
+        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
+      }
+    }
+  }
+}
--- a/src/nf_modules/RSEM/tests/index.nf
+++ b/src/nf_modules/RSEM/tests/index.nf
--- a/src/nf_modules/RSEM/rsem.config
+++ b/src/nf_modules/RSEM/rsem.config
@@ -3,9 +3,6 @@ profiles {
    docker.temp = 'auto'
    docker.enabled = true
    process {
-      $index_fasta {
-        container = "rsem:1.3.0"
-      }
      $mapping_fastq {
        container = "rsem:1.3.0"
      }
@@ -13,9 +10,6 @@ profiles {
  }
  sge {
    process{
-      $index_fasta {
-        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
-      }
      $mapping_fastq {
        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
      }

--- a/src/nf_modules/RSEM/tests/quantification_paired.nf
+++ b/src/nf_modules/RSEM/tests/quantification_paired.nf
@@ -20,20 +20,29 @@ process mapping_fastq {
  input:
  set pair_id, file(reads) from fastq_files
-  file index from index_files.collect()
+  file index from index_files.toList()
  output:
  file "*" into counts_files
  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
 """
 rsem-calculate-expression --bowtie2 \
 --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
 --bowtie2-sensitivity-level "very_sensitive" \
 -output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
+--paired-end ${reads[0]} ${reads[1]} ${index_id} ${pair_id} \
-> ${pair_id}_rsem_bowtie2_report.txt
+2> ${pair_id}_rsem_bowtie2_report.txt
+if grep -q "Error" ${pair_id}_rsem_bowtie2_report.txt; then
+  exit 1
+fi
 """
 }

--- a/src/nf_modules/RSEM/quantification_single.config
+++ b/src/nf_modules/RSEM/quantification_single.config
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $mapping_fastq {
+        container = "rsem:1.3.0"
+      }
+    }
+  }
+  sge {
+    process{
+      $mapping_fastq {
+        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
+      }
+    }
+  }
+}
--- a/src/nf_modules/RSEM/tests/quantification_single.nf
+++ b/src/nf_modules/RSEM/tests/quantification_single.nf
 params.fastq = "$baseDir/data/fastq/*.fastq"
 params.index = "$baseDir/data/index/*.index*"
-params.mean = 125
+params.mean = 200
 params.sd = 100
 log.info "fastq files : ${params.fastq}"
@@ -25,21 +25,31 @@ process mapping_fastq {
  input:
  set file_id, file(reads) from fastq_files
-  file index from index_files.collect()
+  file index from index_files.toList()
  output:
  file "*" into count_files
  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    }
+  }
 """
+ls -l
 rsem-calculate-expression --bowtie2 \
 --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
 --bowtie2-sensitivity-level "very_sensitive" \
 --fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
 --output-genome-bam -p ${task.cpus} \
-${reads} ${index_name} ${file_id} \
+${reads} ${index_id} ${file_id} \
-> ${reads.baseName}_rsem_bowtie2_report.txt
+2> ${file_id}_rsem_bowtie2_report.txt
+if grep -q "Error" ${file_id}_rsem_bowtie2_report.txt; then
+  exit 1
+fi
 """
 }
--- a/src/nf_modules/RSEM/rsem.nf
+++ b/src/nf_modules/RSEM/rsem.nf
-/*
-* RSEM :
-* Imputs : fastq files
-* Imputs : fasta files
-* Output : bam files
-*/
-/*                      fasta indexing                                     */
-params.fasta = "$baseDir/data/bam/*.fasta"
-params.annotation = "$baseDir/data/bam/*.gff3"
-log.info "fasta files : ${params.fasta}"
-Channel
-  .fromPath( params.fasta )
-  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
-  .set { fasta_file }
-Channel
-  .fromPath( params.annotation )
-  .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
-  .set { annotation_file }
-process index_fasta {
-  tag "$fasta.baseName"
-  cpus 4
-  publishDir "results/mapping/index/", mode: 'copy'
-  input:
-    file fasta from fasta_file
-    file annotation from annotation_file
-  output:
-    file "*.index*" into index_files
-  script:
-  def cmd_annotation = "--gff3 ${annotation}"
-  if(annotation ==~ /.*\.gtf$/){
-    cmd_annotation = "--gtf ${annotation}"
-  }
-"""
-rsem-prepare-reference -p ${task.cpus} --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
-${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
-${fasta.baseName}_rsem_bowtie2_report.txt
-"""
-}
-/*
-* for paired-end data
-*/
-params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
-params.index = "$baseDir/data/index/*.index.*"
-log.info "fastq files : ${params.fastq}"
-log.info "index files : ${params.index}"
-Channel
-  .fromFilePairs( params.fastq )
-  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
-  .set { fastq_files }
-Channel
-  .fromPath( params.index )
-  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
-  .set { index_files }
-process mapping_fastq {
-  tag "$pair_id"
-  cpus 4
-  publishDir "results/mapping/quantification/", mode: 'copy'
-  input:
-  set pair_id, file(reads) from fastq_files
-  file index from index_files.collect()
-  output:
-  file "*" into counts_files
-  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
-"""
-rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
-> ${pair_id}_rsem_bowtie2_report.txt
-"""
-}
-/*
-* for single-end data
-*/
-params.fastq = "$baseDir/data/fastq/*.fastq"
-params.index = "$baseDir/data/index/*.index*"
-params.mean = 125
-params.sd = 100
-log.info "fastq files : ${params.fastq}"
-log.info "index files : ${params.index}"
-log.info "mean read size: ${params.mean}"
-log.info "sd read size: ${params.sd}"
-Channel
-  .fromPath( params.fastq )
-  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
-  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
-  .set { fastq_files }
-Channel
-  .fromPath( params.index )
-  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
-  .set { index_files }
-process mapping_fastq {
-  tag "$file_id"
-  cpus 4
-  publishDir "results/mapping/quantification/", mode: 'copy'
-  input:
-  set file_id, file(reads) from fastq_files
-  file index from index_files.collect()
-  output:
-  file "*" into count_files
-  script:
-index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
-"""
-rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \
-${reads} ${index_name} ${file_id} \
-> ${reads.baseName}_rsem_bowtie2_report.txt
-"""
-}
--- a/src/nf_modules/RSEM/tests/tests.sh
+++ b/src/nf_modules/RSEM/tests/tests.sh
-nextflow src/nf_modules/RSEM/tests/index.nf \
+nextflow src/nf_modules/RSEM/indexing.nf \
-  -c src/nf_modules/RSEM/rsem.config \
+  -c src/nf_modules/RSEM/indexing.config \
  -profile docker \
  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
  --annotation "data/tiny_dataset/annot/tiny.gff"
-nextflow src/nf_modules/RSEM/tests/quantification_single.nf \
+nextflow src/nf_modules/RSEM/quantification_single.nf \
-  -c src/nf_modules/RSEM/rsem.config \
+  -c src/nf_modules/RSEM/quantification_single.config \
  -profile docker \
  --index "results/mapping/index/tiny_v2.index*" \
  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq"
-nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \
+nextflow src/nf_modules/RSEM/quantification_paired.nf \
-  -c src/nf_modules/RSEM/rsem.config \
+  -c src/nf_modules/RSEM/quantification_paired.config \
  -profile docker \
  --index "results/mapping/index/tiny_v2.index*" \
  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"