RSEM: fix dockerfile and add nf and test files

4d8776b9 · Laurent Modolo · f7f61d07 · 4d8776b9 · 4d8776b9 · 4d8776b9
Verified Commit 4d8776b9 authored 6 years ago by Laurent Modolo
--- a/src/nf_modules/RSEM/1.3.0/Dockerfile
+++ b/src/nf_modules/RSEM/1.3.0/Dockerfile
@@ -3,11 +3,13 @@ MAINTAINER Laurent Modolo

 ENV RSEM_VERSION=1.3.0
 ENV BOWTIE2_VERSION=2.3.4.1
+ENV SAMTOOLS_VERSION=1.7
 ENV PACKAGES git=1:2.17.0* \
   build-essential=12.4* \
   ca-certificates=20180409 \
   zlib1g-dev=1:1.2.11* \
-   bowtie2=${BOWTIE2_VERSION}*
+   bowtie2=${BOWTIE2_VERSION}* \
+   samtools=${SAMTOOLS_VERSION}*

 RUN apt-get update && \
    apt-get install -y --no-install-recommends ${PACKAGES} && \

--- a/src/nf_modules/RSEM/rsem.config
+++ b/src/nf_modules/RSEM/rsem.config
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $index_fasta {
+        container = "rsem:1.3.0"
+      }
+      $mapping_fastq {
+        container = "rsem:1.3.0"
+      }
+    }
+  }
+  sge {
+    process{
+      $index_fasta {
+        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
+      }
+      $mapping_fastq {
+        beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
+      }
+    }
+  }
+}
--- a/src/nf_modules/RSEM/rsem.nf
+++ b/src/nf_modules/RSEM/rsem.nf
+/*
+* RSEM :
+* Imputs : fastq files
+* Imputs : fasta files
+* Output : bam files
+*/
+
+/*                      fasta indexing                                     */
+params.fasta = "$baseDir/data/bam/*.fasta"
+params.annotation = "$baseDir/data/bam/*.gff3"
+
+log.info "fasta files : ${params.fasta}"
+
+Channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
+  .set { fasta_file }
+Channel
+  .fromPath( params.annotation )
+  .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
+  .set { annotation_file }
+
+process index_fasta {
+  tag "$fasta.baseName"
+  cpus 4
+  publishDir "results/mapping/index/", mode: 'copy'
+
+  input:
+    file fasta from fasta_file
+    file annotation from annotation_file
+
+  output:
+    file "*.index*" into index_files
+
+  script:
+  def cmd_annotation = "--gff3 ${annotation}"
+  if(annotation ==~ /.*\.gtf$/){
+    cmd_annotation = "--gtf ${annotation}"
+  }
+"""
+rsem-prepare-reference -p ${task.cpus} --bowtie2 \
+--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
+${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
+${fasta.baseName}_rsem_bowtie2_report.txt
+"""
+}
+
+
+/*
+* for paired-end data
+*/
+params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
+params.index = "$baseDir/data/index/*.index.*"
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+
+Channel
+  .fromFilePairs( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$pair_id"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files
+  file index from index_files.toList()
+
+  output:
+  file "*" into counts_files
+
+  script:
+index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+"""
+rsem-calculate-expression --bowtie2 \
+--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
+--bowtie2-sensitivity-level "very_sensitive" \
+-output-genome-bam -p ${task.cpus} \
+--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
+> ${pair_id}_rsem_bowtie2_report.txt
+"""
+}
+
+
+
+/*
+* for single-end data
+*/
+
+params.fastq = "$baseDir/data/fastq/*.fastq"
+params.index = "$baseDir/data/index/*.index*"
+params.mean = 300
+params.sd = 100
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+log.info "mean read size: ${params.mean}"
+log.info "sd read size: ${params.sd}"
+
+Channel
+  .fromPath( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$reads.baseName"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  file reads from fastq_files
+  file index from index_files.toList()
+
+  output:
+  file "*" into count_files
+
+  script:
+index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+"""
+rsem-calculate-expression --bowtie2 \
+--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
+--bowtie2-sensitivity-level "very_sensitive" \
+--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
+--output-genome-bam -p ${task.cpus} \
+${reads} ${index_name} ${tagname} > ${tagname}_rsem_bowtie2_report.txt
+"""
+}
+
--- a/src/nf_modules/RSEM/tests/index.nf
+++ b/src/nf_modules/RSEM/tests/index.nf
+params.fasta = "$baseDir/data/bam/*.fasta"
+params.annotation = "$baseDir/data/bam/*.gff3"
+
+log.info "fasta files : ${params.fasta}"
+
+Channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
+  .set { fasta_file }
+Channel
+  .fromPath( params.annotation )
+  .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
+  .set { annotation_file }
+
+process index_fasta {
+  tag "$fasta.baseName"
+  cpus 4
+  publishDir "results/mapping/index/", mode: 'copy'
+
+  input:
+    file fasta from fasta_file
+    file annotation from annotation_file
+
+  output:
+    file "*.index*" into index_files
+
+  script:
+  def cmd_annotation = "--gff3 ${annotation}"
+  if(annotation ==~ /.*\.gtf$/){
+    cmd_annotation = "--gtf ${annotation}"
+  }
+"""
+rsem-prepare-reference -p ${task.cpus} --bowtie2 \
+--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
+${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
+${fasta.baseName}_rsem_bowtie2_report.txt
+"""
+}
+
+
--- a/src/nf_modules/RSEM/tests/quantification_paired.nf
+++ b/src/nf_modules/RSEM/tests/quantification_paired.nf
+params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
+params.index = "$baseDir/data/index/*.index.*"
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+
+Channel
+  .fromFilePairs( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$pair_id"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files
+  file index from index_files.toList()
+
+  output:
+  file "*" into counts_files
+
+  script:
+index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+"""
+rsem-calculate-expression --bowtie2 \
+--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
+--bowtie2-sensitivity-level "very_sensitive" \
+-output-genome-bam -p ${task.cpus} \
+--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
+> ${pair_id}_rsem_bowtie2_report.txt
+"""
+}
+
+
--- a/src/nf_modules/RSEM/tests/quantification_single.nf
+++ b/src/nf_modules/RSEM/tests/quantification_single.nf
+params.fastq = "$baseDir/data/fastq/*.fastq"
+params.index = "$baseDir/data/index/*.index*"
+params.mean = 300
+params.sd = 100
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+log.info "mean read size: ${params.mean}"
+log.info "sd read size: ${params.sd}"
+
+Channel
+  .fromPath( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$reads.baseName"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  file reads from fastq_files
+  file index from index_files.toList()
+
+  output:
+  file "*" into count_files
+
+  script:
+index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
+"""
+rsem-calculate-expression --bowtie2 \
+--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
+--bowtie2-sensitivity-level "very_sensitive" \
+--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
+--output-genome-bam -p ${task.cpus} \
+${reads} ${index_name} ${reads.baseName} \
+> ${reads.baseName}_rsem_bowtie2_report.txt
+"""
+}
+
--- a/src/nf_modules/RSEM/tests/tests.sh
+++ b/src/nf_modules/RSEM/tests/tests.sh
+nextflow src/nf_modules/RSEM/tests/index.nf \
+  -c src/nf_modules/RSEM/rsem.config \
+  -profile docker \
+  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
+  --annotation "data/tiny_dataset/annot/tiny.gff"
+
+nextflow src/nf_modules/RSEM/tests/quantification_single.nf \
+  -c src/nf_modules/RSEM/rsem.config \
+  -profile docker \
+  --index "results/mapping/index/tiny_v2.index*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq"
+
+nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \
+  -c src/nf_modules/RSEM/rsem.config \
+  -profile docker \
+  --index "results/mapping/index/tiny_v2.index*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"
+