From 4d8776b97b7737e0fcf58c8ed09f2d5d7c393882 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Tue, 5 Jun 2018 13:28:06 +0200 Subject: [PATCH] RSEM: fix dockerfile and add nf and test files --- src/nf_modules/RSEM/1.3.0/Dockerfile | 4 +- src/nf_modules/RSEM/rsem.config | 24 +++ src/nf_modules/RSEM/rsem.nf | 139 ++++++++++++++++++ src/nf_modules/RSEM/tests/index.nf | 40 +++++ .../RSEM/tests/quantification_paired.nf | 40 +++++ .../RSEM/tests/quantification_single.nf | 44 ++++++ src/nf_modules/RSEM/tests/tests.sh | 18 +++ 7 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 src/nf_modules/RSEM/rsem.config create mode 100644 src/nf_modules/RSEM/rsem.nf create mode 100644 src/nf_modules/RSEM/tests/index.nf create mode 100644 src/nf_modules/RSEM/tests/quantification_paired.nf create mode 100644 src/nf_modules/RSEM/tests/quantification_single.nf create mode 100644 src/nf_modules/RSEM/tests/tests.sh diff --git a/src/nf_modules/RSEM/1.3.0/Dockerfile b/src/nf_modules/RSEM/1.3.0/Dockerfile index 14d4757..1ccdaa7 100644 --- a/src/nf_modules/RSEM/1.3.0/Dockerfile +++ b/src/nf_modules/RSEM/1.3.0/Dockerfile @@ -3,11 +3,13 @@ MAINTAINER Laurent Modolo ENV RSEM_VERSION=1.3.0 ENV BOWTIE2_VERSION=2.3.4.1 +ENV SAMTOOLS_VERSION=1.7 ENV PACKAGES git=1:2.17.0* \ build-essential=12.4* \ ca-certificates=20180409 \ zlib1g-dev=1:1.2.11* \ - bowtie2=${BOWTIE2_VERSION}* + bowtie2=${BOWTIE2_VERSION}* \ + samtools=${SAMTOOLS_VERSION}* RUN apt-get update && \ apt-get install -y --no-install-recommends ${PACKAGES} && \ diff --git a/src/nf_modules/RSEM/rsem.config b/src/nf_modules/RSEM/rsem.config new file mode 100644 index 0000000..3209b6b --- /dev/null +++ b/src/nf_modules/RSEM/rsem.config @@ -0,0 +1,24 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "rsem:1.3.0" + } + $mapping_fastq { + container = "rsem:1.3.0" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" + } + $mapping_fastq { + beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/RSEM/rsem.nf b/src/nf_modules/RSEM/rsem.nf new file mode 100644 index 0000000..54e6ad0 --- /dev/null +++ b/src/nf_modules/RSEM/rsem.nf @@ -0,0 +1,139 @@ +/* +* RSEM : +* Imputs : fastq files +* Imputs : fasta files +* Output : bam files +*/ + +/* fasta indexing */ +params.fasta = "$baseDir/data/bam/*.fasta" +params.annotation = "$baseDir/data/bam/*.gff3" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .set { fasta_file } +Channel + .fromPath( params.annotation ) + .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" } + .set { annotation_file } + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + file annotation from annotation_file + + output: + file "*.index*" into index_files + + script: + def cmd_annotation = "--gff3 ${annotation}" + if(annotation ==~ /.*\.gtf$/){ + cmd_annotation = "--gtf ${annotation}" + } +""" +rsem-prepare-reference -p ${task.cpus} --bowtie2 \ +--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ +${cmd_annotation} ${fasta} ${fasta.baseName}.index > \ +${fasta.baseName}_rsem_bowtie2_report.txt +""" +} + + +/* +* for paired-end data +*/ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + file index from index_files.toList() + + output: + file "*" into counts_files + + script: +index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] +""" +rsem-calculate-expression --bowtie2 \ +--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ +--bowtie2-sensitivity-level "very_sensitive" \ +-output-genome-bam -p ${task.cpus} \ +--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \ +> ${pair_id}_rsem_bowtie2_report.txt +""" +} + + + +/* +* for single-end data +*/ + +params.fastq = "$baseDir/data/fastq/*.fastq" +params.index = "$baseDir/data/index/*.index*" +params.mean = 300 +params.sd = 100 + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" +log.info "mean read size: ${params.mean}" +log.info "sd read size: ${params.sd}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$reads.baseName" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + file reads from fastq_files + file index from index_files.toList() + + output: + file "*" into count_files + + script: +index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] +""" +rsem-calculate-expression --bowtie2 \ +--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ +--bowtie2-sensitivity-level "very_sensitive" \ +--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \ +--output-genome-bam -p ${task.cpus} \ +${reads} ${index_name} ${tagname} > ${tagname}_rsem_bowtie2_report.txt +""" +} + diff --git a/src/nf_modules/RSEM/tests/index.nf b/src/nf_modules/RSEM/tests/index.nf new file mode 100644 index 0000000..0f5473a --- /dev/null +++ b/src/nf_modules/RSEM/tests/index.nf @@ -0,0 +1,40 @@ +params.fasta = "$baseDir/data/bam/*.fasta" +params.annotation = "$baseDir/data/bam/*.gff3" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .set { fasta_file } +Channel + .fromPath( params.annotation ) + .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" } + .set { annotation_file } + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + file annotation from annotation_file + + output: + file "*.index*" into index_files + + script: + def cmd_annotation = "--gff3 ${annotation}" + if(annotation ==~ /.*\.gtf$/){ + cmd_annotation = "--gtf ${annotation}" + } +""" +rsem-prepare-reference -p ${task.cpus} --bowtie2 \ +--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ +${cmd_annotation} ${fasta} ${fasta.baseName}.index > \ +${fasta.baseName}_rsem_bowtie2_report.txt +""" +} + + diff --git a/src/nf_modules/RSEM/tests/quantification_paired.nf b/src/nf_modules/RSEM/tests/quantification_paired.nf new file mode 100644 index 0000000..75ae42f --- /dev/null +++ b/src/nf_modules/RSEM/tests/quantification_paired.nf @@ -0,0 +1,40 @@ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + file index from index_files.toList() + + output: + file "*" into counts_files + + script: +index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] +""" +rsem-calculate-expression --bowtie2 \ +--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ +--bowtie2-sensitivity-level "very_sensitive" \ +-output-genome-bam -p ${task.cpus} \ +--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \ +> ${pair_id}_rsem_bowtie2_report.txt +""" +} + + diff --git a/src/nf_modules/RSEM/tests/quantification_single.nf b/src/nf_modules/RSEM/tests/quantification_single.nf new file mode 100644 index 0000000..0fd26f0 --- /dev/null +++ b/src/nf_modules/RSEM/tests/quantification_single.nf @@ -0,0 +1,44 @@ +params.fastq = "$baseDir/data/fastq/*.fastq" +params.index = "$baseDir/data/index/*.index*" +params.mean = 300 +params.sd = 100 + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" +log.info "mean read size: ${params.mean}" +log.info "sd read size: ${params.sd}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$reads.baseName" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + file reads from fastq_files + file index from index_files.toList() + + output: + file "*" into count_files + + script: +index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] +""" +rsem-calculate-expression --bowtie2 \ +--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ +--bowtie2-sensitivity-level "very_sensitive" \ +--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \ +--output-genome-bam -p ${task.cpus} \ +${reads} ${index_name} ${reads.baseName} \ +> ${reads.baseName}_rsem_bowtie2_report.txt +""" +} + diff --git a/src/nf_modules/RSEM/tests/tests.sh b/src/nf_modules/RSEM/tests/tests.sh new file mode 100644 index 0000000..f7fcd06 --- /dev/null +++ b/src/nf_modules/RSEM/tests/tests.sh @@ -0,0 +1,18 @@ +nextflow src/nf_modules/RSEM/tests/index.nf \ + -c src/nf_modules/RSEM/rsem.config \ + -profile docker \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ + --annotation "data/tiny_dataset/annot/tiny.gff" + +nextflow src/nf_modules/RSEM/tests/quantification_single.nf \ + -c src/nf_modules/RSEM/rsem.config \ + -profile docker \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" + +nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \ + -c src/nf_modules/RSEM/rsem.config \ + -profile docker \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" + -- GitLab