From a4a0d6f08c763d98483240adc3a7ac0f10364bc6 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Wed, 22 Aug 2018 16:42:56 +0200 Subject: [PATCH] RSEM: update nf structure --- src/nf_modules/RSEM/indexing.config | 18 +++ .../RSEM/{tests/index.nf => indexing.nf} | 0 ...em.config => quantification_paired.config} | 6 - .../RSEM/{tests => }/quantification_paired.nf | 17 ++- .../RSEM/quantification_single.config | 18 +++ .../RSEM/{tests => }/quantification_single.nf | 20 ++- src/nf_modules/RSEM/rsem.nf | 140 ------------------ src/nf_modules/RSEM/{tests => }/tests.sh | 12 +- 8 files changed, 70 insertions(+), 161 deletions(-) create mode 100644 src/nf_modules/RSEM/indexing.config rename src/nf_modules/RSEM/{tests/index.nf => indexing.nf} (100%) rename src/nf_modules/RSEM/{rsem.config => quantification_paired.config} (63%) rename src/nf_modules/RSEM/{tests => }/quantification_paired.nf (68%) create mode 100644 src/nf_modules/RSEM/quantification_single.config rename src/nf_modules/RSEM/{tests => }/quantification_single.nf (72%) delete mode 100644 src/nf_modules/RSEM/rsem.nf rename src/nf_modules/RSEM/{tests => }/tests.sh (54%) diff --git a/src/nf_modules/RSEM/indexing.config b/src/nf_modules/RSEM/indexing.config new file mode 100644 index 0000000..ddf93b6 --- /dev/null +++ b/src/nf_modules/RSEM/indexing.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "rsem:1.3.0" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/RSEM/tests/index.nf b/src/nf_modules/RSEM/indexing.nf similarity index 100% rename from src/nf_modules/RSEM/tests/index.nf rename to src/nf_modules/RSEM/indexing.nf diff --git a/src/nf_modules/RSEM/rsem.config b/src/nf_modules/RSEM/quantification_paired.config similarity index 63% rename from src/nf_modules/RSEM/rsem.config rename to src/nf_modules/RSEM/quantification_paired.config index 3209b6b..344ab1e 100644 --- a/src/nf_modules/RSEM/rsem.config +++ b/src/nf_modules/RSEM/quantification_paired.config @@ -3,9 +3,6 @@ profiles { docker.temp = 'auto' docker.enabled = true process { - $index_fasta { - container = "rsem:1.3.0" - } $mapping_fastq { container = "rsem:1.3.0" } @@ -13,9 +10,6 @@ profiles { } sge { process{ - $index_fasta { - beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" - } $mapping_fastq { beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" } diff --git a/src/nf_modules/RSEM/tests/quantification_paired.nf b/src/nf_modules/RSEM/quantification_paired.nf similarity index 68% rename from src/nf_modules/RSEM/tests/quantification_paired.nf rename to src/nf_modules/RSEM/quantification_paired.nf index 0109500..a22950f 100644 --- a/src/nf_modules/RSEM/tests/quantification_paired.nf +++ b/src/nf_modules/RSEM/quantification_paired.nf @@ -20,20 +20,29 @@ process mapping_fastq { input: set pair_id, file(reads) from fastq_files - file index from index_files.collect() + file index from index_files.toList() output: file "*" into counts_files script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ rsem-calculate-expression --bowtie2 \ --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ --bowtie2-sensitivity-level "very_sensitive" \ -output-genome-bam -p ${task.cpus} \ ---paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \ -> ${pair_id}_rsem_bowtie2_report.txt +--paired-end ${reads[0]} ${reads[1]} ${index_id} ${pair_id} \ +2> ${pair_id}_rsem_bowtie2_report.txt + +if grep -q "Error" ${pair_id}_rsem_bowtie2_report.txt; then + exit 1 +fi """ } diff --git a/src/nf_modules/RSEM/quantification_single.config b/src/nf_modules/RSEM/quantification_single.config new file mode 100644 index 0000000..344ab1e --- /dev/null +++ b/src/nf_modules/RSEM/quantification_single.config @@ -0,0 +1,18 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $mapping_fastq { + container = "rsem:1.3.0" + } + } + } + sge { + process{ + $mapping_fastq { + beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" + } + } + } +} diff --git a/src/nf_modules/RSEM/tests/quantification_single.nf b/src/nf_modules/RSEM/quantification_single.nf similarity index 72% rename from src/nf_modules/RSEM/tests/quantification_single.nf rename to src/nf_modules/RSEM/quantification_single.nf index 47e0047..d52b7f4 100644 --- a/src/nf_modules/RSEM/tests/quantification_single.nf +++ b/src/nf_modules/RSEM/quantification_single.nf @@ -1,6 +1,6 @@ params.fastq = "$baseDir/data/fastq/*.fastq" params.index = "$baseDir/data/index/*.index*" -params.mean = 125 +params.mean = 200 params.sd = 100 log.info "fastq files : ${params.fastq}" @@ -25,21 +25,31 @@ process mapping_fastq { input: set file_id, file(reads) from fastq_files - file index from index_files.collect() + file index from index_files.toList() output: file "*" into count_files script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ +ls -l rsem-calculate-expression --bowtie2 \ --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ --bowtie2-sensitivity-level "very_sensitive" \ --fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \ --output-genome-bam -p ${task.cpus} \ -${reads} ${index_name} ${file_id} \ -> ${reads.baseName}_rsem_bowtie2_report.txt +${reads} ${index_id} ${file_id} \ +2> ${file_id}_rsem_bowtie2_report.txt + +if grep -q "Error" ${file_id}_rsem_bowtie2_report.txt; then + exit 1 +fi """ } diff --git a/src/nf_modules/RSEM/rsem.nf b/src/nf_modules/RSEM/rsem.nf deleted file mode 100644 index aa2fc95..0000000 --- a/src/nf_modules/RSEM/rsem.nf +++ /dev/null @@ -1,140 +0,0 @@ -/* -* RSEM : -* Imputs : fastq files -* Imputs : fasta files -* Output : bam files -*/ - -/* fasta indexing */ -params.fasta = "$baseDir/data/bam/*.fasta" -params.annotation = "$baseDir/data/bam/*.gff3" - -log.info "fasta files : ${params.fasta}" - -Channel - .fromPath( params.fasta ) - .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } - .set { fasta_file } -Channel - .fromPath( params.annotation ) - .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" } - .set { annotation_file } - -process index_fasta { - tag "$fasta.baseName" - cpus 4 - publishDir "results/mapping/index/", mode: 'copy' - - input: - file fasta from fasta_file - file annotation from annotation_file - - output: - file "*.index*" into index_files - - script: - def cmd_annotation = "--gff3 ${annotation}" - if(annotation ==~ /.*\.gtf$/){ - cmd_annotation = "--gtf ${annotation}" - } -""" -rsem-prepare-reference -p ${task.cpus} --bowtie2 \ ---bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ -${cmd_annotation} ${fasta} ${fasta.baseName}.index > \ -${fasta.baseName}_rsem_bowtie2_report.txt -""" -} - - -/* -* for paired-end data -*/ -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" -params.index = "$baseDir/data/index/*.index.*" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$pair_id" - cpus 4 - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - file index from index_files.collect() - - output: - file "*" into counts_files - - script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] -""" -rsem-calculate-expression --bowtie2 \ ---bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ ---bowtie2-sensitivity-level "very_sensitive" \ --output-genome-bam -p ${task.cpus} \ ---paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \ -> ${pair_id}_rsem_bowtie2_report.txt -""" -} - - - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" -params.index = "$baseDir/data/index/*.index*" -params.mean = 125 -params.sd = 100 - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" -log.info "mean read size: ${params.mean}" -log.info "sd read size: ${params.sd}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$file_id" - cpus 4 - publishDir "results/mapping/quantification/", mode: 'copy' - - input: - set file_id, file(reads) from fastq_files - file index from index_files.collect() - - output: - file "*" into count_files - - script: -index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] -""" -rsem-calculate-expression --bowtie2 \ ---bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ ---bowtie2-sensitivity-level "very_sensitive" \ ---fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \ ---output-genome-bam -p ${task.cpus} \ -${reads} ${index_name} ${file_id} \ -> ${reads.baseName}_rsem_bowtie2_report.txt -""" -} diff --git a/src/nf_modules/RSEM/tests/tests.sh b/src/nf_modules/RSEM/tests.sh similarity index 54% rename from src/nf_modules/RSEM/tests/tests.sh rename to src/nf_modules/RSEM/tests.sh index f7fcd06..c57c8c4 100755 --- a/src/nf_modules/RSEM/tests/tests.sh +++ b/src/nf_modules/RSEM/tests.sh @@ -1,17 +1,17 @@ -nextflow src/nf_modules/RSEM/tests/index.nf \ - -c src/nf_modules/RSEM/rsem.config \ +nextflow src/nf_modules/RSEM/indexing.nf \ + -c src/nf_modules/RSEM/indexing.config \ -profile docker \ --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ --annotation "data/tiny_dataset/annot/tiny.gff" -nextflow src/nf_modules/RSEM/tests/quantification_single.nf \ - -c src/nf_modules/RSEM/rsem.config \ +nextflow src/nf_modules/RSEM/quantification_single.nf \ + -c src/nf_modules/RSEM/quantification_single.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index*" \ --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" -nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \ - -c src/nf_modules/RSEM/rsem.config \ +nextflow src/nf_modules/RSEM/quantification_paired.nf \ + -c src/nf_modules/RSEM/quantification_paired.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index*" \ --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" -- GitLab