Skip to content
Snippets Groups Projects
Verified Commit a4a0d6f0 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

RSEM: update nf structure

parent 927521d7
No related branches found
No related tags found
No related merge requests found
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$index_fasta {
container = "rsem:1.3.0"
}
}
}
sge {
process{
$index_fasta {
beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
}
}
}
}
...@@ -3,9 +3,6 @@ profiles { ...@@ -3,9 +3,6 @@ profiles {
docker.temp = 'auto' docker.temp = 'auto'
docker.enabled = true docker.enabled = true
process { process {
$index_fasta {
container = "rsem:1.3.0"
}
$mapping_fastq { $mapping_fastq {
container = "rsem:1.3.0" container = "rsem:1.3.0"
} }
...@@ -13,9 +10,6 @@ profiles { ...@@ -13,9 +10,6 @@ profiles {
} }
sge { sge {
process{ process{
$index_fasta {
beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
}
$mapping_fastq { $mapping_fastq {
beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7" beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
} }
......
...@@ -20,20 +20,29 @@ process mapping_fastq { ...@@ -20,20 +20,29 @@ process mapping_fastq {
input: input:
set pair_id, file(reads) from fastq_files set pair_id, file(reads) from fastq_files
file index from index_files.collect() file index from index_files.toList()
output: output:
file "*" into counts_files file "*" into counts_files
script: script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
""" """
rsem-calculate-expression --bowtie2 \ rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \ --bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \ -output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \ --paired-end ${reads[0]} ${reads[1]} ${index_id} ${pair_id} \
> ${pair_id}_rsem_bowtie2_report.txt 2> ${pair_id}_rsem_bowtie2_report.txt
if grep -q "Error" ${pair_id}_rsem_bowtie2_report.txt; then
exit 1
fi
""" """
} }
......
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$mapping_fastq {
container = "rsem:1.3.0"
}
}
}
sge {
process{
$mapping_fastq {
beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
}
}
}
}
params.fastq = "$baseDir/data/fastq/*.fastq" params.fastq = "$baseDir/data/fastq/*.fastq"
params.index = "$baseDir/data/index/*.index*" params.index = "$baseDir/data/index/*.index*"
params.mean = 125 params.mean = 200
params.sd = 100 params.sd = 100
log.info "fastq files : ${params.fastq}" log.info "fastq files : ${params.fastq}"
...@@ -25,21 +25,31 @@ process mapping_fastq { ...@@ -25,21 +25,31 @@ process mapping_fastq {
input: input:
set file_id, file(reads) from fastq_files set file_id, file(reads) from fastq_files
file index from index_files.collect() file index from index_files.toList()
output: output:
file "*" into count_files file "*" into count_files
script: script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1] index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
}
}
""" """
ls -l
rsem-calculate-expression --bowtie2 \ rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \ --bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \ --bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \ --fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \ --output-genome-bam -p ${task.cpus} \
${reads} ${index_name} ${file_id} \ ${reads} ${index_id} ${file_id} \
> ${reads.baseName}_rsem_bowtie2_report.txt 2> ${file_id}_rsem_bowtie2_report.txt
if grep -q "Error" ${file_id}_rsem_bowtie2_report.txt; then
exit 1
fi
""" """
} }
/*
* RSEM :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
params.annotation = "$baseDir/data/bam/*.gff3"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_file }
Channel
.fromPath( params.annotation )
.ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
.set { annotation_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
file annotation from annotation_file
output:
file "*.index*" into index_files
script:
def cmd_annotation = "--gff3 ${annotation}"
if(annotation ==~ /.*\.gtf$/){
cmd_annotation = "--gtf ${annotation}"
}
"""
rsem-prepare-reference -p ${task.cpus} --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
${fasta.baseName}_rsem_bowtie2_report.txt
"""
}
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
params.index = "$baseDir/data/index/*.index.*"
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
file index from index_files.collect()
output:
file "*" into counts_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
> ${pair_id}_rsem_bowtie2_report.txt
"""
}
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq/*.fastq"
params.index = "$baseDir/data/index/*.index*"
params.mean = 125
params.sd = 100
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
log.info "mean read size: ${params.mean}"
log.info "sd read size: ${params.sd}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$file_id"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
set file_id, file(reads) from fastq_files
file index from index_files.collect()
output:
file "*" into count_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \
${reads} ${index_name} ${file_id} \
> ${reads.baseName}_rsem_bowtie2_report.txt
"""
}
nextflow src/nf_modules/RSEM/tests/index.nf \ nextflow src/nf_modules/RSEM/indexing.nf \
-c src/nf_modules/RSEM/rsem.config \ -c src/nf_modules/RSEM/indexing.config \
-profile docker \ -profile docker \
--fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
--annotation "data/tiny_dataset/annot/tiny.gff" --annotation "data/tiny_dataset/annot/tiny.gff"
nextflow src/nf_modules/RSEM/tests/quantification_single.nf \ nextflow src/nf_modules/RSEM/quantification_single.nf \
-c src/nf_modules/RSEM/rsem.config \ -c src/nf_modules/RSEM/quantification_single.config \
-profile docker \ -profile docker \
--index "results/mapping/index/tiny_v2.index*" \ --index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_S.fastq" --fastq "data/tiny_dataset/fastq/tiny*_S.fastq"
nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \ nextflow src/nf_modules/RSEM/quantification_paired.nf \
-c src/nf_modules/RSEM/rsem.config \ -c src/nf_modules/RSEM/quantification_paired.config \
-profile docker \ -profile docker \
--index "results/mapping/index/tiny_v2.index*" \ --index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment