Skip to content
Snippets Groups Projects
Verified Commit 4d8776b9 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

RSEM: fix dockerfile and add nf and test files

parent f7f61d07
No related branches found
No related tags found
No related merge requests found
......@@ -3,11 +3,13 @@ MAINTAINER Laurent Modolo
ENV RSEM_VERSION=1.3.0
ENV BOWTIE2_VERSION=2.3.4.1
ENV SAMTOOLS_VERSION=1.7
ENV PACKAGES git=1:2.17.0* \
build-essential=12.4* \
ca-certificates=20180409 \
zlib1g-dev=1:1.2.11* \
bowtie2=${BOWTIE2_VERSION}*
bowtie2=${BOWTIE2_VERSION}* \
samtools=${SAMTOOLS_VERSION}*
RUN apt-get update && \
apt-get install -y --no-install-recommends ${PACKAGES} && \
......
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$index_fasta {
container = "rsem:1.3.0"
}
$mapping_fastq {
container = "rsem:1.3.0"
}
}
}
sge {
process{
$index_fasta {
beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
}
$mapping_fastq {
beforeScript = "module purge; module load RSEM/1.3.0; module load SAMtools/1.7"
}
}
}
}
/*
* RSEM :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
params.annotation = "$baseDir/data/bam/*.gff3"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_file }
Channel
.fromPath( params.annotation )
.ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
.set { annotation_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
file annotation from annotation_file
output:
file "*.index*" into index_files
script:
def cmd_annotation = "--gff3 ${annotation}"
if(annotation ==~ /.*\.gtf$/){
cmd_annotation = "--gtf ${annotation}"
}
"""
rsem-prepare-reference -p ${task.cpus} --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
${fasta.baseName}_rsem_bowtie2_report.txt
"""
}
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
params.index = "$baseDir/data/index/*.index.*"
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
file index from index_files.toList()
output:
file "*" into counts_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
> ${pair_id}_rsem_bowtie2_report.txt
"""
}
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq/*.fastq"
params.index = "$baseDir/data/index/*.index*"
params.mean = 300
params.sd = 100
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
log.info "mean read size: ${params.mean}"
log.info "sd read size: ${params.sd}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
file reads from fastq_files
file index from index_files.toList()
output:
file "*" into count_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \
${reads} ${index_name} ${tagname} > ${tagname}_rsem_bowtie2_report.txt
"""
}
params.fasta = "$baseDir/data/bam/*.fasta"
params.annotation = "$baseDir/data/bam/*.gff3"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_file }
Channel
.fromPath( params.annotation )
.ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
.set { annotation_file }
process index_fasta {
tag "$fasta.baseName"
cpus 4
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
file annotation from annotation_file
output:
file "*.index*" into index_files
script:
def cmd_annotation = "--gff3 ${annotation}"
if(annotation ==~ /.*\.gtf$/){
cmd_annotation = "--gtf ${annotation}"
}
"""
rsem-prepare-reference -p ${task.cpus} --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
${cmd_annotation} ${fasta} ${fasta.baseName}.index > \
${fasta.baseName}_rsem_bowtie2_report.txt
"""
}
params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
params.index = "$baseDir/data/index/*.index.*"
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$pair_id"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
file index from index_files.toList()
output:
file "*" into counts_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
-output-genome-bam -p ${task.cpus} \
--paired-end ${reads[0]} ${reads[1]} ${index_name} ${pair_id} \
> ${pair_id}_rsem_bowtie2_report.txt
"""
}
params.fastq = "$baseDir/data/fastq/*.fastq"
params.index = "$baseDir/data/index/*.index*"
params.mean = 300
params.sd = 100
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
log.info "mean read size: ${params.mean}"
log.info "sd read size: ${params.sd}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/mapping/quantification/", mode: 'copy'
input:
file reads from fastq_files
file index from index_files.toList()
output:
file "*" into count_files
script:
index_name = (index[0].baseName =~ /(.*)\.\d/)[0][1]
"""
rsem-calculate-expression --bowtie2 \
--bowtie2-path \$(which bowtie2 | sed 's/bowtie2\$//g') \
--bowtie2-sensitivity-level "very_sensitive" \
--fragment-length-mean ${params.mean} --fragment-length-sd ${params.sd} \
--output-genome-bam -p ${task.cpus} \
${reads} ${index_name} ${reads.baseName} \
> ${reads.baseName}_rsem_bowtie2_report.txt
"""
}
nextflow src/nf_modules/RSEM/tests/index.nf \
-c src/nf_modules/RSEM/rsem.config \
-profile docker \
--fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
--annotation "data/tiny_dataset/annot/tiny.gff"
nextflow src/nf_modules/RSEM/tests/quantification_single.nf \
-c src/nf_modules/RSEM/rsem.config \
-profile docker \
--index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_S.fastq"
nextflow src/nf_modules/RSEM/tests/quantification_paired.nf \
-c src/nf_modules/RSEM/rsem.config \
-profile docker \
--index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment