From 0a015caa8e984e8d616cf65d03ceb81cd931cd8a Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent@modolo.fr> Date: Tue, 31 Mar 2020 11:31:04 +0200 Subject: [PATCH] RNASeq: add experimental biologists practical solution --- src/RNASeq.config | 126 ++++++++++++++++++++++++++++++++++++++++++++-- src/RNASeq.nf | 95 ++++++++++++++++++++++++++++++++-- 2 files changed, 213 insertions(+), 8 deletions(-) diff --git a/src/RNASeq.config b/src/RNASeq.config index ed42799d..69f61457 100644 --- a/src/RNASeq.config +++ b/src/RNASeq.config @@ -4,9 +4,25 @@ profiles { docker.enabled = true process { withName: adaptor_removal { - container = "lbmc/cutadapt:2.4" + container = "lbmc/cutadapt:2.1" cpus = 1 } + withName: trimming { + cpus = 4 + container = "lbmc/urqt:d62c1f8" + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" + cpus = 1 + } + withName: index_fasta { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } + withName: mapping_fastq { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } } } singularity { @@ -14,16 +30,53 @@ profiles { singularity.cacheDir = "./bin/" process { withName: adaptor_removal { - container = "lbmc/cutadapt:2.4" + container = "lbmc/cutadapt:2.1" + cpus = 1 + } + withName: trimming { + cpus = 4 + container = "lbmc/urqt:d62c1f8" + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" cpus = 1 } + withName: index_fasta { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } + withName: mapping_fastq { + container = "lbmc/kallisto:0.44.0" + cpus = 4 + } } } psmn{ process{ withName: adaptor_removal { beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/cutadapt_2.4" + conda = "$baseDir/.conda_envs/cutadapt_2.1" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + } + withName: trimming { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/urqt_d62c1f8" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "5GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + withName: fasta_from_bed { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bedtools_2.25.0" executor = "sge" clusterOptions = "-cwd -V" cpus = 1 @@ -31,6 +84,28 @@ profiles { time = "12h" queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' } + withName: index_fasta { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/kallisto_0.44.0" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + withName: mapping_fastq { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/kallisto_0.44.0" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } } } ccin2p3 { @@ -39,7 +114,28 @@ profiles { singularity.runOptions = "--bind /pbs,/sps,/scratch" process{ withName: adaptor_removal { - container = "lbmc/cutadapt:2.4" + container = "lbmc/cutadapt:2.1" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n" + cpus = 1 + queue = 'huge' + } + withName: trimming { + container = "lbmc/urqt:d62c1f8" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + withName: fasta_from_bed { + container = "lbmc/bedtools:2.25.0" scratch = true stageInMode = "copy" stageOutMode = "rsync" @@ -48,6 +144,28 @@ profiles { cpus = 1 queue = 'huge' } + withName: index_fasta { + container = "lbmc/kallisto:0.44.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + withName: mapping_fastq { + container = "lbmc/kallisto:0.44.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } } } } diff --git a/src/RNASeq.nf b/src/RNASeq.nf index c59eaaaa..73940d65 100644 --- a/src/RNASeq.nf +++ b/src/RNASeq.nf @@ -1,5 +1,15 @@ log.info "fastq files : ${params.fastq}" +log.info "fasta file : ${params.fasta}" +log.info "bed file : ${params.bed}" +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .set { fasta_files } +Channel + .fromPath( params.bed ) + .ifEmpty { error "Cannot find any bed files matching: ${params.bed}" } + .set { bed_files } Channel .fromFilePairs( params.fastq ) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } @@ -16,9 +26,86 @@ process adaptor_removal { set pair_id, "*_cut_R{1,2}.fastq.gz" into fastq_files_cut script: - """ - cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \ - -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ - ${reads[0]} ${reads[1]} > ${pair_id}_report.txt """ + + cutadapt -a AGATCGGAAGAG -g CTCTTCCGATCT -A AGATCGGAAGAG -G CTCTTCCGATCT \ + -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ + ${reads[0]} ${reads[1]} > ${pair_id}_report.txt + """ +} + +process trimming { + tag "${reads}" + publishDir "results/fastq/trimming/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_cut + + output: + set pair_id, "*_trim_R{1,2}.fastq.gz" into fastq_files_trim + + script: +""" +UrQt --t 20 --m ${task.cpus} --gz \ +--in ${reads[0]} --inpair ${reads[1]} \ +--out ${pair_id}_trim_R1.fastq.gz --outpair ${pair_id}_trim_R2.fastq.gz \ +> ${pair_id}_trimming_report.txt +""" +} + +process fasta_from_bed { + tag "${bed.baseName}" + publishDir "results/fasta/", mode: 'copy' + + input: + file fasta from fasta_files + file bed from bed_files + + output: + file "*_extracted.fasta" into fasta_files_extracted + + script: +""" +bedtools getfasta -name \ +-fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta +""" +} + +process index_fasta { + tag "$fasta.baseName" + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_files_extracted + + output: + file "*.index*" into index_files + file "*_kallisto_report.txt" into index_files_report + + script: +""" +kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ +2> ${fasta.baseName}_kallisto_report.txt +""" +} + +process mapping_fastq { + tag "$reads" + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files_trim + file index from index_files.collect() + + output: + file "*" into counts_files + + script: +""" +mkdir ${pair_id} + +kallisto quant -i ${index} -t ${task.cpus} \ +--bias --bootstrap-samples 100 -o ${pair_id} \ +${reads[0]} ${reads[1]} &> ${pair_id}/kallisto_report.txt +""" } -- GitLab