diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 73aaf7b2482d05ee04b9428606378b2575e7c46a..084b9ff2e84260fddfe215d13913faacb4f2c191 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -2,23 +2,24 @@ * RNAseq Analysis pipeline */ -params.input = "data/demultiplexed/*{_R1,_R2}.fastq.gz" +params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz" +params.output = "results" Channel - .fromFilePairs(params.input) - .ifEmpty { error "Cannot find any file matching: ${params.input}" } - .set {input_channel} + .fromFilePairs(params.fastq_raw) + .ifEmpty { error "Cannot find any file matching: ${params.fastq_raw}" } + .set {fastq_raw_channel} /* Trimming by quality */ process trimming { tag "$file_id" cpus 4 - publishDir "results/RNAseq/01_cutadapt/", mode: 'copy' + publishDir "${params.output}/01_cutadapt/", mode: 'copy' echo true input: - set file_id, file(reads) from input_channel + set file_id, file(reads) from fastq_raw_channel output: set file_id, "*cut_{R1,R2}.fastq.gz" into fastq_files_cut @@ -51,7 +52,7 @@ Channel process rRNA_removal { tag "$file_id" cpus 8 - publishDir "results/RNAseq/02_rRNA_depletion/", mode: 'copy' + publishDir "${params.output}/02_rRNA_depletion/", mode: 'copy' input: set file_id, file(reads) from fastq_files_cut @@ -62,8 +63,14 @@ process rRNA_removal { file "*.txt" into bowtie_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ -bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \ +bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \ -1 ${reads[0]} -2 ${reads[1]} --un-conc-gz ${file_id}_R%.fastq.gz 2> \ ${file_id}_bowtie2_report.txt > /dev/null @@ -82,7 +89,7 @@ log.info "index : ${params.index_hg38}" Channel .fromPath ( params.index_hg38 ) - .ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" } + .ifEmpty { error "Cannot find any index files matching: ${params.index_hg38}" } .set { index_file_hg38 } process hisat2_human { @@ -98,10 +105,16 @@ process hisat2_human { file "*.txt" into hisat_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } """ -hisat2 -x genome_tran -p ${task.cpus} \ +hisat2 -x ${index_id} -p ${task.cpus} \ -1 ${fastq_filtred[0]} -2 ${fastq_filtred[1]} \ ---un-conc-gz ${file_id}_notaligned_hg38_R%.fastq.gz \ +--un-conc-gz ${file_id}_notaligned_R%.fastq.gz \ --rna-strandness 'F' \ 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam @@ -115,10 +128,10 @@ reads_aligned_hg38.into{for_mapping;for_htseq} process index_bam { tag "$file_id" - publishDir "results/RNAseq/03_hisat2_hg38/", mode: 'copy' + publishDir "${params.output}/03_hisat2/", mode: 'copy' input: - set file_id, file(bam) from for_mapping + set file_id, file(bam) from for_mapping file report from hisat_report output: @@ -191,7 +204,7 @@ process sort_bam { """ samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_sorted.bam ${bam} """ -} +} params.gtf = "$baseDir/data/annotation/*.gtf" log.info "gtf files : ${params.gtf}" @@ -203,7 +216,7 @@ Channel process counting { tag "$file_id" - publishDir "results/RNAseq/04_HTseq/", mode: 'copy' + publishDir "${params.output}/04_HTseq/", mode: 'copy' input: set file_id, file(bam) from sorted_bam_files_2 diff --git a/src/RibosomeProfiling.nf b/src/RibosomeProfiling.nf index 15e02098a462d9a5c7126218b3364106ee53da4d..9586d01e71ab885c4a733c8d439eb3c388352491 100644 --- a/src/RibosomeProfiling.nf +++ b/src/RibosomeProfiling.nf @@ -55,8 +55,14 @@ process rRNA_removal { file "*.txt" into bowtie_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ -zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \ +zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \ -U - --un-gz ${file_id}_mRNA.fastq.gz 2> \ ${file_id}_bowtie2_report.txt > /dev/null @@ -75,7 +81,7 @@ log.info "index : ${params.index_hg38}" Channel .fromPath ( params.index_hg38 ) - .ifEmpty { error "Cannot find any hg38 index files matching: ${params.index_hg38}" } + .ifEmpty { error "Cannot find any index files matching: ${params.index_hg38}" } .set { index_file_hg38 } process hisat2_human { @@ -92,9 +98,15 @@ process hisat2_human { file "*.txt" into hisat_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } """ -hisat2 -x genome_tran -p ${task.cpus} \ --U ${fastq_filtred} --un-gz ${file_id}_notaligned_hg38.fastq.gz \ +hisat2 -x ${index_id} -p ${task.cpus} \ +-U ${fastq_filtred} --un-gz ${file_id}_notaligned.fastq.gz \ --end-to-end --rna-strandness 'F' \ 2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam @@ -105,7 +117,7 @@ hisat2 -x genome_tran -p ${task.cpus} \ process index_bam { tag "$file_id" - publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy' + publishDir "${params.output}/03_hisat2/", mode: 'copy' input: set file_id, file(bam) from reads_aligned_hg38 @@ -122,7 +134,7 @@ samtools index ${file_id}_sorted.bam sorted_bam_files.into{for_dedup;for_htseq} -/* deduplicating reads */ +/* deduplicating reads params.dedup_options = "" @@ -142,11 +154,11 @@ umi_tools dedup -I ${bam[0]} \ ${params.dedup_options} \ -S ${file_id}_dedup.bam > report.txt """ -} - +}*/ +/* process sort_bam { tag "$file_id" - publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy' + publishDir "${params.output}/03_hisat2_dedup/", mode: 'copy' input: set file_id, file(bam) from dedup_bam @@ -163,7 +175,7 @@ samtools index ${file_id}_sorted.bam cat ${dedup} > ${file_id}_dedup_report.txt """ } - +*/ /* HTseq */ params.gtf = "$baseDir/data/annotation/*.gtf"