diff --git a/src/RNAseq.config b/src/RNAseq.config index a4be61a6426e65a8088c22edee6393caddc133a4..26d4465074f66813ab90d82b6004554ef93cc3b9 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -92,6 +92,10 @@ profiles { cpus = 4 container = "lbmc/hisat2:2.1.0" } + withName: fastqc_genome { + container = "lbmc/fastqc:0.11.5" + cpus = 4 + } withName: dedup_genome { container = "lbmc/umi_tools:1.0.0" cpus = 1 @@ -104,10 +108,22 @@ profiles { container = "lbmc/htseq:0.11.2" cpus = 1 } + withName: rnaseq_qc { + container = "gcr.io/broad-cga-aarong-gtex/rnaseqc:latest" + cpus = 1 + } withName: hisat2_postGenomic { cpus = 4 container = "lbmc/hisat2:2.1.0" } + withName: dedup_postgenome { + container = "lbmc/umi_tools:1.0.0" + cpus = 1 + } + withName: fastqc_postgenome { + container = "lbmc/fastqc:0.11.5" + cpus = 4 + } withName: multiqc { container = "ewels/multiqc:1.9" cpus = 1 diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 872421454fa35154df0ea1c3030eeeebeb8f1d05..7788603969e8f357c1b9eb7761604e81ae33cf82 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -10,6 +10,7 @@ params.filter = "data/filter/human_rRNA_tRNA/*.bt2" params.index_genome = "data/genome/*.ht2" params.do_dedup = true params.gtf = "data/annotation/*.gtf" +params.gtf_collapse = "data/annotation/*.gtf" params.index_postgenome = "data/post_genome/*.ht2" params.do_postgenome = true @@ -18,6 +19,7 @@ log.info "outut directory : ${params.output}" log.info "filter index files : ${params.filter}" log.info "genome index : ${params.index_genome}" log.info "gtf file : ${params.gtf}" +log.info "collapsed gtf file for rnaseqc: ${params.gtf_collapse}" log.info "post-genome index : ${params.index_postgenome}" log.info "" log.info "do fastqc ? : ${params.do_fastqc}" @@ -47,8 +49,13 @@ Channel .set { GTF_FILE } Channel - .fromPath ( params.index_genome ) - .ifEmpty { error "Cannot find any index files matching: ${params.index_genome}" } + .fromPath( params.gtf_collapse ) + .ifEmpty { error "Cannot find any gtf file matching: ${params.gtf_collapse}" } + .set { GTF_COLLAPSE } + +Channel + .fromPath ( params.index_postgenome ) + .ifEmpty { error "Cannot find any index files matching: ${params.index_postgenome}" } .set { POSTGENOME_INDEX } /* Fastqc of raw input */ @@ -170,7 +177,9 @@ fi """ } FILTER_FASTQ.into{FILTER_FASTQ_FASTQC; - FILTER_FASTQ_HISAT} + FILTER_FASTQ_HISAT; + FILTER_FASTQ_POSTGENOME + } /* Fastqc of filtred reads */ @@ -203,7 +212,7 @@ process hisat2_human { file index from GENOME_INDEX.toList() output: - set file_id, "*.fastq.gz" into HISAT_UNALIGNED + set file_id, "*_notaligned_R{1,2}.fastq.gz" into HISAT_UNALIGNED set file_id, "*.{bam,bam.bai}" into HISAT_ALIGNED file "*.txt" into HISAT_LOG @@ -283,13 +292,17 @@ if (! params.do_dedup) { HISAT_ALIGNED_DEDUP.into{DEDUP_GENOME} } +DEDUP_GENOME.into{DEDUP_GENOME_HTSEQ; + DEDUP_GENOME_RNASEQC + } + /* HTseq */ process sort_bam { tag "$file_id" input: - set file_id, file(bam) from DEDUP_GENOME + set file_id, file(bam) from DEDUP_GENOME_HTSEQ output: set file_id, "*_htseq.bam" into SORTED_NAME_GENOME @@ -334,6 +347,31 @@ htseq-count ${bam[0]} ${gtf} \ """ } +/* HTseq */ + +process rnaseq_qc { + tag "$file_id" + publishDir "${params.output}/06_RNAseqQC/", mode: 'copy' + + input: + set file_id, file(bam) from DEDUP_GENOME_RNASEQC + file (gtf) from GTF_COLLAPSE.collect() + + output: + file "*" into RNASEQC_OUTPUT + + script: +""" +rnaseqc ${gtf} ${bam[0]} -s ${file_id} ./ \ +--stranded 'FR' +""" +} + +//////////////////////////////////////////////////////////////////////// +//////////////////////////// POST GENOME /////////////////////////////// +//////////////////////////////////////////////////////////////////////// + + /* HISAT2 POST_GENOMIC */ process hisat2_postGenomic { @@ -341,12 +379,12 @@ process hisat2_postGenomic { publishDir "${params.output}/05_post_genome_hisat2/", mode: 'copy' input: - set file_id, file(fastq_unaligned) from HISAT_UNALIGNED - file index2 from POSTGENOME_INDEX.toList() + set file_id, file(fastq_unaligned) from FILTER_FASTQ_POSTGENOME + file index2 from POSTGENOME_INDEX.collect() output: set file_id, "*.{bam,bam.bai}" into POSTGENOME_ALIGNED - file "*.txt" into POSTGENOME_LOG + file "*_postgenome.txt" into POSTGENOME_LOG when: params.do_postgenome @@ -364,12 +402,12 @@ hisat2 -x ${index2_id} \ -1 ${fastq_unaligned[0]} \ -2 ${fastq_unaligned[1]} \ --rna-strandness 'F' \ - 2> ${file_id}.txt \ + 2> ${file_id}_postgenome.txt \ | samtools view -bS -F 4 - \ | samtools sort -@ ${task.cpus} -o ${file_id}.bam \ && samtools index ${file_id}.bam -if grep -q "ERR" ${file_id}.txt; then +if grep -q "ERR" ${file_id}_postgenome.txt; then exit 1 fi """ @@ -377,6 +415,48 @@ fi POSTGENOME_ALIGNED.into{POSTGENOME_ALIGNED_FASTQC; POSTGENOME_ALIGNED_DEDUP} + +/* Deduplication of reads */ + +process dedup_postgenome { + tag "$file_id" + publishDir "${params.output}/05_post_genome_hisat2/dedup/", mode: 'copy' + + input: + set file_id, file(bam) from POSTGENOME_ALIGNED_DEDUP + + output: + set file_id, "*.{bam, bai}" into DEDUP_POSTGENOME + file "*.log" into DEDUP_POSTGENOME_LOG + + when: + params.do_dedup + + """ + umi_tools dedup -I ${bam}[0] \ + -S ${file_id}.dedup.bam \ + 2> dedup.log + """ +} + +process fastqc_postgenome { + tag "$file_id" + publishDir "${params.output}/00_fastqc/postgenome/", mode: 'copy' + + input: + set file_id, file(reads) from POSTGENOME_ALIGNED_FASTQC + + output: + set file_id, "*.{html,zip}" into OUTPUT_FASTQC_POSTGENOME + + when: + params.do_fastqc + + """ + fastqc ${file_id}* -t ${task.cpus} + """ +} + /* Channel .fromFilePairs(params.script_cov) @@ -421,6 +501,8 @@ process multiqc { file ('fastqc/*') from OUTPUT_FASTQC_GENOME.collect().ifEmpty([]) file ('*') from HTSEQ_COUNT.collect().ifEmpty([]) file ('*') from POSTGENOME_LOG.collect().ifEmpty([]) + file ('fastqc/*') from OUTPUT_FASTQC_POSTGENOME.collect().ifEmpty([]) + file ('*') from RNASEQC_OUTPUT.collect().ifEmpty([]) output: file "multiqc_report.html" into multiqc_report