From c3401f6e4a2311bc70d78164ab49f613422a4e3d Mon Sep 17 00:00:00 2001 From: Emmanuel Labaronne <emmanuel.labaronne@ens-lyon.fr> Date: Thu, 24 Sep 2020 18:08:27 +0200 Subject: [PATCH] src/RNAseq.nf : add dedup option --- src/RNAseq.config | 4 ++++ src/RNAseq.nf | 60 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/RNAseq.config b/src/RNAseq.config index 83560a81..c8f39d48 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -92,6 +92,10 @@ profiles { cpus = 4 container = "lbmc/hisat2:2.1.0" } + withName: dedup_genome { + container = "lbmc/umi_tools:1.0.0" + cpus = 1 + } withName: sort_bam { container = "lbmc/samtools:1.7" cpus = 1 diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 96ef0945..8022c44a 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -8,12 +8,16 @@ params.do_fastqc = true params.script_cov = "src/norm_coverage.sh" params.filter = "data/filter/human_rRNA_tRNA/*.bt2" params.index_genome = "data/genome/*.ht2" +params.do_dedup = true log.info "input raw : ${params.fastq_raw}" log.info "outut directory : ${params.output}" -log.info "do fastqc ? : ${params.do_fastqc}" log.info "filter index files : ${params.filter}" log.info "genome index : ${params.index_genome}" +log.info "" +log.info "do fastqc ? : ${params.do_fastqc}" +log.info "do deduplication ? : ${params.do_dedup}" +log.info "" Channel .fromFilePairs(params.fastq_raw) @@ -75,7 +79,6 @@ process trimming { ${reads[0]} ${reads[1]} > ${file_id}_first_report.txt cutadapt -j ${task.cpus} \ - -a "A{100}" \ -u -10 \ -o ${file_id}_cut_R1.fastq.gz \ ${file_id}_tmp_R1.fastq.gz \ @@ -183,7 +186,7 @@ process hisat2_human { output: file "*.fastq.gz" into HISAT_UNALIGNED - set file_id, "*_sorted.{bam,bam.bai}" into HISAT_ALIGNED + set file_id, "*.{bam,bam.bai}" into HISAT_ALIGNED file "*.txt" into HISAT_LOG script: @@ -211,7 +214,56 @@ fi """ } -//sorted_bam_files.into{sorted_bam_htseq; sorted_bam_coverage} +HISAT_ALIGNED.into{HISAT_ALIGNED_FASTQC; + HISAT_ALIGNED_DEDUP} + + +/* Fastqc of filtred reads */ + +process fastqc_genome { + tag "$file_id" + publishDir "${params.output}/00_fastqc/genome/", mode: 'copy' + + input: + set file_id, file(reads) from HISAT_ALIGNED_FASTQC + + output: + set file_id, "*.{html,zip}" into OUTPUT_FASTQC_GENOME + + when: + params.do_fastqc + + """ + fastqc ${file_id}* -t ${task.cpus} + """ +} + +/* Deduplication of reads */ + +process dedup_genome { + tag "$file_id" + publishDir "${params.output}/03_hisat2/dedup/", mode: 'copy' + + input: + set file_id, file(bam) from HISAT_ALIGNED_DEDUP + + output: + set file_id, "*.bam" into DEDUP_GENOME + file "*.log" into DEDUP_LOG + + when: + params.do_dedup + + """ + umi_tools dedup -I ${bam}[0] \ + -S ${file_id}.dedup.bam \ + 2> dedup.log + """ +} + +if (! params.do_dedup) { + HISAT_ALIGNED_DEDUP.into{DEDUP_GENOME} +} /* HTseq -- GitLab