From aa52ea7efda8887f5112cf168f44fbe35afe4114 Mon Sep 17 00:00:00 2001 From: elabaron <emmanuel.labaronne@ens-lyon.fr> Date: Tue, 10 Nov 2020 16:45:02 +0100 Subject: [PATCH] src/RibosomeProfiling.nf : fix coverage normalisation --- src/RNAseq.config | 22 +++++- src/RibosomeProfiling.nf | 141 +++++++++++++++++++++++++++------------ 2 files changed, 118 insertions(+), 45 deletions(-) diff --git a/src/RNAseq.config b/src/RNAseq.config index 1ec50488..5a4cf78f 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -61,6 +61,15 @@ profiles { time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } + withName: calc_scalingFactor{ + container = "lbmc/hisat2:2.1.0" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 1 + time = "12h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + } withName: coverage { container = "lbmc/deeptools:3.0.2" executor = "sge" @@ -71,7 +80,16 @@ profiles { time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } - withName: coverage_postgenome { + withName: calc_scalingFactor_with_postgenome{ + container = "lbmc/hisat2:2.1.0" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 1 + time = "12h" + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + } + withName: coverage_with_postgenome { container = "lbmc/deeptools:3.0.2" executor = "sge" clusterOptions = "-cwd -V" @@ -101,7 +119,7 @@ profiles { time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } - withName: hisat2_postGenomic { + withName: hisat2_postgenome { container = "lbmc/hisat2:2.1.0" executor = "sge" clusterOptions = "-cwd -V" diff --git a/src/RibosomeProfiling.nf b/src/RibosomeProfiling.nf index 0f002d48..bb19b1c5 100644 --- a/src/RibosomeProfiling.nf +++ b/src/RibosomeProfiling.nf @@ -440,7 +440,7 @@ rnaseqc ${gtf} ${bam[0]} -s ${file_id} ./ /* HISAT2 POST_GENOMIC */ ///////////////////////// -process hisat2_postGenomic { +process hisat2_postgenome { tag "$file_id" publishDir "${params.output}/05_post_genome_hisat2/", mode: 'copy' @@ -550,50 +550,105 @@ process fastqc_postgenome { /* Coverage */ ////////////// -process coverage_postgenome { - tag "$file_id" - publishDir "${params.output}/07_coverage/", mode: 'copy' - - input: - set file_id, file(bam) from HISAT_ALIGNED_COV - set file_id_2, file(post_genome) from POSTGENOME_ALIGNED_COV - - output: - file "*.bigwig" into COVERAGE_OUTPUT - - when: - params.do_postgenome - - shell: - ''' - genome=$(samtools view !{bam[0]} | awk '{print $1}' | sort | uniq | wc -l) - postgenome=$(samtools view !{post_genome[0]} | awk '{print $1}' | sort | uniq | wc -l) - total=$(($genome + $postgenome)) - factor=$(awk -v c=$total 'BEGIN {print 1000000/c}') - bamCoverage -p !{task.cpus} --binSize 1 -b !{bam[0]} -o !{file_id}_genome.bigwig - bamCoverage -p !{task.cpus} --binSize 1 -b !{post_genome[0]} -o !{file_id}_postgenome.bigwig - ''' -} +if (params.do_postgenome){ + HISAT_ALIGNED_COV.join(POSTGENOME_ALIGNED_COV) + .into{COVERAGE_MERGED_VALUES;COVERAGE_MERGED_BAM} + + process calc_scalingFactor_with_postgenome{ + tag "$file_id" + + input: + set file_id, file(genome), file(post_genome) from COVERAGE_MERGED_VALUES + + output: + set file_id, env(genome), env(postgenome), env(factor) into COVERAGE_MERGED_FACTOR + + shell: + ''' + genome=$(samtools view !{genome[0]} | awk '{print $1}' | sort | uniq | wc -l) + postgenome=$(samtools view !{post_genome[0]} | awk '{print $1}' | sort | uniq | wc -l) + total=$(($genome + $postgenome)) + factor=$(awk -v c=$total 'BEGIN {print 1000000/c}') + ''' + } + + COVERAGE_MERGED_FACTOR.join(COVERAGE_MERGED_BAM) + .set{COVERAGE_MERGED_BIGWIG} + + process coverage_with_postgenome{ + tag "$file_id" + publishDir "${params.output}/07_coverage/", mode: 'copy' + + input: + set file_id, val(genome), val(postgenome), val (factor), file(genome_bam), file(post_genome_bam) from COVERAGE_MERGED_BIGWIG + + output: + file "*.bigwig" into COVERAGE_OUTPUT + file "*.txt" into COVERAGE_LOG + + shell: + ''' + total=$((!{genome} + !{postgenome})) + echo "genome aligment : !{genome} counts" >> !{file_id}.txt + echo "postgenome aligment : !{postgenome} counts" >> !{file_id}.txt + echo "total counts : $total" >> !{file_id}.txt + echo "scaling factor : !{factor}" >> !{file_id}.txt + if [ !{genome} -gt 0 ] + then + bamCoverage -p !{task.cpus} --binSize 1 --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig + fi + if [ !{postgenome} -gt 0 ] + then + bamCoverage -p !{task.cpus} --binSize 1 --scaleFactor !{factor} -b !{post_genome_bam[0]} -o !{file_id}_postgenome.bigwig + fi + ''' + } +} else { + HISAT_ALIGNED_COV.into{COVERAGE_MERGED_VALUES;COVERAGE_MERGED_BAM} + + process calc_scalingFactor{ + tag "$file_id" + + input: + set file_id, file(genome) from COVERAGE_MERGED_VALUES + + output: + set file_id, env(genome), env(factor) into COVERAGE_MERGED_FACTOR + + shell: + ''' + genome=$(samtools view !{genome[0]} | awk '{print $1}' | sort | uniq | wc -l) + factor=$(awk -v c=$genome 'BEGIN {print 1000000/c}') + ''' + } -process coverage { - tag "$file_id" - publishDir "${params.output}/07_coverage/", mode: 'copy' - - input: - set file_id, file(bam) from HISAT_ALIGNED_COV_2 - - output: - file "*.bigwig" into COVERAGE_OUTPUT_2 - - when: - params.do_postgenome==false - shell: - ''' - genome=$(samtools view !{bam[0]} | awk '{print $1}' | sort | uniq | wc -l) - factor=$(awk -v c=$genome 'BEGIN {print 1000000/c}') - bamCoverage -p !{task.cpus} --binSize 1 -b !{bam[0]} -o !{file_id}.bigwig - ''' + COVERAGE_MERGED_FACTOR.join(COVERAGE_MERGED_BAM) + .set{COVERAGE_MERGED_BIGWIG} + + + process coverage { + tag "$file_id" + publishDir "${params.output}/07_coverage/", mode: 'copy' + + input: + set file_id, val(genome), val (factor), file(genome_bam) from COVERAGE_MERGED_BIGWIG + + output: + file "*.bigwig" into COVERAGE_OUTPUT + file "*.txt" into COVERAGE_LOG + + shell: + ''' + echo "genome aligment : !{genome} counts" >> !{file_id}.txt + echo "scaling factor : !{factor}" >> !{file_id}.txt + if [ !{genome} -gt 0 ] + then + bamCoverage -p !{task.cpus} --binSize 1 --scaleFactor !{factor} -b !{genome_bam[0]} -o !{file_id}_genome.bigwig + fi + ''' + } } + ///////////// /* MultiQC */ ///////////// -- GitLab