diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 8515e966fdab38145cb9508d3a72f5830f464ebc..791d4b0a675c1576156a847349115a0e6301cf4b 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -129,6 +129,7 @@ samtools index ${file_id}_sorted.bam """ } +sorted_bam_files.into{sorted_bam_htseq, sorted_bam_coverage} /* HTseq */ @@ -136,7 +137,7 @@ process sort_bam { tag "$file_id" input: - set file_id, file(bam) from sorted_bam_files + set file_id, file(bam) from sorted_bam_htseq output: set file_id, "*_htseq.bam" into sorted_bam_files_2 @@ -188,3 +189,22 @@ htseq-count ${bam[0]} ${gtf} \ """ } + +process coverage { + tag "$file_id" + publishDir "${params.output}/05_coverage/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_coverage + + output: + file "*.bw" into coverage_files + + script: +""" +bash src/norm_coverage.sh -b ${bam} \ + -o {file_id}.bw \ + --binSize 1 \ + -p ${cpus} 8 +""" +} diff --git a/src/norm_coverage.sh b/src/norm_coverage.sh new file mode 100644 index 0000000000000000000000000000000000000000..8dcf4c835e1639bbec5a84f10aadd1a2190f4e3c --- /dev/null +++ b/src/norm_coverage.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +set -e + +usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> --binSize <int> -p <CPUs>" 1>&2; exit 1; } + +cpus=4 +binSize=1 + +while getopts "b:o:binSize:p:" arg; do + case $arg in + -h) + echo "usage" + ;; + -b) + bam=$OPTARG + ;; + -o) + output=$OPTARG + ;; + --binSize) + binSize=$OPTARG + ;; + -p) + cpus=$OPTARG + ;; + esac +done + +hg38=$(samtools view ${bam} | awk '{print $1}' | sort | uniq | wc -l) +factor=$(echo "1000000/($hg38)" | bc -l) +echo "hg38 counts : $hg38" +echo "scaling factor : $factor\n" +bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output}