From 6fffaaeea2857f1f7c4e6e4c8f55a8841e93e160 Mon Sep 17 00:00:00 2001
From: Emmanuel Labaronne <emmanuel.labaronne@ens-lyon.fr>
Date: Thu, 2 Jul 2020 10:48:12 +0200
Subject: [PATCH] add src/norm_coverage and add coverage calcul to RNAseq
 worflow

---
 src/RNAseq.nf        | 22 +++++++++++++++++++++-
 src/norm_coverage.sh | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 src/norm_coverage.sh

diff --git a/src/RNAseq.nf b/src/RNAseq.nf
index 8515e966..791d4b0a 100644
--- a/src/RNAseq.nf
+++ b/src/RNAseq.nf
@@ -129,6 +129,7 @@ samtools index ${file_id}_sorted.bam
 """
 }
 
+sorted_bam_files.into{sorted_bam_htseq, sorted_bam_coverage}
 
 /*                   HTseq                            */
 
@@ -136,7 +137,7 @@ process sort_bam {
   tag "$file_id"
 
   input:
-    set file_id, file(bam) from sorted_bam_files
+    set file_id, file(bam) from sorted_bam_htseq
 
   output:
     set file_id, "*_htseq.bam" into sorted_bam_files_2
@@ -188,3 +189,22 @@ htseq-count ${bam[0]} ${gtf} \
 
 """
 }
+
+process coverage {
+  tag "$file_id"
+  publishDir "${params.output}/05_coverage/", mode: 'copy'
+
+  input:
+  set file_id, file(bam) from sorted_bam_coverage
+
+  output:
+  file "*.bw" into coverage_files
+
+  script:
+"""
+bash src/norm_coverage.sh -b ${bam} \
+                          -o {file_id}.bw \
+                          --binSize 1 \
+                          -p ${cpus} 8
+"""
+}
diff --git a/src/norm_coverage.sh b/src/norm_coverage.sh
new file mode 100644
index 00000000..8dcf4c83
--- /dev/null
+++ b/src/norm_coverage.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+set -e
+
+usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> --binSize <int> -p <CPUs>" 1>&2; exit 1; }
+
+cpus=4
+binSize=1
+
+while getopts "b:o:binSize:p:" arg; do
+  case $arg in
+    -h)
+      echo "usage"
+      ;;
+    -b)
+      bam=$OPTARG
+      ;;
+    -o)
+      output=$OPTARG
+      ;;
+    --binSize)
+      binSize=$OPTARG
+      ;;
+    -p)
+      cpus=$OPTARG
+      ;;
+  esac
+done
+
+hg38=$(samtools view ${bam} | awk '{print $1}' | sort | uniq | wc -l)
+factor=$(echo "1000000/($hg38)" | bc -l)
+echo "hg38 counts : $hg38"
+echo "scaling factor : $factor\n"
+bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output}
-- 
GitLab