From f70a43f950cc4d9c7a5755c79e3c217477ad801c Mon Sep 17 00:00:00 2001
From: Emmanuel Labaronne <emmanuel.labaronne@ens-lyon.fr>
Date: Fri, 14 Feb 2020 11:05:15 +0100
Subject: [PATCH] add nextflow scripts for umi_tools dedup

---
 src/nf_modules/umi_tools/dedup.config | 54 +++++++++++++++++++++++++++
 src/nf_modules/umi_tools/dedup.nf     | 48 ++++++++++++++++++++++++
 src/nf_modules/umi_tools/tests.sh     | 13 +++++++
 3 files changed, 115 insertions(+)
 create mode 100644 src/nf_modules/umi_tools/dedup.config
 create mode 100644 src/nf_modules/umi_tools/dedup.nf
 create mode 100644 src/nf_modules/umi_tools/tests.sh

diff --git a/src/nf_modules/umi_tools/dedup.config b/src/nf_modules/umi_tools/dedup.config
new file mode 100644
index 00000000..08679f07
--- /dev/null
+++ b/src/nf_modules/umi_tools/dedup.config
@@ -0,0 +1,54 @@
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      withName: sort_bam {
+        container = "lbmc/samtools:1.7"
+        cpus = 1
+      }
+      withName: counting {
+        container = "lbmc/umi_tools:1.0.0"
+        cpus = 1
+      }
+    }
+  }
+  singularity {
+    singularity.enabled = true
+    singularity.cacheDir = "./bin/"
+    process {
+      withName: sort_bam {
+        container = "lbmc/samtools:1.7"
+        cpus = 1
+      }
+      withName: counting {
+        container = "lbmc/umi_tools:1.0.0"
+        cpus = 1
+      }
+    }
+  }
+  psmn{
+    process{
+      withName: sort_bam {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/samtools_1.7"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 1
+        memory = "20GB"
+        time = "12h"
+        queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
+      }
+      withName: counting {
+        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
+        module = "umi_tools/1.0.0"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 1
+        memory = "20GB"
+        time = "12h"
+        queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
+      }
+    }
+  }
+}
diff --git a/src/nf_modules/umi_tools/dedup.nf b/src/nf_modules/umi_tools/dedup.nf
new file mode 100644
index 00000000..0b7455f2
--- /dev/null
+++ b/src/nf_modules/umi_tools/dedup.nf
@@ -0,0 +1,48 @@
+params.bam = "$baseDir/data/bam/*.bam"
+params.dedup_options = ""
+
+log.info "bam files : ${params.bam}"
+log.info "aditionnal option for umi_tools dedup : ${params.options}"
+
+Channel
+  .fromPath( params.bam )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" }
+  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
+  .set { bam_files }
+
+process sort_bam {
+  tag "$file_id"
+  cpus 4
+
+  input:
+    set file_id, file(bam) from bam_files
+
+  output:
+    set file_id, "*_sorted.bam{,.bai}" into sorted_bam_files
+
+  script:
+"""
+# sort bam
+samtools sort -@ ${task.cpus} -o ${file_id}_sorted.bam ${bam}
+samtools index ${file_id}_sorted.bam
+"""
+}
+
+process dedup {
+  tag "$file_id"
+  publishDir "results/dedup/", mode: 'copy'
+
+  input:
+  set file_id, file(bam) from sorted_bam_files
+
+  output:
+  file "*dedup.bam" into dedup_bam
+  file "*.txt" into dedup_report
+
+  script:
+"""
+umi_tools dedup -I ${bam[0]} \
+                ${params.options} \
+                -S ${file_id}_dedup.bam > report.txt
+"""
+}
diff --git a/src/nf_modules/umi_tools/tests.sh b/src/nf_modules/umi_tools/tests.sh
new file mode 100644
index 00000000..1a9cb806
--- /dev/null
+++ b/src/nf_modules/umi_tools/tests.sh
@@ -0,0 +1,13 @@
+./nextflow src/nf_modules/umi_tools/dedup.nf \
+  -c src/nf_modules/umi_tools/dedup.config \
+  -profile docker \
+  --bam "data/tiny_dataset/map/tiny_v2.bam" \
+  -resume
+
+if [ -x "$(command -v singularity)" ]; then
+./nextflow src/nf_modules/umi_tools/dedup.nf \
+  -c src/nf_modules/umi_tools/dedup.config \
+  -profile singularity \
+  --bam "data/tiny_dataset/map/tiny_v2.bam" \
+  -resume
+fi
-- 
GitLab