From f70a43f950cc4d9c7a5755c79e3c217477ad801c Mon Sep 17 00:00:00 2001 From: Emmanuel Labaronne <emmanuel.labaronne@ens-lyon.fr> Date: Fri, 14 Feb 2020 11:05:15 +0100 Subject: [PATCH] add nextflow scripts for umi_tools dedup --- src/nf_modules/umi_tools/dedup.config | 54 +++++++++++++++++++++++++++ src/nf_modules/umi_tools/dedup.nf | 48 ++++++++++++++++++++++++ src/nf_modules/umi_tools/tests.sh | 13 +++++++ 3 files changed, 115 insertions(+) create mode 100644 src/nf_modules/umi_tools/dedup.config create mode 100644 src/nf_modules/umi_tools/dedup.nf create mode 100644 src/nf_modules/umi_tools/tests.sh diff --git a/src/nf_modules/umi_tools/dedup.config b/src/nf_modules/umi_tools/dedup.config new file mode 100644 index 00000000..08679f07 --- /dev/null +++ b/src/nf_modules/umi_tools/dedup.config @@ -0,0 +1,54 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: sort_bam { + container = "lbmc/samtools:1.7" + cpus = 1 + } + withName: counting { + container = "lbmc/umi_tools:1.0.0" + cpus = 1 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + withName: sort_bam { + container = "lbmc/samtools:1.7" + cpus = 1 + } + withName: counting { + container = "lbmc/umi_tools:1.0.0" + cpus = 1 + } + } + } + psmn{ + process{ + withName: sort_bam { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + } + withName: counting { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "umi_tools/1.0.0" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + } + } + } +} diff --git a/src/nf_modules/umi_tools/dedup.nf b/src/nf_modules/umi_tools/dedup.nf new file mode 100644 index 00000000..0b7455f2 --- /dev/null +++ b/src/nf_modules/umi_tools/dedup.nf @@ -0,0 +1,48 @@ +params.bam = "$baseDir/data/bam/*.bam" +params.dedup_options = "" + +log.info "bam files : ${params.bam}" +log.info "aditionnal option for umi_tools dedup : ${params.options}" + +Channel + .fromPath( params.bam ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.bam}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { bam_files } + +process sort_bam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*_sorted.bam{,.bai}" into sorted_bam_files + + script: +""" +# sort bam +samtools sort -@ ${task.cpus} -o ${file_id}_sorted.bam ${bam} +samtools index ${file_id}_sorted.bam +""" +} + +process dedup { + tag "$file_id" + publishDir "results/dedup/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files + + output: + file "*dedup.bam" into dedup_bam + file "*.txt" into dedup_report + + script: +""" +umi_tools dedup -I ${bam[0]} \ + ${params.options} \ + -S ${file_id}_dedup.bam > report.txt +""" +} diff --git a/src/nf_modules/umi_tools/tests.sh b/src/nf_modules/umi_tools/tests.sh new file mode 100644 index 00000000..1a9cb806 --- /dev/null +++ b/src/nf_modules/umi_tools/tests.sh @@ -0,0 +1,13 @@ +./nextflow src/nf_modules/umi_tools/dedup.nf \ + -c src/nf_modules/umi_tools/dedup.config \ + -profile docker \ + --bam "data/tiny_dataset/map/tiny_v2.bam" \ + -resume + +if [ -x "$(command -v singularity)" ]; then +./nextflow src/nf_modules/umi_tools/dedup.nf \ + -c src/nf_modules/umi_tools/dedup.config \ + -profile singularity \ + --bam "data/tiny_dataset/map/tiny_v2.bam" \ + -resume +fi -- GitLab