From 4a115a8a85033b020dbefdd44bbbd7721a62bc71 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Mon, 15 Feb 2021 17:23:52 +0100
Subject: [PATCH] add gatk3 to DSL2

---
 src/nf_modules/gatk3/main.nf    | 113 ++++++++++++++++++++++++++++++++
 src/nf_modules/picard/main.nf   |  21 +++++-
 src/nf_modules/samtools/main.nf |  19 ++++++
 3 files changed, 151 insertions(+), 2 deletions(-)
 create mode 100644 src/nf_modules/gatk3/main.nf

diff --git a/src/nf_modules/gatk3/main.nf b/src/nf_modules/gatk3/main.nf
new file mode 100644
index 00000000..67e59b90
--- /dev/null
+++ b/src/nf_modules/gatk3/main.nf
@@ -0,0 +1,113 @@
+version = "3.8.0"
+container_url = "lbmc/gatk:${version}"
+
+process variant_calling {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam), path(bai)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), "*.vcf", emit: vcf
+
+  script:
+"""
+gatk3 -T HaplotypeCaller \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -I ${bam} \
+  -o ${file_id}.vcf
+"""
+}
+
+process filter_snp {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(variants)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_snp.vcf"), emit: vcf
+  script:
+"""
+gatk3 -T SelectVariants \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -V ${variants} \
+  -selectType SNP \
+  -o ${file_id}_snp.vcf
+"""
+}
+
+process filter_indels {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(variants)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_indel.vcf"), emit: vcf
+  script:
+"""
+gatk3 -T SelectVariants \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -V ${variants} \
+  -selectType INDEL \
+  -o ${file_id}_indel.vcf
+"""
+}
+
+high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
+
+process high_confidence_snp {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(variants)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_snp.vcf"), emit: vcf
+  script:
+"""
+gatk3 -T VariantFiltration \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -V ${variants} \
+  --filterExpression "${high_confidence_snp_filter}" \
+  --filterName "basic_snp_filter" \
+  -o ${file_id}_filtered_snp.vcf
+"""
+}
+
+high_confidence_indel_filter = "QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
+
+process high_confidence_indel {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(variants)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_indel.vcf"), emit: vcf
+  script:
+"""
+gatk3 -T VariantFiltration \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -V ${variants} \
+  --filterExpression "${high_confidence_indel_filter}" \
+  --filterName "basic_indel_filter" \
+  -o ${file_id}_filtered_indel.vcf
+"""
+}
diff --git a/src/nf_modules/picard/main.nf b/src/nf_modules/picard/main.nf
index 51725afa..8a9b0d8a 100644
--- a/src/nf_modules/picard/main.nf
+++ b/src/nf_modules/picard/main.nf
@@ -31,9 +31,9 @@ process index_fasta {
   tag "$file_id"
 
   input:
-    tuple val(file_id), file(fasta)
+    tuple val(file_id), path(fasta)
   output:
-    tuple val(file_id), file("*.dict"), emit: index 
+    tuple val(file_id), path("*.dict"), emit: index 
 
   script:
 """
@@ -42,3 +42,20 @@ REFERENCE=${fasta} \
 OUTPUT=${fasta.simpleName}.dict
 """
 }
+
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*"), emit: index
+
+  script:
+"""
+PicardCommandLine BuildBamIndex \
+INPUT=${bam}
+"""
+}
diff --git a/src/nf_modules/samtools/main.nf b/src/nf_modules/samtools/main.nf
index e3413fff..20db864f 100644
--- a/src/nf_modules/samtools/main.nf
+++ b/src/nf_modules/samtools/main.nf
@@ -17,6 +17,25 @@ samtools faidx ${fasta}
 """
 }
 
+filter_bam_quality_threshold = 30
+
+process filter_bam_quality {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*_filtered.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} -hb ${bam} -q ${filter_bam_quality_threshold} > \
+  ${bam.simpleName}_filtered.bam
+"""
+}
+
 
 process filter_bam {
   container = "${container_url}"
-- 
GitLab