From 4a115a8a85033b020dbefdd44bbbd7721a62bc71 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Mon, 15 Feb 2021 17:23:52 +0100 Subject: [PATCH] add gatk3 to DSL2 --- src/nf_modules/gatk3/main.nf | 113 ++++++++++++++++++++++++++++++++ src/nf_modules/picard/main.nf | 21 +++++- src/nf_modules/samtools/main.nf | 19 ++++++ 3 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 src/nf_modules/gatk3/main.nf diff --git a/src/nf_modules/gatk3/main.nf b/src/nf_modules/gatk3/main.nf new file mode 100644 index 00000000..67e59b90 --- /dev/null +++ b/src/nf_modules/gatk3/main.nf @@ -0,0 +1,113 @@ +version = "3.8.0" +container_url = "lbmc/gatk:${version}" + +process variant_calling { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam), path(bai) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), "*.vcf", emit: vcf + + script: +""" +gatk3 -T HaplotypeCaller \ + -nct ${task.cpus} \ + -R ${fasta} \ + -I ${bam} \ + -o ${file_id}.vcf +""" +} + +process filter_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(variants) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${variants} \ + -selectType SNP \ + -o ${file_id}_snp.vcf +""" +} + +process filter_indels { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(variants) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: +""" +gatk3 -T SelectVariants \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${variants} \ + -selectType INDEL \ + -o ${file_id}_indel.vcf +""" +} + +high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" + +process high_confidence_snp { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(variants) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_snp.vcf"), emit: vcf + script: +""" +gatk3 -T VariantFiltration \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${variants} \ + --filterExpression "${high_confidence_snp_filter}" \ + --filterName "basic_snp_filter" \ + -o ${file_id}_filtered_snp.vcf +""" +} + +high_confidence_indel_filter = "QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" + +process high_confidence_indel { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(variants) + tuple val(ref_id), path(fasta), path(fai), path(dict) + output: + tuple val(file_id), path("*_indel.vcf"), emit: vcf + script: +""" +gatk3 -T VariantFiltration \ + -nct ${task.cpus} \ + -R ${fasta} \ + -V ${variants} \ + --filterExpression "${high_confidence_indel_filter}" \ + --filterName "basic_indel_filter" \ + -o ${file_id}_filtered_indel.vcf +""" +} diff --git a/src/nf_modules/picard/main.nf b/src/nf_modules/picard/main.nf index 51725afa..8a9b0d8a 100644 --- a/src/nf_modules/picard/main.nf +++ b/src/nf_modules/picard/main.nf @@ -31,9 +31,9 @@ process index_fasta { tag "$file_id" input: - tuple val(file_id), file(fasta) + tuple val(file_id), path(fasta) output: - tuple val(file_id), file("*.dict"), emit: index + tuple val(file_id), path("*.dict"), emit: index script: """ @@ -42,3 +42,20 @@ REFERENCE=${fasta} \ OUTPUT=${fasta.simpleName}.dict """ } + +process index_bam { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + output: + tuple val(file_id), path("*"), emit: index + + script: +""" +PicardCommandLine BuildBamIndex \ +INPUT=${bam} +""" +} diff --git a/src/nf_modules/samtools/main.nf b/src/nf_modules/samtools/main.nf index e3413fff..20db864f 100644 --- a/src/nf_modules/samtools/main.nf +++ b/src/nf_modules/samtools/main.nf @@ -17,6 +17,25 @@ samtools faidx ${fasta} """ } +filter_bam_quality_threshold = 30 + +process filter_bam_quality { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(bam) + + output: + tuple val(file_id), path("*_filtered.bam"), emit: bam + script: +""" +samtools view -@ ${task.cpus} -hb ${bam} -q ${filter_bam_quality_threshold} > \ + ${bam.simpleName}_filtered.bam +""" +} + process filter_bam { container = "${container_url}" -- GitLab