From b85258ad475e02f830991de84a591884c6beaa57 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent@modolo.fr>
Date: Tue, 9 Oct 2018 15:57:22 +0200
Subject: [PATCH] SNP_calling.nf: cleanup gatk code

---
 src/SNP_calling.config |  15 ----
 src/SNP_calling.nf     | 169 +----------------------------------------
 2 files changed, 4 insertions(+), 180 deletions(-)

diff --git a/src/SNP_calling.config b/src/SNP_calling.config
index 34660e06..cf829bad 100644
--- a/src/SNP_calling.config
+++ b/src/SNP_calling.config
@@ -45,21 +45,6 @@ profiles {
       withName: vcf_to_csv_norm {
         container = "gatk:4.0.8.1"
       }
-      withName: HaplotypeCaller {
-        container = "gatk:4.0.8.1"
-      }
-      withName: GetPileupSummaries {
-        container = "gatk:4.0.8.1"
-      }
-      withName: CalculateContamination {
-        container = "gatk:4.0.8.1"
-      }
-      withName: CollectSequencingArtifactMetrics {
-        container = "gatk:4.0.8.1"
-      }
-      withName: filter_SNP {
-        container = "gatk:4.0.8.1"
-      }
     }
   }
   sge {
diff --git a/src/SNP_calling.nf b/src/SNP_calling.nf
index 2c9cdfcf..096afb9f 100644
--- a/src/SNP_calling.nf
+++ b/src/SNP_calling.nf
@@ -296,46 +296,29 @@ final_indexed_bam_files_norm = index_bam_files_norm
 final_indexed_bam_files_tumor = index_bam_files_tumor
    .filter{ "tumor_sample" == it[0] }
 
-final_bam_files_norm.into{
-  haplotypecaller_bam_files_norm;
+final_bam_files_norm.set{
   samtools_SNP_bam_files_norm
 }
-final_bam_files_tumor.into{
-  haplotypecaller_bam_files_tumor;
+final_bam_files_tumor.set{
   samtools_SNP_bam_files_tumor;
-  artifact_bam_files_tumor;
-  pileup_bam_files_tumor
 }
-final_indexed_bam_files_norm.into{
-  haplo_index_bam_files_norm;
+final_indexed_bam_files_norm.set{
   samtools_SNP_index_bam_files_norm
 }
-final_indexed_bam_files_tumor.into{
-  haplo_index_bam_files_tumor;
+final_indexed_bam_files_tumor.set{
   samtools_SNP_index_bam_files_tumor;
-  artifact_index_bam_files_tumor;
-  pileup_index_bam_files_tumor
 }
 final_fasta_file.into{
-  haplo_fasta_file;
   samtools_SNP_fasta_file_tumor;
   samtools_SNP_fasta_file_norm;
-  artifact_fasta_file;
-  filter_fasta_file
 }
 indexed2_fasta_file.into{
-  haplo_indexed2_fasta_file;
   samtools_SNP_indexed2_fasta_file_tumor;
   samtools_SNP_indexed2_fasta_file_norm;
-  artifact_indexed2_fasta_file;
-  filter_indexed2_fasta_file
 }
 indexed3_fasta_file.into{
-  haplo_indexed3_fasta_file;
   samtools_SNP_indexed3_fasta_file_tumor;
   samtools_SNP_indexed3_fasta_file_norm;
-  artifact_indexed3_fasta_file;
-  filter_indexed3_fasta_file
 }
 
 process samtools_SNP_tumor {
@@ -432,147 +415,3 @@ gatk VariantsToTable -V ${file_id_norm}_filtered.vcf \
 """
 }
 
-
-/*
-process HaplotypeCaller {
-  tag "$file_id_norm"
-  cpus 10
-  publishDir "results/SNP/vcf/", mode: 'copy'
-
-  input:
-    set file_id_norm, file(bam_norm) from haplotypecaller_bam_files_norm
-    set file_ididx_norm, file(bamidx_norm) from haplo_index_bam_files_norm
-    set file_id_tumor, file(bam_tumor) from haplotypecaller_bam_files_tumor
-    set file_ididx_tumor, file(bamidx_tumor) from haplo_index_bam_files_tumor
-    set genome_id, file(fasta) from haplo_fasta_file
-    set genome2_idx, file(fasta2idx) from haplo_indexed2_fasta_file
-    set genome3_idx, file(fasta3idx) from haplo_indexed3_fasta_file
-
-  output:
-    set file_id_norm, "*.vcf" into vcf_files
-    set file_id_norm, "*.vcf.idx" into index_vcf_files
-    set file_id_norm, "*.bam" into realigned_bams_files
-    file "*_mutect2_report.txt" into mutect2_report
-
-  script:
-"""
-gatk --java-options "-Xmx32G" Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
--I ${bam_tumor} -tumor ${file_id_tumor} \
--I ${bam_norm} -normal ${file_id_norm} \
--O ${file_id_norm}_raw_calls.g.vcf \
--bamout ${file_id_norm}_realigned.bam 2> ${file_id_norm}_mutect2_report.txt
-"""
-}
-
-vcf_files.into{
-  pileup_vcf_files;
-  filter_vcf_files
-}
-index_vcf_files.into{
-  pileup_index_vcf_files;
-  filter_index_vcf_files
-}
-
-process GetPileupSummaries {
-  tag "$file_id_norm"
-  cpus 1
-  publishDir "results/SNP/vcf/", mode: 'copy'
-
-  input:
-    set file_id_norm, file(vcf) from pileup_vcf_files
-    set fileidx_id_norm, file(vcfidx) from pileup_index_vcf_files
-    set file_id_tumor, file(bam_tumor) from pileup_bam_files_tumor
-
-  output:
-    set file_id_tumor, "*.table" into pileup_files
-    file "*_pileup_report.txt" into pileup_report
-
-  script:
-"""
-gatk --java-options "-Xmx32G" GetPileupSummaries \
--I ${bam_tumor} \
--V ${vcf} \
--O ${file_id_tumor}_pileup.table \
-2> ${file_id_tumor}_pileup_report.txt
-"""
-}
-
-process CalculateContamination {
-  tag "$file_id_tumor"
-  cpus 1
-  publishDir "results/SNP/vcf/", mode: 'copy'
-
-  input:
-    set file_id_tumor, file(pileup_table) from pileup_files
-
-  output:
-    set file_id_tumor, "*.table" into contamination_files
-    file "*_contamination_report.txt" into contamination_report
-
-  script:
-"""
-gatk --java-options "-Xmx32G" CalculateContamination \
--I ${pileup_table} \
--O $file_id_tumor}_contamination.table \
-2> ${file_id_tumor}_contamination_report.txt
-"""
-}
-*/
-/*
-process CollectSequencingArtifactMetrics {
-  tag "$file_id_tumor"
-  cpus 1
-  publishDir "results/SNP/vcf/", mode: 'copy'
-
-  input:
-    set file_id_tumor, file(bam_tumor) from artifact_bam_files_tumor
-    set genome_id, file(fasta) from artifact_fasta_file
-    set genome2_idx, file(fasta2idx) from artifact_indexed2_fasta_file
-    set genome3_idx, file(fasta3idx) from artifact_indexed3_fasta_file
-
-  output:
-    set file_id_tumor, "*_artifact.*" into artifact_files
-    file "*_artifact_report.txt" into artifact_report
-
-  script:
-"""
-gatk CollectSequencingArtifactMetrics \
--I ${bam_tumor} \
--O ${file_id_tumor}_artifact \
--R ${fasta} \
-2> ${file_id_tumor}_artifact_report.txt
-"""
-}
-
-process filter_SNP {
-  tag "$file_id_norm"
-  cpus 1
-  publishDir "results/SNP/vcf/", mode: 'copy'
-
-  input:
-    set file_id_norm, file(vcf) from filter_vcf_files
-    set fileidx_id_norm, file(vcfid) from filter_index_vcf_files
-    set genome_id, file(fasta) from filter_fasta_file
-    set genome2_idx, file(fasta2idx) from filter_indexed2_fasta_file
-    set genome3_idx, file(fasta3idx) from filter_indexed3_fasta_file
-
-  output:
-    set file_id_norm, "*.vcf" into vcf_files_filtered
-    file "*_filter_report.txt" into filter_report
-
-  script:
-"""
-gatk FilterMutectCalls \
--V ${vcf} \
--O ${file_id_norm}_filtered.vcf \
-2> ${file_id_norm}_filter_report.txt
-gatk SelectVariants \
--R ${fasta} \
---variant ${file_id_norm}_filtered.vcf \
---exclude-filtered \
--O ${file_id_norm}_filtered_pass.vcf \
-2>> ${file_id_norm}_filter_report.txt
-"""
-}
-*/
-
-- 
GitLab