From 60a814b3785b8efb7a16db81f9358f8562e5ff89 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent@modolo.fr> Date: Tue, 25 Sep 2018 18:26:39 +0200 Subject: [PATCH] SNP_calling.nf: add merging step into tumor_sample and normal_sample --- src/1_JU28_59vs17_SNP_calling.sh | 2 +- src/SNP_calling.config | 3 + src/SNP_calling.nf | 111 ++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 3 deletions(-) diff --git a/src/1_JU28_59vs17_SNP_calling.sh b/src/1_JU28_59vs17_SNP_calling.sh index ed4e9f0..edc938e 100644 --- a/src/1_JU28_59vs17_SNP_calling.sh +++ b/src/1_JU28_59vs17_SNP_calling.sh @@ -3,4 +3,4 @@ ./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile docker --fasta "data/fasta/DBG2OLC-output2.fasta" --fastq "data/fastq/*_{1,2}.fastq.gz" --sam "results/mapping/sam/*.sam" -resume -w ~/data/work/ -./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile docker --fasta "data/fasta/DBG2OLC-output2.fasta" --fastq "data/fastq/*_{1,2}.fastq.gz" --sam "results/mapping/sam/*.sam" -resume -w ~/data/work/ --tumor "NG-10944_JU2859_bis_lib169352_5217_1" --normal "MR_550_clean" +./nextflow src/SNP_calling.nf -c src/SNP_calling.config -profile docker --fasta "data/fasta/DBG2OLC-output2.fasta" --fastq "data/fastq/*_{1,2}.fastq.gz" --sam "results/mapping/sam/*.sam" -resume -w ~/data/work/ --tumor "[\"NG-10944_JU2859_bis_lib169352_5217_1\"]" --normal "[\"MR_550_clean\"], \"MR_350_clean\"]" diff --git a/src/SNP_calling.config b/src/SNP_calling.config index 579b09f..f46e222 100644 --- a/src/SNP_calling.config +++ b/src/SNP_calling.config @@ -21,6 +21,9 @@ profiles { withName: sam_to_bam { container = "sambamba:0.6.7" } + withName: merge_bam { + container = "sambamba:0.6.7" + } withName: sort_bam { container = "sambamba:0.6.7" } diff --git a/src/SNP_calling.nf b/src/SNP_calling.nf index 19b5db1..1cc03f0 100644 --- a/src/SNP_calling.nf +++ b/src/SNP_calling.nf @@ -3,6 +3,10 @@ params.fasta = "$baseDir/data/*.fasta" params.sam = "" log.info "fastq files : ${params.fastq}" log.info "fasta files : ${params.fasta}" +def normal_sample = Eval.me(params.normal) +def tumor_sample = Eval.me(params.tumor) +log.info "normal : ${normal_sample}" +log.info "tumor : ${tumor_sample}" Channel .fromPath( params.fasta ) @@ -152,13 +156,46 @@ sambamba sort -t ${task.cpus} --tmpdir=./tmp -o ${file_id}_sorted.bam ${bam} """ } +sorted_bam_files.into { + sorted_bam_files_norm; + sorted_bam_files_tumor +} +collect_sorted_bam_file = sorted_bam_files_norm + .filter{ normal_sample.contains(it[0]) } + .map { it -> it[1]} + .collect() + .map { it -> ["normal_sample", it]} +collect_sorted_bam_file.join( + sorted_bam_files_tumor + .filter{ tumor_sample.contains(it[0]) } + .map { it -> it[1]} + .collect() + .map { it -> ["tumor_sample", it]} +) + +process merge_bam { + tag "$file_id" + cpus 4 + + input: + set file_id, file(bam) from collect_sorted_bam_file + + output: + set file_id, "*.bam" into merged_bam_files + + script: +""" +sambamba merge -t ${task.cpus} ${file_id}.bam ${bam} +""" +} + process name_bam { tag "$file_id" cpus 4 publishDir "results/mapping/bam/", mode: 'copy' input: - set file_id, file(bam) from sorted_bam_files + set file_id, file(bam) from merged_bam_files output: set file_id, "*_named.bam" into named_bam_files @@ -191,7 +228,7 @@ process index_bam { script: """ -sambamba index -t ${task.cpus} --tmpdir=./tmp ${bam} +sambamba index -t ${task.cpus} ${bam} """ } @@ -258,5 +295,75 @@ gatk Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \ """ } +/* +process filter_SNP { + tag "$file_id" + cpus 4 + publishDir "results/SNP/vcf/", mode: 'copy' + + input: + + output: + set file_id, "*.vcf" into vcf_files_filtered + + script: +""" +gatk --java-options "-Xmx2g" Mutect2 \ +-R hg38/Homo_sapiens_assembly38.fasta \ +-I tumor.bam \ +-I normal.bam \ +-tumor HCC1143_tumor \ +-normal HCC1143_normal \ +-pon resources/chr17_pon.vcf.gz \ +--germline-resource resources/chr17_af-only-gnomad_grch38.vcf.gz \ +--af-of-alleles-not-in-resource 0.0000025 \ +--disable-read-filter MateOnSameContigOrNoMappedMateReadFilter \ +-L chr17plus.interval_list \ +-O 1_somatic_m2.vcf.gz \ +-bamout 2_tumor_normal_m2.bam + +gatk Mutect2 \ +-R ~/Documents/ref/hg38/Homo_sapiens_assembly38.fasta \ +-I HG00190.bam \ +-tumor HG00190 \ +--disable-read-filter MateOnSameContigOrNoMappedMateReadFilter \ +-L chr17plus.interval_list \ +-O 3_HG00190.vcf.gz + +gatk CreateSomaticPanelOfNormals \ +-vcfs 3_HG00190.vcf.gz \ +-vcfs 4_NA19771.vcf.gz \ +-vcfs 5_HG02759.vcf.gz \ +-O 6_threesamplepon.vcf.gz + +gatk GetPileupSummaries \ +-I tumor.bam \ +-V resources/chr17_small_exac_common_3_grch38.vcf.gz \ +-O 7_tumor_getpileupsummaries.table + +gatk CalculateContamination \ +-I 7_tumor_getpileupsummaries.table \ +-O 8_tumor_calculatecontamination.table + +gatk FilterMutectCalls \ +-V somatic_m2.vcf.gz \ +--contamination-table tumor_calculatecontamination.table \ +-O 9_somatic_oncefiltered.vcf.gz + +gatk CollectSequencingArtifactMetrics \ +-I tumor.bam \ +-O 10_tumor_artifact \ +–-FILE_EXTENSION ".txt" \ +-R ~/Documents/ref/hg38/Homo_sapiens_assembly38.fasta + +gatk FilterByOrientationBias \ +-A G/T \ +-A C/T \ +-V 9_somatic_oncefiltered.vcf.gz \ +-P tumor_artifact.pre_adapter_detail_metrics.txt \ +-O 11_somatic_twicefiltered.vcf.gz +""" +} +*/ -- GitLab