src/SNP_calling.nf: add Mutect2 to call SNP

224bbd69 · Laurent Modolo · a2f71be3 · 224bbd69
Unverified Commit 224bbd69 authored Sep 24, 2018 by Laurent Modolo
--- a/src/SNP_calling.nf
+++ b/src/SNP_calling.nf
 params.fastq = "$baseDir/data/*.fastq"
 params.fasta = "$baseDir/data/*.fasta"
+params.sam = ""
 log.info "fastq files : ${params.fastq}"
 log.info "fasta files : ${params.fasta}"

 Channel
  .fromPath( params.fasta )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
+  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
-  .set { fasta_file }
+  .into { fasta_file;
+     indel_fasta_file;
+     recalibration_fasta_file;
+     haplotypecaller_fasta_file
+  }
 Channel
  .fromFilePairs( params.fastq )
  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
  .set { fastq_files }

+if (params.sam == "") {
  process adaptor_removal {
    tag "$pair_id"
    publishDir "results/fastq/adaptor_removal/", mode: 'copy'
@@ -91,4 +97,130 @@ ${index_id} ${reads[0]} ${reads[1]} \
  -o ${pair_id}.sam &> ${pair_id}_bwa_report.txt
  """
  }
+} else {
+  Channel
+    .fromPath( params.sam )
+    .ifEmpty { error "Cannot find any sam files matching: ${params.sam}" }
+    .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
+    .set { sam_files }
+}
+
+process dedup_sam {
+  tag "$file_id"
+  cpus 4
+
+  input:
+    set file_id, file(sam) from sam_files
+
+  output:
+    set file_id, "*_dedup.sam*" into dedup_sam_files
+  script:
+"""
+samblaster --addMateTags -i ${sam} -o ${file_id}_dedup.sam
+"""
+}
+
+process sort_bam {
+  tag "$file_id"
+  cpus 4
+  publishDir "results/mapping/bam/1_dedup/", mode: 'copy'
+
+  input:
+    set file_id, file(sam) from dedup_sam_files
+
+  output:
+    set file_id, "*_sorted.bam" into sorted_bam_files
+
+  script:
+"""
+sambamba view -t ${task.cpus} -S -f bam -l 0 ${sam} | \
+sambamba sort -t ${task.cpus} -o ${file_id}_sorted.bam /dev/stdin
+"""
+}
+
+sorted_bam_files.into{
+  index_sorted_bam_files;
+  haplotypecaller_sorted_bam_files
+}
+
+process index_bam {
+  tag "$file_id"
+  cpus 4
+  publishDir "results/mapping/bam/2_realigned/", mode: 'copy'
+
+  input:
+    set file_id, file(bam) from index_sorted_bam_files
+
+  output:
+    set file_id, "*.bam*" into indexed_bam_files
+
+  script:
+"""
+sambamba index -t ${task.cpus} ${bam}
+"""
+}
+
+haplotypecaller_fasta_file.into{
+    haplo_fasta_file;
+    index2_fasta_file
+    index3_fasta_file
+  }
+
+process index2_fasta {
+  tag "$file_id"
+  publishDir "results/mapping/bam/2_realigned/", mode: 'copy'
+
+  input:
+    set genome_id, file(fasta) from index2_fasta_file
+
+  output:
+    set genome_id, "*.dict" into indexed2_fasta_file
+
+  script:
+"""
+gatk CreateSequenceDictionary -R ${fasta} &> gatk_output.txt
+"""
+}
+
+process index3_fasta {
+  tag "$file_id"
+  publishDir "results/mapping/bam/2_realigned/", mode: 'copy'
+
+  input:
+    set genome_id, file(fasta) from index3_fasta_file
+
+  output:
+    set genome_id, "*.fai" into indexed3_fasta_file
+
+  script:
+"""
+samtools faidx ${fasta}
+"""
+}
+
+process HaplotypeCaller {
+  tag "$file_id"
+  cpus 4
+  publishDir "results/SNP/vcf/", mode: 'copy'
+
+  input:
+    set file_id, file(bam) from haplotypecaller_sorted_bam_files.collect()
+    set file_ididx, file(bamidx) from indexed_bam_files.collect()
+    set genome_id, file(fasta) from haplo_fasta_file.collect()
+    set genome2_idx, file(fasta2idx) from indexed2_fasta_file.collect()
+    set genome3_idx, file(fasta3idx) from indexed3_fasta_file.collect()
+
+  output:
+    set file_id, "*.vcf" into vcf_files
+    set file_id, "*.bam" into realigned_bams_files
+
+  script:
+"""
+gatk Mutect2 --native-pair-hmm-threads ${task.cpus} -R ${fasta} \
+-I ${bam} -tumor ${params.tumor} -normal ${params.normal} \
+-O ${file_id}_raw_calls.g.vcf \
+-bamout ${file_id}_realigned.bam
+"""
+}
+