version = "3.8.0"
container_url = "lbmc/gatk:${version}"

params.variant_calling = ""
params.variant_calling_out = ""
process variant_calling {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.variant_calling_out != "") {
    publishDir "results/${params.variant_calling_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(bam), path(bai)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*.vcf"), emit: vcf

  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T HaplotypeCaller \
  -nct ${task.cpus} \
  ${params.variant_calling} \
  -R ${fasta} \
  -I ${bam} \
  -o ${file_prefix}.vcf
"""
}

params.filter_snp = ""
params.filter_snp_out = ""
process filter_snp {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.filter_snp_out != "") {
    publishDir "results/${params.filter_snp_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(vcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*_snp.vcf"), emit: vcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T SelectVariants \
  -nct ${task.cpus} \
  ${params.filter_snp} \
  -R ${fasta} \
  -V ${vcf} \
  -selectType SNP \
  -o ${file_prefix}_snp.vcf
"""
}

params.filter_indels = ""
params.filter_indels_out = ""
process filter_indels {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.filter_indels_out != "") {
    publishDir "results/${params.filter_indels_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(vcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*_indel.vcf"), emit: vcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T SelectVariants \
  -nct ${task.cpus} \
  ${params.filter_indels} \
  -R ${fasta} \
  -V ${vcf} \
  -selectType INDEL \
  -o ${file_prefix}_indel.vcf
"""
}

high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
params.high_confidence_snp = "--filterExpression \"${high_confidence_snp_filter}\" --filterName \"basic_snp_filter\""
params.high_confidence_snp_out = ""
process high_confidence_snp {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.high_confidence_snp_out != "") {
    publishDir "results/${params.high_confidence_snp_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(vcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*_snp.vcf"), emit: vcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T VariantFiltration \
  -nct ${task.cpus} \
  -R ${fasta} \
  -V ${vcf} \
  ${params.high_confidence_snp} \
  -o ${file_prefix}_filtered_snp.vcf
"""
}

high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
params.high_confidence_indels = "--filterExpression \"${high_confidence_indel_filter}\" --filterName \"basic_indel_filter\""
params.high_confidence_indels_out = ""
process high_confidence_indels {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.high_confidence_indels_out != "") {
    publishDir "results/${params.high_confidence_indels_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(vcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*_indel.vcf"), emit: vcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T VariantFiltration \
  -nct ${task.cpus} \
  -R ${fasta} \
  -V ${vcf} \
  ${params.high_confidence_indels} \
  -o ${file_prefix}_filtered_indel.vcf
"""
}

params.recalibrate_snp_table = ""
params.recalibrate_snp_table_out = ""
process recalibrate_snp_table {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.recalibrate_snp_table_out != "") {
    publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("recal_data_table"), emit: recal_table
  script:
"""
gatk3 -T BaseRecalibrator \
  -nct ${task.cpus} \
  ${recalibrate_snp_table} \
  -R ${fasta} \
  -I ${bam} \
  -knownSites ${snp_file} \
  -knownSites ${indel_file} \
  -o recal_data_table
"""
}

params.recalibrate_snp = ""
params.recalibrate_snp_out = ""
process recalibrate_snp {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.recalibrate_snp_out != "") {
    publishDir "results/${params.recalibrate_snp_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
    tuple val(table_id), path(recal_data_table)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*.bam"), emit: bam
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T PrintReads \
  --use_jdk_deflater \
  --use_jdk_inflater \
  ${recalibrate_snp} \
  -nct ${task.cpus} \
  -R ${fasta} \
  -I ${bam} \
  -BQSR recal_data_table \
  -o ${file_prefix}_recal.bam
"""
}

params.haplotype_caller = ""
params.haplotype_caller_out = ""
process haplotype_caller {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.haplotype_caller_out != "") {
    publishDir "results/${params.haplotype_caller_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(bam)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*.gvcf"), emit: gvcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T HaplotypeCaller \
  -nct ${task.cpus} \
  ${params.haplotype_caller} \
  -R ${fasta} \
  -I ${bam} \
  -ERC GVCF \
  -variant_index_type LINEAR -variant_index_parameter 128000 \
  -o ${file_prefix}.gvcf
"""
}

params.gvcf_genotyping = ""
params.gvcf_genotyping_out = ""
process gvcf_genotyping {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.gvcf_genotyping_out != "") {
    publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(gvcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*.vcf"), emit: vcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T GenotypeGVCFs \
  -nct ${task.cpus} \
  ${params.gvcf_genotyping} \
  -R ${fasta} \
  -V ${gvcf} \
  -o ${file_prefix}_joint.vcf
"""
}

params.select_variants_snp = ""
params.select_variants_snp_out = ""
process select_variants_snp {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.select_variants_snp_out != "") {
    publishDir "results/${params.select_variants_snp_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(vcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T SelectVariants \
  -nct ${task.cpus} \
  ${params.select_variants_snp} \
  -R ${fasta} \
  -V ${vcf} \
  -selectType SNP \
  -o ${file_prefix}_joint_snp.vcf
"""
}

params.select_variants_indels = ""
params.select_variants_indels_out = ""
process select_variants_indels {
  container = "${container_url}"
  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.select_variants_indels_out != "") {
    publishDir "results/${params.select_variants_indels_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(vcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T SelectVariants \
  -nct ${task.cpus} \
  ${params.select_variants_indels} \
  -R ${fasta} \
  -V ${vcf} \
  -selectType INDEL \
  -o ${file_prefix}_joint_indel.vcf
"""
}

params.personalized_genome = ""
params.personalized_genome_out = ""
process personalized_genome {
  container = "${container_url}"
  label "big_mem_mono_cpus"
  tag "$file_id"
  if (params.personalized_genome_out != "") {
    publishDir "results/${params.personalized_genome_out}", mode: 'copy'
  }

  input:
    tuple val(file_id), path(vcf)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("*_genome.fasta"), emit: fasta

  script:
  if (file_id instanceof List){
    file_prefix = file_id[0]
  } else {
    file_prefix = file_id
  }
"""
gatk3 -T FastaAlternateReferenceMaker\
  ${params.personalized_genome} \
  -R ${reference} \
  -V ${vcf} \
  -o ${file_prefix}_genome.fasta
"""
}