Compare revisions

ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0
--- a/src/nf_modules/fastq_screen/main.nf
+++ b/src/nf_modules/fastq_screen/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.11.3--pl5.22.0_0"
+container_url = "quay.io/biocontainers/fastq-screen:${version}"
+
+params.fastq_screen = ""
+params.fastq_screen_out = ""
+process fastq_screen {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.fastq_screen_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fastq)
+
+  output:
+    tuple val(file_id), path("*"), emit: output
+
+  script:
+"""
+fastq_screen --get_genomes
+fastq_screen --threads ${task.cpus} sample1.fastq sample2.fastq
+"""
+}
--- a/src/nf_modules/fastqc/main.nf
+++ b/src/nf_modules/fastqc/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.11.5"
+container_url = "lbmc/fastqc:${version}"
+
+params.fastqc_fastq = ""
+params.fastqc_fastq_out = ""
+process fastqc_fastq {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.fastqc_fastq_out != "") {
+    publishDir "results/${params.fastqc_fastq_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("*.{zip,html}"), emit: report
+
+  script:
+  if (reads.size() == 2)
+  """
+  fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \
+    ${params.fastqc_fastq} \
+    ${reads[0]} ${reads[1]}
+  """
+  else
+  """
+    fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${params.fastqc_fastq} ${reads[0]}
+  """
+}
\ No newline at end of file
--- a/src/nf_modules/flexi_splitter/main.nf
+++ b/src/nf_modules/flexi_splitter/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "1.0.2"
+container_url = "lbmc/flexi_splitter:${version}"
+
+params.split = ""
+params.split_out = ""
+
+
+workflow split {
+  take:
+    reads
+    config
+  main:
+    split_fastq(reads, config)
+    group_fastq(split_fastq.out.fastq_folder)
+    group_fastq.out.fastq
+      .map{ it -> it[1] }
+      .flatten()
+      .collate(2)
+      .map{ it -> [it[0].simpleName - ~/_{0,1}R[12]/, it]}
+      .set{ splited_fastq }
+
+  emit:
+    fastq = splited_fastq
+}
+
+process split_fastq {
+  // You can get an example of config file here:
+  // src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.split_out != "") {
+    publishDir "results/${params.split_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(reads)
+  tuple val(config_id), path(config)
+
+  output:
+  tuple val(file_id), path("split"), emit: fastq_folder
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+
+  if (reads.size() == 2)
+  """
+  flexi_splitter ${params.split} -n 2 -f ${reads[0]},${reads[1]} -o split -c ${config}
+  """
+  else
+  """
+  flexi_splitter ${params.split} -n 1 -f ${reads[0]} -o split -c ${config}
+  """
+}
+
+process group_fastq {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.split_out != "") {
+    publishDir "results/${params.split_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(reads_folder)
+
+  output:
+  tuple val(file_id), path("results/*"), emit: fastq
+
+  script:
+"""
+mkdir -p results/
+find split/ -type "f" | \
+  grep -v "unassigned" | \
+  sed -E "s|(split/(.*)/(.*))|\\1 \\2_\\3|g" |
+  awk '{system("mv "\$1" results/"\$2)}'
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml
+++ b/src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+PLATE:
+  coords:
+    reads: 0
+    start: 1
+    stop: 4
+    header: False
+  samples:
+    - name : Plate1
+      seq: GACT
+    - name : Plate2
+      seq: CATG
+    - name : Plate3
+      seq: CCAA
+    - name : Plate4
+      seq: CTGT
+    - name : Plate5
+      seq: GTAG
+    - name : Plate6
+      seq: TGAT
+    - name : Plate7
+      seq: ATCA
+    - name : Plate8
+      seq: TAGA
+
+conditions:
+    - Plate1 :
+      Plate1
+    - Plate2 :
+      Plate2
+    - Plate3 :
+      Plate3
+    - Plate4 :
+      Plate4
+    - Plate5 :
+      Plate5
+    - Plate6 :
+      Plate6
+    - Plate7 :
+      Plate7
+    - Plate8 :
+      Plate8
--- a/src/nf_modules/flexi_splitter/toy_file_paired.yaml
+++ b/src/nf_modules/flexi_splitter/toy_file_paired.yaml
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+PCR:
+  coords:
+    reads: 3
+    start: 1
+    stop: 6
+    header: False
+  samples:
+    - name : PCR1
+      seq: NCAGTG
+    - name : PCR2
+      seq : CGATGT
+    - name : PCR3
+      seq: TTAGGC
+    - name : PCR4
+      seq : TGACCA
+    - name: PCR5
+      seq: NGAACG
+    - name: PCR6
+      seq: NCAACA
+RT:
+  coords:
+    reads: 1
+    start: 6
+    stop: 13
+    header: False
+  samples:
+    - name : RT1
+      seq: TAGTGCC
+    - name : RT2
+      seq: GCTACCC
+    - name: RT3
+      seq: ATCGACC
+    - name: RT4
+      seq: CGACTCC
+UMI:
+  coords:
+    reads: 1
+    start: 1
+    stop: 6
+    header: False
+conditions:
+  wt:
+    - RT1
+    - PCR1
+  ko:
+    - RT2
+    - PCR2
+  sample_paired:
+    - RT2
+    - PCR6
--- a/src/nf_modules/g2gtools/main.nf
+++ b/src/nf_modules/g2gtools/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.2.8"
+container_url = "lbmc/g2gtools:${version}"
+
+params.vci_build = ""
+params.vci_build_out = ""
+process vci_build {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.vci_build_out != "") {
+    publishDir "results/${params.vci_build_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.vci.gz"), path("*.vci.gz.tbi"), emit: vci
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+
+  input_vcf = ""
+  for (vcf_file in vcf) {
+    input_vcf += " -i ${vcf_file}"
+  }
+"""
+g2gtools vcf2vci \
+  ${params.vci_build} \
+  -p ${task.cpus} \
+  -f ${fasta} \
+  ${input_vcf} \
+  -s ${file_prefix} \
+  -o ${file_prefix}.vci 2> ${file_prefix}_g2gtools_vcf2vci_report.txt
+"""
+}
+
+params.incorporate_snp = ""
+params.incorporate_snp_out = ""
+process incorporate_snp {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.incorporate_snp_out != "") {
+    publishDir "results/${params.incorporate_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vci), path(tbi)
+    tuple val(ref_id), path(fasta)
+  output:
+    tuple val(file_id), path("${file_prefix}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+g2gtools patch \
+  ${params.incorporate_snp} \
+  -p ${task.cpus} \
+  -i ${fasta} \
+  -c ${vci} \
+  -o ${file_prefix}_snp.fa 2> ${file_prefix}_g2gtools_path_report.txt
+"""
+}
+
+params.incorporate_indel = ""
+params.incorporate_indel_out = ""
+process incorporate_indel {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.incorporate_indel_out != "") {
+    publishDir "results/${params.incorporate_indel_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta), path(vci), path(tbi)
+  output:
+    tuple val(file_id), path("${file_prefix}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+g2gtools transform \
+  ${params.incorporate_indel} \
+  -p ${task.cpus} \
+  -i ${fasta} \
+  -c ${vci} \
+  -o ${file_prefix}_snp_indel.fa 2> ${file_prefix}_g2gtools_transform_report.txt
+"""
+}
+
+params.convert_gtf = ""
+params.convert_gtf_out = ""
+process convert_gtf {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.convert_gtf_out != "") {
+    publishDir "results/${params.convert_gtf_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vci), path(tbi)
+    tuple val(annot_id), path(gtf)
+  output:
+    tuple val(file_id), path("${file_prefix}.gtf"), emit: gtf
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+g2gtools convert \
+  ${params.convert_gtf} \
+  -i ${gtf} \
+  -c ${vci} \
+  -o ${file_prefix}.gtf 2> ${file_prefix}_g2gtools_convert_report.txt
+"""
+}
+
+params.convert_bed = ""
+params.convert_bed_out = ""
+process convert_bed {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.convert_bed_out != "") {
+    publishDir "results/${params.convert_bed_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vci), path(tbi)
+    tuple val(annot_id), path(bed)
+  output:
+    tuple val(file_id), path("${file_id}.bed"), emit: bed
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+g2gtools convert \
+  ${params.convert_bed} \
+  -i ${bed} \
+  -c ${vci} \
+  -o ${file_id}.bed 2> ${file_id}_g2gtools_convert_report.txt
+"""
+}
+
+params.convert_bam = ""
+params.convert_bam_out = ""
+process convert_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${bam_id} ${file_id}"
+  if (params.convert_bam_out != "") {
+    publishDir "results/${params.convert_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vci), path(tbi)
+    tuple val(bam_id), path(bam)
+  output:
+    tuple val(file_id), path("${file_id}_${bam_id.baseName}.bam"), emit: bam
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+g2gtools convert \
+  ${params.convert_bam} \
+  -i ${bam} \
+  -c ${vci} \
+  -o ${file_id}_${bam.baseName}.bam 2> ${file_id}_g2gtools_convert_report.txt
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/gatk3/main.nf
+++ b/src/nf_modules/gatk3/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "3.8.0"
+container_url = "lbmc/gatk:${version}"
+
+params.variant_calling = ""
+params.variant_calling_out = ""
+process variant_calling {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.variant_calling_out != "") {
+    publishDir "results/${params.variant_calling_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam), path(bai)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.vcf"), emit: vcf
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T HaplotypeCaller \
+  -nct ${task.cpus} \
+  ${params.variant_calling} \
+  -R ${fasta} \
+  -I ${bam} \
+  -o ${file_prefix}.vcf
+"""
+}
+
+params.filter_snp = ""
+params.filter_snp_out = ""
+process filter_snp {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_snp_out != "") {
+    publishDir "results/${params.filter_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_snp.vcf"), emit: vcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T SelectVariants \
+  -nct ${task.cpus} \
+  ${params.filter_snp} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -selectType SNP \
+  -o ${file_prefix}_snp.vcf
+"""
+}
+
+params.filter_indels = ""
+params.filter_indels_out = ""
+process filter_indels {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_indels_out != "") {
+    publishDir "results/${params.filter_indels_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_indel.vcf"), emit: vcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T SelectVariants \
+  -nct ${task.cpus} \
+  ${params.filter_indels} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -selectType INDEL \
+  -o ${file_prefix}_indel.vcf
+"""
+}
+
+params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
+params.high_confidence_snp = "--filterExpression \"${params.high_confidence_snp_filter}\" --filterName \"basic_snp_filter\""
+params.high_confidence_snp_out = ""
+process high_confidence_snp {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.high_confidence_snp_out != "") {
+    publishDir "results/${params.high_confidence_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_snp.vcf"), emit: vcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T VariantFiltration \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -V ${vcf} \
+  ${params.high_confidence_snp} \
+  -o ${file_prefix}_filtered_snp.vcf
+"""
+}
+
+params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
+params.high_confidence_indels = "--filterExpression \"${params.high_confidence_indel_filter}\" --filterName \"basic_indel_filter\""
+params.high_confidence_indels_out = ""
+process high_confidence_indels {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.high_confidence_indels_out != "") {
+    publishDir "results/${params.high_confidence_indels_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_indel.vcf"), emit: vcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T VariantFiltration \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -V ${vcf} \
+  ${params.high_confidence_indels} \
+  -o ${file_prefix}_filtered_indel.vcf
+"""
+}
+
+params.recalibrate_snp_table = ""
+params.recalibrate_snp_table_out = ""
+process recalibrate_snp_table {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.recalibrate_snp_table_out != "") {
+    publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("recal_data_table"), emit: recal_table
+  script:
+"""
+gatk3 -T BaseRecalibrator \
+  -nct ${task.cpus} \
+  ${recalibrate_snp_table} \
+  -R ${fasta} \
+  -I ${bam} \
+  -knownSites ${snp_file} \
+  -knownSites ${indel_file} \
+  -o recal_data_table
+"""
+}
+
+params.recalibrate_snp = ""
+params.recalibrate_snp_out = ""
+process recalibrate_snp {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.recalibrate_snp_out != "") {
+    publishDir "results/${params.recalibrate_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
+    tuple val(table_id), path(recal_data_table)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.bam"), emit: bam
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T PrintReads \
+  --use_jdk_deflater \
+  --use_jdk_inflater \
+  ${recalibrate_snp} \
+  -nct ${task.cpus} \
+  -R ${fasta} \
+  -I ${bam} \
+  -BQSR recal_data_table \
+  -o ${file_prefix}_recal.bam
+"""
+}
+
+params.haplotype_caller = ""
+params.haplotype_caller_out = ""
+process haplotype_caller {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.haplotype_caller_out != "") {
+    publishDir "results/${params.haplotype_caller_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.gvcf"), emit: gvcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T HaplotypeCaller \
+  -nct ${task.cpus} \
+  ${params.haplotype_caller} \
+  -R ${fasta} \
+  -I ${bam} \
+  -ERC GVCF \
+  -variant_index_type LINEAR -variant_index_parameter 128000 \
+  -o ${file_prefix}.gvcf
+"""
+}
+
+params.gvcf_genotyping = ""
+params.gvcf_genotyping_out = ""
+process gvcf_genotyping {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.gvcf_genotyping_out != "") {
+    publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gvcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.vcf"), emit: vcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T GenotypeGVCFs \
+  -nct ${task.cpus} \
+  ${params.gvcf_genotyping} \
+  -R ${fasta} \
+  -V ${gvcf} \
+  -o ${file_prefix}_joint.vcf
+"""
+}
+
+params.select_variants_snp = ""
+params.select_variants_snp_out = ""
+process select_variants_snp {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.select_variants_snp_out != "") {
+    publishDir "results/${params.select_variants_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T SelectVariants \
+  -nct ${task.cpus} \
+  ${params.select_variants_snp} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -selectType SNP \
+  -o ${file_prefix}_joint_snp.vcf
+"""
+}
+
+params.select_variants_indels = ""
+params.select_variants_indels_out = ""
+process select_variants_indels {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.select_variants_indels_out != "") {
+    publishDir "results/${params.select_variants_indels_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T SelectVariants \
+  -nct ${task.cpus} \
+  ${params.select_variants_indels} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -selectType INDEL \
+  -o ${file_prefix}_joint_indel.vcf
+"""
+}
+
+params.personalized_genome = ""
+params.personalized_genome_out = ""
+process personalized_genome {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.personalized_genome_out != "") {
+    publishDir "results/${params.personalized_genome_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_genome.fasta"), emit: fasta
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+"""
+gatk3 -T FastaAlternateReferenceMaker\
+  ${params.personalized_genome} \
+  -R ${reference} \
+  -V ${vcf} \
+  -o ${file_prefix}_genome.fasta
+"""
+}
+
--- a/src/nf_modules/gatk4/main.nf
+++ b/src/nf_modules/gatk4/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "4.2.0.0"
+container_url = "broadinstitute/gatk:${version}"
+
+def get_file_prefix(file_id) {
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else if (file_id instanceof Map) {
+      library = file_id[0]
+      file_prefix = file_id[0]
+      if (file_id.containsKey('library')) {
+        library = file_id.library
+        file_prefix = file_id.id
+      }
+  } else {
+    file_prefix = file_id
+  }
+  return file_prefix
+}
+
+include {
+  index_fasta as samtools_index_fasta;
+  index_bam;
+} from './../samtools/main.nf'
+include {
+  index_fasta as picard_index_fasta;
+  index_bam as picard_index_bam;
+  mark_duplicate;
+} from './../picard/main.nf'
+
+params.variant_calling_out = ""
+workflow germline_cohort_data_variant_calling {
+  take:
+    bam
+    fasta
+  main:
+    // data preparation
+    mark_duplicate(bam)
+    index_bam(mark_duplicate.out.bam)
+    picard_index_bam(mark_duplicate.out.bam)
+    index_bam.out.bam_idx
+      .join(picard_index_bam.out.index)
+      .set{ bam_idx }
+    picard_index_fasta(fasta)
+    samtools_index_fasta(fasta)
+    fasta
+      .join(picard_index_fasta.out.index)
+      .join(samtools_index_fasta.out.index)
+      .set{ fasta_idx }
+    
+    // variant calling
+    call_variants_per_sample(
+      bam_idx,
+      fasta_idx.collect()
+    )
+    call_variants_all_sample(
+      call_variants_per_sample.out.gvcf,
+      fasta_idx
+    )
+  emit:
+    vcf = call_variants_all_sample.out.vcf
+}
+
+/*******************************************************************/
+workflow base_quality_recalibrator{
+  take:
+    bam_idx
+    fasta_idx
+    vcf
+
+  main:
+    index_vcf(vcf)
+    compute_base_recalibration(
+      bam_idx,
+      fasta_idx,
+      index_vcf.out.vcf_idx
+    ) 
+    apply_base_recalibration(
+      bam_idx,
+      fasta_idx,
+      compute_base_recalibration.out.table
+    )
+    emit:
+    bam = apply_base_recalibration.out.bam
+}
+
+process index_vcf {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(vcf)
+  output:
+    tuple val(file_id), path("${vcf}"), path("*"), emit: vcf_idx
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
+  -I ${vcf}
+"""
+}
+
+process compute_base_recalibration {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+    tuple val(vcf_id), path(vcf), path(vcf_idx)
+  output:
+    tuple val(file_id), path("${bam.simpleName}.table"), emit: table
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+  def vcf_cmd = ""
+  if (vcf instanceof List){
+    for (vcf_file in vcf){
+      vcf_cmd += "--known-sites ${vcf_file} "
+    }
+  } else {
+    vcf_cmd = "--known-sites ${vcf} "
+  }
+"""
+ gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \
+   -I ${bam} \
+   -R ${fasta} \
+   ${vcf_cmd} \
+   -O ${bam.simpleName}.table
+"""
+}
+
+process apply_base_recalibration {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+    tuple val(table_id), path(table)
+  output:
+    tuple val(file_id), path("${bam.simpleName}_recalibrate.bam"), emit: bam
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+ gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
+   -R ${fasta} \
+   -I ${bam} \
+   --bqsr-recal-file ${table} \
+   -O ${bam.simpleName}_recalibrate.bam
+"""
+}
+
+/*******************************************************************/
+params.variant_calling_gvcf_out = ""
+process call_variants_per_sample {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.variant_calling_gvcf_out != "") {
+    publishDir "results/${params.variant_calling_gvcf_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("${bam.simpleName}.gvcf.gz"), emit: gvcf
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller  \
+   -R ${fasta} \
+   -I ${bam} \
+   -O ${bam.simpleName}.gvcf.gz \
+   -ERC GVCF
+"""
+}
+
+/*******************************************************************/
+
+workflow call_variants_all_sample {
+  take:
+    gvcf
+    fasta_idx
+
+  main:
+    index_gvcf(gvcf)
+    validate_gvcf(
+      index_gvcf.out.gvcf_idx,
+      fasta_idx.collect()
+    )
+    consolidate_gvcf(
+      validate_gvcf.out.gvcf
+      .groupTuple(),
+      fasta_idx.collect()
+    )
+    genomic_db_call(
+      consolidate_gvcf.out.gvcf_idx,
+      fasta_idx.collect()
+    )
+  emit:
+    vcf = genomic_db_call.out.vcf
+}
+
+process index_gvcf {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(gvcf)
+  output:
+    tuple val(file_id), path("${gvcf}"), path("${gvcf}.tbi"), emit: gvcf_idx
+    tuple val(file_id), path("${gvcf.simpleName}_IndexFeatureFile_report.txt"), emit: report
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
+      -I ${gvcf} 2> ${gvcf.simpleName}_IndexFeatureFile_report.txt
+"""
+}
+
+process validate_gvcf {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(gvcf), path(gvcf_idx)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("${gvcf}"), path("${gvcf_idx}"), emit: gvcf
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" ValidateVariants \
+   -V ${gvcf} \
+   -R ${fasta} -gvcf
+"""
+}
+
+process consolidate_gvcf {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(gvcf), path(gvcf_idx)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("${file_prefix}.gvcf"), path("${file_prefix}.gvcf.idx"), emit: gvcf_idx
+    tuple val(file_id), path("${file_prefix}_CombineGVCFs_report.txt"), emit: report
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+  def gvcf_cmd = ""
+  if (gvcf instanceof List){
+    for (gvcf_file in gvcf){
+      gvcf_cmd += "-V ${gvcf_file} "
+    }
+  } else {
+    gvcf_cmd = "-V ${gvcf} "
+  }
+"""
+mkdir tmp
+gatk --java-options "-Xmx${xmx_memory}G" CombineGVCFs \
+    ${gvcf_cmd} \
+    -R ${fasta} \
+    -O ${file_prefix}.gvcf 2> ${file_prefix}_CombineGVCFs_report.txt
+gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
+      -I ${file_prefix}.gvcf 2> ${file_prefix}_IndexFeatureFile_report.txt
+"""
+}
+
+process genomic_db_call {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.variant_calling_out != "") {
+    publishDir "results/${params.variant_calling_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(gvcf), path(gvcf_idx)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("${gvcf.simpleName}.vcf.gz"), emit: vcf
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+  def gvcf_cmd = ""
+  if (gvcf instanceof List){
+    for (gvcf_file in gvcf){
+      gvcf_cmd += "--V ${gvcf_file} "
+    }
+  } else {
+    gvcf_cmd = "--V ${gvcf} "
+  }
+"""
+mkdir tmp
+gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
+   -R ${fasta} \
+   -V ${gvcf} \
+   -O ${gvcf.simpleName}.vcf.gz \
+   --tmp-dir ./tmp
+"""
+}
+
+/*******************************************************************/
+params.variant_calling = ""
+process variant_calling {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.variant_calling_out != "") {
+    publishDir "results/${params.variant_calling_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam), path(bai)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.vcf"), emit: vcf
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
+  ${params.variant_calling} \
+  -R ${fasta} \
+  -I ${bam} \
+  -O ${bam.simpleName}.vcf
+"""
+}
+
+params.filter_snp = ""
+params.filter_snp_out = ""
+process filter_snp {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.filter_snp_out != "") {
+    publishDir "results/${params.filter_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_snp.vcf"), emit: vcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
+  ${params.filter_snp} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -select-type SNP \
+  -O ${vcf.simpleName}_snp.vcf
+"""
+}
+
+params.filter_indels = ""
+params.filter_indels_out = ""
+process filter_indels {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.filter_indels_out != "") {
+    publishDir "results/${params.filter_indels_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_indel.vcf"), emit: vcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
+  ${params.filter_indels} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -select-type INDEL \
+  -O ${vcf.simpleName}_indel.vcf
+"""
+}
+
+params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)"
+params.high_confidence_snp = "--filter-expression \"${params.high_confidence_snp_filter}\" --filter-name \"basic_snp_filter\""
+params.high_confidence_snp_out = ""
+process high_confidence_snp {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.high_confidence_snp_out != "") {
+    publishDir "results/${params.high_confidence_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_snp.vcf"), emit: vcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
+  -R ${fasta} \
+  -V ${vcf} \
+  ${params.high_confidence_snp} \
+  -O ${vcf.simpleName}_filtered_snp.vcf
+"""
+}
+
+params.high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0"
+params.high_confidence_indels = "--filter-expression \"${params.high_confidence_indel_filter}\" --filter-name \"basic_indel_filter\""
+params.high_confidence_indels_out = ""
+process high_confidence_indels {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.high_confidence_indels_out != "") {
+    publishDir "results/${params.high_confidence_indels_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_indel.vcf"), emit: vcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
+  -R ${fasta} \
+  -V ${vcf} \
+  ${params.high_confidence_indels} \
+  -O ${vcf.simpleName}_filtered_indel.vcf
+"""
+}
+
+params.recalibrate_snp_table = ""
+params.recalibrate_snp_table_out = ""
+process recalibrate_snp_table {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.recalibrate_snp_table_out != "") {
+    publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(bam_idx_bis)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("recal_data_table"), emit: recal_table
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
+  -I ${snp_file}
+gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
+  -I ${indel_file}
+gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \
+  ${params.recalibrate_snp_table} \
+  -R ${fasta} \
+  -I ${bam} \
+  -known-sites ${snp_file} \
+  -known-sites ${indel_file} \
+  -O recal_data_table
+"""
+}
+
+params.recalibrate_snp = ""
+params.recalibrate_snp_out = ""
+process recalibrate_snp {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.recalibrate_snp_out != "") {
+    publishDir "results/${params.recalibrate_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(recal_table)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.bam"), emit: bam
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
+  ${params.recalibrate_snp} \
+  -R ${fasta} \
+  -I ${bam} \
+  --bqsr-recal-file recal_data_table \
+  -O ${bam.simpleName}_recal.bam
+"""
+}
+
+params.haplotype_caller = ""
+params.haplotype_caller_out = ""
+process haplotype_caller {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.haplotype_caller_out != "") {
+    publishDir "results/${params.haplotype_caller_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.gvcf"), emit: gvcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
+  ${params.haplotype_caller} \
+  -R ${fasta} \
+  -I ${bam} \
+  -ERC GVCF \
+  -O ${bam.simpleName}.gvcf
+"""
+}
+
+params.gvcf_genotyping = ""
+params.gvcf_genotyping_out = ""
+process gvcf_genotyping {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.gvcf_genotyping_out != "") {
+    publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gvcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*.vcf.gz"), emit: vcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
+  ${params.gvcf_genotyping} \
+  -R ${fasta} \
+  -V ${gvcf} \
+  -O ${gvcf.simpleName}_joint.vcf.gz
+"""
+}
+
+params.select_variants_snp = ""
+params.select_variants_snp_out = ""
+process select_variants_snp {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.select_variants_snp_out != "") {
+    publishDir "results/${params.select_variants_snp_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \
+  ${params.select_variants_snp} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -select-type SNP \
+  -O ${vcf.simpleName}_joint_snp.vcf
+"""
+}
+
+params.select_variants_indels = ""
+params.select_variants_indels_out = ""
+process select_variants_indels {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.select_variants_indels_out != "") {
+    publishDir "results/${params.select_variants_indels_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
+  ${params.select_variants_indels} \
+  -R ${fasta} \
+  -V ${vcf} \
+  -select-type INDEL \
+  -O ${file_prefix}_joint_indel.vcf
+"""
+}
+
+params.personalized_genome = ""
+params.personalized_genome_out = ""
+process personalized_genome {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.personalized_genome_out != "") {
+    publishDir "results/${params.personalized_genome_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(fasta), path(fai), path(dict)
+  output:
+    tuple val(file_id), path("*_genome.fasta"), emit: fasta
+
+  script:
+  xmx_memory = "${task.memory}" - ~/\s*GB/
+  file_prefix = get_file_prefix(file_id)
+"""
+gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\
+  ${params.personalized_genome} \
+  -R ${reference} \
+  -V ${vcf} \
+  -O ${vcf.simpleName}_genome.fasta
+"""
+}
+
+
+
--- a/src/nf_modules/gffread/main.nf
+++ b/src/nf_modules/gffread/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.12.2"
+container_url = "lbmc/gffread:${version}"
+
+params.gffread = ""
+params.gffread_out = ""
+process gffread {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_prefix"
+  if (params.gffread_out != "") {
+    publishDir "results/${params.gffread_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(gtf)
+  tuple val(fasta_id), path(fasta)
+
+  output:
+    tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  """
+  gffread ${gtf} -g ${fasta} -M -x dup_${file_prefix}.fasta
+  awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta | grep -v -e "^\$" > ${file_prefix}.fasta
+  """
+}
+
+params.spliced_cds = ""
+params.spliced_cds_out = ""
+process spliced_cds {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_prefix"
+  if (params.spliced_cds_out != "") {
+    publishDir "results/${params.spliced_cds_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(gtf)
+  tuple val(fasta_id), path(fasta)
+
+  output:
+    tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  """
+gzip -dck ${fasta} > ${fasta.simpleName}_un.fasta
+gzip -dck ${gtf} > ${gtf.simpleName}_un.gtf
+gffread ${gtf.simpleName}_un.gtf -g ${fasta.simpleName}_un.fasta -M \
+  -x ${file_prefix}.fasta
+  """
+}
\ No newline at end of file
--- a/src/nf_modules/guppy-cpu/main.nf
+++ b/src/nf_modules/guppy-cpu/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "5.0.11"
+container_url = "lbmc/guppy-cpu:${version}"
+
+params.basecalling_out = ""
+params.flowcell = "FLO-MIN106"
+params.kit = "SQK-PCS109"
+params.cpu_threads_per_caller = 4
+params.num_callers = 1
+process basecall_fast5 {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.basecalling_out != "") {
+    publishDir "results/${params.basecalling_out}", mode: 'copy'
+  }
+
+  if (params.flowcell == "") {
+      errorFlowcell << "WARNING ! No Flowcell type given..."
+      errorFlowcell.view()
+  }
+
+  if (params.kit == "") {
+      errorKit "WARNING ! No kit type given..."
+      errorKit.view()
+  }
+
+  input:
+    tuple val(file_id), path(fast5)
+
+  output:
+    tuple val(file_id), path("*.fastq*"), emit: fastq
+
+  script:
+"""
+guppy_basecaller --compress_fastq \
+    -i ${path(fast5)} \
+    -s ${params.basecalling_out} \
+    --cpu_threads_per_caller ${params.cpu_threads_per_caller} \
+    --num_callers ${params.num_callers} \
+    --flowcell ${params.flowcell} \
+    --kit ${params.kit}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/guppy-gpu/main.nf
+++ b/src/nf_modules/guppy-gpu/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "5.0.11"
+container_url = "lbmc/guppy-gpu:${version}"
+
+params.basecalling_out = ""
+params.flowcell = ""
+params.kit = ""
+params.gpu_runners_per_device = 16
+process basecall_fast5 {
+  container = "${container_url}"
+  // Need to create a profile using GPUs
+  label ""
+  tag "$file_id"
+  if (params.basecalling_out != "") {
+    publishDir "results/${params.basecalling_out}", mode: 'copy'
+  }
+
+  if (params.flowcell == "") {
+      errorFlowcell << "WARNING ! No Flowcell type given..."
+      errorFlowcell.view()
+  }
+
+  if (params.kit == "") {
+      errorKit "WARNING ! No kit type given..."
+      errorKit.view()
+  }
+
+  input:
+    tuple val(file_id), path(fast5)
+
+  output:
+    tuple val(file_id), path("*.fastq*"), emit: fastq
+
+  script:
+"""
+guppy_basecaller --compress_fastq -x "cuda:all" --min_qscore 7.0 \
+    -i ${path(fast5)} \
+    -s ${params.basecalling_out} \
+    --gpu_runners_per_device ${params.gpu_runners_per_device} \
+    --flowcell ${params.flowcell} \
+    --kit ${params.kit}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/hisat2/main.nf
+++ b/src/nf_modules/hisat2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.2.1"
+container_url = "lbmc/hisat2:${version}"
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("*.ht2*"), emit: index
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+gunzip ${fasta}
+hisat2-build -p ${task.cpus} \
+  ${fasta.baseName} \
+  ${fasta.simpleName} &> \
+  ${fasta.simpleName}_hisat2_index_report.txt
+
+if grep -q "Error" ${fasta.simpleName}_hisat2_index_report.txt; then
+  exit 1
+fi
+"""
+}
+
+params.mapping_fastq = ""
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+  path "*_report.txt", emit: report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2.*/) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2.*/)[0][1]
+    }
+  }
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break
+    default:
+      file_prefix = file_id
+    break
+  }
+
+  if (reads.size() == 2)
+  """
+  hisat2 ${params.mapping_fastq} \
+    -p ${task.cpus} \
+    -x ${index_id} \
+    -1 ${reads[0]} \
+    -2 ${reads[1]} 2> \
+    ${file_prefix}_ht2_mapping_report.txt \
+    | samtools view -@ ${task.cpus} -bS - \
+    | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
+
+  if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
+    exit 1
+  fi
+  """
+  else
+  """
+  hisat2 ${params.mapping_fastq} \
+    -p ${task.cpus} \
+    -x ${index_id} \
+    -U ${reads} 2> \
+    ${file_prefix}_ht2_mapping_report.txt \
+    | samtools view -@ ${task.cpus} -bS - \
+    | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
+  if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
+    exit 1
+  fi
+  """
+}
--- a/src/nf_modules/htseq/main.nf
+++ b/src/nf_modules/htseq/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "1.99.2"
+container_url = "lbmc/htseq:${version}"
+
+params.htseq_out = ""
+
+
+
+process gff3_2_gtf {
+  container = "dceoy/cufflinks"
+  label "small_mem_mono_cpus"
+
+    input:
+        tuple val(genome_id), path(gff3_file)
+    output:
+        path "${genome_id}.gtf", emit: gtf
+    script:
+"""
+gffread ${gff3_file} -T -o ${genome_id}.gtf
+"""
+}
+
+
+process htseq_count {
+    container = "${container_url}"
+    label "big_mem_mono_cpus"
+    tag "file_id: $file_id"
+    if (params.htseq_out != "") {
+        publishDir "results/${params.htseq_out}", mode: 'copy'
+    }
+    input:
+      tuple val(file_id), path(bam), path(bai)
+      path (gtf)
+
+    output:
+      path "${file_id}.tsv", emit: counts
+
+  script:
+"""
+htseq-count -n ${task.cpus} -r pos -a 10 -s yes -t exon -i gene_id $bam $gtf > ${file_id}.tsv
+"""
+}
+
+workflow htseq_count_with_gff {
+  take:
+    bam_tuple
+    gff_file
+  main:
+    gff3_2_gtf(gff_file)
+    htseq_count(bam_tuple,gff3_2_gtf.out.gtf)
+  emit:
+    counts = htseq_count.out.counts
+}
--- a/src/nf_modules/kallisto/main.nf
+++ b/src/nf_modules/kallisto/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.44.0"
+container_url = "lbmc/kallisto:${version}"
+
+params.index_fasta = "-k 31 --make-unique"
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("*.index*"), emit: index
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+kallisto index ${params.index_fasta} -i ${fasta.baseName}.index ${fasta} \
+2> ${fasta.baseName}_kallisto_index_report.txt
+"""
+}
+
+params.mapping_fastq = "--bias --bootstrap-samples 100"
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$pair_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kallisto quant -i ${index} -t ${task.cpus} \
+  ${params.mapping_fastq} -o ${file_prefix} \
+  ${reads[0]} ${reads[1]} &> ${file_prefix}_kallisto_mapping_report.txt
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kallisto quant -i ${index} -t ${task.cpus} --single \
+  ${params.mapping_fastq} -o ${file_prefix} \
+  ${reads[0]} &> ${file_prefix}_kallisto_mapping_report.txt
+  """
+}
--- a/src/nf_modules/kb/main.nf
+++ b/src/nf_modules/kb/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.26.0"
+container_url = "lbmc/kb:${version}"
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+
+workflow index_fasta {
+  take:
+    fasta
+    gtf
+
+  main:
+    tr2g(gtf)
+    index_default(fasta, gtf, tr2g.out.t2g)
+
+  emit:
+    index = index_default.out.index
+    t2g = index_default.out.t2g
+    report = index_default.out.report
+}
+
+process tr2g {
+  // create transcript to gene table from gtf if no transcript to gene file is provided
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gtf)
+
+  output:
+    tuple val(file_id), path("t2g.txt"), emit: t2g
+
+  script:
+  """
+  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  """
+}
+
+process g2tr {
+  // create gene to transcript table from gtf if no transcript to gene file is provided
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gtf)
+
+  output:
+    tuple val(file_id), path("g2t.txt"), emit: g2t
+
+  script:
+  """
+  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  awk 'BEGIN{OFS="\\t"}{print \$2, \$1}' t2g.txt > g2t.txt
+  """
+}
+
+process index_default {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+    tuple val(gtf_id), path(gtf)
+    tuple val(t2g_id), path(transcript_to_gene)
+
+  output:
+    tuple val(file_id), path("*.idx"), emit: index
+    tuple val(t2g_id), path("${transcript_to_gene}"), emit: t2g
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+kb ref \
+  -i ${fasta.simpleName}.idx \
+  -g ${transcript_to_gene} \
+  ${params.index_fasta} \
+  -f1 cdna.fa ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
+"""
+}
+
+
+include { split } from "./../flexi_splitter/main.nf"
+
+params.kb_protocol = "10x_v3"
+params.count = ""
+params.count_out = ""
+workflow count {
+  take:
+    index
+    fastq
+    transcript_to_gene
+    whitelist
+    config
+
+  main:
+  whitelist
+    .ifEmpty(["NO WHITELIST", 0])
+    .set{ whitelist_optional }
+  switch(params.kb_protocol) {
+    case "marsseq":
+      split(fastq, config.collect())
+      kb_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      kb_marseq.out.counts.set{res_counts}
+      kb_marseq.out.report.set{res_report}
+    break;
+    default:
+      kb_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      kb_default.out.counts.set{res_counts}
+      kb_default.out.report.set{res_report}
+    break;
+  }
+
+  emit:
+    counts = res_counts
+    report = res_report
+}
+
+process kb_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_out != "") {
+    publishDir "results/${params.count_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    -x 10XV3 \
+    --h5ad \
+    ${params.count} \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+}
+
+process kb_marseq {
+  // With the MARS-Seq protocol, we have:
+  // on the read 1: 4 nt of bc plate
+  // on the read 2: 6 nt of bc cell, and 8 nt of UMI
+  // this process expect that the bc plate is removed from the read 1
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_out != "") {
+    publishDir "results/${params.count_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    ${params.count} \
+    --h5ad \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    --h5ad \
+    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+}
+
+// ************************** velocity workflow **************************
+
+workflow index_fasta_velocity {
+  take:
+    fasta
+    gtf
+
+  main:
+    tr2g(gtf)
+    index_fasta_velocity_default(fasta, gtf, tr2g.out.t2g)
+
+  emit:
+    index = index_fasta_velocity_default.out.index
+    t2g = index_fasta_velocity_default.out.t2g
+    report = index_fasta_velocity_default.out.report
+}
+
+process index_fasta_velocity_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+    tuple val(gtf_id), path(gtf)
+    tuple val(t2g_id), path(transcript_to_gene)
+
+  output:
+    tuple val(file_id), path("*.idx"), emit: index
+    tuple val(t2g_id), path("${transcript_to_gene}"), path("cdna_t2c.txt"), path("intron_t2c.txt"), emit: t2g
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+kb ref \
+  -i ${fasta.simpleName}.idx \
+  -g ${transcript_to_gene} \
+  ${params.index_fasta} \
+  -f1 cdna.fa -f2 intron.fa -c1 cdna_t2c.txt -c2 intron_t2c.txt --workflow lamanno \
+  ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
+"""
+}
+
+params.count_velocity = ""
+params.count_velocity_out = ""
+workflow count_velocity {
+  take:
+    index
+    fastq
+    transcript_to_gene
+    whitelist
+    config
+
+  main:
+  whitelist
+    .ifEmpty(["NO WHITELIST", 0])
+    .set{ whitelist_optional }
+  switch(params.kb_protocol) {
+    case "marsseq":
+      split(fastq, config.collect())
+      velocity_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      velocity_marseq.out.counts.set{res_counts}
+      velocity_marseq.out.report.set{res_report}
+    break;
+    default:
+      velocity_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      velocity_default.out.counts.set{res_counts}
+      velocity_default.out.report.set{res_report}
+    break;
+  }
+
+  emit:
+    counts = res_counts
+    report = res_report
+}
+
+process velocity_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_velocity_out != "") {
+    publishDir "results/${params.count_velocity_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+    ${whitelist_param} \
+    -x 10XV3 \
+    --h5ad \
+    ${params.count} \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+}
+
+process velocity_marseq {
+  // With the MARS-Seq protocol, we have:
+  // on the read 1: 4 nt of bc plate
+  // on the read 2: 6 nt of bc cell, and 8 nt of UMI
+  // this process expect that the bc plate is removed from the read 1
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_velocity_out != "") {
+    publishDir "results/${params.count_velocity_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+     --h5ad \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+}
+
--- a/src/nf_modules/macs2/main.nf
+++ b/src/nf_modules/macs2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.1.2"
+container_url = "lbmc/macs2:${version}"
+
+params.macs_gsize=3e9
+params.macs_mfold="5 50"
+params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_out = ""
+process peak_calling {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_out != "") {
+    publishDir "results/${params.peak_calling_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam_ip), path(bam_control)
+
+  output:
+    tuple val(file_id), path("*.narrowPeak"), emit: peak
+    tuple val(file_id), path("*.bed"), emit: summits
+    tuple val(file_id), path("*_peaks.xls"), path("*_report.txt"), emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+macs2 callpeak \
+  ${params.peak_calling} \
+  --treatment ${bam_ip} \
+  --call-summits \
+  --control ${bam_control} \
+  --keep-dup all \
+  --qvalue 0.99 \
+  --name ${bam_ip.simpleName} 2> \
+  ${bam_ip.simpleName}_macs2_report.txt
+
+if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
+params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_bg_out = ""
+process peak_calling_bg {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_bg_out != "") {
+    publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bg_ip), path(bg_control)
+
+  output:
+    tuple val(file_id), path("*.narrowPeak"), emit: peak
+    tuple val(file_id), path("*.bed"), emit: summits
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
+  ${bg_ip.simpleName}.bed
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
+  ${bg_control.simpleName}.bed
+macs2 callpeak \
+  ${params.peak_calling_bg} \
+  --treatment ${bg_ip.simpleName}.bed \
+  --qvalue 0.99 \
+  --call-summits \
+  --control ${bg_control.simpleName}.bed \
+  --keep-dup all \
+  --name ${bg_ip.simpleName} 2> \
+  ${bg_ip.simpleName}_macs2_report.txt
+
+if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
--- a/src/nf_modules/macs3/main.nf
+++ b/src/nf_modules/macs3/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "3.0.0a6"
+container_url = "lbmc/macs3:${version}"
+
+params.macs_gsize=3e9
+params.macs_mfold="5 50"
+params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_out = ""
+process peak_calling {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_out != "") {
+    publishDir "results/${params.peak_calling_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam_ip), path(bam_control)
+
+  output:
+    path "*", emit: peak
+    path "*_report.txt", emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+macs3 callpeak \
+  --treatment ${bam_ip} \
+  --call-summits \
+  --control ${bam_control} \
+  --keep-dup all \
+  ${params.peak_calling} \
+  --name ${bam_ip.simpleName} \
+  --gsize ${params.macs_gsize} 2> \
+  ${bam_ip.simpleName}_macs3_report.txt
+
+if grep -q "ERROR" ${bam_ip.simpleName}_macs3_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
+params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_bg_out = ""
+process peak_calling_bg {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_bg_out != "") {
+    publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bg_ip), path(bg_control)
+
+  output:
+    path "*", emit: peak
+    path "*_report.txt", emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
+  ${bg_ip.simpleName}.bed
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
+  ${bg_control.simpleName}.bed
+macs3 callpeak \
+  ${params.peak_calling_bg} \
+  --treatment ${bg_ip.simpleName}.bed \
+  --call-summits \
+  --control ${bg_control.simpleName}.bed \
+  --keep-dup all \
+  --mfold params.macs_mfold[0] params.macs_mfold[1]
+  --name ${bg_ip.simpleName} \
+  --gsize ${params.macs_gsize} 2> \
+  ${bg_ip.simpleName}_macs3_report.txt
+
+if grep -q "ERROR" ${bg_ip.simpleName}_macs3_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
--- a/src/nf_modules/minimap2/main.nf
+++ b/src/nf_modules/minimap2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.17"
+container_url = "lbmc/minimap2:${version}"
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("${fasta}"), path("*.mmi*"), emit: index
+
+  script:
+  memory = "${task.memory}" - ~/\s*GB/
+"""
+minimap2 ${params.index_fasta} -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta}
+"""
+}
+
+params.mapping_fastq = "-ax sr"
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(fasta_id), path(fasta), path(index)
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  memory = "${task.memory}" - ~/\s*GB/
+  memory = memory.toInteger() / (task.cpus + 1.0)
+  if (reads.size() == 2)
+  """
+  minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} |
+    samtools view -Sb - > ${pair_id}.bam
+  """
+  else
+  """
+  minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} |
+    samtools view -Sb - > ${pair_id}.bam
+  """
+}
--- a/src/nf_modules/multiqc/main.nf
+++ b/src/nf_modules/multiqc/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+// multiqc generate nice html report combining lots of differents bioinformatics
+// tools report.
+// 
+// EXAMPLE:
+
+/*
+include { multiqc } 
+  from './nf_modules/multiqc/main'
+  addParams(
+    multiqc_out: "QC/"
+  )
+
+multiqc(
+  report_a
+  .mix(
+    report_b,
+    report_c,
+    report_d
+  )
+)
+*/
+
+version = "1.11"
+container_url = "lbmc/multiqc:${version}"
+
+params.multiqc = ""
+params.multiqc_out = "QC/"
+workflow multiqc {
+  take:
+    report
+  main:
+    report
+    .map{it ->
+      if (it instanceof List){
+        if(it.size() > 1) {
+          it[1]
+        } else {
+          it[0]
+        }
+      } else {
+        it
+      }
+    }
+    .unique()
+    .flatten()
+    .set { report_cleaned }
+    multiqc_default(report_cleaned.collect())
+
+  emit:
+  report = multiqc_default.out.report
+}
+
+process multiqc_default {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  if (params.multiqc_out != "") {
+    publishDir "results/${params.multiqc_out}", mode: 'copy'
+  }
+
+  input:
+    path report 
+
+  output:
+    path "*multiqc_*", emit: report
+
+  script:
+"""
+multiqc ${params.multiqc} -f .
+"""
+}
--- a/src/nf_modules/picard/main.nf
+++ b/src/nf_modules/picard/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.18.11"
+container_url = "lbmc/picard:${version}"
+
+params.mark_duplicate = "VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true"
+params.mark_duplicate_out = ""
+process mark_duplicate {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.mark_duplicate_out != "") {
+    publishDir "results/${params.mark_duplicate_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id) , path("*.bam"), emit: bam
+    path "*_report.dupinfo.txt", emit: report
+
+
+  script:
+"""
+PicardCommandLine MarkDuplicates \
+  ${params.mark_duplicate} \
+  INPUT=${bam} \
+  OUTPUT=${bam.baseName}_dedup.bam \
+  METRICS_FILE=${bam.baseName}_picard_dedup_report.dupinfo.txt &> \
+  picard_${bam.baseName}.log
+"""
+}
+
+params.normalize_fasta = ""
+params.normalize_fasta_out = ""
+process normalize_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.normalize_fasta_out != "") {
+    publishDir "results/${params.normalize_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("results/*.fasta.gz"), emit: fasta 
+
+  script:
+"""
+mkdir -p results
+PicardCommandLine NormalizeFasta \
+      I=${fasta} \
+      O=results/${fasta.simpleName}.fasta
+gzip results/${fasta.simpleName}.fasta
+"""
+}
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.dict"), emit: index
+
+  script:
+"""
+PicardCommandLine CreateSequenceDictionary \
+  ${params.index_fasta} \
+  REFERENCE=${fasta} \
+  OUTPUT=${fasta.baseName}.dict
+"""
+}
+
+params.index_bam = ""
+params.index_bam_out = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_bam_out != "") {
+    publishDir "results/${params.index_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*"), emit: index
+
+  script:
+"""
+PicardCommandLine BuildBamIndex \
+  ${params.index_bam} \
+  INPUT=${bam}
+"""
+}
No results found