Compare revisions

ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0
--- a/src/nf_modules/gffread/main.nf
+++ b/src/nf_modules/gffread/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "0.12.2"
+container_url = "lbmc/gffread:${version}"
+params.gffread = ""
+params.gffread_out = ""
+process gffread {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_prefix"
+  if (params.gffread_out != "") {
+    publishDir "results/${params.gffread_out}", mode: 'copy'
+  }
+  input:
+  tuple val(file_id), path(gtf)
+  tuple val(fasta_id), path(fasta)
+  output:
+    tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  """
+  gffread ${gtf} -g ${fasta} -M -x dup_${file_prefix}.fasta
+  awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta | grep -v -e "^\$" > ${file_prefix}.fasta
+  """
+}
+params.spliced_cds = ""
+params.spliced_cds_out = ""
+process spliced_cds {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_prefix"
+  if (params.spliced_cds_out != "") {
+    publishDir "results/${params.spliced_cds_out}", mode: 'copy'
+  }
+  input:
+  tuple val(file_id), path(gtf)
+  tuple val(fasta_id), path(fasta)
+  output:
+    tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  """
+gzip -dck ${fasta} > ${fasta.simpleName}_un.fasta
+gzip -dck ${gtf} > ${gtf.simpleName}_un.gtf
+gffread ${gtf.simpleName}_un.gtf -g ${fasta.simpleName}_un.fasta -M \
+  -x ${file_prefix}.fasta
+  """
+}
\ No newline at end of file
--- a/src/nf_modules/guppy-cpu/main.nf
+++ b/src/nf_modules/guppy-cpu/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "5.0.11"
+container_url = "lbmc/guppy-cpu:${version}"
+params.basecalling_out = ""
+params.flowcell = "FLO-MIN106"
+params.kit = "SQK-PCS109"
+params.cpu_threads_per_caller = 4
+params.num_callers = 1
+process basecall_fast5 {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.basecalling_out != "") {
+    publishDir "results/${params.basecalling_out}", mode: 'copy'
+  }
+  if (params.flowcell == "") {
+      errorFlowcell << "WARNING ! No Flowcell type given..."
+      errorFlowcell.view()
+  }
+  if (params.kit == "") {
+      errorKit "WARNING ! No kit type given..."
+      errorKit.view()
+  }
+  input:
+    tuple val(file_id), path(fast5)
+  output:
+    tuple val(file_id), path("*.fastq*"), emit: fastq
+  script:
+"""
+guppy_basecaller --compress_fastq \
+    -i ${path(fast5)} \
+    -s ${params.basecalling_out} \
+    --cpu_threads_per_caller ${params.cpu_threads_per_caller} \
+    --num_callers ${params.num_callers} \
+    --flowcell ${params.flowcell} \
+    --kit ${params.kit}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/guppy-gpu/main.nf
+++ b/src/nf_modules/guppy-gpu/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "5.0.11"
+container_url = "lbmc/guppy-gpu:${version}"
+params.basecalling_out = ""
+params.flowcell = ""
+params.kit = ""
+params.gpu_runners_per_device = 16
+process basecall_fast5 {
+  container = "${container_url}"
+  // Need to create a profile using GPUs
+  label ""
+  tag "$file_id"
+  if (params.basecalling_out != "") {
+    publishDir "results/${params.basecalling_out}", mode: 'copy'
+  }
+  if (params.flowcell == "") {
+      errorFlowcell << "WARNING ! No Flowcell type given..."
+      errorFlowcell.view()
+  }
+  if (params.kit == "") {
+      errorKit "WARNING ! No kit type given..."
+      errorKit.view()
+  }
+  input:
+    tuple val(file_id), path(fast5)
+  output:
+    tuple val(file_id), path("*.fastq*"), emit: fastq
+  script:
+"""
+guppy_basecaller --compress_fastq -x "cuda:all" --min_qscore 7.0 \
+    -i ${path(fast5)} \
+    -s ${params.basecalling_out} \
+    --gpu_runners_per_device ${params.gpu_runners_per_device} \
+    --flowcell ${params.flowcell} \
+    --kit ${params.kit}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/hisat2/main.nf
+++ b/src/nf_modules/hisat2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "2.2.1"
+container_url = "lbmc/hisat2:${version}"
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.ht2*"), emit: index
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+"""
+gunzip ${fasta}
+hisat2-build -p ${task.cpus} \
+  ${fasta.baseName} \
+  ${fasta.simpleName} &> \
+  ${fasta.simpleName}_hisat2_index_report.txt
+if grep -q "Error" ${fasta.simpleName}_hisat2_index_report.txt; then
+  exit 1
+fi
+"""
+}
+params.mapping_fastq = ""
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+  path "*_report.txt", emit: report
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2.*/) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2.*/)[0][1]
+    }
+  }
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break
+    default:
+      file_prefix = file_id
+    break
+  }
+  if (reads.size() == 2)
+  """
+  hisat2 ${params.mapping_fastq} \
+    -p ${task.cpus} \
+    -x ${index_id} \
+    -1 ${reads[0]} \
+    -2 ${reads[1]} 2> \
+    ${file_prefix}_ht2_mapping_report.txt \
+    | samtools view -@ ${task.cpus} -bS - \
+    | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
+  if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
+    exit 1
+  fi
+  """
+  else
+  """
+  hisat2 ${params.mapping_fastq} \
+    -p ${task.cpus} \
+    -x ${index_id} \
+    -U ${reads} 2> \
+    ${file_prefix}_ht2_mapping_report.txt \
+    | samtools view -@ ${task.cpus} -bS - \
+    | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
+  if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
+    exit 1
+  fi
+  """
+}
--- a/src/nf_modules/htseq/main.nf
+++ b/src/nf_modules/htseq/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "1.99.2"
+container_url = "lbmc/htseq:${version}"
+params.htseq_out = ""
+process gff3_2_gtf {
+  container = "dceoy/cufflinks"
+  label "small_mem_mono_cpus"
+    input:
+        tuple val(genome_id), path(gff3_file)
+    output:
+        path "${genome_id}.gtf", emit: gtf
+    script:
+"""
+gffread ${gff3_file} -T -o ${genome_id}.gtf
+"""
+}
+process htseq_count {
+    container = "${container_url}"
+    label "big_mem_mono_cpus"
+    tag "file_id: $file_id"
+    if (params.htseq_out != "") {
+        publishDir "results/${params.htseq_out}", mode: 'copy'
+    }
+    input:
+      tuple val(file_id), path(bam), path(bai)
+      path (gtf)
+    output:
+      path "${file_id}.tsv", emit: counts
+  script:
+"""
+htseq-count -n ${task.cpus} -r pos -a 10 -s yes -t exon -i gene_id $bam $gtf > ${file_id}.tsv
+"""
+}
+workflow htseq_count_with_gff {
+  take:
+    bam_tuple
+    gff_file
+  main:
+    gff3_2_gtf(gff_file)
+    htseq_count(bam_tuple,gff3_2_gtf.out.gtf)
+  emit:
+    counts = htseq_count.out.counts
+}
--- a/src/nf_modules/kallisto/main.nf
+++ b/src/nf_modules/kallisto/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "0.44.0"
+container_url = "lbmc/kallisto:${version}"
+params.index_fasta = "-k 31 --make-unique"
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.index*"), emit: index
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+"""
+kallisto index ${params.index_fasta} -i ${fasta.baseName}.index ${fasta} \
+2> ${fasta.baseName}_kallisto_index_report.txt
+"""
+}
+params.mapping_fastq = "--bias --bootstrap-samples 100"
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$pair_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kallisto quant -i ${index} -t ${task.cpus} \
+  ${params.mapping_fastq} -o ${file_prefix} \
+  ${reads[0]} ${reads[1]} &> ${file_prefix}_kallisto_mapping_report.txt
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kallisto quant -i ${index} -t ${task.cpus} --single \
+  ${params.mapping_fastq} -o ${file_prefix} \
+  ${reads[0]} &> ${file_prefix}_kallisto_mapping_report.txt
+  """
+}
--- a/src/nf_modules/kb/main.nf
+++ b/src/nf_modules/kb/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "0.26.0"
+container_url = "lbmc/kb:${version}"
+params.index_fasta = ""
+params.index_fasta_out = ""
+workflow index_fasta {
+  take:
+    fasta
+    gtf
+  main:
+    tr2g(gtf)
+    index_default(fasta, gtf, tr2g.out.t2g)
+  emit:
+    index = index_default.out.index
+    t2g = index_default.out.t2g
+    report = index_default.out.report
+}
+process tr2g {
+  // create transcript to gene table from gtf if no transcript to gene file is provided
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(gtf)
+  output:
+    tuple val(file_id), path("t2g.txt"), emit: t2g
+  script:
+  """
+  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  """
+}
+process g2tr {
+  // create gene to transcript table from gtf if no transcript to gene file is provided
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(gtf)
+  output:
+    tuple val(file_id), path("g2t.txt"), emit: g2t
+  script:
+  """
+  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  awk 'BEGIN{OFS="\\t"}{print \$2, \$1}' t2g.txt > g2t.txt
+  """
+}
+process index_default {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+    tuple val(gtf_id), path(gtf)
+    tuple val(t2g_id), path(transcript_to_gene)
+  output:
+    tuple val(file_id), path("*.idx"), emit: index
+    tuple val(t2g_id), path("${transcript_to_gene}"), emit: t2g
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+"""
+kb ref \
+  -i ${fasta.simpleName}.idx \
+  -g ${transcript_to_gene} \
+  ${params.index_fasta} \
+  -f1 cdna.fa ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
+"""
+}
+include { split } from "./../flexi_splitter/main.nf"
+params.kb_protocol = "10x_v3"
+params.count = ""
+params.count_out = ""
+workflow count {
+  take:
+    index
+    fastq
+    transcript_to_gene
+    whitelist
+    config
+  main:
+  whitelist
+    .ifEmpty(["NO WHITELIST", 0])
+    .set{ whitelist_optional }
+  switch(params.kb_protocol) {
+    case "marsseq":
+      split(fastq, config.collect())
+      kb_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      kb_marseq.out.counts.set{res_counts}
+      kb_marseq.out.report.set{res_report}
+    break;
+    default:
+      kb_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      kb_default.out.counts.set{res_counts}
+      kb_default.out.report.set{res_report}
+    break;
+  }
+  emit:
+    counts = res_counts
+    report = res_report
+}
+process kb_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_out != "") {
+    publishDir "results/${params.count_out}", mode: 'copy'
+  }
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene)
+  tuple val(whitelist_id), path(whitelist)
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    -x 10XV3 \
+    --h5ad \
+    ${params.count} \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+}
+process kb_marseq {
+  // With the MARS-Seq protocol, we have:
+  // on the read 1: 4 nt of bc plate
+  // on the read 2: 6 nt of bc cell, and 8 nt of UMI
+  // this process expect that the bc plate is removed from the read 1
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_out != "") {
+    publishDir "results/${params.count_out}", mode: 'copy'
+  }
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene)
+  tuple val(whitelist_id), path(whitelist)
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    ${params.count} \
+    --h5ad \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    --h5ad \
+    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+}
+// ************************** velocity workflow **************************
+workflow index_fasta_velocity {
+  take:
+    fasta
+    gtf
+  main:
+    tr2g(gtf)
+    index_fasta_velocity_default(fasta, gtf, tr2g.out.t2g)
+  emit:
+    index = index_fasta_velocity_default.out.index
+    t2g = index_fasta_velocity_default.out.t2g
+    report = index_fasta_velocity_default.out.report
+}
+process index_fasta_velocity_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+    tuple val(gtf_id), path(gtf)
+    tuple val(t2g_id), path(transcript_to_gene)
+  output:
+    tuple val(file_id), path("*.idx"), emit: index
+    tuple val(t2g_id), path("${transcript_to_gene}"), path("cdna_t2c.txt"), path("intron_t2c.txt"), emit: t2g
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+"""
+kb ref \
+  -i ${fasta.simpleName}.idx \
+  -g ${transcript_to_gene} \
+  ${params.index_fasta} \
+  -f1 cdna.fa -f2 intron.fa -c1 cdna_t2c.txt -c2 intron_t2c.txt --workflow lamanno \
+  ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
+"""
+}
+params.count_velocity = ""
+params.count_velocity_out = ""
+workflow count_velocity {
+  take:
+    index
+    fastq
+    transcript_to_gene
+    whitelist
+    config
+  main:
+  whitelist
+    .ifEmpty(["NO WHITELIST", 0])
+    .set{ whitelist_optional }
+  switch(params.kb_protocol) {
+    case "marsseq":
+      split(fastq, config.collect())
+      velocity_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      velocity_marseq.out.counts.set{res_counts}
+      velocity_marseq.out.report.set{res_report}
+    break;
+    default:
+      velocity_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      velocity_default.out.counts.set{res_counts}
+      velocity_default.out.report.set{res_report}
+    break;
+  }
+  emit:
+    counts = res_counts
+    report = res_report
+}
+process velocity_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_velocity_out != "") {
+    publishDir "results/${params.count_velocity_out}", mode: 'copy'
+  }
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
+  tuple val(whitelist_id), path(whitelist)
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+    ${whitelist_param} \
+    -x 10XV3 \
+    --h5ad \
+    ${params.count} \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+}
+process velocity_marseq {
+  // With the MARS-Seq protocol, we have:
+  // on the read 1: 4 nt of bc plate
+  // on the read 2: 6 nt of bc cell, and 8 nt of UMI
+  // this process expect that the bc plate is removed from the read 1
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_velocity_out != "") {
+    publishDir "results/${params.count_velocity_out}", mode: 'copy'
+  }
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
+  tuple val(whitelist_id), path(whitelist)
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+     --h5ad \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+}
--- a/src/nf_modules/macs2/main.nf
+++ b/src/nf_modules/macs2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "2.1.2"
+container_url = "lbmc/macs2:${version}"
+params.macs_gsize=3e9
+params.macs_mfold="5 50"
+params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_out = ""
+process peak_calling {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_out != "") {
+    publishDir "results/${params.peak_calling_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam_ip), path(bam_control)
+  output:
+    tuple val(file_id), path("*.narrowPeak"), emit: peak
+    tuple val(file_id), path("*.bed"), emit: summits
+    tuple val(file_id), path("*_peaks.xls"), path("*_report.txt"), emit: report
+  script:
+/* remove --nomodel option for real dataset */
+"""
+macs2 callpeak \
+  ${params.peak_calling} \
+  --treatment ${bam_ip} \
+  --call-summits \
+  --control ${bam_control} \
+  --keep-dup all \
+  --qvalue 0.99 \
+  --name ${bam_ip.simpleName} 2> \
+  ${bam_ip.simpleName}_macs2_report.txt
+if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_bg_out = ""
+process peak_calling_bg {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_bg_out != "") {
+    publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bg_ip), path(bg_control)
+  output:
+    tuple val(file_id), path("*.narrowPeak"), emit: peak
+    tuple val(file_id), path("*.bed"), emit: summits
+    tuple val(file_id), path("*_report.txt"), emit: report
+  script:
+/* remove --nomodel option for real dataset */
+"""
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
+  ${bg_ip.simpleName}.bed
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
+  ${bg_control.simpleName}.bed
+macs2 callpeak \
+  ${params.peak_calling_bg} \
+  --treatment ${bg_ip.simpleName}.bed \
+  --qvalue 0.99 \
+  --call-summits \
+  --control ${bg_control.simpleName}.bed \
+  --keep-dup all \
+  --name ${bg_ip.simpleName} 2> \
+  ${bg_ip.simpleName}_macs2_report.txt
+if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
--- a/src/nf_modules/macs3/main.nf
+++ b/src/nf_modules/macs3/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "3.0.0a6"
+container_url = "lbmc/macs3:${version}"
+params.macs_gsize=3e9
+params.macs_mfold="5 50"
+params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_out = ""
+process peak_calling {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_out != "") {
+    publishDir "results/${params.peak_calling_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam_ip), path(bam_control)
+  output:
+    path "*", emit: peak
+    path "*_report.txt", emit: report
+  script:
+/* remove --nomodel option for real dataset */
+"""
+macs3 callpeak \
+  --treatment ${bam_ip} \
+  --call-summits \
+  --control ${bam_control} \
+  --keep-dup all \
+  ${params.peak_calling} \
+  --name ${bam_ip.simpleName} \
+  --gsize ${params.macs_gsize} 2> \
+  ${bam_ip.simpleName}_macs3_report.txt
+if grep -q "ERROR" ${bam_ip.simpleName}_macs3_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_bg_out = ""
+process peak_calling_bg {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_bg_out != "") {
+    publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bg_ip), path(bg_control)
+  output:
+    path "*", emit: peak
+    path "*_report.txt", emit: report
+  script:
+/* remove --nomodel option for real dataset */
+"""
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
+  ${bg_ip.simpleName}.bed
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
+  ${bg_control.simpleName}.bed
+macs3 callpeak \
+  ${params.peak_calling_bg} \
+  --treatment ${bg_ip.simpleName}.bed \
+  --call-summits \
+  --control ${bg_control.simpleName}.bed \
+  --keep-dup all \
+  --mfold params.macs_mfold[0] params.macs_mfold[1]
+  --name ${bg_ip.simpleName} \
+  --gsize ${params.macs_gsize} 2> \
+  ${bg_ip.simpleName}_macs3_report.txt
+if grep -q "ERROR" ${bg_ip.simpleName}_macs3_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
--- a/src/nf_modules/minimap2/main.nf
+++ b/src/nf_modules/minimap2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "2.17"
+container_url = "lbmc/minimap2:${version}"
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("${fasta}"), path("*.mmi*"), emit: index
+  script:
+  memory = "${task.memory}" - ~/\s*GB/
+"""
+minimap2 ${params.index_fasta} -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta}
+"""
+}
+params.mapping_fastq = "-ax sr"
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+  input:
+  tuple val(fasta_id), path(fasta), path(index)
+  tuple val(file_id), path(reads)
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  memory = "${task.memory}" - ~/\s*GB/
+  memory = memory.toInteger() / (task.cpus + 1.0)
+  if (reads.size() == 2)
+  """
+  minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} |
+    samtools view -Sb - > ${pair_id}.bam
+  """
+  else
+  """
+  minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} |
+    samtools view -Sb - > ${pair_id}.bam
+  """
+}
--- a/src/nf_modules/multiqc/main.nf
+++ b/src/nf_modules/multiqc/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+// multiqc generate nice html report combining lots of differents bioinformatics
+// tools report.
+// 
+// EXAMPLE:
+/*
+include { multiqc } 
+  from './nf_modules/multiqc/main'
+  addParams(
+    multiqc_out: "QC/"
+  )
+multiqc(
+  report_a
+  .mix(
+    report_b,
+    report_c,
+    report_d
+  )
+)
+*/
+version = "1.11"
+container_url = "lbmc/multiqc:${version}"
+params.multiqc = ""
+params.multiqc_out = "QC/"
+workflow multiqc {
+  take:
+    report
+  main:
+    report
+    .map{it ->
+      if (it instanceof List){
+        if(it.size() > 1) {
+          it[1]
+        } else {
+          it[0]
+        }
+      } else {
+        it
+      }
+    }
+    .unique()
+    .flatten()
+    .set { report_cleaned }
+    multiqc_default(report_cleaned.collect())
+  emit:
+  report = multiqc_default.out.report
+}
+process multiqc_default {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  if (params.multiqc_out != "") {
+    publishDir "results/${params.multiqc_out}", mode: 'copy'
+  }
+  input:
+    path report 
+  output:
+    path "*multiqc_*", emit: report
+  script:
+"""
+multiqc ${params.multiqc} -f .
+"""
+}
--- a/src/nf_modules/picard/main.nf
+++ b/src/nf_modules/picard/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "2.18.11"
+container_url = "lbmc/picard:${version}"
+params.mark_duplicate = "VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true"
+params.mark_duplicate_out = ""
+process mark_duplicate {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.mark_duplicate_out != "") {
+    publishDir "results/${params.mark_duplicate_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id) , path("*.bam"), emit: bam
+    path "*_report.dupinfo.txt", emit: report
+  script:
+"""
+PicardCommandLine MarkDuplicates \
+  ${params.mark_duplicate} \
+  INPUT=${bam} \
+  OUTPUT=${bam.baseName}_dedup.bam \
+  METRICS_FILE=${bam.baseName}_picard_dedup_report.dupinfo.txt &> \
+  picard_${bam.baseName}.log
+"""
+}
+params.normalize_fasta = ""
+params.normalize_fasta_out = ""
+process normalize_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.normalize_fasta_out != "") {
+    publishDir "results/${params.normalize_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("results/*.fasta.gz"), emit: fasta 
+  script:
+"""
+mkdir -p results
+PicardCommandLine NormalizeFasta \
+      I=${fasta} \
+      O=results/${fasta.simpleName}.fasta
+gzip results/${fasta.simpleName}.fasta
+"""
+}
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.dict"), emit: index
+  script:
+"""
+PicardCommandLine CreateSequenceDictionary \
+  ${params.index_fasta} \
+  REFERENCE=${fasta} \
+  OUTPUT=${fasta.baseName}.dict
+"""
+}
+params.index_bam = ""
+params.index_bam_out = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_bam_out != "") {
+    publishDir "results/${params.index_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*"), emit: index
+  script:
+"""
+PicardCommandLine BuildBamIndex \
+  ${params.index_bam} \
+  INPUT=${bam}
+"""
+}
--- a/src/nf_modules/pigz/2.4/docker_init.sh
+++ b/src/nf_modules/pigz/2.4/docker_init.sh
-#!/bin/sh
-docker build src/nf_modules/pigz/2.4 -t 'pigz:2.4'
--- a/src/nf_modules/porechop/main.nf
+++ b/src/nf_modules/porechop/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "0.2.4"
+container_url = "lbmc/porechop:${version}"
+process porechop {
+    container = "${container_url}"
+    label "big_mem_multi_cpus"
+    tag "$file_id"
+    if (params.porechop_out != "") {
+    publishDir "results/${params.porechop_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fatsq)
+  output:
+    tuple val(file_id), path("*_porechoped.fastq"), emit: porechoped_fastq
+  script:
+"""
+porechop -i ${fastq} -o ${file_id}_porechoped.fastq --threads 4
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/main.nf
+++ b/src/nf_modules/rasusa/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "0.6.0"
+container_url = "lbmc/rasusa:${version}"
+include { index_fasta } from "./../samtools/main.nf"
+params.sample_fastq = ""
+params.sample_fastq_coverage = ""
+params.sample_fastq_size = ""
+params.sample_fastq_out = ""
+workflow sample_fastq {
+  take:
+  fastq
+  fasta
+  main:
+  if (params.sample_fastq_coverage == "" && params.sample_fastq_size == ""){
+    fastq
+      .set{ final_fastq }
+  } else {
+    index_fasta(fasta)
+    sub_sample_fastq(fastq, index_fasta.out.index)
+    sub_sample_fastq.out.fastq
+      .set{ final_fastq }
+  }
+  emit:
+  fastq = final_fastq
+}
+process sub_sample_fastq {
+  container = "${container_url}"
+  label "small_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.sample_fastq_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fastq)
+    tuple val(index_id), path(idx)
+  output:
+    tuple val(file_id), path("sub_*.fastq.gz"), emit: fastq
+  script:
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break
+    default:
+      file_prefix = file_id
+    break
+  }
+  sample_option = "-c " + params.sample_fastq_coverage
+  if (params.sample_fastq_size != ""){
+    sample_option = "-b " + params.sample_fastq_size
+  }
+  if (fastq.size() == 2)
+"""
+rasusa \
+  -i ${fastq[0]} ${fastq[1]} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq[0].simpleName}.fastq.gz sub_${fastq[1].simpleName}.fastq.gz
+"""
+  else
+"""
+rasusa \
+  -i ${fastq} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq.simpleName}.fastq.gz
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/test.nf
+++ b/src/nf_modules/rasusa/test.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+nextflow.enable.dsl=2
+/*
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
+*/
+params.fastq = "data/fastq/*R{1,2}*"
+params.fasta = "data/fasta/*.fasta"
+params.coverage = ""
+params.size = ""
+include { sample_fastq } from "./main.nf" addParams(sample_fastq_coverage: params.coverage, sample_fastq_size: params.size, sample_fastq_out: "sample/")
+channel
+  .fromFilePairs( params.fastq, size: -1)
+  .set { fastq_files }
+channel
+  .fromPath( params.fasta )
+  .map { it -> [it.simpleName, it]}
+  .set { fasta_files }
+workflow {
+  sample_fastq(fastq_files, fasta_files.collect())
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/test.sh
+++ b/src/nf_modules/rasusa/test.sh
+#! /bin/sh
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
\ No newline at end of file
--- a/src/nf_modules/salmon/main.nf
+++ b/src/nf_modules/salmon/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "1.8.0"
+container_url = "lbmc/salmon:${version}"
+process quantify {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.salmon_out != "") {
+    publishDir "results/${params.salmon_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*.sf"), emit: quant
+  script:
+"""
+salmon quant -l A --noErrorModel -t XXXXXXXXXX -a ${bam} -p 4 -o ${params.salmon_out}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/sambamba/main.nf
+++ b/src/nf_modules/sambamba/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "0.6.7"
+container_url = "lbmc/sambamba:${version}"
+params.index_bam = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+  script:
+"""
+sambamba index ${params.index_bam} -t ${task.cpus} ${bam}
+"""
+}
+params.sort_bam = ""
+process sort_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+  script:
+"""
+sambamba sort -t ${task.cpus} ${params.sort_bam} -o ${bam.baseName}_sorted.bam ${bam}
+"""
+}
+params.split_bam = ""
+process split_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*_forward.bam*"), emit: bam_forward
+    tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse
+  script:
+"""
+sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '+'" ${bam} > \
+  ${bam.baseName}_forward.bam
+sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '-'" ${bam} > \
+  ${bam.baseName}_reverse.bam
+"""
+}
--- a/src/nf_modules/samtools/main.nf
+++ b/src/nf_modules/samtools/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+version = "1.11"
+container_url = "lbmc/samtools:${version}"
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.fai"), emit: index
+  script:
+"""
+if gzip -t ${fasta}; then
+  zcat ${fasta} > ${fasta.simpleName}.fasta
+  samtools faidx ${params.index_fasta}  ${fasta.simpleName}.fasta
+else
+  samtools faidx ${params.index_fasta} ${fasta}
+fi
+"""
+}
+params.filter_bam_quality_threshold = 30
+params.filter_bam_quality = "-q ${params.filter_bam_quality_threshold}"
+params.filter_bam_quality_out = ""
+process filter_bam_quality {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_quality_out != "") {
+    publishDir "results/${params.filter_bam_quality_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*_filtered.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} -hb ${bam} ${params.filter_bam_quality} > \
+  ${bam.simpleName}_filtered.bam
+"""
+}
+params.filter_bam = ""
+params.filter_bam_out = ""
+process filter_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_out != "") {
+    publishDir "results/${params.filter_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+    tuple val(bed_id), path(bed)
+  output:
+    tuple val(file_id), path("*_filtered.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} ${params.filter_bam} > \
+  ${bam.simpleName}_filtered.bam
+"""
+}
+params.rm_from_bam = ""
+params.rm_from_bam_out = ""
+process rm_from_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.rm_from_bam_out != "") {
+    publishDir "results/${params.rm_from_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+    tuple val(bed_id), path(bed)
+  output:
+    tuple val(file_id), path("*_filtered.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} ${params.filter_bam} -hb -L ${bed} -U ${bam.simpleName}_filtered.bam ${bam} >  /dev/null
+"""
+}
+params.filter_bam_mapped = "-F 4"
+params.filter_bam_mapped_out = ""
+process filter_bam_mapped {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_mapped_out != "") {
+    publishDir "results/${params.filter_bam_mapped_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*_mapped.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} ${params.filter_bam_mapped} -hb ${bam} > \
+  ${bam.simpleName}_mapped.bam
+"""
+}
+params.filter_bam_unmapped = "-f 4"
+params.filter_bam_unmapped_out = ""
+process filter_bam_unmapped {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_unmapped_out != "") {
+    publishDir "results/${params.filter_bam_unmapped_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*_unmapped.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} ${params.filter_bam_unmapped} -hb ${bam} > ${bam.simpleName}_unmapped.bam
+"""
+}
+params.index_bam = ""
+params.index_bam_out = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_bam_out != "") {
+    publishDir "results/${params.index_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("${bam}"), path("*.bam.bai"), emit: bam_idx
+  script:
+"""
+samtools index ${params.index_bam} ${bam}
+"""
+}
+params.sort_bam = ""
+params.sort_bam_out = ""
+process sort_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.sort_bam_out != "") {
+    publishDir "results/${params.sort_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+  script:
+"""
+samtools sort -@ ${task.cpus} ${params.sort_bam} -O BAM -o ${bam.simpleName}_sorted.bam ${bam}
+"""
+}
+params.split_bam = ""
+params.split_bam_out = ""
+process split_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.split_bam_out != "") {
+    publishDir "results/${params.split_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*_forward.bam*"), emit: bam_forward
+    tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse
+  script:
+"""
+samtools view -@ ${Math.round(task.cpus/2)} ${params.split_bam} \
+  -hb -F 0x10 ${bam} > ${bam.simpleName}_forward.bam &
+samtools view -@ ${Math.round(task.cpus/2)} ${params.split_bam} \
+  -hb -f 0x10 ${bam} > ${bam.simpleName}_reverse.bam
+"""
+}
+params.merge_bam = ""
+params.merge_bam_out = ""
+process merge_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.merge_bam_out != "") {
+    publishDir "results/${params.merge_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(first_file_id), path(first_bam)
+    tuple val(second_file_id), path(second_bam)
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+  script:
+"""
+samtools merge -@ ${task.cpus} ${params.merge_bam} ${first_bam} ${second_bam} \
+  ${first_bam.simpleName}_${second_file.simpleName}.bam
+"""
+}
+params.merge_multi_bam = ""
+params.merge_multi_bam_out = ""
+process merge_multi_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.merge_multi_bam_out != "") {
+    publishDir "results/${params.merge_multi_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bams)
+  output:
+    tuple val(file_id), path("*_merged.bam*"), emit: bam
+  script:
+"""
+samtools merge -@ ${task.cpus} \
+  ${params.merge_multi_bam} \
+  ${bams[0].simpleName}_merged.bam \
+  ${bams}
+"""
+}
+params.stats_bam = ""
+params.stats_bam_out = ""
+process stats_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.stats_bam_out != "") {
+    publishDir "results/${params.stats_bam_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*.tsv"), emit: tsv
+    path "*.flagstat.txt", emit: report 
+  script:
+"""
+samtools flagstat -@ ${task.cpus} ${params.stats_bam} -O tsv ${bam} > ${bam.simpleName}.flagstat.txt
+cp ${bam.simpleName}.flagstat.txt ${bam.simpleName}.tsv
+"""
+}
+params.flagstat_2_multiqc = ""
+params.flagstat_2_multiqc_out = ""
+process flagstat_2_multiqc {
+  tag "$file_id"
+  if (params.flagstat_2_multiqc_out != "") {
+    publishDir "results/${params.flagstat_2_multiqc_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(tsv)
+  output:
+    tuple val(file_id), path("*.txt"), emit: report
+"""
+mv ${tsv} ${tsv.simpleName}.flagstat.txt
+"""
+}
+params.idxstat_2_multiqc = ""
+params.idxstat_2_multiqc_out = ""
+process idxstat_2_multiqc {
+  tag "$file_id"
+  if (params.idxstat_2_multiqc_out != "") {
+    publishDir "results/${params.idxstat_2_multiqc_out}", mode: 'copy'
+  }
+  input:
+    tuple val(file_id), path(tsv)
+  output:
+    tuple val(file_id), path("*.txt"), emit: report
+"""
+mv ${tsv} ${tsv.simpleName}.idxstats.txt
+"""
+}
\ No newline at end of file
No results found