Compare revisions

ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0
--- a/src/nf_modules/kallisto/main.nf
+++ b/src/nf_modules/kallisto/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.44.0"
+container_url = "lbmc/kallisto:${version}"
+
+params.index_fasta = "-k 31 --make-unique"
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("*.index*"), emit: index
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+kallisto index ${params.index_fasta} -i ${fasta.baseName}.index ${fasta} \
+2> ${fasta.baseName}_kallisto_index_report.txt
+"""
+}
+
+params.mapping_fastq = "--bias --bootstrap-samples 100"
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$pair_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kallisto quant -i ${index} -t ${task.cpus} \
+  ${params.mapping_fastq} -o ${file_prefix} \
+  ${reads[0]} ${reads[1]} &> ${file_prefix}_kallisto_mapping_report.txt
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kallisto quant -i ${index} -t ${task.cpus} --single \
+  ${params.mapping_fastq} -o ${file_prefix} \
+  ${reads[0]} &> ${file_prefix}_kallisto_mapping_report.txt
+  """
+}
--- a/src/nf_modules/kb/main.nf
+++ b/src/nf_modules/kb/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.26.0"
+container_url = "lbmc/kb:${version}"
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+
+workflow index_fasta {
+  take:
+    fasta
+    gtf
+
+  main:
+    tr2g(gtf)
+    index_default(fasta, gtf, tr2g.out.t2g)
+
+  emit:
+    index = index_default.out.index
+    t2g = index_default.out.t2g
+    report = index_default.out.report
+}
+
+process tr2g {
+  // create transcript to gene table from gtf if no transcript to gene file is provided
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gtf)
+
+  output:
+    tuple val(file_id), path("t2g.txt"), emit: t2g
+
+  script:
+  """
+  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  """
+}
+
+process g2tr {
+  // create gene to transcript table from gtf if no transcript to gene file is provided
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gtf)
+
+  output:
+    tuple val(file_id), path("g2t.txt"), emit: g2t
+
+  script:
+  """
+  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  awk 'BEGIN{OFS="\\t"}{print \$2, \$1}' t2g.txt > g2t.txt
+  """
+}
+
+process index_default {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+    tuple val(gtf_id), path(gtf)
+    tuple val(t2g_id), path(transcript_to_gene)
+
+  output:
+    tuple val(file_id), path("*.idx"), emit: index
+    tuple val(t2g_id), path("${transcript_to_gene}"), emit: t2g
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+kb ref \
+  -i ${fasta.simpleName}.idx \
+  -g ${transcript_to_gene} \
+  ${params.index_fasta} \
+  -f1 cdna.fa ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
+"""
+}
+
+
+include { split } from "./../flexi_splitter/main.nf"
+
+params.kb_protocol = "10x_v3"
+params.count = ""
+params.count_out = ""
+workflow count {
+  take:
+    index
+    fastq
+    transcript_to_gene
+    whitelist
+    config
+
+  main:
+  whitelist
+    .ifEmpty(["NO WHITELIST", 0])
+    .set{ whitelist_optional }
+  switch(params.kb_protocol) {
+    case "marsseq":
+      split(fastq, config.collect())
+      kb_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      kb_marseq.out.counts.set{res_counts}
+      kb_marseq.out.report.set{res_report}
+    break;
+    default:
+      kb_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      kb_default.out.counts.set{res_counts}
+      kb_default.out.report.set{res_report}
+    break;
+  }
+
+  emit:
+    counts = res_counts
+    report = res_report
+}
+
+process kb_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_out != "") {
+    publishDir "results/${params.count_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    -x 10XV3 \
+    --h5ad \
+    ${params.count} \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+}
+
+process kb_marseq {
+  // With the MARS-Seq protocol, we have:
+  // on the read 1: 4 nt of bc plate
+  // on the read 2: 6 nt of bc cell, and 8 nt of UMI
+  // this process expect that the bc plate is removed from the read 1
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_out != "") {
+    publishDir "results/${params.count_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    ${params.count} \
+    --h5ad \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    --h5ad \
+    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  """
+}
+
+// ************************** velocity workflow **************************
+
+workflow index_fasta_velocity {
+  take:
+    fasta
+    gtf
+
+  main:
+    tr2g(gtf)
+    index_fasta_velocity_default(fasta, gtf, tr2g.out.t2g)
+
+  emit:
+    index = index_fasta_velocity_default.out.index
+    t2g = index_fasta_velocity_default.out.t2g
+    report = index_fasta_velocity_default.out.report
+}
+
+process index_fasta_velocity_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+    tuple val(gtf_id), path(gtf)
+    tuple val(t2g_id), path(transcript_to_gene)
+
+  output:
+    tuple val(file_id), path("*.idx"), emit: index
+    tuple val(t2g_id), path("${transcript_to_gene}"), path("cdna_t2c.txt"), path("intron_t2c.txt"), emit: t2g
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+kb ref \
+  -i ${fasta.simpleName}.idx \
+  -g ${transcript_to_gene} \
+  ${params.index_fasta} \
+  -f1 cdna.fa -f2 intron.fa -c1 cdna_t2c.txt -c2 intron_t2c.txt --workflow lamanno \
+  ${fasta} ${gtf} > ${fasta.simpleName}_kb_index_report.txt
+"""
+}
+
+params.count_velocity = ""
+params.count_velocity_out = ""
+workflow count_velocity {
+  take:
+    index
+    fastq
+    transcript_to_gene
+    whitelist
+    config
+
+  main:
+  whitelist
+    .ifEmpty(["NO WHITELIST", 0])
+    .set{ whitelist_optional }
+  switch(params.kb_protocol) {
+    case "marsseq":
+      split(fastq, config.collect())
+      velocity_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      velocity_marseq.out.counts.set{res_counts}
+      velocity_marseq.out.report.set{res_report}
+    break;
+    default:
+      velocity_default(index.collect(), fastq, transcript_to_gene.collect(), whitelist_optional.collect())
+      velocity_default.out.counts.set{res_counts}
+      velocity_default.out.report.set{res_report}
+    break;
+  }
+
+  emit:
+    counts = res_counts
+    report = res_report
+}
+
+process velocity_default {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_velocity_out != "") {
+    publishDir "results/${params.count_velocity_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+    ${whitelist_param} \
+    -x 10XV3 \
+    --h5ad \
+    ${params.count} \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+}
+
+process velocity_marseq {
+  // With the MARS-Seq protocol, we have:
+  // on the read 1: 4 nt of bc plate
+  // on the read 2: 6 nt of bc cell, and 8 nt of UMI
+  // this process expect that the bc plate is removed from the read 1
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_prefix"
+  if (params.count_velocity_out != "") {
+    publishDir "results/${params.count_velocity_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+  tuple val(t2g_id), path(transcript_to_gene), path(cdna_t2g), path(intron_t2g)
+  tuple val(whitelist_id), path(whitelist)
+
+  output:
+  tuple val(file_id), path("${file_prefix}"), emit: counts
+  tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+  def kb_memory = "${task.memory}" - ~/GB/
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  def whitelist_param = ""
+  if (whitelist_id != "NO WHITELIST"){
+    whitelist_param = "-w ${whitelist}"
+  }
+
+  if (reads.size() == 2)
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+     --h5ad \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+  else
+  """
+  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
+    -m ${kb_memory} \
+    -i ${index} \
+    -g ${transcript_to_gene} \
+    -o ${file_prefix} \
+    -c1 ${cdna_t2g} \
+    -c2 ${intron_t2g} \
+    --workflow lamanno \
+    ${whitelist_param} \
+    ${params.count} \
+    -x 1,0,6:1,6,14:0,0,0 \
+    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
+  """
+}
+
--- a/src/nf_modules/macs2/main.nf
+++ b/src/nf_modules/macs2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.1.2"
+container_url = "lbmc/macs2:${version}"
+
+params.macs_gsize=3e9
+params.macs_mfold="5 50"
+params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_out = ""
+process peak_calling {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_out != "") {
+    publishDir "results/${params.peak_calling_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam_ip), path(bam_control)
+
+  output:
+    tuple val(file_id), path("*.narrowPeak"), emit: peak
+    tuple val(file_id), path("*.bed"), emit: summits
+    tuple val(file_id), path("*_peaks.xls"), path("*_report.txt"), emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+macs2 callpeak \
+  ${params.peak_calling} \
+  --treatment ${bam_ip} \
+  --call-summits \
+  --control ${bam_control} \
+  --keep-dup all \
+  --qvalue 0.99 \
+  --name ${bam_ip.simpleName} 2> \
+  ${bam_ip.simpleName}_macs2_report.txt
+
+if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
+params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_bg_out = ""
+process peak_calling_bg {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_bg_out != "") {
+    publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bg_ip), path(bg_control)
+
+  output:
+    tuple val(file_id), path("*.narrowPeak"), emit: peak
+    tuple val(file_id), path("*.bed"), emit: summits
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
+  ${bg_ip.simpleName}.bed
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
+  ${bg_control.simpleName}.bed
+macs2 callpeak \
+  ${params.peak_calling_bg} \
+  --treatment ${bg_ip.simpleName}.bed \
+  --qvalue 0.99 \
+  --call-summits \
+  --control ${bg_control.simpleName}.bed \
+  --keep-dup all \
+  --name ${bg_ip.simpleName} 2> \
+  ${bg_ip.simpleName}_macs2_report.txt
+
+if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
--- a/src/nf_modules/macs3/main.nf
+++ b/src/nf_modules/macs3/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "3.0.0a6"
+container_url = "lbmc/macs3:${version}"
+
+params.macs_gsize=3e9
+params.macs_mfold="5 50"
+params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_out = ""
+process peak_calling {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_out != "") {
+    publishDir "results/${params.peak_calling_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam_ip), path(bam_control)
+
+  output:
+    path "*", emit: peak
+    path "*_report.txt", emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+macs3 callpeak \
+  --treatment ${bam_ip} \
+  --call-summits \
+  --control ${bam_control} \
+  --keep-dup all \
+  ${params.peak_calling} \
+  --name ${bam_ip.simpleName} \
+  --gsize ${params.macs_gsize} 2> \
+  ${bam_ip.simpleName}_macs3_report.txt
+
+if grep -q "ERROR" ${bam_ip.simpleName}_macs3_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
+params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}"
+params.peak_calling_bg_out = ""
+process peak_calling_bg {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.peak_calling_bg_out != "") {
+    publishDir "results/${params.peak_calling_bg_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bg_ip), path(bg_control)
+
+  output:
+    path "*", emit: peak
+    path "*_report.txt", emit: report
+
+  script:
+/* remove --nomodel option for real dataset */
+"""
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \
+  ${bg_ip.simpleName}.bed
+awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
+  ${bg_control.simpleName}.bed
+macs3 callpeak \
+  ${params.peak_calling_bg} \
+  --treatment ${bg_ip.simpleName}.bed \
+  --call-summits \
+  --control ${bg_control.simpleName}.bed \
+  --keep-dup all \
+  --mfold params.macs_mfold[0] params.macs_mfold[1]
+  --name ${bg_ip.simpleName} \
+  --gsize ${params.macs_gsize} 2> \
+  ${bg_ip.simpleName}_macs3_report.txt
+
+if grep -q "ERROR" ${bg_ip.simpleName}_macs3_report.txt; then
+  echo "MACS3 error"
+  exit 1
+fi
+"""
+}
+
--- a/src/nf_modules/minimap2/main.nf
+++ b/src/nf_modules/minimap2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.17"
+container_url = "lbmc/minimap2:${version}"
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("${fasta}"), path("*.mmi*"), emit: index
+
+  script:
+  memory = "${task.memory}" - ~/\s*GB/
+"""
+minimap2 ${params.index_fasta} -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta}
+"""
+}
+
+params.mapping_fastq = "-ax sr"
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(fasta_id), path(fasta), path(index)
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  memory = "${task.memory}" - ~/\s*GB/
+  memory = memory.toInteger() / (task.cpus + 1.0)
+  if (reads.size() == 2)
+  """
+  minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} |
+    samtools view -Sb - > ${pair_id}.bam
+  """
+  else
+  """
+  minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} |
+    samtools view -Sb - > ${pair_id}.bam
+  """
+}
--- a/src/nf_modules/multiqc/main.nf
+++ b/src/nf_modules/multiqc/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+// multiqc generate nice html report combining lots of differents bioinformatics
+// tools report.
+// 
+// EXAMPLE:
+
+/*
+include { multiqc } 
+  from './nf_modules/multiqc/main'
+  addParams(
+    multiqc_out: "QC/"
+  )
+
+multiqc(
+  report_a
+  .mix(
+    report_b,
+    report_c,
+    report_d
+  )
+)
+*/
+
+version = "1.11"
+container_url = "lbmc/multiqc:${version}"
+
+params.multiqc = ""
+params.multiqc_out = "QC/"
+workflow multiqc {
+  take:
+    report
+  main:
+    report
+    .map{it ->
+      if (it instanceof List){
+        if(it.size() > 1) {
+          it[1]
+        } else {
+          it[0]
+        }
+      } else {
+        it
+      }
+    }
+    .unique()
+    .flatten()
+    .set { report_cleaned }
+    multiqc_default(report_cleaned.collect())
+
+  emit:
+  report = multiqc_default.out.report
+}
+
+process multiqc_default {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  if (params.multiqc_out != "") {
+    publishDir "results/${params.multiqc_out}", mode: 'copy'
+  }
+
+  input:
+    path report 
+
+  output:
+    path "*multiqc_*", emit: report
+
+  script:
+"""
+multiqc ${params.multiqc} -f .
+"""
+}
--- a/src/nf_modules/picard/main.nf
+++ b/src/nf_modules/picard/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.18.11"
+container_url = "lbmc/picard:${version}"
+
+params.mark_duplicate = "VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true"
+params.mark_duplicate_out = ""
+process mark_duplicate {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.mark_duplicate_out != "") {
+    publishDir "results/${params.mark_duplicate_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id) , path("*.bam"), emit: bam
+    path "*_report.dupinfo.txt", emit: report
+
+
+  script:
+"""
+PicardCommandLine MarkDuplicates \
+  ${params.mark_duplicate} \
+  INPUT=${bam} \
+  OUTPUT=${bam.baseName}_dedup.bam \
+  METRICS_FILE=${bam.baseName}_picard_dedup_report.dupinfo.txt &> \
+  picard_${bam.baseName}.log
+"""
+}
+
+params.normalize_fasta = ""
+params.normalize_fasta_out = ""
+process normalize_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.normalize_fasta_out != "") {
+    publishDir "results/${params.normalize_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("results/*.fasta.gz"), emit: fasta 
+
+  script:
+"""
+mkdir -p results
+PicardCommandLine NormalizeFasta \
+      I=${fasta} \
+      O=results/${fasta.simpleName}.fasta
+gzip results/${fasta.simpleName}.fasta
+"""
+}
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.dict"), emit: index
+
+  script:
+"""
+PicardCommandLine CreateSequenceDictionary \
+  ${params.index_fasta} \
+  REFERENCE=${fasta} \
+  OUTPUT=${fasta.baseName}.dict
+"""
+}
+
+params.index_bam = ""
+params.index_bam_out = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_bam_out != "") {
+    publishDir "results/${params.index_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*"), emit: index
+
+  script:
+"""
+PicardCommandLine BuildBamIndex \
+  ${params.index_bam} \
+  INPUT=${bam}
+"""
+}
--- a/src/nf_modules/porechop/main.nf
+++ b/src/nf_modules/porechop/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.2.4"
+container_url = "lbmc/porechop:${version}"
+
+process porechop {
+    container = "${container_url}"
+    label "big_mem_multi_cpus"
+    tag "$file_id"
+    if (params.porechop_out != "") {
+    publishDir "results/${params.porechop_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fatsq)
+
+  output:
+    tuple val(file_id), path("*_porechoped.fastq"), emit: porechoped_fastq
+  script:
+"""
+porechop -i ${fastq} -o ${file_id}_porechoped.fastq --threads 4
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/main.nf
+++ b/src/nf_modules/rasusa/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.6.0"
+container_url = "lbmc/rasusa:${version}"
+
+include { index_fasta } from "./../samtools/main.nf"
+
+params.sample_fastq = ""
+params.sample_fastq_coverage = ""
+params.sample_fastq_size = ""
+params.sample_fastq_out = ""
+workflow sample_fastq {
+  take:
+  fastq
+  fasta
+
+  main:
+  if (params.sample_fastq_coverage == "" && params.sample_fastq_size == ""){
+    fastq
+      .set{ final_fastq }
+  } else {
+    index_fasta(fasta)
+    sub_sample_fastq(fastq, index_fasta.out.index)
+    sub_sample_fastq.out.fastq
+      .set{ final_fastq }
+  }
+
+  emit:
+  fastq = final_fastq
+
+}
+
+process sub_sample_fastq {
+  container = "${container_url}"
+  label "small_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.sample_fastq_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fastq)
+    tuple val(index_id), path(idx)
+
+  output:
+    tuple val(file_id), path("sub_*.fastq.gz"), emit: fastq
+
+  script:
+
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break
+    default:
+      file_prefix = file_id
+    break
+  }
+
+  sample_option = "-c " + params.sample_fastq_coverage
+  if (params.sample_fastq_size != ""){
+    sample_option = "-b " + params.sample_fastq_size
+  }
+
+  if (fastq.size() == 2)
+"""
+rasusa \
+  -i ${fastq[0]} ${fastq[1]} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq[0].simpleName}.fastq.gz sub_${fastq[1].simpleName}.fastq.gz
+"""
+  else
+"""
+rasusa \
+  -i ${fastq} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq.simpleName}.fastq.gz
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/test.nf
+++ b/src/nf_modules/rasusa/test.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+nextflow.enable.dsl=2
+
+/*
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
+*/
+
+params.fastq = "data/fastq/*R{1,2}*"
+params.fasta = "data/fasta/*.fasta"
+params.coverage = ""
+params.size = ""
+
+include { sample_fastq } from "./main.nf" addParams(sample_fastq_coverage: params.coverage, sample_fastq_size: params.size, sample_fastq_out: "sample/")
+
+channel
+  .fromFilePairs( params.fastq, size: -1)
+  .set { fastq_files }
+
+channel
+  .fromPath( params.fasta )
+  .map { it -> [it.simpleName, it]}
+  .set { fasta_files }
+
+workflow {
+  sample_fastq(fastq_files, fasta_files.collect())
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/test.sh
+++ b/src/nf_modules/rasusa/test.sh
+#! /bin/sh
+
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
\ No newline at end of file
--- a/src/nf_modules/salmon/main.nf
+++ b/src/nf_modules/salmon/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "1.8.0"
+container_url = "lbmc/salmon:${version}"
+
+process quantify {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.salmon_out != "") {
+    publishDir "results/${params.salmon_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.sf"), emit: quant
+  script:
+"""
+salmon quant -l A --noErrorModel -t XXXXXXXXXX -a ${bam} -p 4 -o ${params.salmon_out}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/sambamba/index_bams.config
+++ b/src/nf_modules/sambamba/index_bams.config
-profiles {
-  docker {
-    docker.temp = 'auto'
-    docker.enabled = true
-    process {
-      withName: index_bam {
-        container = "sambamba:0.6.7"
-      }
-    }
-  }
-  psmn {
-    process{
-      withName: index_bam {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "sambamba/0.6.7"
-        executor = "sge"
-        clusterOptions = "-m e -cwd -V"
-        memory = "30GB"
-        time = "24h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-    }
-  }
-}
--- a/src/nf_modules/sambamba/index_bams.nf
+++ b/src/nf_modules/sambamba/index_bams.nf
-params.bam = "$baseDir/data/bam/*.bam"
-
-log.info "bams files : ${params.bam}"
-
-Channel
-  .fromPath( params.bam )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
-  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
-  .set { bam_files }
-
-process index_bam {
-  tag "$file_id"
-  cpus 4
-
-  input:
-    set file_id, file(bam) from bam_files
-
-  output:
-    set file_id, "*.bam*" into indexed_bam_file
-
-  script:
-"""
-sambamba index -t ${task.cpus} ${bam}
-"""
-}
-
--- a/src/nf_modules/sambamba/main.nf
+++ b/src/nf_modules/sambamba/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.6.7"
+container_url = "lbmc/sambamba:${version}"
+
+params.index_bam = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+
+  script:
+"""
+sambamba index ${params.index_bam} -t ${task.cpus} ${bam}
+"""
+}
+
+params.sort_bam = ""
+process sort_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+
+  script:
+"""
+sambamba sort -t ${task.cpus} ${params.sort_bam} -o ${bam.baseName}_sorted.bam ${bam}
+"""
+}
+
+params.split_bam = ""
+process split_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*_forward.bam*"), emit: bam_forward
+    tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse
+  script:
+"""
+sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '+'" ${bam} > \
+  ${bam.baseName}_forward.bam
+sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '-'" ${bam} > \
+  ${bam.baseName}_reverse.bam
+"""
+}
--- a/src/nf_modules/sambamba/sort_bams.config
+++ b/src/nf_modules/sambamba/sort_bams.config
-profiles {
-  docker {
-    docker.temp = 'auto'
-    docker.enabled = true
-    process {
-      withName: sort_bam {
-        container = "sambamba:0.6.7"
-      }
-    }
-  }
-  psmn {
-    process{
-      withName: sort_bam {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "sambamba/0.6.7"
-        executor = "sge"
-        clusterOptions = "-m e -cwd -V"
-        memory = "30GB"
-        time = "24h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-    }
-  }
-}
--- a/src/nf_modules/sambamba/sort_bams.nf
+++ b/src/nf_modules/sambamba/sort_bams.nf
-params.bam = "$baseDir/data/bam/*.bam"
-
-log.info "bams files : ${params.bam}"
-
-Channel
-  .fromPath( params.bam )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
-  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
-  .set { bam_files }
-
-process sort_bam {
-  tag "$file_id"
-  cpus 4
-
-  input:
-    set file_id, file(bam) from bam_files
-
-  output:
-    set file_id, "*_sorted.bam" into sorted_bam_files
-
-  script:
-"""
-sambamba sort -t ${task.cpus} -o ${file_id}_sorted.bam ${bam}
-"""
-}
-
--- a/src/nf_modules/sambamba/split_bams.config
+++ b/src/nf_modules/sambamba/split_bams.config
-profiles {
-  docker {
-    docker.temp = 'auto'
-    docker.enabled = true
-    process {
-      withName: split_bam {
-        container = "sambamba:0.6.7"
-      }
-    }
-  }
-  psmn {
-    process{
-      withName: split_bam {
-        beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
-        module = "sambamba/0.6.7"
-        executor = "sge"
-        clusterOptions = "-m e -cwd -V"
-        memory = "30GB"
-        time = "24h"
-        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
-        penv = 'openmp16'
-      }
-    }
-  }
-}
--- a/src/nf_modules/sambamba/split_bams.nf
+++ b/src/nf_modules/sambamba/split_bams.nf
-params.bam = "$baseDir/data/bam/*.bam"
-
-log.info "bams files : ${params.bam}"
-
-Channel
-  .fromPath( params.bam )
-  .ifEmpty { error "Cannot find any bam files matching: ${params.bam}" }
-  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
-  .set { bam_files }
-
-process split_bam {
-  tag "$file_id"
-  cpus 4
-
-  input:
-    set file_id, file(bam) from bam_files
-
-  output:
-    set file_id, "*_forward.bam*" into forward_bam_files
-    set file_id, "*_reverse.bam*" into reverse_bam_files
-  script:
-"""
-sambamba view -t ${task.cpus} -h -F "strand == '+'" ${bam} > ${file_id}_forward.bam
-sambamba view -t ${task.cpus} -h -F "strand == '-'" ${bam} > ${file_id}_reverse.bam
-"""
-}
-
--- a/src/nf_modules/sambamba/tests.sh
+++ b/src/nf_modules/sambamba/tests.sh
-./nextflow src/nf_modules/sambamba/sort_bams.nf \
-  -c src/nf_modules/sambamba/sort_bams.config \
-  -profile docker \
-  --bam "data/tiny_dataset/map/tiny_v2.bam"
-
-./nextflow src/nf_modules/sambamba/index_bams.nf \
-  -c src/nf_modules/sambamba/index_bams.config \
-  -profile docker \
-  --bam "data/tiny_dataset/map/tiny_v2.sort.bam"
-
-./nextflow src/nf_modules/sambamba/split_bams.nf \
-  -c src/nf_modules/sambamba/split_bams.config \
-  -profile docker \
-  --bam "data/tiny_dataset/map/tiny_v2.bam"
No results found