Compare revisions

ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0
--- a/src/nf_modules/emase-zero/main.nf
+++ b/src/nf_modules/emase-zero/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "0.3.1"
 container_url = "lbmc/emase-zero:${version}"

-include { tr2g } from "./../kb/main.nf"
+include { g2tr } from "./../kb/main.nf"
 include { bam2ec } from "./../alntools/main.nf"
 include { fasta_to_transcripts_lengths } from "./../bioawk/main.nf"

@@ -15,10 +19,10 @@ workflow count {
    gtf

  main:
-    tr2g(gtf)
+    g2tr(gtf)
    fasta_to_transcripts_lengths(fasta)
-    bam2ec(bam_idx, fasta_to_transcripts_lengths.out.tsv)
-    emase(bam2ec.out.bin, bam2ec.out.tsv, tr2g.out.t2g)
+    bam2ec(bam_idx, fasta_to_transcripts_lengths.out.tsv.collect())
+    emase(bam2ec.out.bin, fasta.collect(), bam2ec.out.tsv, g2tr.out.g2t.collect())

  emit:
    count = emase.out.count
@@ -34,18 +38,21 @@ process emase {

  input:
    tuple val(file_id), path(bin)
+    tuple val(fasta_id), path(fasta)
    tuple val(transcript_length_id), path(transcript_length)
-    tuple val(transcript_to_gene_id), path(transcript_to_gene)
+    tuple val(gene_to_transcript_id), path(gene_to_transcript)

  output:
-    tuple val(file_id), path("${bin.simpleName}.quantified"), emit: count
+    tuple val(file_id), path("${bin.simpleName}.quantified*"), emit: count
+    path "*_report.txt", emit: report

  script:
 """
+grep ">" ${fasta} | sed 's/>//' > tr_list.txt
 emase-zero ${params.count} \
-  -b ${bin} \
  -o ${bin.simpleName}.quantified \
  -l ${transcript_length} \
-  -g ${transcript_to_gene}
+  -g ${gene_to_transcript} \
+  ${bin} &> ${file_id}_emase-zero_report.txt
 """
 }
\ No newline at end of file
--- a/src/nf_modules/emase/main.nf
+++ b/src/nf_modules/emase/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "0.10.16"
 container_url = "lbmc/emase:${version}"

-params.personalised_transcriptome = ""
-
-process personalised_transcriptome {
+params.diploid_genome = "-x"
+params.diploid_genome_out = "-x"
+process diploid_genome {
  container = "${container_url}"
  label "big_mem_mono_cpus"
-  tag "$file_id"
+  tag "${genome_a}-${genome_b}"
+  if (params.diploid_genome_out != "") {
+    publishDir "results/${params.diploid_genome_out}", mode: 'copy'
+  }

  input:
-    tuple val(file_id), path(fasta)
-    tuple val(gtf_id), path(gtf)
+    tuple val(genome_a), path(fasta_a), val(genome_b), path(fasta_b)

  output:
-    tuple val(file_id), path("${fasta.simpleName}.*"), emit: index
-    tuple val(file_id), path("*_bwa_report.txt"), emit: report
+    tuple val("${genome_a}_${genome_b}"), path(".fa"), emit: fasta

  script:
 """
-prepare-emase ${personalised_transcriptome} -G ${REF_FASTA} -g ${REF_GTF} -o ${REF_DIR} -m --no-bowtie-index
-// ${REF_DIR}/emase.transcriptome.fa
-// ${REF_DIR}/emase.transcriptome.info
-// ${REF_DIR}/emase.gene2transcripts.tsv
-prepare-emase -G ${SAMPLE_DIR}/L.fa,${SAMPLE_DIR}/R.fa -s L,R -o ${SAMPLE_DIR}
+prepare-emase -G ${fasta_a},${fasta_b} -s ${genome_a},${genome_b} ${params.diploid_genome} 
 """
 }
\ No newline at end of file
--- a/src/nf_modules/fastp/main.nf
+++ b/src/nf_modules/fastp/main.nf
-version = "0.20.1"
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.23.2"
 container_url = "lbmc/fastp:${version}"

 params.fastp_protocol = ""
@@ -151,3 +155,4 @@ process fastp_accel_1splus {
      --report_title ${file_prefix}
  """
 }
+
--- a/src/nf_modules/fastq_screen/main.nf
+++ b/src/nf_modules/fastq_screen/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.11.3--pl5.22.0_0"
+container_url = "quay.io/biocontainers/fastq-screen:${version}"
+
+params.fastq_screen = ""
+params.fastq_screen_out = ""
+process fastq_screen {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.fastq_screen_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fastq)
+
+  output:
+    tuple val(file_id), path("*"), emit: output
+
+  script:
+"""
+fastq_screen --get_genomes
+fastq_screen --threads ${task.cpus} sample1.fastq sample2.fastq
+"""
+}
--- a/src/nf_modules/fastqc/main.nf
+++ b/src/nf_modules/fastqc/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "0.11.5"
 container_url = "lbmc/fastqc:${version}"


--- a/src/nf_modules/flexi_splitter/main.nf
+++ b/src/nf_modules/flexi_splitter/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "1.0.2"
 container_url = "lbmc/flexi_splitter:${version}"


--- a/src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml
+++ b/src/nf_modules/flexi_splitter/marseq_flexi_splitter.yaml
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
 PLATE:
  coords:
    reads: 0

--- a/src/nf_modules/flexi_splitter/toy_file_paired.yaml
+++ b/src/nf_modules/flexi_splitter/toy_file_paired.yaml
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
 PCR:
  coords:
    reads: 3

--- a/src/nf_modules/g2gtools/main.nf
+++ b/src/nf_modules/g2gtools/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "0.2.8"
 container_url = "lbmc/g2gtools:${version}"


--- a/src/nf_modules/gatk3/main.nf
+++ b/src/nf_modules/gatk3/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "3.8.0"
 container_url = "lbmc/gatk:${version}"


--- a/src/nf_modules/gatk4/main.nf
+++ b/src/nf_modules/gatk4/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "4.2.0.0"
 container_url = "broadinstitute/gatk:${version}"

+def get_file_prefix(file_id) {
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else if (file_id instanceof Map) {
+      library = file_id[0]
+      file_prefix = file_id[0]
+      if (file_id.containsKey('library')) {
+        library = file_id.library
+        file_prefix = file_id.id
+      }
+  } else {
+    file_prefix = file_id
+  }
+  return file_prefix
+}
+
 include {
  index_fasta as samtools_index_fasta;
+  index_bam;
 } from './../samtools/main.nf'
 include {
  index_fasta as picard_index_fasta;
@@ -18,8 +39,9 @@ workflow germline_cohort_data_variant_calling {
  main:
    // data preparation
    mark_duplicate(bam)
+    index_bam(mark_duplicate.out.bam)
    picard_index_bam(mark_duplicate.out.bam)
-    mark_duplicate.out.bam
+    index_bam.out.bam_idx
      .join(picard_index_bam.out.index)
      .set{ bam_idx }
    picard_index_fasta(fasta)
@@ -76,11 +98,7 @@ process index_vcf {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
  -I ${vcf}
@@ -92,7 +110,7 @@ process compute_base_recalibration {
  label "big_mem_mono_cpus"
  tag "$file_id"
  input:
-    tuple val(file_id), path(bam), path(bam_idx)
+    tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
    tuple val(vcf_id), path(vcf), path(vcf_idx)
  output:
@@ -100,11 +118,7 @@ process compute_base_recalibration {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
  def vcf_cmd = ""
  if (vcf instanceof List){
    for (vcf_file in vcf){
@@ -127,7 +141,7 @@ process apply_base_recalibration {
  label "big_mem_mono_cpus"
  tag "$file_id"
  input:
-    tuple val(file_id), path(bam), path(bam_idx)
+    tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
    tuple val(table_id), path(table)
  output:
@@ -135,11 +149,7 @@ process apply_base_recalibration {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
   -R ${fasta} \
@@ -150,24 +160,24 @@ process apply_base_recalibration {
 }

 /*******************************************************************/
-
+params.variant_calling_gvcf_out = ""
 process call_variants_per_sample {
  container = "${container_url}"
  label "big_mem_mono_cpus"
  tag "$file_id"
+  if (params.variant_calling_gvcf_out != "") {
+    publishDir "results/${params.variant_calling_gvcf_out}", mode: 'copy'
+  }
+
  input:
-    tuple val(file_id), path(bam), path(bam_idx)
+    tuple val(file_id), path(bam), path(bam_idx), path(bam_idx_bis)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("${bam.simpleName}.gvcf.gz"), emit: gvcf

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller  \
   -R ${fasta} \
@@ -192,9 +202,6 @@ workflow call_variants_all_sample {
    )
    consolidate_gvcf(
      validate_gvcf.out.gvcf
-      .map {
-        it -> ["library", it[1], it[2]]
-      }
      .groupTuple(),
      fasta_idx.collect()
    )
@@ -218,11 +225,7 @@ process index_gvcf {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
      -I ${gvcf} 2> ${gvcf.simpleName}_IndexFeatureFile_report.txt
@@ -241,11 +244,7 @@ process validate_gvcf {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" ValidateVariants \
   -V ${gvcf} \
@@ -266,11 +265,7 @@ process consolidate_gvcf {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
  def gvcf_cmd = ""
  if (gvcf instanceof List){
    for (gvcf_file in gvcf){
@@ -305,11 +300,7 @@ process genomic_db_call {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
  def gvcf_cmd = ""
  if (gvcf instanceof List){
    for (gvcf_file in gvcf){
@@ -346,11 +337,7 @@ process variant_calling {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
  ${params.variant_calling} \
@@ -377,11 +364,7 @@ process filter_snp {
    tuple val(file_id), path("*_snp.vcf"), emit: vcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
  ${params.filter_snp} \
@@ -409,11 +392,7 @@ process filter_indels {
    tuple val(file_id), path("*_indel.vcf"), emit: vcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
  ${params.filter_indels} \
@@ -442,11 +421,7 @@ process high_confidence_snp {
    tuple val(file_id), path("*_snp.vcf"), emit: vcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
  -R ${fasta} \
@@ -474,11 +449,7 @@ process high_confidence_indels {
    tuple val(file_id), path("*_indel.vcf"), emit: vcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \
  -R ${fasta} \
@@ -499,17 +470,13 @@ process recalibrate_snp_table {
  }

  input:
-    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx)
+    tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(bam_idx_bis)
    tuple val(ref_id), path(fasta), path(fai), path(dict)
  output:
    tuple val(file_id), path("recal_data_table"), emit: recal_table
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \
  -I ${snp_file}
@@ -542,11 +509,7 @@ process recalibrate_snp {
    tuple val(file_id), path("*.bam"), emit: bam
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \
  ${params.recalibrate_snp} \
@@ -574,11 +537,7 @@ process haplotype_caller {
    tuple val(file_id), path("*.gvcf"), emit: gvcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \
  ${params.haplotype_caller} \
@@ -606,11 +565,7 @@ process gvcf_genotyping {
    tuple val(file_id), path("*.vcf.gz"), emit: vcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \
  ${params.gvcf_genotyping} \
@@ -637,11 +592,7 @@ process select_variants_snp {
    tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \
  ${params.select_variants_snp} \
@@ -669,11 +620,7 @@ process select_variants_indels {
    tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf
  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \
  ${params.select_variants_indels} \
@@ -702,11 +649,7 @@ process personalized_genome {

  script:
  xmx_memory = "${task.memory}" - ~/\s*GB/
-  if (file_id instanceof List){
-    file_prefix = file_id[0]
-  } else {
-    file_prefix = file_id
-  }
+  file_prefix = get_file_prefix(file_id)
 """
 gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\
  ${params.personalized_genome} \

--- a/src/nf_modules/gffread/main.nf
+++ b/src/nf_modules/gffread/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "0.12.2"
 container_url = "lbmc/gffread:${version}"

@@ -26,6 +30,37 @@ process gffread {
  }
  """
  gffread ${gtf} -g ${fasta} -M -x dup_${file_prefix}.fasta
-  awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta > ${file_prefix}.fasta
+  awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta | grep -v -e "^\$" > ${file_prefix}.fasta
+  """
+}
+
+params.spliced_cds = ""
+params.spliced_cds_out = ""
+process spliced_cds {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_prefix"
+  if (params.spliced_cds_out != "") {
+    publishDir "results/${params.spliced_cds_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(gtf)
+  tuple val(fasta_id), path(fasta)
+
+  output:
+    tuple val(fasta_id), path("${file_prefix}.fasta"), emit: fasta
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  """
+gzip -dck ${fasta} > ${fasta.simpleName}_un.fasta
+gzip -dck ${gtf} > ${gtf.simpleName}_un.gtf
+gffread ${gtf.simpleName}_un.gtf -g ${fasta.simpleName}_un.fasta -M \
+  -x ${file_prefix}.fasta
  """
 }
\ No newline at end of file
--- a/src/nf_modules/guppy-cpu/main.nf
+++ b/src/nf_modules/guppy-cpu/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "5.0.11"
+container_url = "lbmc/guppy-cpu:${version}"
+
+params.basecalling_out = ""
+params.flowcell = "FLO-MIN106"
+params.kit = "SQK-PCS109"
+params.cpu_threads_per_caller = 4
+params.num_callers = 1
+process basecall_fast5 {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.basecalling_out != "") {
+    publishDir "results/${params.basecalling_out}", mode: 'copy'
+  }
+
+  if (params.flowcell == "") {
+      errorFlowcell << "WARNING ! No Flowcell type given..."
+      errorFlowcell.view()
+  }
+
+  if (params.kit == "") {
+      errorKit "WARNING ! No kit type given..."
+      errorKit.view()
+  }
+
+  input:
+    tuple val(file_id), path(fast5)
+
+  output:
+    tuple val(file_id), path("*.fastq*"), emit: fastq
+
+  script:
+"""
+guppy_basecaller --compress_fastq \
+    -i ${path(fast5)} \
+    -s ${params.basecalling_out} \
+    --cpu_threads_per_caller ${params.cpu_threads_per_caller} \
+    --num_callers ${params.num_callers} \
+    --flowcell ${params.flowcell} \
+    --kit ${params.kit}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/guppy-gpu/main.nf
+++ b/src/nf_modules/guppy-gpu/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "5.0.11"
+container_url = "lbmc/guppy-gpu:${version}"
+
+params.basecalling_out = ""
+params.flowcell = ""
+params.kit = ""
+params.gpu_runners_per_device = 16
+process basecall_fast5 {
+  container = "${container_url}"
+  // Need to create a profile using GPUs
+  label ""
+  tag "$file_id"
+  if (params.basecalling_out != "") {
+    publishDir "results/${params.basecalling_out}", mode: 'copy'
+  }
+
+  if (params.flowcell == "") {
+      errorFlowcell << "WARNING ! No Flowcell type given..."
+      errorFlowcell.view()
+  }
+
+  if (params.kit == "") {
+      errorKit "WARNING ! No kit type given..."
+      errorKit.view()
+  }
+
+  input:
+    tuple val(file_id), path(fast5)
+
+  output:
+    tuple val(file_id), path("*.fastq*"), emit: fastq
+
+  script:
+"""
+guppy_basecaller --compress_fastq -x "cuda:all" --min_qscore 7.0 \
+    -i ${path(fast5)} \
+    -s ${params.basecalling_out} \
+    --gpu_runners_per_device ${params.gpu_runners_per_device} \
+    --flowcell ${params.flowcell} \
+    --kit ${params.kit}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/hisat2/main.nf
+++ b/src/nf_modules/hisat2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.2.1"
+container_url = "lbmc/hisat2:${version}"
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("*.ht2*"), emit: index
+    tuple val(file_id), path("*_report.txt"), emit: report
+
+  script:
+"""
+gunzip ${fasta}
+hisat2-build -p ${task.cpus} \
+  ${fasta.baseName} \
+  ${fasta.simpleName} &> \
+  ${fasta.simpleName}_hisat2_index_report.txt
+
+if grep -q "Error" ${fasta.simpleName}_hisat2_index_report.txt; then
+  exit 1
+fi
+"""
+}
+
+params.mapping_fastq = ""
+params.mapping_fastq_out = ""
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.mapping_fastq_out != "") {
+    publishDir "results/${params.mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(index_id), path(index)
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(file_id), path("*.bam"), emit: bam
+  path "*_report.txt", emit: report
+
+  script:
+  index_id = index[0]
+  for (index_file in index) {
+    if (index_file =~ /.*\.1\.ht2.*/) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2.*/)[0][1]
+    }
+  }
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break
+    default:
+      file_prefix = file_id
+    break
+  }
+
+  if (reads.size() == 2)
+  """
+  hisat2 ${params.mapping_fastq} \
+    -p ${task.cpus} \
+    -x ${index_id} \
+    -1 ${reads[0]} \
+    -2 ${reads[1]} 2> \
+    ${file_prefix}_ht2_mapping_report.txt \
+    | samtools view -@ ${task.cpus} -bS - \
+    | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
+
+  if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
+    exit 1
+  fi
+  """
+  else
+  """
+  hisat2 ${params.mapping_fastq} \
+    -p ${task.cpus} \
+    -x ${index_id} \
+    -U ${reads} 2> \
+    ${file_prefix}_ht2_mapping_report.txt \
+    | samtools view -@ ${task.cpus} -bS - \
+    | samtools sort -@ ${task.cpus} -o ${file_prefix}.bam
+  if grep -q "Error" ${file_prefix}_ht2_mapping_report.txt; then
+    exit 1
+  fi
+  """
+}
--- a/src/nf_modules/htseq/main.nf
+++ b/src/nf_modules/htseq/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "1.99.2"
+container_url = "lbmc/htseq:${version}"
+
+params.htseq_out = ""
+
+
+
+process gff3_2_gtf {
+  container = "dceoy/cufflinks"
+  label "small_mem_mono_cpus"
+
+    input:
+        tuple val(genome_id), path(gff3_file)
+    output:
+        path "${genome_id}.gtf", emit: gtf
+    script:
+"""
+gffread ${gff3_file} -T -o ${genome_id}.gtf
+"""
+}
+
+
+process htseq_count {
+    container = "${container_url}"
+    label "big_mem_mono_cpus"
+    tag "file_id: $file_id"
+    if (params.htseq_out != "") {
+        publishDir "results/${params.htseq_out}", mode: 'copy'
+    }
+    input:
+      tuple val(file_id), path(bam), path(bai)
+      path (gtf)
+
+    output:
+      path "${file_id}.tsv", emit: counts
+
+  script:
+"""
+htseq-count -n ${task.cpus} -r pos -a 10 -s yes -t exon -i gene_id $bam $gtf > ${file_id}.tsv
+"""
+}
+
+workflow htseq_count_with_gff {
+  take:
+    bam_tuple
+    gff_file
+  main:
+    gff3_2_gtf(gff_file)
+    htseq_count(bam_tuple,gff3_2_gtf.out.gtf)
+  emit:
+    counts = htseq_count.out.counts
+}
--- a/src/nf_modules/kallisto/main.nf
+++ b/src/nf_modules/kallisto/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "0.44.0"
 container_url = "lbmc/kallisto:${version}"


--- a/src/nf_modules/kb/main.nf
+++ b/src/nf_modules/kb/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "0.26.0"
 container_url = "lbmc/kb:${version}"

@@ -37,6 +41,30 @@ process tr2g {
  script:
  """
  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  """
+}
+
+process g2tr {
+  // create gene to transcript table from gtf if no transcript to gene file is provided
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gtf)
+
+  output:
+    tuple val(file_id), path("g2t.txt"), emit: g2t
+
+  script:
+  """
+  t2g.py --gtf ${gtf}
+  sort -k1 -u t2g_dup.txt > t2g.txt
+  awk 'BEGIN{OFS="\\t"}{print \$2, \$1}' t2g.txt > g2t.txt
  """
 }

@@ -88,7 +116,7 @@ workflow count {
    .set{ whitelist_optional }
  switch(params.kb_protocol) {
    case "marsseq":
-      split(fastq, config)
+      split(fastq, config.collect())
      kb_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
      kb_marseq.out.counts.set{res_counts}
      kb_marseq.out.report.set{res_report}
@@ -145,8 +173,13 @@ process kb_default {
    -o ${file_prefix} \
    ${whitelist_param} \
    -x 10XV3 \
+    --h5ad \
    ${params.count} \
    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
  """
 }

@@ -194,8 +227,12 @@ process kb_marseq {
    -o ${file_prefix} \
    ${whitelist_param} \
    ${params.count} \
+    --h5ad \
    -x 1,0,6:1,6,14:0,0,0 \
    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
  """
  else
  """
@@ -208,7 +245,11 @@ process kb_marseq {
    ${whitelist_param} \
    ${params.count} \
    -x 1,0,6:1,6,14:0,0,0 \
+    --h5ad \
    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
  """
 }

@@ -231,7 +272,7 @@ workflow index_fasta_velocity {

 process index_fasta_velocity_default {
  container = "${container_url}"
-  label "big_mem_mono_cpus"
+  label "big_mem_multi_cpus"
  tag "$file_id"
  if (params.index_fasta_out != "") {
    publishDir "results/${params.index_fasta_out}", mode: 'copy'
@@ -274,7 +315,7 @@ workflow count_velocity {
    .set{ whitelist_optional }
  switch(params.kb_protocol) {
    case "marsseq":
-      split(fastq, config)
+      split(fastq, config.collect())
      velocity_marseq(index.collect(), split.out.fastq, transcript_to_gene.collect(), whitelist_optional.collect())
      velocity_marseq.out.counts.set{res_counts}
      velocity_marseq.out.report.set{res_report}
@@ -324,17 +365,24 @@ process velocity_default {
  if (reads.size() == 2)
  """
  mkdir ${file_prefix}
+  kb count  -t ${task.cpus} \
    -m ${kb_memory} \
    -i ${index} \
    -g ${transcript_to_gene} \
    -o ${file_prefix} \
    -c1 ${cdna_t2g} \
    -c2 ${intron_t2g} \
-    --lamanno \
+    --workflow lamanno \
    ${whitelist_param} \
    -x 10XV3 \
+    --h5ad \
    ${params.count} \
    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
  """
 }

@@ -382,11 +430,17 @@ process velocity_marseq {
    -o ${file_prefix} \
    -c1 ${cdna_t2g} \
    -c2 ${intron_t2g} \
-    --lamanno \
+    --workflow lamanno \
+     --h5ad \
    ${whitelist_param} \
    ${params.count} \
    -x 1,0,6:1,6,14:0,0,0 \
    ${reads[0]} ${reads[1]} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
  """
  else
  """
@@ -398,10 +452,16 @@ process velocity_marseq {
    -o ${file_prefix} \
    -c1 ${cdna_t2g} \
    -c2 ${intron_t2g} \
-    --lamanno \
+    --workflow lamanno \
    ${whitelist_param} \
    ${params.count} \
    -x 1,0,6:1,6,14:0,0,0 \
    ${reads} > ${file_prefix}_kb_mapping_report.txt
+  fix_t2g.py --t2g ${transcript_to_gene}
+  cp fix_t2g.txt ${file_prefix}/
+  cp ${transcript_to_gene} ${file_prefix}/
+  cp ${cdna_t2g} ${file_prefix}/
+  cp ${intron_t2g} ${file_prefix}/
  """
 }
+
--- a/src/nf_modules/macs2/main.nf
+++ b/src/nf_modules/macs2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "2.1.2"
 container_url = "lbmc/macs2:${version}"

@@ -30,6 +34,7 @@ macs2 callpeak \
  --call-summits \
  --control ${bam_control} \
  --keep-dup all \
+  --qvalue 0.99 \
  --name ${bam_ip.simpleName} 2> \
  ${bam_ip.simpleName}_macs2_report.txt

@@ -68,6 +73,7 @@ awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \
 macs2 callpeak \
  ${params.peak_calling_bg} \
  --treatment ${bg_ip.simpleName}.bed \
+  --qvalue 0.99 \
  --call-summits \
  --control ${bg_control.simpleName}.bed \
  --keep-dup all \

--- a/src/nf_modules/macs3/main.nf
+++ b/src/nf_modules/macs3/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
 version = "3.0.0a6"
 container_url = "lbmc/macs3:${version}"
No results found