Compare revisions

ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0 · ceca3ce0
--- a/src/.docker_modules/urqt/d62c1f8/Dockerfile
+++ b/src/.docker_modules/urqt/d62c1f8/Dockerfile
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+FROM ubuntu:18.04
+MAINTAINER Laurent Modolo
+
+ENV URQT_VERSION=d62c1f8
+ENV PACKAGES git=1:2.17* \
+   build-essential=12.4* \
+   ca-certificates=20180409 \
+   procps \
+   zlib1g-dev=1:1.2.11*
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ${PACKAGES} && \
+    apt-get clean
+
+RUN git clone https://github.com/l-modolo/UrQt.git && \
+  cd UrQt && \
+  git checkout ${URQT_VERSION} && \
+  make && \
+  cd .. && \
+  mv UrQt/UrQt /usr/bin/ && \
+  rm -Rf UrQt
--- a/src/.docker_modules/urqt/d62c1f8/docker_init.sh
+++ b/src/.docker_modules/urqt/d62c1f8/docker_init.sh
+#!/bin/sh
+
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+docker pull lbmc/urqt:d62c1f8
+# docker build src/.docker_modules/urqt/d62c1f8 -t 'lbmc/urqt:d62c1f8'
+# docker push lbmc/urqt:d62c1f8
+docker buildx build --platform linux/amd64,linux/arm64 -t "lbmc/urqt:d62c1f8" --push src/.docker_modules/urqt/d62c1f8
--- a/src/.singularity_in2p3
+++ b/src/.singularity_in2p3
+/sps/lbmc/common/singularity/
\ No newline at end of file
--- a/src/.singularity_psmn
+++ b/src/.singularity_psmn
+/Xnfs/abc/singularity/
\ No newline at end of file
--- a/src/example_chipseq.nf
+++ b/src/example_chipseq.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+nextflow.enable.dsl=2
+
+include {
+  fastp
+} from './nf_modules/fastp/main'
+
+workflow csv_parsing {
+  if (params.csv_path.size() > 0) {
+    log.info "loading local csv files"
+    Channel
+      .fromPath(params.csv_path, checkIfExists: true)
+      .ifEmpty { error 
+      log.error """
+    =============================================================
+      WARNING! No csv input file precised.
+      Use '--csv_path <file.csv>'
+      Or '--help' for more informations
+    =============================================================
+    """
+      }
+      .splitCsv(header: true, sep: ";", strip: true)
+      .flatMap{
+        it -> [
+          [(it.IP + it.WCE).md5(), "IP", "w", file(it.IP)],
+          [(it.IP + it.WCE).md5(), "WCE", "w", file(it.WCE)]
+        ]
+      }
+      .map{ it ->
+        if (it[1] instanceof List){
+          it
+        } else {
+          [it[0], [it[1]], it[2], it[3], [it[4]]]
+        }
+      }
+      .map{
+        it ->
+        if (it[1].size() == 2){ // if data are paired_end
+          [
+            "index": it[0],
+            "group": ref_order(it),
+            "ip": it[2],
+            "type": it[3],
+            "id": read_order(it)[0],
+            "file": read_order(it)
+          ]
+        } else {
+          [
+            "index": it[0],
+            "group": it[1].simpleName,
+            "ip": it[2],
+            "type": it[3],
+            "id": it[4].simpleName,
+            "file": [it[4].simpleName, it[4]]
+          ]
+        }
+      }
+      .set{input_csv}
+  } else {
+    log.info "loading remotes SRA csv files"
+    Channel
+      .fromPath(params.csv_sra, checkIfExists: true)
+      .ifEmpty { error 
+      log.error """
+    =============================================================
+      WARNING! No csv input file precised.
+      Use '--csv_path <file.csv>' or
+      Use '--csv_SRA <file.csv>'
+      Or '--help' for more informations
+    =============================================================
+    """
+      }
+      .splitCsv(header: true, sep: ";", strip: true)
+      .flatMap{
+        it -> [
+          [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], t.IP_w, "IP", "w", it.IP_w],
+          [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], it.IP_w, "WCE", "w", it.WCE_w],
+          [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], it.IP_w, "IP", "m", it.IP_m],
+          [[it.IP_w + it.WCE_w + it.IP_m + it.WCE_m], it.IP_w, "WCE", "m", it.WCE_m]
+        ]
+      }
+      .map{
+        it ->
+        if (it[1].size() == 2){ // if data are paired_end
+          [
+            "index": (
+              it[0][0][0].simpleName +
+              it[0][0][1].simpleName +
+              it[0][0][2].simpleName +
+              it[0][0][3].simpleName
+            ).md5(),
+            "group": it[1][0].simpleName,
+            "ip": it[2],
+            "type": it[3],
+            "id": it[4][0].simpleName[0..-4],
+            "file": [it[4][0].simpleName[0..-4], it[4]]
+          ]
+        } else {
+          [
+            "index": (
+              it[0][0].simpleName +
+              it[0][1].simpleName +
+              it[0][2].simpleName +
+              it[0][3].simpleName
+            ).md5(),
+            "group": it[1].simpleName,
+            "ip": it[2],
+            "type": it[3],
+            "id": it[4].simpleName,
+            "file": [it[4].simpleName, it[4]]
+          ]
+        }
+      }
+      .set{input_csv}
+  }
+  emit:
+  input_csv
+}
+
+
+workflow {
+
+}
\ No newline at end of file
--- a/src/example_marseq.nf
+++ b/src/example_marseq.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+nextflow.enable.dsl=2
+
+/*
+Testing pipeline for marseq scRNASeq analysis
+*/
+
+include { adaptor_removal} from "./nf_modules/cutadapt/main.nf"
+include {
+  index_fasta;
+  count;
+  index_fasta_velocity;
+  count_velocity
+} from "./nf_modules/kb/main.nf" addParams(
+  kb_protocol: "marsseq",
+  count_out: "quantification/",
+  count_velocity_out: "quantification_velocity/"
+)
+
+params.fasta = "http://ftp.ensembl.org/pub/release-98/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz"
+params.fastq = "data/CF42_45/*/*R{1,2}.fastq.gz"
+params.gtf = "http://ftp.ensembl.org/pub/release-98/gtf/gallus_gallus/Gallus_gallus.GRCg6a.98.gtf.gz"
+params.transcript_to_gene = ""
+params.whitelist = "data/expected_whitelist.txt"
+params.config = "data/marseq_flexi_splitter.yaml"
+params.workflow_type = "classic"
+
+log.info "fastq files (--fastq): ${params.fastq}"
+log.info "fasta file (--fasta): ${params.fasta}"
+log.info "gtf file (--gtf): ${params.gtf}"
+log.info "transcript_to_gene file (--transcript_to_gene): ${params.transcript_to_gene}"
+log.info "whitelist file (--whitelist): ${params.whitelist}"
+log.info "config file (--config): ${params.config}"
+
+channel
+  .fromFilePairs( params.fastq, size: -1)
+  .set { fastq_files }
+channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
+  .map { it -> [it.simpleName, it]}
+  .set { fasta_files }
+channel
+  .fromPath( params.gtf )
+  .ifEmpty { error "Cannot find any gtf files matching: ${params.gtf}" }
+  .map { it -> [it.simpleName, it]}
+  .set { gtf_files }
+if (params.whitelist == "") {
+  channel.empty()
+    .set { whitelist_files }
+} else {
+  channel
+    .fromPath( params.whitelist )
+    .map { it -> [it.simpleName, it]}
+    .set { whitelist_files }
+}
+channel
+  .fromPath( params.config )
+  .ifEmpty { error "Cannot find any config files matching: ${params.config}" }
+  .map { it -> [it.simpleName, it]}
+  .set { config_files }
+
+workflow {
+  adaptor_removal(fastq_files)
+  if (params.workflow_type == "classic") {
+    index_fasta(
+      fasta_files,
+      gtf_files
+    )
+    count(
+      index_fasta.out.index,
+      adaptor_removal.out.fastq,
+      index_fasta.out.t2g, whitelist_files,config_files
+    )
+  } else {
+    index_fasta_velocity(
+      fasta_files,
+      gtf_files
+    )
+    count_velocity(
+      index_fasta_velocity.out.index,
+      adaptor_removal.out.fastq,
+      index_fasta_velocity.out.t2g,
+      whitelist_files,
+      config_files
+    )
+  }
+}
--- a/src/example_variant_calling.nf
+++ b/src/example_variant_calling.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+nextflow.enable.dsl=2
+
+/*
+Testing pipeline for marseq scRNASeq analysis
+*/
+
+include {
+  mapping;
+} from "./nf_modules/bwa/main.nf"
+
+include {
+  sort_bam;
+} from "./nf_modules/samtools/main.nf"
+
+include {
+  germline_cohort_data_variant_calling;
+} from "./nf_modules/gatk4/main.nf" addParams(
+  variant_calling_out: "vcf/",
+)
+
+params.fastq = ""
+params.fasta = ""
+
+channel
+  .fromFilePairs( params.fastq, size: -1)
+  .set { fastq_files }
+channel
+  .fromPath( params.fasta )
+  .map { it -> [it.simpleName, it]}
+  .set { fasta_files }
+
+workflow {
+  mapping(fasta_files, fastq_files)
+  sort_bam(mapping.out.bam)
+  germline_cohort_data_variant_calling(sort_bam.out.bam, fasta_files)
+}
--- a/src/fasta_sampler.nf
+++ b/src/fasta_sampler.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+Channel
+  .fromPath( "data/tiny_dataset/fasta/*.fasta" )
+  .set { fasta_file }
+
+process sample_fasta {
+  publishDir "results/sampling/", mode: 'copy'
+
+  input:
+file fasta from fasta_file
+
+  output:
+file "*_sample.fasta" into fasta_sample
+
+  script:
+"""
+head ${fasta} > ${fasta.baseName}_sample.fasta
+"""
+}
--- a/src/in2p3.pbs
+++ b/src/in2p3.pbs
+#!/bin/sh
+
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+#SBATCH --job-name=nextflow
+
+#SBATCH --ntasks=1
+#SBATCH --output=results/nextflow_%j.log
+#SBATCH --licenses=sps
+
+####################################
+
+# change to your path 
+SCRATCH=/sps/lbmc/lmodolo/nextflow
+
+${SCRATCH}/nextflow run ${SCRATCH}/src/solution_RNASeq.nf -profile ccin2p3 \
+--fastq "${SCRATCH}/data/tiny_dataset/fastq/*_R{1,2}.fastq" \
+--fasta "${SCRATCH}/data/tiny_dataset/fasta/tiny_v2.fasta" \
+--bed "${SCRATCH}/data/tiny_dataset/annot/tiny.bed" \
+-ansi-log \
+-w "${SCRATCH}/work/"
+
+wait
--- a/src/install_nextflow.sh
+++ b/src/install_nextflow.sh
 #!/bin/sh

+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
 java -version
 curl -s https://get.nextflow.io | bash
--- a/src/nextflow.config
+++ b/src/nextflow.config
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+nextflowVersion = '>=20'
+
+manifest {
+    homePage = 'https://gitbio.ens-lyon.fr/LBMC/nextflow'
+    description = 'pipeline to '
+    mainScript = 'main.nf'
+    version = '0.0.0'
+}
+
+report {
+  enabled = true
+  file = "$baseDir/../results/report.html"
+}
+
+profiles {
+  docker {
+    docker.temp = "auto"
+    docker.enabled = true
+    process {
+      errorStrategy = 'finish'
+      memory = '16GB'
+      withLabel: big_mem_mono_cpus {
+        cpus = 1
+      }
+      withLabel: big_mem_multi_cpus {
+        cpus = 4
+      }
+      withLabel: small_mem_mono_cpus {
+        cpus = 1
+        memory = '2GB'
+      }
+      withLabel: small_mem_multi_cpus {
+        cpus = 4
+        memory = '2GB'
+      }
+    }
+  }
+  podman {
+    charliecloud.enabled = true
+    charliecloud.cacheDir = "./bin/"
+    process {
+      errorStrategy = 'finish'
+      memory = '16GB'
+      withLabel: big_mem_mono_cpus {
+        cpus = 1
+      }
+      withLabel: big_mem_multi_cpus {
+        cpus = 4
+      }
+      withLabel: small_mem_mono_cpus {
+        cpus = 1
+        memory = '2GB'
+      }
+      withLabel: small_mem_multi_cpus {
+        cpus = 4
+        memory = '2GB'
+      }
+    }
+  }
+  singularity {
+    singularity.enabled = true
+    singularity.cacheDir = "./bin/"
+    process {
+      errorStrategy = 'finish'
+      memory = '16GB'
+      withLabel: big_mem_mono_cpus {
+        cpus = 1
+      }
+      withLabel: big_mem_multi_cpus {
+        cpus = 4
+      }
+      withLabel: small_mem_mono_cpus {
+        cpus = 1
+        memory = '2GB'
+      }
+      withLabel: small_mem_multi_cpus {
+        cpus = 4
+        memory = '2GB'
+      }
+    }
+  }
+  psmn {
+    charliecloud.enabled = true
+    charliecloud.cacheDir = "/Xnfs/abc/charliecloud"
+    charliecloud.runOptions = "--bind /scratch:/scratch --bind /Xnfs:/Xnfs --bind /home:/home"
+    charliecloud.readOnlyInputs = true
+    process{
+      errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' }
+      maxRetries = 3
+      executor = "slurm"
+      queue = "Lake"
+      withLabel: big_mem_mono_cpus {
+        cpus = 1
+        memory = "128GB"
+        time = "24h"
+      }
+      withLabel: big_mem_multi_cpus {
+        cpus = 32
+        memory = "192GB"
+        time = "24h"
+      }
+      withLabel: small_mem_mono_cpus {
+        cpus = 1
+        memory = "16GB"
+        time = "24h"
+      }
+      withLabel: small_mem_multi_cpus {
+        cpus = 32
+        memory = "16GB"
+        time = "24h"
+      }
+    }
+  }
+  ccin2p3 {
+    singularity.enabled = true
+    singularity.cacheDir = "$baseDir/../bin/"
+    singularity.runOptions = "--bind /pbs,/sps,/scratch,/tmp"
+    process{
+      maxRetries = 3
+      withLabel: big_mem_mono_cpus {
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "slurm"
+        clusterOptions = "--licenses=sps"
+        cpus = 1
+        memory = "8GB"
+        queue = "htc"
+      }
+      withLabel: big_mem_multi_cpus {
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "slurm"
+        clusterOptions = "--licenses=sps"
+        cpus = 1
+        memory = "8GB"
+        queue = "htc"
+      }
+      withLabel: small_mem_mono_cpus {
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "slurm"
+        clusterOptions = "--licenses=sps"
+        cpus = 1
+        memory = "8GB"
+        queue = "htc"
+      }
+      withLabel: small_mem_multi_cpus {
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "slurm"
+        clusterOptions = "--licenses=sps"
+        cpus = 1
+        memory = "8GB"
+        queue = "htc"
+      }
+    }
+  }
+}
--- a/src/nf_modules/BEDtools/2.25.0/docker_init.sh
+++ b/src/nf_modules/BEDtools/2.25.0/docker_init.sh
-#!/bin/sh
-docker build src/nf_modules/BEDtools/2.25.0 -t 'bedtools:2.25.0'
--- a/src/nf_modules/Bowtie2/2.3.4.1/docker_init.sh
+++ b/src/nf_modules/Bowtie2/2.3.4.1/docker_init.sh
-#!/bin/sh
-docker build src/nf_modules/Bowtie2/2.3.4.1 -t 'bowtie2:2.3.4.1'
--- a/src/nf_modules/FastQC/0.11.5/docker_init.sh
+++ b/src/nf_modules/FastQC/0.11.5/docker_init.sh
-#!/bin/sh
-docker build src/nf_modules/FastQC/0.11.5 -t 'fastqc:0.11.5'
--- a/src/nf_modules/SAMtools/1.7/docker_init.sh
+++ b/src/nf_modules/SAMtools/1.7/docker_init.sh
-#!/bin/sh
-docker build src/nf_modules/SAMtools/1.7 -t 'samtools:1.7'
--- a/src/nf_modules/agat/main.nf
+++ b/src/nf_modules/agat/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.8.0"
+container_url = "lbmc/agat:${version}"
+
+params.gff_to_bed = ""
+params.gff_to_bed_out = ""
+process gff_to_bed {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.gff_to_bed_out != "") {
+    publishDir "results/${params.gff_to_bed_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gff)
+  output:
+    tuple val(file_id), path("*.bed"), emit: bed
+
+  script:
+"""
+zcat ${gff} > ${gff.baseName}.gff
+agat_convert_sp_gff2bed.pl ${params.gff_to_bed} --gff ${gff.baseName}.gff -o ${gff.simpleName}.bed
+"""
+}
+
+params.gff_to_gtf = ""
+params.gff_to_gtf_out = ""
+process gff_to_gtf {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.gff_to_gtf_out != "") {
+    publishDir "results/${params.gff_to_gtf_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gff)
+  output:
+    tuple val(file_id), path("*.gtf"), emit: gtf
+
+  script:
+"""
+zcat ${gff} > ${gff.baseName}.gff
+agat_convert_sp_gff2gtf.pl ${params.gff_to_gtf} --gff ${gff.baseName}.gff -o ${gff.simpleName}.gtf
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/alntools/main.nf
+++ b/src/nf_modules/alntools/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "dd96682"
+container_url = "lbmc/alntools:${version}"
+
+params.bam2ec = ""
+params.bam2ec_out = ""
+process bam2ec {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.bam2ec_out != "") {
+    publishDir "results/${params.bam2ec_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam), path(bam_idx)
+    tuple val(transcripts_lengths_id), path(transcripts_lengths)
+
+  output:
+    tuple val(file_id), path("${bam.simpleName}.bin"), emit: bin
+    tuple val(transcripts_lengths_id), path("${transcripts_lengths}"), emit: tsv
+    tuple val(file_id), path("${bam.simpleName}_bam2ec_report.txt"), emit: report
+
+  script:
+"""
+mkdir tmp
+alntools bam2ec \
+  -c 1 ${params.bam2ec} \
+  -d ./tmp \
+  -t ${transcripts_lengths} \
+  -v \
+  ${bam} ${bam.simpleName}.bin &> \
+  ${bam.simpleName}_bam2ec_report.txt
+"""
+}
+
+params.gtf_to_transcripts_lengths = ""
+params.gtf_to_transcripts_lengths_out = ""
+process gtf_to_transcripts_lengths {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.gtf_to_transcripts_lengths != "") {
+    publishDir "results/${params.gtf_to_transcripts_lengths}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(gtf)
+
+  output:
+    tuple val(file_id), path("${gtf.simpleName}_transcripts_lengths.tsv"), emit: tsv
+
+  script:
+"""
+awk -F"[\\t;]" '
+\$3=="exon" {
+        ID=gensub(/transcript_id \\"(.*)\\"/, "\\\\1", "g", \$11); 
+        LEN[ID]+=\$5-\$4+1;
+    } 
+END{
+    for(i in LEN)
+        {print i"\\t"LEN[i]}
+    }
+' ${gtf} > ${gtf.simpleName}_transcripts_lengths.tsv
+"""
+}
--- a/src/nf_modules/beagle/main.nf
+++ b/src/nf_modules/beagle/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "5.1_24Aug19.3e8--hdfd78af_1"
+container_url = "quay.io/biocontainers/beagle::${version}"
+
+params.phasing = ""
+process phasing {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(vcf)
+    tuple val(ref_id), path(ref_vcf)
+
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+
+  script:
+"""
+beagle nthread=${task.cpus} \
+  gtgl=${vcf} \
+  ref=${ref_vcf}
+"""
+}
--- a/src/nf_modules/bedtools/main.nf
+++ b/src/nf_modules/bedtools/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.25.0"
+container_url = "lbmc/bedtools:${version}"
+
+params.fasta_from_bed = "-name"
+params.fasta_from_bed_out = ""
+process fasta_from_bed {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.fasta_from_bed_out != "") {
+    publishDir "results/${params.fasta_from_bed_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(fasta_id), path(fasta)
+  tuple val(file_id), path(bed)
+
+  output:
+  tuple val(file_id), path("*_extracted.fasta"), emit: fasta
+
+  script:
+"""
+bedtools getfasta ${params.fasta_from_bed} \
+-fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta
+"""
+}
+
+params.merge_bed = ""
+params.merge_bed_out = ""
+process merge_bed {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.merge_bed_out != "") {
+    publishDir "results/${params.merge_bed_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(bed)
+
+  output:
+  tuple val(file_id), path("*_merged.fasta"), emit: bed
+
+  script:
+"""
+bedtools merge ${params.merge_bed} -i ${bed} > ${bed[0].simpleName}_merged.bed
+"""
+}
+
+params.bam_to_fastq_singleend = ""
+params.bam_to_fastq_singleend_out = ""
+process bam_to_fastq_singleend {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${bam_id}"
+  if (params.bam_to_fastq_singleend_out != "") {
+    publishDir "results/${params.bam_to_fastq_singleend_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(bam_id), path(bam)
+
+  output:
+  tuple val(bam_id), path("*.fastq"), emit: fastq
+
+  script:
+"""
+bedtools bamtofastq \
+  ${params.bam_to_fastq_singleend} \
+  -i ${bam} -fq ${bam.baseName}.fastq
+"""
+}
+
+params.bam_to_fastq_pairedend = ""
+params.bam_to_fastq_pairedend_out = ""
+process bam_to_fastq_pairedend {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${bam_id}"
+  if (params.bam_to_fastq_pairedend_out != "") {
+    publishDir "results/${params.bam_to_fastq_pairedend_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(bam_id), path(bam)
+
+  output:
+  tuple val(bam_id), path("*.fastq"), emit: fastq
+
+  script:
+"""
+bedtools bamtofastq \
+  ${params.bam_to_fastq_pairedend} \
+  -i ${bam} -fq ${bam.baseName}_R1.fastq -fq2 ${bam.baseName}_R2.fastq
+"""
+}
+
+params.bam_to_bedgraph = ""
+params.bam_to_bedgraph_out = ""
+process bam_to_bedgraph {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${bam_id}"
+  if (params.bam_to_bedgraph_out != "") {
+    publishDir "results/${params.bam_to_bedgraph_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(bam_id), path(bam)
+
+  output:
+  tuple val(bam_id), path("*.bed"), emit: bed
+
+  script:
+"""
+bedtools genomecov \
+  ${params.bam_to_bedgraph} \
+  -ibam ${bam} \
+  -bg > ${bam.simpleName}.bed
+"""
+}
--- a/src/nf_modules/bioawk/main.nf
+++ b/src/nf_modules/bioawk/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "1.0"
+container_url = "lbmc/bioawk:${version}"
+
+params.fasta_to_transcripts_lengths = ""
+params.fasta_to_transcripts_lengths_out = ""
+process fasta_to_transcripts_lengths {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.fasta_to_transcripts_lengths_out != "") {
+    publishDir "results/${params.fasta_to_transcripts_lengths_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("${fasta.simpleName}_transcripts_lengths.tsv"), emit: tsv
+
+  script:
+"""
+bioawk -c fastx '{print(\$name" "length(\$seq))}' ${fasta} > ${fasta.simpleName}_transcripts_lengths.tsv
+"""
+}
\ No newline at end of file
No results found