Compare revisions

ceca3ce0 · ceca3ce0 · f7f61d07 · ceca3ce0 · ceca3ce0 · ceca3ce0
--- a/src/nf_modules/multiqc/main.nf
+++ b/src/nf_modules/multiqc/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+// multiqc generate nice html report combining lots of differents bioinformatics
+// tools report.
+// 
+// EXAMPLE:
+
+/*
+include { multiqc } 
+  from './nf_modules/multiqc/main'
+  addParams(
+    multiqc_out: "QC/"
+  )
+
+multiqc(
+  report_a
+  .mix(
+    report_b,
+    report_c,
+    report_d
+  )
+)
+*/
+
+version = "1.11"
+container_url = "lbmc/multiqc:${version}"
+
+params.multiqc = ""
+params.multiqc_out = "QC/"
+workflow multiqc {
+  take:
+    report
+  main:
+    report
+    .map{it ->
+      if (it instanceof List){
+        if(it.size() > 1) {
+          it[1]
+        } else {
+          it[0]
+        }
+      } else {
+        it
+      }
+    }
+    .unique()
+    .flatten()
+    .set { report_cleaned }
+    multiqc_default(report_cleaned.collect())
+
+  emit:
+  report = multiqc_default.out.report
+}
+
+process multiqc_default {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  if (params.multiqc_out != "") {
+    publishDir "results/${params.multiqc_out}", mode: 'copy'
+  }
+
+  input:
+    path report 
+
+  output:
+    path "*multiqc_*", emit: report
+
+  script:
+"""
+multiqc ${params.multiqc} -f .
+"""
+}
--- a/src/nf_modules/picard/main.nf
+++ b/src/nf_modules/picard/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.18.11"
+container_url = "lbmc/picard:${version}"
+
+params.mark_duplicate = "VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true"
+params.mark_duplicate_out = ""
+process mark_duplicate {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.mark_duplicate_out != "") {
+    publishDir "results/${params.mark_duplicate_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id) , path("*.bam"), emit: bam
+    path "*_report.dupinfo.txt", emit: report
+
+
+  script:
+"""
+PicardCommandLine MarkDuplicates \
+  ${params.mark_duplicate} \
+  INPUT=${bam} \
+  OUTPUT=${bam.baseName}_dedup.bam \
+  METRICS_FILE=${bam.baseName}_picard_dedup_report.dupinfo.txt &> \
+  picard_${bam.baseName}.log
+"""
+}
+
+params.normalize_fasta = ""
+params.normalize_fasta_out = ""
+process normalize_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.normalize_fasta_out != "") {
+    publishDir "results/${params.normalize_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("results/*.fasta.gz"), emit: fasta 
+
+  script:
+"""
+mkdir -p results
+PicardCommandLine NormalizeFasta \
+      I=${fasta} \
+      O=results/${fasta.simpleName}.fasta
+gzip results/${fasta.simpleName}.fasta
+"""
+}
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.dict"), emit: index
+
+  script:
+"""
+PicardCommandLine CreateSequenceDictionary \
+  ${params.index_fasta} \
+  REFERENCE=${fasta} \
+  OUTPUT=${fasta.baseName}.dict
+"""
+}
+
+params.index_bam = ""
+params.index_bam_out = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_bam_out != "") {
+    publishDir "results/${params.index_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+  output:
+    tuple val(file_id), path("*"), emit: index
+
+  script:
+"""
+PicardCommandLine BuildBamIndex \
+  ${params.index_bam} \
+  INPUT=${bam}
+"""
+}
--- a/src/nf_modules/pigz/2.4/docker_init.sh
+++ b/src/nf_modules/pigz/2.4/docker_init.sh
-#!/bin/sh
-docker build src/nf_modules/pigz/2.4 -t 'pigz:2.4'
--- a/src/nf_modules/porechop/main.nf
+++ b/src/nf_modules/porechop/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.2.4"
+container_url = "lbmc/porechop:${version}"
+
+process porechop {
+    container = "${container_url}"
+    label "big_mem_multi_cpus"
+    tag "$file_id"
+    if (params.porechop_out != "") {
+    publishDir "results/${params.porechop_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fatsq)
+
+  output:
+    tuple val(file_id), path("*_porechoped.fastq"), emit: porechoped_fastq
+  script:
+"""
+porechop -i ${fastq} -o ${file_id}_porechoped.fastq --threads 4
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/main.nf
+++ b/src/nf_modules/rasusa/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.6.0"
+container_url = "lbmc/rasusa:${version}"
+
+include { index_fasta } from "./../samtools/main.nf"
+
+params.sample_fastq = ""
+params.sample_fastq_coverage = ""
+params.sample_fastq_size = ""
+params.sample_fastq_out = ""
+workflow sample_fastq {
+  take:
+  fastq
+  fasta
+
+  main:
+  if (params.sample_fastq_coverage == "" && params.sample_fastq_size == ""){
+    fastq
+      .set{ final_fastq }
+  } else {
+    index_fasta(fasta)
+    sub_sample_fastq(fastq, index_fasta.out.index)
+    sub_sample_fastq.out.fastq
+      .set{ final_fastq }
+  }
+
+  emit:
+  fastq = final_fastq
+
+}
+
+process sub_sample_fastq {
+  container = "${container_url}"
+  label "small_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.sample_fastq_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fastq)
+    tuple val(index_id), path(idx)
+
+  output:
+    tuple val(file_id), path("sub_*.fastq.gz"), emit: fastq
+
+  script:
+
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break
+    default:
+      file_prefix = file_id
+    break
+  }
+
+  sample_option = "-c " + params.sample_fastq_coverage
+  if (params.sample_fastq_size != ""){
+    sample_option = "-b " + params.sample_fastq_size
+  }
+
+  if (fastq.size() == 2)
+"""
+rasusa \
+  -i ${fastq[0]} ${fastq[1]} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq[0].simpleName}.fastq.gz sub_${fastq[1].simpleName}.fastq.gz
+"""
+  else
+"""
+rasusa \
+  -i ${fastq} \
+  -g ${idx} \
+  ${sample_option} \
+  -o sub_${fastq.simpleName}.fastq.gz
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/test.nf
+++ b/src/nf_modules/rasusa/test.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+nextflow.enable.dsl=2
+
+/*
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
+*/
+
+params.fastq = "data/fastq/*R{1,2}*"
+params.fasta = "data/fasta/*.fasta"
+params.coverage = ""
+params.size = ""
+
+include { sample_fastq } from "./main.nf" addParams(sample_fastq_coverage: params.coverage, sample_fastq_size: params.size, sample_fastq_out: "sample/")
+
+channel
+  .fromFilePairs( params.fastq, size: -1)
+  .set { fastq_files }
+
+channel
+  .fromPath( params.fasta )
+  .map { it -> [it.simpleName, it]}
+  .set { fasta_files }
+
+workflow {
+  sample_fastq(fastq_files, fasta_files.collect())
+}
\ No newline at end of file
--- a/src/nf_modules/rasusa/test.sh
+++ b/src/nf_modules/rasusa/test.sh
+#! /bin/sh
+
+# SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq"
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" --coverage 1.0
+./nextflow src/nf_modules/rasusa/test.nf -c src/nextflow.config -profile docker --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fastq "data/tiny_dataset/fastq/tiny_R1.fastq" --size "1Mb"
\ No newline at end of file
--- a/src/nf_modules/salmon/main.nf
+++ b/src/nf_modules/salmon/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "1.8.0"
+container_url = "lbmc/salmon:${version}"
+
+process quantify {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.salmon_out != "") {
+    publishDir "results/${params.salmon_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.sf"), emit: quant
+  script:
+"""
+salmon quant -l A --noErrorModel -t XXXXXXXXXX -a ${bam} -p 4 -o ${params.salmon_out}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/sambamba/main.nf
+++ b/src/nf_modules/sambamba/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "0.6.7"
+container_url = "lbmc/sambamba:${version}"
+
+params.index_bam = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+
+  script:
+"""
+sambamba index ${params.index_bam} -t ${task.cpus} ${bam}
+"""
+}
+
+params.sort_bam = ""
+process sort_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+
+  script:
+"""
+sambamba sort -t ${task.cpus} ${params.sort_bam} -o ${bam.baseName}_sorted.bam ${bam}
+"""
+}
+
+params.split_bam = ""
+process split_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*_forward.bam*"), emit: bam_forward
+    tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse
+  script:
+"""
+sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '+'" ${bam} > \
+  ${bam.baseName}_forward.bam
+sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '-'" ${bam} > \
+  ${bam.baseName}_reverse.bam
+"""
+}
--- a/src/nf_modules/samtools/main.nf
+++ b/src/nf_modules/samtools/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "1.11"
+container_url = "lbmc/samtools:${version}"
+
+params.index_fasta = ""
+params.index_fasta_out = ""
+process index_fasta {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_fasta_out != "") {
+    publishDir "results/${params.index_fasta_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+  output:
+    tuple val(file_id), path("*.fai"), emit: index
+
+  script:
+"""
+if gzip -t ${fasta}; then
+  zcat ${fasta} > ${fasta.simpleName}.fasta
+  samtools faidx ${params.index_fasta}  ${fasta.simpleName}.fasta
+else
+  samtools faidx ${params.index_fasta} ${fasta}
+fi
+
+"""
+}
+
+params.filter_bam_quality_threshold = 30
+params.filter_bam_quality = "-q ${params.filter_bam_quality_threshold}"
+params.filter_bam_quality_out = ""
+process filter_bam_quality {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_quality_out != "") {
+    publishDir "results/${params.filter_bam_quality_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*_filtered.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} -hb ${bam} ${params.filter_bam_quality} > \
+  ${bam.simpleName}_filtered.bam
+"""
+}
+
+params.filter_bam = ""
+params.filter_bam_out = ""
+process filter_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_out != "") {
+    publishDir "results/${params.filter_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+    tuple val(bed_id), path(bed)
+
+  output:
+    tuple val(file_id), path("*_filtered.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} -hb ${bam} -L ${bed} ${params.filter_bam} > \
+  ${bam.simpleName}_filtered.bam
+"""
+}
+
+params.rm_from_bam = ""
+params.rm_from_bam_out = ""
+process rm_from_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.rm_from_bam_out != "") {
+    publishDir "results/${params.rm_from_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+    tuple val(bed_id), path(bed)
+
+  output:
+    tuple val(file_id), path("*_filtered.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} ${params.filter_bam} -hb -L ${bed} -U ${bam.simpleName}_filtered.bam ${bam} >  /dev/null
+"""
+}
+
+params.filter_bam_mapped = "-F 4"
+params.filter_bam_mapped_out = ""
+process filter_bam_mapped {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_mapped_out != "") {
+    publishDir "results/${params.filter_bam_mapped_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*_mapped.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} ${params.filter_bam_mapped} -hb ${bam} > \
+  ${bam.simpleName}_mapped.bam
+"""
+}
+
+params.filter_bam_unmapped = "-f 4"
+params.filter_bam_unmapped_out = ""
+process filter_bam_unmapped {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.filter_bam_unmapped_out != "") {
+    publishDir "results/${params.filter_bam_unmapped_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*_unmapped.bam"), emit: bam
+  script:
+"""
+samtools view -@ ${task.cpus} ${params.filter_bam_unmapped} -hb ${bam} > ${bam.simpleName}_unmapped.bam
+"""
+}
+
+params.index_bam = ""
+params.index_bam_out = ""
+process index_bam {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.index_bam_out != "") {
+    publishDir "results/${params.index_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("${bam}"), path("*.bam.bai"), emit: bam_idx
+
+  script:
+"""
+samtools index ${params.index_bam} ${bam}
+"""
+}
+
+params.sort_bam = ""
+params.sort_bam_out = ""
+process sort_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.sort_bam_out != "") {
+    publishDir "results/${params.sort_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+
+  script:
+"""
+samtools sort -@ ${task.cpus} ${params.sort_bam} -O BAM -o ${bam.simpleName}_sorted.bam ${bam}
+"""
+}
+
+params.split_bam = ""
+params.split_bam_out = ""
+process split_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.split_bam_out != "") {
+    publishDir "results/${params.split_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*_forward.bam*"), emit: bam_forward
+    tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse
+  script:
+"""
+samtools view -@ ${Math.round(task.cpus/2)} ${params.split_bam} \
+  -hb -F 0x10 ${bam} > ${bam.simpleName}_forward.bam &
+samtools view -@ ${Math.round(task.cpus/2)} ${params.split_bam} \
+  -hb -f 0x10 ${bam} > ${bam.simpleName}_reverse.bam
+"""
+}
+
+params.merge_bam = ""
+params.merge_bam_out = ""
+process merge_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.merge_bam_out != "") {
+    publishDir "results/${params.merge_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(first_file_id), path(first_bam)
+    tuple val(second_file_id), path(second_bam)
+
+  output:
+    tuple val(file_id), path("*.bam*"), emit: bam
+  script:
+"""
+samtools merge -@ ${task.cpus} ${params.merge_bam} ${first_bam} ${second_bam} \
+  ${first_bam.simpleName}_${second_file.simpleName}.bam
+"""
+}
+
+params.merge_multi_bam = ""
+params.merge_multi_bam_out = ""
+process merge_multi_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.merge_multi_bam_out != "") {
+    publishDir "results/${params.merge_multi_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bams)
+
+  output:
+    tuple val(file_id), path("*_merged.bam*"), emit: bam
+  script:
+"""
+samtools merge -@ ${task.cpus} \
+  ${params.merge_multi_bam} \
+  ${bams[0].simpleName}_merged.bam \
+  ${bams}
+"""
+}
+
+params.stats_bam = ""
+params.stats_bam_out = ""
+process stats_bam {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.stats_bam_out != "") {
+    publishDir "results/${params.stats_bam_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.tsv"), emit: tsv
+    path "*.flagstat.txt", emit: report 
+  script:
+"""
+samtools flagstat -@ ${task.cpus} ${params.stats_bam} -O tsv ${bam} > ${bam.simpleName}.flagstat.txt
+cp ${bam.simpleName}.flagstat.txt ${bam.simpleName}.tsv
+"""
+}
+
+params.flagstat_2_multiqc = ""
+params.flagstat_2_multiqc_out = ""
+process flagstat_2_multiqc {
+  tag "$file_id"
+  if (params.flagstat_2_multiqc_out != "") {
+    publishDir "results/${params.flagstat_2_multiqc_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(tsv)
+
+  output:
+    tuple val(file_id), path("*.txt"), emit: report
+"""
+mv ${tsv} ${tsv.simpleName}.flagstat.txt
+"""
+}
+
+params.idxstat_2_multiqc = ""
+params.idxstat_2_multiqc_out = ""
+process idxstat_2_multiqc {
+  tag "$file_id"
+  if (params.idxstat_2_multiqc_out != "") {
+    publishDir "results/${params.idxstat_2_multiqc_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(tsv)
+
+  output:
+    tuple val(file_id), path("*.txt"), emit: report
+"""
+mv ${tsv} ${tsv.simpleName}.idxstats.txt
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/sanity/main.nf
+++ b/src/nf_modules/sanity/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+container_url="mlepetit/sanity:latest"
+
+params.sanity_out=""
+params.sanity=""
+
+process normalization_sanity
+        {
+
+        container="${container_url}"
+        label  "big_mem_multi_cpus"
+        if (params.sanity_out != "") {
+		publishDir "results/${params.sanity_out}", mode: 'copy'
+
+	}
+else {
+          publishDir "results/normalize_matrix/", mode: 'copy'
+
+           }
+
+	input:
+
+               tuple val(id_mtx), path(raw_filtered_mtx)   
+               
+
+        output:
+
+               tuple val(id_mtx),path("log_transcription_quotients.txt"), emit: normalize_filtered_mtx
+               tuple val(id_mtx), path("ltq_error_bars.txt")  ,emit: ltq_error
+
+        script:
+
+        """
+        Sanity -f ${raw_filtered_mtx} -n ${task.cpus} ${params.sanity} 
+        """
+        }
--- a/src/nf_modules/sratoolkit/list-srr.txt
+++ b/src/nf_modules/sratoolkit/list-srr.txt
+ERR572281
+ERR572146
+ERR572201
+ERR638114
+ERR638115
+ERR638116
--- a/src/nf_modules/sratoolkit/list-srr.txt.license
+++ b/src/nf_modules/sratoolkit/list-srr.txt.license
+SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+
+SPDX-License-Identifier: CC-BY-SA-4.0
--- a/src/nf_modules/sratoolkit/main.nf
+++ b/src/nf_modules/sratoolkit/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "3.0.0"
+container_url = "lbmc/sratoolkit:${version}"
+
+params.fastq_dump = ""
+params.fastq_dump_out = ""
+process fastq_dump {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$sra"
+  if (params.fastq_dump_out != "") {
+    publishDir "results/${params.fastq_dump_out}", mode: 'copy'
+  }
+
+  input:
+    val sra
+
+  output:
+    tuple val(sra), path("*.fastq"), emit: fastq
+
+  script:
+"""
+fastq-dump ${params.fastq_dump} --split-files --gzip ${sra}
+if [ -f ${sra}_1.fastq ]
+then
+  mv ${sra}_1.fastq ${sra}_R1.fastq
+fi
+if [ -f ${sra}_2.fastq ]
+then
+  mv ${sra}_2.fastq ${sra}_R2.fastq
+fi
+"""
+}
--- a/src/nf_modules/star/main.nf
+++ b/src/nf_modules/star/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+// STAR is an ultrafast universal RNA-seq aligner
+// 
+// EXAMPLE:
+
+/*
+include {
+    index_with_gff as star_index_with_gff;
+    mapping_fastq as star_mapping_fastq
+ } from './nf_modules/star/main.nf'
+   addParams(
+    star_mapping_fastq_out: "star/"
+   )
+
+star_index_with_gff(
+  genome_file,
+  gff_file
+  )
+
+star_mapping_fastq(
+    star_index_with_gff.out.index,
+    reads
+)
+
+*/
+
+
+version = "2.7.3a"
+container_url = "lbmc/star:${version}"
+
+params.star_mapping_fastq_out = ""
+
+
+process gff3_2_gtf {
+  container = "dceoy/cufflinks"
+  label "small_mem_mono_cpus"
+
+    input:
+        tuple val(genome_id), path(gff3_file)
+    output:
+        path "${genome_id}.gtf", emit: gtf
+    script:
+"""
+gffread ${gff3_file} -T -o ${genome_id}.gtf
+"""
+}
+
+
+process index_with_gtf {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+
+  input:
+    tuple val(genome_id), path(genome_fasta)
+    path gtf_file
+
+  output:
+    tuple val(genome_id), path ("*"), emit: index
+
+  script:
+"""
+STAR --runThreadN ${task.cpus} --runMode genomeGenerate \
+--genomeDir ./ \
+--genomeFastaFiles ${genome_fasta}  \
+--sjdbGTFfile ${gtf_file} \
+--genomeSAindexNbases 13 # min(14, log2(GenomeLength)/2 - 1)
+"""
+}
+
+workflow index_with_gff {
+  take:
+    genome_fasta
+    gff_file
+  main:
+    gff3_2_gtf(gff_file)
+    index_with_gtf(genome_fasta,gff3_2_gtf.out.gtf)
+  emit:
+    report = index_with_gtf.out.index
+}
+
+
+process index_without_gff {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+
+  input:
+    tuple val(genome_id), path(genome_fasta)
+
+  output:
+    tuple val(genome_id), path ("*"), emit: index
+
+  script:
+"""
+STAR --runThreadN ${task.cpus} --runMode genomeGenerate \
+--genomeDir ./ \
+--genomeFastaFiles ${genome_fasta}  \
+--genomeSAindexNbases 13 # min(14, log2(GenomeLength)/2 - 1)
+"""
+}
+
+
+process mapping_fastq {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  if (params.star_mapping_fastq_out != "") {
+    publishDir "results/${params.star_mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(index_id), path(index)
+    tuple val(reads_id), path(reads) 
+
+  output:
+    path "*.Log.final.out", emit: report
+    tuple val(reads_id), path("*.bam"), emit: bam
+
+  script:
+if (reads_id instanceof List){
+    file_prefix = reads_id[0]
+  } else {
+    file_prefix = reads_id
+  }
+
+if (reads.size() == 2)
+"""
+mkdir -p index
+mv ${index} index/
+STAR --runThreadN ${task.cpus} \
+--genomeDir index/ \
+--readFilesCommand zcat \
+--readFilesIn ${reads[0]} ${reads[1]} \
+--outFileNamePrefix ${reads_id}. \
+--alignIntronMax 10000 \
+--outSAMtype BAM SortedByCoordinate \
+--outSAMstrandField intronMotif
+
+mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
+"""
+else
+"""
+mkdir -p index
+mv ${index} index/
+STAR --runThreadN ${task.cpus} \
+--genomeDir index/ \
+--readFilesCommand zcat \
+--readFilesIn ${reads} \
+--outFileNamePrefix ${reads_id}. \
+--alignIntronMax 10000 \
+--outSAMtype BAM SortedByCoordinate \
+--outSAMstrandField intronMotif
+
+mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/stringtie2/main.nf
+++ b/src/nf_modules/stringtie2/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.1.1"
+container_url = "lbmc/stringtie2:${version}"
+
+process jcount {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.salmon_out != "") {
+    publishDir "results/${params.salmon_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(bam)
+
+  output:
+    tuple val(file_id), path("*.sf"), emit: quant
+  script:
+"""
+salmon quant -l A --noErrorModel -t XXXXXXXXXX -a ${bam} -p 4 -o ${params.salmon_out}
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/trinity/main.nf
+++ b/src/nf_modules/trinity/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "2.13.2"
+container_url = "lbmc/trinity:${version}"
+
+params.sample = 3
+params.min_glue = 1
+params.min_contig_length = 200
+params.assembly_out = ""
+
+
+workflow assembly {
+  take:
+    fastq
+  main:
+    complete_assembly(fastq)
+    super_transcript(complete_assembly.out.fasta)
+
+  emit:
+    fasta = complete_assembly.out.fasta
+    super_transcript = super_transcript.out.fasta
+}
+
+process complete_assembly {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "$file_id"
+  if (params.assembly_out != "") {
+    publishDir "results/${params.assembly_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fastq)
+
+  output:
+    tuple val(file_id), path("trinity_output_${file_prefix}/"), emit: folder
+    tuple val(file_id), path("trinity_output_${file_prefix}.Trinity.fasta"), emit: fasta
+    tuple val(file_id), path("trinity_output_${file_prefix}.Trinity.fasta.gene_trans_map"), emit: gene_map 
+    tuple val(file_id), path("trinity_output_${file_prefix}/salmon_outdir/quant.sf"), emit: quant
+
+  script:
+
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break;
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break;
+    default:
+      file_prefix = file_id
+    break;
+  };
+  def memory = "${task.memory}" - ~/\s*GB/
+
+  if (fastq.size() == 2)
+"""
+  mkdir trinity_output_${file_prefix}
+  Trinity \
+    --seqType fq \
+    --max_memory ${memory}G \
+    --left ${fastq[0]} \
+    --right ${fastq[1]} \
+    --CPU ${task.cpus} \
+    --min_glue ${params.min_glue} \
+    --min_contig_length ${params.min_contig_length} \
+    --output trinity_output_${file_prefix}
+"""
+  else
+"""
+  mkdir trinity_output_${file_prefix}
+  Trinity \
+    --seqType fq \
+    --max_memory ${memory}G \
+    --single ${fastq} \
+    --CPU ${task.cpus} \
+    --min_glue ${params.min_glue} \
+    --min_contig_length ${params.min_contig_length} \
+    --output trinity_output_${file_prefix}
+"""
+}
+
+
+process super_transcript {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "$file_id"
+  if (params.assembly_out != "") {
+    publishDir "results/${params.assembly_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(file_id), path(fasta)
+
+  output:
+    tuple val(file_id), path("trinity_genes.fasta"), path("trinity_genes.gtf"), emit: fasta
+
+  script:
+
+  switch(file_id) {
+    case {it instanceof List}:
+      file_prefix = file_id[0]
+    break;
+    case {it instanceof Map}:
+      file_prefix = file_id.values()[0]
+    break;
+    default:
+      file_prefix = file_id
+    break;
+  };
+  def memory = "${task.memory}" - ~/\s*GB/
+
+"""
+Trinity_gene_splice_modeler.py \
+  --trinity_fasta ${fasta}
+"""
+}
--- a/src/nf_modules/ucsc/main.nf
+++ b/src/nf_modules/ucsc/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "407"
+container_url = "lbmc/ucsc:${version}"
+
+include {
+  index_fasta
+} from './../samtools/main'
+
+params.bedgraph_to_bigwig = ""
+params.bedgraph_to_bigwig_out = ""
+process bedgraph_to_bigwig {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.bedgraph_to_bigwig_out != "") {
+    publishDir "results/${params.bedgraph_to_bigwig_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(bg)
+  tuple val(file_id), path(bed)
+
+  output:
+  tuple val(file_id), path("*.bw"), emit: bw
+
+  script:
+"""
+LC_COLLATE=C
+# transform bed file of start-stop chromosome size to stop chromosome size
+awk -v OFS="\\t" '{print \$1, \$3}' ${bed} > chromsize.txt
+
+sort -T ./ -k1,1 -k2,2n ${bg} > \
+  bedGraphToBigWig ${params.bedgraph_to_bigwig} - \
+    chromsize.txt \
+    ${bg.simpleName}_norm.bw
+"""
+}
+
+params.wig_to_bedgraph = ""
+params.wig_to_bedgraph_out = ""
+workflow wig_to_bedgraph {
+  take:
+    fasta
+    wig
+  main:
+    wig_to_bigwig(
+      fasta,
+      wig
+    )
+    bigwig_to_bedgraph(
+      wig_to_bigwig.out.bw
+    )
+  emit:
+  bg = bigwig_to_bedgraph.out.bg
+}
+
+workflow wig2_to_bedgraph2 {
+  take:
+    fasta
+    wig
+  main:
+    wig2_to_bigwig2(
+      fasta,
+      wig
+    )
+    bigwig2_to_bedgraph2(
+      wig2_to_bigwig2.out.bw
+    )
+  emit:
+  bg = bigwig2_to_bedgraph2.out.bg
+}
+
+params.bigwig_to_bedgraph = ""
+params.bigwig_to_bedgraph_out = ""
+process bigwig_to_bedgraph {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.bigwig_to_bedgraph_out != "") {
+    publishDir "results/${params.bigwig_to_bedgraph_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(bw)
+
+  output:
+  tuple val(file_id), path("*.bg"), emit: bg
+
+  script:
+"""
+bigWigToBedGraph ${bw} ${bw.simpleName}.bg
+"""
+}
+
+params.bigwig2_to_bedgraph2 = ""
+params.bigwig2_to_bedgraph2_out = ""
+process bigwig2_to_bedgraph2 {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.bigwig_to_bedgraph_out != "") {
+    publishDir "results/${params.bigwig_to_bedgraph_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(bw_a), path(bw_b)
+
+  output:
+  tuple val(file_id), path("${bw_a.simpleName}.bg"), path("${bw_b.simpleName}.bg"), emit: bg
+
+  script:
+"""
+bigWigToBedGraph ${bw_a} ${bw_a.simpleName}.bg
+bigWigToBedGraph ${bw_b} ${bw_b.simpleName}.bg
+"""
+}
+
+params.bigwig_to_wig = ""
+params.bigwig_to_wig_out = ""
+process bigwig_to_wig {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.bigwig_to_wig_out != "") {
+    publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(bw)
+
+  output:
+  tuple val(file_id), path("*.wig"), emit: wig
+
+  script:
+"""
+bigWigToBedGraph ${bw} ${bw.simpleName}.bg
+bedgraph_to_wig.pl --bedgraph ${bw.simpleName}.bg --wig ${bw.simpleName}.wig --step 10
+"""
+}
+
+params.bigwig2_to_wig2 = ""
+params.bigwig2_to_wig2_out = ""
+process bigwig2_to_wig2 {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.bigwig_to_wig_out != "") {
+    publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(bw_a), path(bw_b)
+
+  output:
+  tuple val(file_id), path("${bw_a.simpleName}.wig"), path("${bw_b.simpleName}.wig"), emit: wig
+
+  script:
+"""
+bigWigToBedGraph ${bw_a} ${bw_a.simpleName}.bg
+bedgraph_to_wig.pl --bedgraph ${bw_a.simpleName}.bg --wig ${bw_a.simpleName}.wig --step 10
+bigWigToBedGraph ${bw_b} ${bw_b.simpleName}.bg
+bedgraph_to_wig.pl --bedgraph ${bw_b.simpleName}.bg --wig ${bw_b.simpleName}.wig --step 10
+"""
+}
+
+params.wig_to_bigwig = ""
+params.wig_to_bigwig_out = ""
+
+workflow wig_to_bigwig {
+  take:
+    fasta
+    wig
+  main:
+    index_fasta(fasta)
+    wig_to_bigwig_sub(
+      wig,
+      index_fasta.out.index
+    )
+  emit:
+  bw = wig_to_bigwig_sub.out.bw
+}
+
+process wig_to_bigwig_sub {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.bigwig_to_wig_out != "") {
+    publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(w)
+  tuple val(idx_id), path(fasta_idx)
+
+  output:
+  tuple val(file_id), path("${w.simpleName}.bw"), emit: bw
+
+  script:
+"""
+cut -f 1,2 ${fasta_idx} > ${fasta_idx.simpleName}.sizes
+wigToBigWig -clip ${w} ${fasta_idx.simpleName}.sizes ${w.simpleName}.bw
+"""
+}
+
+params.wig2_to_bigwig2 = ""
+params.wig2_to_bigwig2_out = ""
+
+workflow wig2_to_bigwig2 {
+  take:
+    fasta
+    wigs
+  main:
+    index_fasta(fasta)
+    wig2_to_bigwig2_sub(
+      wigs,
+      index_fasta.out.index
+    )
+  emit:
+  bw = wig2_to_bigwig2_sub.out.bw
+}
+
+process wig2_to_bigwig2_sub {
+  container = "${container_url}"
+  label "big_mem_mono_cpus"
+  tag "${file_id}"
+  if (params.bigwig_to_wig_out != "") {
+    publishDir "results/${params.bigwig_to_wig_out}", mode: 'copy'
+  }
+
+  input:
+  tuple val(file_id), path(w_a), path(w_b)
+  tuple val(idx_id), path(fasta_idx)
+
+  output:
+  tuple val(file_id), path("${w_a.simpleName}.bw"), path("${w_b.simpleName}.bw"), emit: bw
+
+  script:
+"""
+cut -f 1,2 ${fasta_idx} > ${fasta_idx.simpleName}.sizes
+wigToBigWig -clip ${w_a} ${fasta_idx.simpleName}.sizes ${w_a.simpleName}.bw
+wigToBigWig -clip ${w_b} ${fasta_idx.simpleName}.sizes ${w_b.simpleName}.bw
+"""
+}
\ No newline at end of file
--- a/src/nf_modules/urqt/main.nf
+++ b/src/nf_modules/urqt/main.nf
+// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr>
+//
+// SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = "d62c1f8"
+container_url = "lbmc/urqt:${version}"
+
+trim_quality = "20"
+
+params.trimming = "--t 20"
+process trimming {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  tag "${file_id}"
+
+  input:
+  tuple val(file_id), path(reads)
+
+  output:
+  tuple val(pair_id), path("*_trim_R{1,2}.fastq.gz"), emit: fastq
+  path "*_report.txt", emit: report
+
+  script:
+  if (file_id instanceof List){
+    file_prefix = file_id[0]
+  } else {
+    file_prefix = file_id
+  }
+  if (reads.size() == 2)
+"""
+UrQt ${params.trimming} --m ${task.cpus} --gz \
+  --in ${reads[0]} --inpair ${reads[1]} \
+  --out ${file_prefix}_trim_R1.fastq.gz --outpair ${file_prefix}_trim_R2.fastq.gz \
+  > ${pair_id}_trimming_report.txt
+"""
+  else
+"""
+UrQt ${params.trimming} --m ${task.cpus} --gz \
+  --in ${reads[0]} \
+  --out ${file_prefix}_trim.fastq.gz \
+  > ${file_prefix}_trimming_report.txt
+"""
+}
\ No newline at end of file
--- a/src/nf_test.config
+++ b/src/nf_test.config
-docker {
-  temp = 'auto'
-  enabled = true
-}
-
-process {
-  $adaptor_removal {
-    container = "cutadapt:1.14"
-  }
-}
-
-process {
-  $trimming {
-    container = "cutadapt:1.14"
-  }
-}
No results found