workflow SUBSAMPLE_READ { take: fastq main: COUNT_READS(fastq) COUNT_READS.out.fastq .groupTuple(by: [2, 3], sort: true) .view() SAMPLE_READS(fastq_to_sample) emit: fastq: SAMPLE_READS.out.reads version: SAMPLE_READS.out.version.mix(COUNT_READS.version) } process COUNT_READS { tag "$meta.id" label 'small_mem_mono_cpus' container "lbmc/alpine:3.17" input: tuple val(meta), path(fastq) output: tuple val(meta), stdout, path("*.fastq.gz"), emit: fastq path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' """ zcat ${fastq} | wc -l cat <<-END_VERSIONS > versions.yml "${task.process}": split: v1.35.0 END_VERSIONS """ } process SAMPLE_READS { tag "$meta.id" label 'small_mem_mono_cpus' container "quay.io/biocontainers/seqtk:1.3--h5bf99c6_3" input: tuple val(meta), path(reads), val(sample_size) output: tuple val(meta), path("*.fastq.gz"), emit: reads path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" if (!(args ==~ /.*-s[0-9]+.*/)) { args += " -s100" } if ( !sample_size ) { error "SEQTK/SAMPLE must have a sample_size value included" } """ printf "%s\\n" $reads | while read f; do seqtk \\ sample \\ $args \\ \$f \\ $sample_size \\ | gzip --no-name > ${prefix}_\$(basename \$f) done cat <<-END_VERSIONS > versions.yml "${task.process}": seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ }