workflow SUBSAMPLE_READ { take: fastq main: COUNT_READS(fastq) SORT_READS( COUNT_READS.out.count .map{ it -> [ it[0].specie, it[1] ] } .groupTuple() ) SAMPLE_READS(SORT_READS.out.count .cross( fastq .map{ item -> [ item[0].specie, item ] } ) .map{ it -> [ it[1][1][0], it[1][1][1], it[0][1] ] } .join( COUNT_READS.out.count ) ) emit: fastq = SAMPLE_READS.out.reads version = COUNT_READS.out.versions .mix(SAMPLE_READS.out.versions) .mix(SORT_READS.out.versions) } process COUNT_READS { tag "$meta.id" label 'small_mem_mono_cpus' container "lbmc/alpine:3.17" input: tuple val(meta), path(fastq) output: tuple val(meta), path("${meta.id}.csv"), emit: count path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' """ zcat ${fastq} | wc -l > ${meta.id}.csv cat <<-END_VERSIONS > versions.yml "${task.process}": split: v1.35.0 END_VERSIONS """ } process SORT_READS { tag "$specie" label 'small_mem_mono_cpus' container "lbmc/alpine:3.17" input: tuple val(specie), path(count) output: tuple val(specie), path("count.csv"), emit: count path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' """ cat ${count} | sort -u -k1n | head -n 1 > count.csv cat <<-END_VERSIONS > versions.yml "${task.process}": split: v1.35.0 END_VERSIONS """ } process SAMPLE_READS { tag "$meta.id" label 'small_mem_mono_cpus' debug true container "lbmc/alpine:3.17" input: tuple val(meta), path(reads), path(sample_size), path(read_number) output: tuple val(meta), path("sample_${reads}"), emit: reads path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" if (!(args ==~ /.*-s[0-9]+.*/)) { args += " -s100" } if ( !sample_size ) { error "SEQTK/SAMPLE must have a sample_size value included" } """ SAMPLE_SIZE=\$((cat $sample_size | tr -d '\\n')) CURRENT_READ_NUMBER=\$((cat $read_number | tr -d '\\n')) if [\$SAMPLE_SIZE -eq \$CURRENT_READ_NUMBER]; then ln -s ${reads} sample_${reads} READ_NUMBER=\$((cat $sample_size | tr -d '\\n')) else zcat ${reads} | head -n \$((cat $sample_size | tr -d '\\n')) | gzip -c > sample_${reads} READ_NUMBER=\$((zcat sample_${reads} | wc -l)) fi if [\$SAMPLE_SIZE -ne \$READ_NUMBER]; then exit 1 fi cat <<-END_VERSIONS > versions.yml "${task.process}": seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ }