Skip to content
Snippets Groups Projects
sample_reads.nf 1.79 KiB
Newer Older
workflow SUBSAMPLE_READ {
    take:
      fastq
    main:
      COUNT_READS(fastq)
      COUNT_READS.out.fastq
        .groupTuple(by: [2, 3], sort: true)
        .view()
      SAMPLE_READS(fastq_to_sample)
    emit:
      fastq: SAMPLE_READS.out.reads
      version: SAMPLE_READS.out.version.mix(COUNT_READS.version)
}

process COUNT_READS {
    tag "$meta.id"
    label 'small_mem_mono_cpus'

    container "lbmc/alpine:3.17"

    input:
    tuple val(meta), path(fastq)

    output:
    tuple val(meta), stdout, path("*.fastq.gz"), emit: fastq
    path "versions.yml"           , emit: versions

    script:
    def args = task.ext.args ?: ''
    """
    zcat ${fastq} | wc -l 

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        split: v1.35.0
    END_VERSIONS
    """
}

process SAMPLE_READS {
    tag "$meta.id"
    label 'small_mem_mono_cpus'

    container "quay.io/biocontainers/seqtk:1.3--h5bf99c6_3"

    input:
    tuple val(meta), path(reads), val(sample_size)

    output:
    tuple val(meta), path("*.fastq.gz"), emit: reads
    path "versions.yml"                , emit: versions

    when:
    task.ext.when == null || task.ext.when

    script:
    def args   = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    if (!(args ==~ /.*-s[0-9]+.*/)) {
        args += " -s100"
    }
    if ( !sample_size ) {
        error "SEQTK/SAMPLE must have a sample_size value included"
    }
    """
    printf "%s\\n" $reads | while read f;
    do
        seqtk \\
            sample \\
            $args \\
            \$f \\
            $sample_size \\
            | gzip --no-name > ${prefix}_\$(basename \$f)
    done

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
    END_VERSIONS
    """
}