Newer
Older
workflow SUBSAMPLE_READ {
take:
fastq
main:
COUNT_READS(fastq)
Laurent Modolo
committed
SORT_READS(
COUNT_READS.out.count
.map{
it ->
[
it[0].specie,
it[1]
]
}
.groupTuple()
)
SAMPLE_READS(SORT_READS.out.count
.cross(
fastq
.map{
item ->
[
item[0].specie,
item
]
}
)
.map{
it ->
[
it[1][1][0],
it[1][1][1],
it[0][1]
]
}
.join(
COUNT_READS.out.count
)
)
Laurent Modolo
committed
fastq = SAMPLE_READS.out.reads
version = COUNT_READS.out.versions
.mix(SAMPLE_READS.out.versions)
.mix(SORT_READS.out.versions)
}
process COUNT_READS {
tag "$meta.id"
label 'small_mem_mono_cpus'
container "lbmc/alpine:3.17"
input:
tuple val(meta), path(fastq)
output:
Laurent Modolo
committed
tuple val(meta), path("${meta.id}.csv"), emit: count
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
Laurent Modolo
committed
zcat ${fastq} | wc -l > ${meta.id}.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
split: v1.35.0
END_VERSIONS
"""
}
process SORT_READS {
tag "$specie"
label 'small_mem_mono_cpus'
container "lbmc/alpine:3.17"
input:
tuple val(specie), path(count)
output:
tuple val(specie), path("count.csv"), emit: count
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
cat ${count} | sort -u -k1n | head -n 1 > count.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
split: v1.35.0
END_VERSIONS
"""
}
process SAMPLE_READS {
tag "$meta.id"
label 'small_mem_mono_cpus'
Laurent Modolo
committed
debug true
Laurent Modolo
committed
container "lbmc/alpine:3.17"
Laurent Modolo
committed
tuple val(meta), path(reads), path(sample_size), path(read_number)
Laurent Modolo
committed
tuple val(meta), path("sample_${reads}"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (!(args ==~ /.*-s[0-9]+.*/)) {
args += " -s100"
}
if ( !sample_size ) {
error "SEQTK/SAMPLE must have a sample_size value included"
}
"""
SAMPLE_SIZE=\$((cat $sample_size | tr -d '\\n'))
CURRENT_READ_NUMBER=\$((cat $read_number | tr -d '\\n'))
if [\$SAMPLE_SIZE -eq \$CURRENT_READ_NUMBER]; then
Laurent Modolo
committed
ln -s ${reads} sample_${reads}
READ_NUMBER=\$((cat $sample_size | tr -d '\\n'))
Laurent Modolo
committed
else
zcat ${reads} | head -n \$((cat $sample_size | tr -d '\\n')) | gzip -c > sample_${reads}
READ_NUMBER=\$((zcat sample_${reads} | wc -l))
fi
if [\$SAMPLE_SIZE -ne \$READ_NUMBER]; then
exit 1
Laurent Modolo
committed
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
"""
}