Skip to content
Snippets Groups Projects
Verified Commit 92ef5bb7 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

add sample_reads.nf: subworkflow to make number of reads equal between Male and Female

parent 0d3f5da9
No related branches found
No related tags found
No related merge requests found
......@@ -3,13 +3,45 @@ workflow SUBSAMPLE_READ {
fastq
main:
COUNT_READS(fastq)
COUNT_READS.out.fastq
.groupTuple(by: [2, 3], sort: true)
.view()
SAMPLE_READS(fastq_to_sample)
SORT_READS(
COUNT_READS.out.count
.map{
it ->
[
it[0].specie,
it[1]
]
}
.groupTuple()
)
SAMPLE_READS(SORT_READS.out.count
.cross(
fastq
.map{
item ->
[
item[0].specie,
item
]
}
)
.map{
it ->
[
it[1][1][0],
it[1][1][1],
it[0][1]
]
}
.join(
COUNT_READS.out.count
)
)
emit:
fastq: SAMPLE_READS.out.reads
version: SAMPLE_READS.out.version.mix(COUNT_READS.version)
fastq = SAMPLE_READS.out.reads
version = COUNT_READS.out.versions
.mix(SAMPLE_READS.out.versions)
.mix(SORT_READS.out.versions)
}
process COUNT_READS {
......@@ -22,13 +54,37 @@ process COUNT_READS {
tuple val(meta), path(fastq)
output:
tuple val(meta), stdout, path("*.fastq.gz"), emit: fastq
tuple val(meta), path("${meta.id}.csv"), emit: count
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
zcat ${fastq} | wc -l
zcat ${fastq} | wc -l > ${meta.id}.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
split: v1.35.0
END_VERSIONS
"""
}
process SORT_READS {
tag "$specie"
label 'small_mem_mono_cpus'
container "lbmc/alpine:3.17"
input:
tuple val(specie), path(count)
output:
tuple val(specie), path("count.csv"), emit: count
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
cat ${count} | sort -u -k1n | head -n 1 > count.csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
......@@ -40,14 +96,15 @@ process COUNT_READS {
process SAMPLE_READS {
tag "$meta.id"
label 'small_mem_mono_cpus'
debug true
container "quay.io/biocontainers/seqtk:1.3--h5bf99c6_3"
container "lbmc/alpine:3.17"
input:
tuple val(meta), path(reads), val(sample_size)
tuple val(meta), path(reads), path(sample_size), path(read_number)
output:
tuple val(meta), path("*.fastq.gz"), emit: reads
tuple val(meta), path("sample_${reads}"), emit: reads
path "versions.yml" , emit: versions
when:
......@@ -63,15 +120,11 @@ process SAMPLE_READS {
error "SEQTK/SAMPLE must have a sample_size value included"
}
"""
printf "%s\\n" $reads | while read f;
do
seqtk \\
sample \\
$args \\
\$f \\
$sample_size \\
| gzip --no-name > ${prefix}_\$(basename \$f)
done
if [\$(cat $sample_size | tr -d '\\n') -eq \$(cat $read_number | tr -d '\\n')]; then
ln -s ${reads} sample_${reads}
else
zcat ${reads} | head -n \$(cat $sample_size | tr -d '\\n') | gzip -c > sample_${reads}
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment