Skip to content
Snippets Groups Projects
Verified Commit 0d3f5da9 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

main.nf: add sample_reads modules

parent a636cd0d
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@ Testing pipeline for marseq scRNASeq analysis
params.kmer_size = 12
params.bootstrap = 10
include { SUBSAMPLE_READ } from "./modules/sample_reads"
include { SPLIT } from "./modules/split"
include { FASTKMERS } from "./modules/fastkmers"
include { MERGEKMER } from "./modules/mergekmer"
......@@ -27,7 +28,8 @@ Channel.fromPath( file(params.csv) )
Channel.fromPath(params.csv).set{params_csv}
workflow {
SPLIT(fastq.r1.mix(fastq.r2))
SUBSAMPLE_READ(fastq.r1.mix(fastq.r2))
SPLIT(SUBSAMPLE_READ.out.fastq)
FASTKMERS(SPLIT.out.fastq.transpose())
MERGEKMER(FASTKMERS.out.csv.groupTuple())
COLLATEKMER(MERGEKMER.out.csv.map{it -> [it[0].specie, it[1]] }.groupTuple())
......
workflow SUBSAMPLE_READ {
take:
fastq
main:
COUNT_READS(fastq)
COUNT_READS.out.fastq
.groupTuple(by: [2, 3], sort: true)
.view()
SAMPLE_READS(fastq_to_sample)
emit:
fastq: SAMPLE_READS.out.reads
version: SAMPLE_READS.out.version.mix(COUNT_READS.version)
}
process COUNT_READS {
tag "$meta.id"
label 'small_mem_mono_cpus'
container "lbmc/alpine:3.17"
input:
tuple val(meta), path(fastq)
output:
tuple val(meta), stdout, path("*.fastq.gz"), emit: fastq
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
zcat ${fastq} | wc -l
cat <<-END_VERSIONS > versions.yml
"${task.process}":
split: v1.35.0
END_VERSIONS
"""
}
process SAMPLE_READS {
tag "$meta.id"
label 'small_mem_mono_cpus'
container "quay.io/biocontainers/seqtk:1.3--h5bf99c6_3"
input:
tuple val(meta), path(reads), val(sample_size)
output:
tuple val(meta), path("*.fastq.gz"), emit: reads
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (!(args ==~ /.*-s[0-9]+.*/)) {
args += " -s100"
}
if ( !sample_size ) {
error "SEQTK/SAMPLE must have a sample_size value included"
}
"""
printf "%s\\n" $reads | while read f;
do
seqtk \\
sample \\
$args \\
\$f \\
$sample_size \\
| gzip --no-name > ${prefix}_\$(basename \$f)
done
cat <<-END_VERSIONS > versions.yml
"${task.process}":
seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment