diff --git a/src/main.nf b/src/main.nf
index a8d444b0af842b20a3b02cfdb9ed521127e36e9a..72cba21fde16bbf3a1a625efba3e25891f269d49 100644
--- a/src/main.nf
+++ b/src/main.nf
@@ -7,6 +7,7 @@ Testing pipeline for marseq scRNASeq analysis
 params.kmer_size = 12
 params.bootstrap = 10
 
+include { SUBSAMPLE_READ } from "./modules/sample_reads"
 include { SPLIT } from "./modules/split"
 include { FASTKMERS } from "./modules/fastkmers"
 include { MERGEKMER } from "./modules/mergekmer"
@@ -27,7 +28,8 @@ Channel.fromPath( file(params.csv) )
 Channel.fromPath(params.csv).set{params_csv}
 
 workflow {
-  SPLIT(fastq.r1.mix(fastq.r2))
+  SUBSAMPLE_READ(fastq.r1.mix(fastq.r2))
+  SPLIT(SUBSAMPLE_READ.out.fastq)
   FASTKMERS(SPLIT.out.fastq.transpose())
   MERGEKMER(FASTKMERS.out.csv.groupTuple())
   COLLATEKMER(MERGEKMER.out.csv.map{it -> [it[0].specie, it[1]] }.groupTuple())
diff --git a/src/modules/sample_reads.nf b/src/modules/sample_reads.nf
new file mode 100644
index 0000000000000000000000000000000000000000..859e9c295a8e3243e760dc6a28e2109d4d4fa0c2
--- /dev/null
+++ b/src/modules/sample_reads.nf
@@ -0,0 +1,81 @@
+workflow SUBSAMPLE_READ {
+    take:
+      fastq
+    main:
+      COUNT_READS(fastq)
+      COUNT_READS.out.fastq
+        .groupTuple(by: [2, 3], sort: true)
+        .view()
+      SAMPLE_READS(fastq_to_sample)
+    emit:
+      fastq: SAMPLE_READS.out.reads
+      version: SAMPLE_READS.out.version.mix(COUNT_READS.version)
+}
+
+process COUNT_READS {
+    tag "$meta.id"
+    label 'small_mem_mono_cpus'
+
+    container "lbmc/alpine:3.17"
+
+    input:
+    tuple val(meta), path(fastq)
+
+    output:
+    tuple val(meta), stdout, path("*.fastq.gz"), emit: fastq
+    path "versions.yml"           , emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    zcat ${fastq} | wc -l 
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        split: v1.35.0
+    END_VERSIONS
+    """
+}
+
+process SAMPLE_READS {
+    tag "$meta.id"
+    label 'small_mem_mono_cpus'
+
+    container "quay.io/biocontainers/seqtk:1.3--h5bf99c6_3"
+
+    input:
+    tuple val(meta), path(reads), val(sample_size)
+
+    output:
+    tuple val(meta), path("*.fastq.gz"), emit: reads
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args   = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (!(args ==~ /.*-s[0-9]+.*/)) {
+        args += " -s100"
+    }
+    if ( !sample_size ) {
+        error "SEQTK/SAMPLE must have a sample_size value included"
+    }
+    """
+    printf "%s\\n" $reads | while read f;
+    do
+        seqtk \\
+            sample \\
+            $args \\
+            \$f \\
+            $sample_size \\
+            | gzip --no-name > ${prefix}_\$(basename \$f)
+    done
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+    END_VERSIONS
+    """
+}