From 4c5c8034ab1689679b2cba2b2e1c34e298ea779d Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Mon, 18 Sep 2023 09:32:02 +0200
Subject: [PATCH] add line number check for subsample and split

---
 .gitignore                  |  7 +++++++
 src/modules/sample_reads.nf | 12 ++++++++++--
 src/modules/split.nf        |  3 +++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index d88c0bc..b03679d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,10 @@ src/mergekmer
 .Rhistory
 .Rprofile
 .kmer-diff.Rproj
+.Rbuildignore
+doc/2023_09_18_presentation.html
+doc/2023_09_18_presentation_files/
+doc/presentation_files/
+fail.txt
+kmer-diff.Rproj
+src/plot_counts.Rmd
diff --git a/src/modules/sample_reads.nf b/src/modules/sample_reads.nf
index 6bce0b3..e10d4ec 100644
--- a/src/modules/sample_reads.nf
+++ b/src/modules/sample_reads.nf
@@ -120,10 +120,18 @@ process SAMPLE_READS {
         error "SEQTK/SAMPLE must have a sample_size value included"
     }
     """
-    if [\$(cat $sample_size | tr -d '\\n') -eq \$(cat $read_number | tr -d '\\n')]; then
+    SAMPLE_SIZE=\$((cat $sample_size | tr -d '\\n'))
+    CURRENT_READ_NUMBER=\$((cat $read_number | tr -d '\\n'))
+
+    if [\$SAMPLE_SIZE -eq \$CURRENT_READ_NUMBER]; then
         ln -s ${reads} sample_${reads}
+        READ_NUMBER=\$((cat $sample_size | tr -d '\\n'))
     else
-        zcat ${reads} | head -n \$(cat $sample_size | tr -d '\\n') | gzip -c > sample_${reads}
+        zcat ${reads} | head -n \$((cat $sample_size | tr -d '\\n')) | gzip -c > sample_${reads}
+        READ_NUMBER=\$((zcat sample_${reads} | wc -l))
+    fi
+    if [\$SAMPLE_SIZE -ne \$READ_NUMBER]; then
+       exit 1 
     fi
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/src/modules/split.nf b/src/modules/split.nf
index a6cf5fe..9a8b72f 100644
--- a/src/modules/split.nf
+++ b/src/modules/split.nf
@@ -16,6 +16,9 @@ process SPLIT {
     """
     zcat ${fastq} | split -l 4000000 - ${fastq.simpleName}_
 
+    ls -l ${fastq.simpleName}_* | \\
+        awk '{system("if [\$((wc -l "\$9")) -ne 4000000]; then exit 1; fi")}'
+
     ls -l ${fastq.simpleName}_* | \\
         awk '{system("gzip -c "\$9" > "\$9".fastq.gz && rm "\$9)}'
 
-- 
GitLab