From 29498d032124373cd4ebfef83ff496e579b3b2b2 Mon Sep 17 00:00:00 2001
From: xgrand <xavier.grand@ens-lyon.fr>
Date: Wed, 16 Aug 2023 10:24:05 +0200
Subject: [PATCH] add 390bp filtration of reads.

---
 src/nf_modules/seqkit/main.nf | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/nf_modules/seqkit/main.nf b/src/nf_modules/seqkit/main.nf
index 87792e5..761afb3 100755
--- a/src/nf_modules/seqkit/main.nf
+++ b/src/nf_modules/seqkit/main.nf
@@ -40,10 +40,11 @@ process seqkit_grep {
     val(gsp)
 
   output:
-    tuple val(barcode), path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq"), emit: filtered_fastq
+    tuple val(barcode), path("${barcode}/${barcode}_390bp_filtered_5RACE_GSP.fastq"), emit: filtered_fastq
     path("${barcode}/*.csv")
     path("${barcode}/*.txt")
     path("${barcode}/${barcode}_filtered_5RACE.fastq")
+    path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq")
 
   script:
     lgadapt = Math.round(adapt.size().div(10))
@@ -57,9 +58,11 @@ process seqkit_grep {
     echo ${gsp} > gsp.txt
     seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq -j ${task.cpus}
     seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq -o ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus}
+    seqkit seq --min-len 390 --remove-gaps ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus} > ${barcode}_390bp_filtered_5RACE_GSP.fastq
     seqkit stats ../${fastq} -T -j ${task.cpus} > ${barcode}_seq_stats.csv
     seqkit stats ${barcode}_filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
     seqkit stats ${barcode}_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
+    seqkit stats ${barcode}_390bp_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
     """
 }
 
-- 
GitLab