From fa7de5ceddc970326e62ebf15aa30b7ff637bf02 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Fri, 10 Mar 2023 15:46:28 +0100
Subject: [PATCH] switch fastq file to fasta file for kmdiff

---
 data/sample_mbelari.csv         | 18 +++++++++---------
 data/sample_mlongespiculosa.csv |  4 ++--
 data/sample_mspiculigera.csv    |  4 ++--
 src/kmdiff.sh                   | 12 +++++++++++-
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/data/sample_mbelari.csv b/data/sample_mbelari.csv
index 76e8c0c..eedcc91 100644
--- a/data/sample_mbelari.csv
+++ b/data/sample_mbelari.csv
@@ -1,9 +1,9 @@
-female1: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fastq.gz
-female2: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fastq.gz
-female3: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fastq.gz
-female4: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fastq.gz
-female5: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fastq.gz
-female6: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fastq.gz
-female7: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fastq.gz
-male1: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fastq.gz
-male2: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fastq.gz
+female1: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fasta.gz
+female2: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fasta.gz
+female3: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fasta.gz
+female4: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fasta.gz
+female5: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fasta.gz
+female6: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fasta.gz
+female7: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fasta.gz
+male1: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fasta.gz
+male2: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fasta.gz
diff --git a/data/sample_mlongespiculosa.csv b/data/sample_mlongespiculosa.csv
index 9c05514..7496b78 100644
--- a/data/sample_mlongespiculosa.csv
+++ b/data/sample_mlongespiculosa.csv
@@ -1,2 +1,2 @@
-female1: /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fastq.gz
-male1: //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fastq.gz
+female1: /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fasta.gz
+male1: //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz
diff --git a/data/sample_mspiculigera.csv b/data/sample_mspiculigera.csv
index 534ec50..394e621 100644
--- a/data/sample_mspiculigera.csv
+++ b/data/sample_mspiculigera.csv
@@ -1,2 +1,2 @@
-female1: /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fastq.gz
-male1: //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fastq.gz
+female1: /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fasta.gz
+male1: //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fasta.gz
diff --git a/src/kmdiff.sh b/src/kmdiff.sh
index 303773f..c05c132 100644
--- a/src/kmdiff.sh
+++ b/src/kmdiff.sh
@@ -4,13 +4,23 @@ docker push lbmc/kmdiff:1.0.1
 
 # build charliecloud image
 ch-image pull -s /Xnfs/abc/charliecloud/ lbmc/kmdiff:1.0.1
+ch-image pull -s /Xnfs/abc/charliecloud quay.io/biocontainers/seqtk:1.3--ha92aebf_0
 update_ch_image.sh
 
-alias kmdiff="ch-run -b /scratch /Xnfs/abc/charliecloud/img/lbmc%kmdiff+1.0.1 -- kmdiff"
+alias kmdiff="ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/lbmc%kmdiff+1.0.1 -- kmdiff"
+alias seqtk="ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/quay.io%biocontainers%seqtk+2.3--ha92aebf_0 -- seqtk"
 
 WORK=$(pwd)
 mkdir results/
 
+# convert fastq into fasta
+fd ".*fastq" data \
+  | sed 's|.fastq.gz||' \
+  | awk '{system("ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/quay.io%biocontainers%seqtk+1.3--ha92aebf_0 -- seqtk seq -a "$0".fastq.gz > "$0".fasta")}'
+
+fd ".*fasta" data --exec gzip -v9 {}
+
+
 # kmdiff count
 kmdiff count -f ${WORK}/data/sample_mbelari.csv -d ${WORK}/results/mbelari_counts/ -t 32
 kmdiff count -f ${WORK}/data/sample_mlongespiculosa.csv -d ${WORK}/results/mlongespiculosa_counts/ -t 32
-- 
GitLab