From fa7de5ceddc970326e62ebf15aa30b7ff637bf02 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Fri, 10 Mar 2023 15:46:28 +0100 Subject: [PATCH] switch fastq file to fasta file for kmdiff --- data/sample_mbelari.csv | 18 +++++++++--------- data/sample_mlongespiculosa.csv | 4 ++-- data/sample_mspiculigera.csv | 4 ++-- src/kmdiff.sh | 12 +++++++++++- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/data/sample_mbelari.csv b/data/sample_mbelari.csv index 76e8c0c..eedcc91 100644 --- a/data/sample_mbelari.csv +++ b/data/sample_mbelari.csv @@ -1,9 +1,9 @@ -female1: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fastq.gz -female2: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fastq.gz -female3: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fastq.gz -female4: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fastq.gz -female5: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fastq.gz -female6: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fastq.gz -female7: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fastq.gz -male1: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fastq.gz -male2: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fastq.gz +female1: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fasta.gz +female2: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fasta.gz +female3: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fasta.gz +female4: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fasta.gz +female5: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fasta.gz +female6: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fasta.gz +female7: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fasta.gz +male1: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fasta.gz +male2: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fasta.gz diff --git a/data/sample_mlongespiculosa.csv b/data/sample_mlongespiculosa.csv index 9c05514..7496b78 100644 --- a/data/sample_mlongespiculosa.csv +++ b/data/sample_mlongespiculosa.csv @@ -1,2 +1,2 @@ -female1: /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fastq.gz -male1: //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fastq.gz +female1: /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fasta.gz +male1: //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz diff --git a/data/sample_mspiculigera.csv b/data/sample_mspiculigera.csv index 534ec50..394e621 100644 --- a/data/sample_mspiculigera.csv +++ b/data/sample_mspiculigera.csv @@ -1,2 +1,2 @@ -female1: /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fastq.gz -male1: //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fastq.gz +female1: /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fasta.gz +male1: //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fasta.gz diff --git a/src/kmdiff.sh b/src/kmdiff.sh index 303773f..c05c132 100644 --- a/src/kmdiff.sh +++ b/src/kmdiff.sh @@ -4,13 +4,23 @@ docker push lbmc/kmdiff:1.0.1 # build charliecloud image ch-image pull -s /Xnfs/abc/charliecloud/ lbmc/kmdiff:1.0.1 +ch-image pull -s /Xnfs/abc/charliecloud quay.io/biocontainers/seqtk:1.3--ha92aebf_0 update_ch_image.sh -alias kmdiff="ch-run -b /scratch /Xnfs/abc/charliecloud/img/lbmc%kmdiff+1.0.1 -- kmdiff" +alias kmdiff="ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/lbmc%kmdiff+1.0.1 -- kmdiff" +alias seqtk="ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/quay.io%biocontainers%seqtk+2.3--ha92aebf_0 -- seqtk" WORK=$(pwd) mkdir results/ +# convert fastq into fasta +fd ".*fastq" data \ + | sed 's|.fastq.gz||' \ + | awk '{system("ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/quay.io%biocontainers%seqtk+1.3--ha92aebf_0 -- seqtk seq -a "$0".fastq.gz > "$0".fasta")}' + +fd ".*fasta" data --exec gzip -v9 {} + + # kmdiff count kmdiff count -f ${WORK}/data/sample_mbelari.csv -d ${WORK}/results/mbelari_counts/ -t 32 kmdiff count -f ${WORK}/data/sample_mlongespiculosa.csv -d ${WORK}/results/mlongespiculosa_counts/ -t 32 -- GitLab