diff --git a/data/sample_mbelari.csv b/data/sample_mbelari.csv index 76e8c0cdf110d2af7a476f34dc1e9805e9dc5135..eedcc91711084e3a4efe6d89873dd3b9e08831a9 100644 --- a/data/sample_mbelari.csv +++ b/data/sample_mbelari.csv @@ -1,9 +1,9 @@ -female1: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fastq.gz -female2: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fastq.gz -female3: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fastq.gz -female4: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fastq.gz -female5: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fastq.gz -female6: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fastq.gz -female7: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fastq.gz -male1: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fastq.gz -male2: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fastq.gz +female1: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/1_S1_L001_R2_001.fasta.gz +female2: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/2_S2_L001_R2_001.fasta.gz +female3: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/3_S3_L001_R2_001.fasta.gz +female4: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/4_S4_L001_R2_001.fasta.gz +female5: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/5_S5_L001_R2_001.fasta.gz +female6: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/6_S6_L001_R2_001.fasta.gz +female7: /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mbelari_females/MRDR5_R2.fasta.gz +male1: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R1_001.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/JU2817_males_S11_L002_R2_001.fasta.gz +male2: //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R1.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mbelari_males/MRDR6_R2.fasta.gz diff --git a/data/sample_mlongespiculosa.csv b/data/sample_mlongespiculosa.csv index 9c05514ee12f2730eb31dc61df0631f5f2fc3a71..7496b78e5322ebd7e899b02975f3c65618fe5695 100644 --- a/data/sample_mlongespiculosa.csv +++ b/data/sample_mlongespiculosa.csv @@ -1,2 +1,2 @@ -female1: /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fastq.gz -male1: //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fastq.gz +female1: /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R1.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_females/MRDR3_R2.fasta.gz +male1: //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mlongespiculosa_males/MRDR4_R1.fasta.gz diff --git a/data/sample_mspiculigera.csv b/data/sample_mspiculigera.csv index 534ec50636a7c973d2a339dbc03f4131907ab655..394e621d9fcb523df692510bffa11d4a8bd28c98 100644 --- a/data/sample_mspiculigera.csv +++ b/data/sample_mspiculigera.csv @@ -1,2 +1,2 @@ -female1: /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fastq.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fastq.gz -male1: //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fastq.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fastq.gz +female1: /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R1_001.fasta.gz ; /scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_females/AF72-females_BIS_S8_L002_R2_001.fasta.gz +male1: //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R1_001.fasta.gz ; //scratch/Bio/lmodolo/kmer_diff/data/Mspiculigera_males/AF72-males_S7_L002_R2_001.fasta.gz diff --git a/src/kmdiff.sh b/src/kmdiff.sh index 303773f7e850a34191266bf25c32bd1e7066a69f..c05c1323d525c0d4b7f61b3f2805d7c38137ec70 100644 --- a/src/kmdiff.sh +++ b/src/kmdiff.sh @@ -4,13 +4,23 @@ docker push lbmc/kmdiff:1.0.1 # build charliecloud image ch-image pull -s /Xnfs/abc/charliecloud/ lbmc/kmdiff:1.0.1 +ch-image pull -s /Xnfs/abc/charliecloud quay.io/biocontainers/seqtk:1.3--ha92aebf_0 update_ch_image.sh -alias kmdiff="ch-run -b /scratch /Xnfs/abc/charliecloud/img/lbmc%kmdiff+1.0.1 -- kmdiff" +alias kmdiff="ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/lbmc%kmdiff+1.0.1 -- kmdiff" +alias seqtk="ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/quay.io%biocontainers%seqtk+2.3--ha92aebf_0 -- seqtk" WORK=$(pwd) mkdir results/ +# convert fastq into fasta +fd ".*fastq" data \ + | sed 's|.fastq.gz||' \ + | awk '{system("ch-run -b /scratch -c $PWD /Xnfs/abc/charliecloud/img/quay.io%biocontainers%seqtk+1.3--ha92aebf_0 -- seqtk seq -a "$0".fastq.gz > "$0".fasta")}' + +fd ".*fasta" data --exec gzip -v9 {} + + # kmdiff count kmdiff count -f ${WORK}/data/sample_mbelari.csv -d ${WORK}/results/mbelari_counts/ -t 32 kmdiff count -f ${WORK}/data/sample_mlongespiculosa.csv -d ${WORK}/results/mlongespiculosa_counts/ -t 32