From 24f5cba3bcd52ad183123b9f024411780766cc9e Mon Sep 17 00:00:00 2001 From: mcariou <115-mcariou@users.noreply.gitbio.ens-lyon.fr> Date: Tue, 12 Oct 2021 18:01:42 +0200 Subject: [PATCH] create sub database --- script/2_make_db.sh | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100755 script/2_make_db.sh diff --git a/script/2_make_db.sh b/script/2_make_db.sh new file mode 100755 index 0000000..dc85914 --- /dev/null +++ b/script/2_make_db.sh @@ -0,0 +1,65 @@ +#!/bin/bash +#$ -S /bin/bash +## name of the job to follow them +#$ -N dblegio +## name of the queue to be used +#$ -q E5-2670deb*,E5-2667v2*,E5-2667v4* +#$ -cwd +#$ -V +## where to put the log files (output and error) automatically generated by the cluster (different from the .log generated by DGINN) +## the dirs must exist before job is launched +#$ -o /home/mcariou/2021_legio/log/ +#$ -e /home/mcariou/2021_legio/log/ + +### configurer l'environnement +module purge + + +################################################################################## +#./2_make_db.sh /home/mcariou/2021_legio/doc/tabAss.txt /home/mcariou/2020_Attaiech/prot_db/Transdecoder/ /home/mcariou/2021_legio/blastdb/phyloref + + +HOME="/home/mcariou/2021_legio/" +OUT=$HOME"blastdb/phyloref/" +CAT=$OUT"/cat_phyloref_cds.fasta" +Trans="/home/mcariou/2020_Attaiech/prot_db/Transdecoder/" +TAB=$HOME"/phylolegio/doc/tabAss.txt" + +mkdir -p $OUT + +##################################################################################################################UT + + +### Read tab genomes and cat cds files. + +if [[ -s $CAT ]] ; then + echo "cat already exists" +else + for genome in `cat $TAB | sed '1d'| awk '{print $1}'` + do + file=${Trans}/${genome}*/longest_orfs.cds + file2=`echo $file` + if [[ -s $file2 ]] ; then + cat $file2 >> $CAT + else + echo "doesn't: $file2" + fi + done +fi + + +# Re-split concatenate +#sed '1 s/^/\n/; 2,$ s/>/~\n>/' $CAT | split -t '~' -l 100000 -d --filter="tail -n+2 | grep -v '^~$' > \$OUT\$FILE" + + +### Make Blast db + +makeblastdb -dbtype nucl -in $CAT -hash_index -out $OUT/phyloref -parse_seqids + + + + + + + +# fin -- GitLab