diff --git a/script/2_make_db.sh b/script/2_make_db.sh new file mode 100755 index 0000000000000000000000000000000000000000..dc85914d001ac0690aa3ceb54aaeede531f18063 --- /dev/null +++ b/script/2_make_db.sh @@ -0,0 +1,65 @@ +#!/bin/bash +#$ -S /bin/bash +## name of the job to follow them +#$ -N dblegio +## name of the queue to be used +#$ -q E5-2670deb*,E5-2667v2*,E5-2667v4* +#$ -cwd +#$ -V +## where to put the log files (output and error) automatically generated by the cluster (different from the .log generated by DGINN) +## the dirs must exist before job is launched +#$ -o /home/mcariou/2021_legio/log/ +#$ -e /home/mcariou/2021_legio/log/ + +### configurer l'environnement +module purge + + +################################################################################## +#./2_make_db.sh /home/mcariou/2021_legio/doc/tabAss.txt /home/mcariou/2020_Attaiech/prot_db/Transdecoder/ /home/mcariou/2021_legio/blastdb/phyloref + + +HOME="/home/mcariou/2021_legio/" +OUT=$HOME"blastdb/phyloref/" +CAT=$OUT"/cat_phyloref_cds.fasta" +Trans="/home/mcariou/2020_Attaiech/prot_db/Transdecoder/" +TAB=$HOME"/phylolegio/doc/tabAss.txt" + +mkdir -p $OUT + +##################################################################################################################UT + + +### Read tab genomes and cat cds files. + +if [[ -s $CAT ]] ; then + echo "cat already exists" +else + for genome in `cat $TAB | sed '1d'| awk '{print $1}'` + do + file=${Trans}/${genome}*/longest_orfs.cds + file2=`echo $file` + if [[ -s $file2 ]] ; then + cat $file2 >> $CAT + else + echo "doesn't: $file2" + fi + done +fi + + +# Re-split concatenate +#sed '1 s/^/\n/; 2,$ s/>/~\n>/' $CAT | split -t '~' -l 100000 -d --filter="tail -n+2 | grep -v '^~$' > \$OUT\$FILE" + + +### Make Blast db + +makeblastdb -dbtype nucl -in $CAT -hash_index -out $OUT/phyloref -parse_seqids + + + + + + + +# fin