Skip to content
Snippets Groups Projects
Commit 24f5cba3 authored by mcariou's avatar mcariou
Browse files

create sub database

parent 4d71656b
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
#$ -S /bin/bash
## name of the job to follow them
#$ -N dblegio
## name of the queue to be used
#$ -q E5-2670deb*,E5-2667v2*,E5-2667v4*
#$ -cwd
#$ -V
## where to put the log files (output and error) automatically generated by the cluster (different from the .log generated by DGINN)
## the dirs must exist before job is launched
#$ -o /home/mcariou/2021_legio/log/
#$ -e /home/mcariou/2021_legio/log/
### configurer l'environnement
module purge
##################################################################################
#./2_make_db.sh /home/mcariou/2021_legio/doc/tabAss.txt /home/mcariou/2020_Attaiech/prot_db/Transdecoder/ /home/mcariou/2021_legio/blastdb/phyloref
HOME="/home/mcariou/2021_legio/"
OUT=$HOME"blastdb/phyloref/"
CAT=$OUT"/cat_phyloref_cds.fasta"
Trans="/home/mcariou/2020_Attaiech/prot_db/Transdecoder/"
TAB=$HOME"/phylolegio/doc/tabAss.txt"
mkdir -p $OUT
##################################################################################################################UT
### Read tab genomes and cat cds files.
if [[ -s $CAT ]] ; then
echo "cat already exists"
else
for genome in `cat $TAB | sed '1d'| awk '{print $1}'`
do
file=${Trans}/${genome}*/longest_orfs.cds
file2=`echo $file`
if [[ -s $file2 ]] ; then
cat $file2 >> $CAT
else
echo "doesn't: $file2"
fi
done
fi
# Re-split concatenate
#sed '1 s/^/\n/; 2,$ s/>/~\n>/' $CAT | split -t '~' -l 100000 -d --filter="tail -n+2 | grep -v '^~$' > \$OUT\$FILE"
### Make Blast db
makeblastdb -dbtype nucl -in $CAT -hash_index -out $OUT/phyloref -parse_seqids
# fin
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment