Commit 24f5cba3 authored by mcariou's avatar mcariou
Browse files

create sub database

parent 4d71656b
#!/bin/bash
#$ -S /bin/bash
## name of the job to follow them
#$ -N dblegio
## name of the queue to be used
#$ -q E5-2670deb*,E5-2667v2*,E5-2667v4*
#$ -cwd
#$ -V
## where to put the log files (output and error) automatically generated by the cluster (different from the .log generated by DGINN)
## the dirs must exist before job is launched
#$ -o /home/mcariou/2021_legio/log/
#$ -e /home/mcariou/2021_legio/log/
### configurer l'environnement
module purge
##################################################################################
#./2_make_db.sh /home/mcariou/2021_legio/doc/tabAss.txt /home/mcariou/2020_Attaiech/prot_db/Transdecoder/ /home/mcariou/2021_legio/blastdb/phyloref
HOME="/home/mcariou/2021_legio/"
OUT=$HOME"blastdb/phyloref/"
CAT=$OUT"/cat_phyloref_cds.fasta"
Trans="/home/mcariou/2020_Attaiech/prot_db/Transdecoder/"
TAB=$HOME"/phylolegio/doc/tabAss.txt"
mkdir -p $OUT
##################################################################################################################UT
### Read tab genomes and cat cds files.
if [[ -s $CAT ]] ; then
echo "cat already exists"
else
for genome in `cat $TAB | sed '1d'| awk '{print $1}'`
do
file=${Trans}/${genome}*/longest_orfs.cds
file2=`echo $file`
if [[ -s $file2 ]] ; then
cat $file2 >> $CAT
else
echo "doesn't: $file2"
fi
done
fi
# Re-split concatenate
#sed '1 s/^/\n/; 2,$ s/>/~\n>/' $CAT | split -t '~' -l 100000 -d --filter="tail -n+2 | grep -v '^~$' > \$OUT\$FILE"
### Make Blast db
makeblastdb -dbtype nucl -in $CAT -hash_index -out $OUT/phyloref -parse_seqids
# fin
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment