2_make_db.sh 1.81 KB
Newer Older
mcariou's avatar
mcariou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/bin/bash
#$ -S /bin/bash
## name of the job to follow them
#$ -N dblegio
## name of the queue to be used
#$ -q E5-2670deb*,E5-2667v2*,E5-2667v4*
#$ -cwd
#$ -V
## where to put the log files (output and error) automatically generated by the cluster (different from the .log generated by DGINN)
## the dirs must exist before job is launched
#$ -o /home/mcariou/2021_legio/log/
#$ -e /home/mcariou/2021_legio/log/
 
### configurer l'environnement
module purge


##################################################################################
#./2_make_db.sh /home/mcariou/2021_legio/doc/tabAss.txt /home/mcariou/2020_Attaiech/prot_db/Transdecoder/ /home/mcariou/2021_legio/blastdb/phyloref


HOME="/home/mcariou/2021_legio/"
mcariou's avatar
V2    
mcariou committed
23
OUT=$HOME"blastdb/phyloref_V2/"
mcariou's avatar
mcariou committed
24
CAT=$OUT"/cat_phyloref_cds.fasta"
mcariou's avatar
mcariou committed
25
CATPROT=$OUT"/cat_phyloref_pep.fasta"
mcariou's avatar
mcariou committed
26
Trans="/home/mcariou/2020_Attaiech/prot_db/Transdecoder/"
mcariou's avatar
V2    
mcariou committed
27
TAB=$HOME"/phylolegio/doc/tabAss_V2.txt"
mcariou's avatar
mcariou committed
28
29
30
31
32
33
34
35
36
           
mkdir -p $OUT

##################################################################################################################UT
### Read tab genomes and cat cds files.

if [[ -s $CAT ]] ; then 
    echo "cat already exists"
else
mcariou's avatar
V2    
mcariou committed
37
    for genome in `cat $TAB | sed '1d'| awk -F ";" '{print $1}'`
mcariou's avatar
mcariou committed
38
39
40
41
42
43
    do
    file=${Trans}/${genome}*/longest_orfs.cds
    file2=`echo $file`
        if [[ -s $file2 ]] ; then
        cat $file2 >> $CAT
        else
mcariou's avatar
V2    
mcariou committed
44
        echo "doesn't: $file"
mcariou's avatar
mcariou committed
45
46
47
48
49
50
51
52
53
54
55
        fi
    done
fi


# Re-split concatenate
#sed '1 s/^/\n/; 2,$ s/>/~\n>/' $CAT | split -t '~' -l 100000 -d --filter="tail -n+2 | grep -v '^~$' > \$OUT\$FILE"


### Make Blast db

mcariou's avatar
mcariou committed
56
57
58
59
#makeblastdb -dbtype nucl -in $CAT -hash_index -out $OUT/phyloref_nuc -parse_seqids
###
# need to translate
transeq -sequence $CAT -outseq $CATPROT
mcariou's avatar
mcariou committed
60

mcariou's avatar
mcariou committed
61
makeblastdb -dbtype prot -in $CATPROT -hash_index -out $OUT/phyloref_prot -parse_seqids
mcariou's avatar
mcariou committed
62
63
64


# fin