Commit 65035b53 authored by mcariou's avatar mcariou
Browse files

add legio strain in 4th script

parent 15a080aa
......@@ -10,7 +10,7 @@ if (length(args)!=8) {
#args<-c("/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.psiblast", "0.0001", "0.3", "0.5", "1", "/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.fasta")
#args<-c("/home/mcariou/2021_legio/out_blastn/lpg2300.psiblast", "0.0001", "0.1", "0.1", "1", "/home/mcariou/2021_legio/fasta/lpg2300/lpg2300.fasta", "~/2021_legio/blastdb/phyloref/phyloref_nuc", "/home/mcariou/2021_legio/Taxo/accessionTaxa.sql")
#args<-c("/home/adminmarie/Documents/CIRI_BIBS_projects/2021_04_Doublet/pipeline/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.psiblast", "0.0001", "0.3", "0.5", "1", "/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.fasta")
#args<-c("/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.psiblast", "0.0001", "0.3", "0.5", "1", "/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.fasta", "~/2021_legio/blastdb/phyloref/phyloref_nuc", "/home/mcariou/2021_legio/Taxo/accessionTaxa.sql")
print("reading arguments and data...")
......@@ -75,22 +75,13 @@ print(paste0("Species names retrieved; dim blastn et taxa: ", dim(blastn), ", ",
blastn<-cbind(blastn, Taxa)
###############
#########################
# Here is time to keep legionella pneumophila strains
##############
##########################
blastn_lp<-blastn[blastn$species=="Legionella pneumophila",]
##########################
#### Keep one sequence per subject id
......@@ -111,13 +102,55 @@ blastn<-blastn[blastn$sseqid %in% listid,]
print(paste("Keep best hit for each species, nrow = ", nrow(blastn)))
listid_short<-sapply(listid, function(x) substr(x, 1, nchar(x)-2))
#### Retrieve sequences via blastcmd
system(paste0("rm ", fasta))
for (seq in listid_short){
cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ", seq, " >> ", fasta)
cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ", seq, " >> ", fasta, "_L")
system(cmd)
}
listid_short_L<-listid_short
#################################################################################################
### Legionella pneumophila
blastn_lp$evalue<-unlist(blastn_lp$evalue)
blastn_lp$length<-unlist(blastn_lp$length)
blastn_lp$sseqid<-unlist(blastn_lp$sseqid)
listid<-blastn_lp$sseqid
print(paste("Keep best hit for each species, nrow = ", nrow(blastn_lp)))
listid_short<-sapply(listid, function(x) substr(x, 1, nchar(x)-2))
#### Retrieve sequences via blastcmd
for (seq in listid_short){
cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ", seq, " >> ", fasta, "_lp")
system(cmd)
}
listid_short_lp<-listid_short
#### And cat
cmd<-paste0("cat ", fasta, "_L ", fasta, "_lp > ", fasta)
system(cmd)
###########################################################################
##################################################################################################
#### Change names in the fasta
library(ape)
......@@ -125,6 +158,18 @@ library(ape)
aln<-read.dna(fasta, format="fasta")
alnaln<-clustal(aln)
names(listid_short_lp)<-paste0("Legionella pneumophila.", names(listid_short_lp))
listid_short<-c(listid_short_L, listid_short_lp)
#names(alnaln)<-names(aln)
names(listid_short)<-gsub(names(listid_short), pattern=" ", replacement="_")
......@@ -133,7 +178,7 @@ tmp<-sapply(rownames(alnaln), function(x) strsplit(x, " ")[[1]][1])
newnames<-sapply(tmp, function(x){
names(listid_short)[listid_short==x]
names(listid_short)[listid_short==x][1]
})
rownames(alnaln)<-newnames
......
......@@ -16,7 +16,7 @@ module load trimal
#/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh ~/2021_legio/out_blastn/78Lp_uniprot.psiblast ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1
/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh ~/2021_legio/out_blastn/78Lp_uniprot.psiblast ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1
/home/mcariou/2021_legio/phylolegio/script/5_cat_aln_phy.sh ~/2021_legio/fasta/78Lp/
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment