Skip to content
Snippets Groups Projects
Commit 65035b53 authored by mcariou's avatar mcariou
Browse files

add legio strain in 4th script

parent 15a080aa
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,7 @@ if (length(args)!=8) {
#args<-c("/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.psiblast", "0.0001", "0.3", "0.5", "1", "/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.fasta")
#args<-c("/home/mcariou/2021_legio/out_blastn/lpg2300.psiblast", "0.0001", "0.1", "0.1", "1", "/home/mcariou/2021_legio/fasta/lpg2300/lpg2300.fasta", "~/2021_legio/blastdb/phyloref/phyloref_nuc", "/home/mcariou/2021_legio/Taxo/accessionTaxa.sql")
#args<-c("/home/adminmarie/Documents/CIRI_BIBS_projects/2021_04_Doublet/pipeline/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.psiblast", "0.0001", "0.3", "0.5", "1", "/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.fasta")
#args<-c("/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.psiblast", "0.0001", "0.3", "0.5", "1", "/home/mcariou/2021_legio/fasta/78Lp/Q5ZXP3_lpg0688/Q5ZXP3_lpg0688.fasta", "~/2021_legio/blastdb/phyloref/phyloref_nuc", "/home/mcariou/2021_legio/Taxo/accessionTaxa.sql")
print("reading arguments and data...")
......@@ -75,22 +75,13 @@ print(paste0("Species names retrieved; dim blastn et taxa: ", dim(blastn), ", ",
blastn<-cbind(blastn, Taxa)
###############
#########################
# Here is time to keep legionella pneumophila strains
##############
##########################
blastn_lp<-blastn[blastn$species=="Legionella pneumophila",]
##########################
#### Keep one sequence per subject id
......@@ -111,13 +102,55 @@ blastn<-blastn[blastn$sseqid %in% listid,]
print(paste("Keep best hit for each species, nrow = ", nrow(blastn)))
listid_short<-sapply(listid, function(x) substr(x, 1, nchar(x)-2))
#### Retrieve sequences via blastcmd
system(paste0("rm ", fasta))
for (seq in listid_short){
cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ", seq, " >> ", fasta)
cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ", seq, " >> ", fasta, "_L")
system(cmd)
}
listid_short_L<-listid_short
#################################################################################################
### Legionella pneumophila
blastn_lp$evalue<-unlist(blastn_lp$evalue)
blastn_lp$length<-unlist(blastn_lp$length)
blastn_lp$sseqid<-unlist(blastn_lp$sseqid)
listid<-blastn_lp$sseqid
print(paste("Keep best hit for each species, nrow = ", nrow(blastn_lp)))
listid_short<-sapply(listid, function(x) substr(x, 1, nchar(x)-2))
#### Retrieve sequences via blastcmd
for (seq in listid_short){
cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ", seq, " >> ", fasta, "_lp")
system(cmd)
}
listid_short_lp<-listid_short
#### And cat
cmd<-paste0("cat ", fasta, "_L ", fasta, "_lp > ", fasta)
system(cmd)
###########################################################################
##################################################################################################
#### Change names in the fasta
library(ape)
......@@ -125,6 +158,18 @@ library(ape)
aln<-read.dna(fasta, format="fasta")
alnaln<-clustal(aln)
names(listid_short_lp)<-paste0("Legionella pneumophila.", names(listid_short_lp))
listid_short<-c(listid_short_L, listid_short_lp)
#names(alnaln)<-names(aln)
names(listid_short)<-gsub(names(listid_short), pattern=" ", replacement="_")
......@@ -133,7 +178,7 @@ tmp<-sapply(rownames(alnaln), function(x) strsplit(x, " ")[[1]][1])
newnames<-sapply(tmp, function(x){
names(listid_short)[listid_short==x]
names(listid_short)[listid_short==x][1]
})
rownames(alnaln)<-newnames
......
......@@ -16,7 +16,7 @@ module load trimal
#/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh ~/2021_legio/out_blastn/78Lp_uniprot.psiblast ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1
/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh ~/2021_legio/out_blastn/78Lp_uniprot.psiblast ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1
/home/mcariou/2021_legio/phylolegio/script/5_cat_aln_phy.sh ~/2021_legio/fasta/78Lp/
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment