diff --git a/script/4_parse_PSIblast.R b/script/4_parse_PSIblast.R index 89846fd7160fe6a11d302920fffdc81dcd310d30..5b5e8130df30937d183c5a8d41c699ed7c10e53b 100644 --- a/script/4_parse_PSIblast.R +++ b/script/4_parse_PSIblast.R @@ -105,7 +105,7 @@ listid_short<-sapply(listid, function(x) substr(x, 1, nchar(x)-2)) #### Retrieve sequences via blastcmd -system(paste0("rm ", fasta)) +#system(paste0("rm ", fasta)) for (seq in listid_short){ cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ", seq, " >> ", fasta, "_L") system(cmd) diff --git a/script/5_cat_aln_phy.R b/script/5_cat_aln_phy.R index f94da67ca934b403f208909c4295b9ea36bf91d1..7d2201426972c6a57eb63199955ca32da3b4fabc 100644 --- a/script/5_cat_aln_phy.R +++ b/script/5_cat_aln_phy.R @@ -34,8 +34,18 @@ file<-file[grep(x=file, pattern="aln")] aln<-read.dna(paste0(args[1], "/",subrepcurrent, "/", file), format="fasta", as.character=TRUE) tmp<-aln rownames(tmp)<-sapply(rownames(aln), function(x) strsplit(x, split=".", fixed=TRUE)[[1]][1]) - - for (seq in rownames(tmp)){ + + for (i in 1:length(rownames(tmp))){ + seq<-rownames(tmp)[i] + seq_2<-names(rownames(tmp)[i]) + + if(seq=="Legionella_pneumophila"){ + seq_2<-strsplit(as.character(seq_2), ".p", fixed=TRUE)[[1]][1] + if((seq_2 %in% names(cat))==FALSE){ + cat<-c(cat, list(seq="")) + names(cat)<-c(names(cat)[-length(cat)], seq_2) + } + } if((seq %in% names(cat))==FALSE){ #print(seq) cat<-c(cat, list(seq="")) @@ -53,8 +63,10 @@ file<-list.files(paste0(args[1], "/", subrepi)) file<-file[grep(x=file, pattern="aln")] if (length(file)>0){ aln<-read.dna(paste0(args[1], "/",subrepi, "/", file), format="fasta", as.character=TRUE, as.matrix=FALSE) + old_aln<-sapply(as.character(names(aln)), function(x) strsplit(x, ".p", fixed=TRUE)[[1]][1]) names(aln)<-sapply(names(aln), function(x) strsplit(x, split=".", fixed=TRUE)[[1]][1]) - + names(aln)[names(aln)=="Legionella_pneumophila"]<-old_aln[names(aln)=="Legionella_pneumophila"] + len<-max(unlist(lapply(aln, length))) for (seq in names(cat)){ diff --git a/script/psmn/runscript_78Lp_step34.sh b/script/psmn/runscript_78Lp_step34.sh index edf7943694c00057c2f1e6251c592ff73c74a6cc..a9adfd3e5dbb2b6645ed202b7761c63667399827 100755 --- a/script/psmn/runscript_78Lp_step34.sh +++ b/script/psmn/runscript_78Lp_step34.sh @@ -16,7 +16,7 @@ module load trimal -/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh ~/2021_legio/out_blastn/78Lp_uniprot.psiblast ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1 +#/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh ~/2021_legio/out_blastn/78Lp_uniprot.psiblast ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1 /home/mcariou/2021_legio/phylolegio/script/5_cat_aln_phy.sh ~/2021_legio/fasta/78Lp/