From c6dc77df8c467a3cf3cddfb1e70b50180f583ba7 Mon Sep 17 00:00:00 2001
From: mcariou <115-mcariou@users.noreply.gitbio.ens-lyon.fr>
Date: Fri, 14 Jan 2022 09:45:52 +0100
Subject: [PATCH] add 5 legio

---
 script/4_parse_PSIblast.R            |  2 +-
 script/5_cat_aln_phy.R               | 18 +++++++++++++++---
 script/psmn/runscript_78Lp_step34.sh |  2 +-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/script/4_parse_PSIblast.R b/script/4_parse_PSIblast.R
index 89846fd..5b5e813 100644
--- a/script/4_parse_PSIblast.R
+++ b/script/4_parse_PSIblast.R
@@ -105,7 +105,7 @@ listid_short<-sapply(listid, function(x) substr(x, 1, nchar(x)-2))
 
 
 #### Retrieve sequences via blastcmd
-system(paste0("rm ", fasta))
+#system(paste0("rm ", fasta))
 for (seq in listid_short){ 
 cmd<-paste0("blastdbcmd -db ", blastdb, " -entry ",  seq, " >> ", fasta, "_L")
 system(cmd)
diff --git a/script/5_cat_aln_phy.R b/script/5_cat_aln_phy.R
index f94da67..7d22014 100644
--- a/script/5_cat_aln_phy.R
+++ b/script/5_cat_aln_phy.R
@@ -34,8 +34,18 @@ file<-file[grep(x=file, pattern="aln")]
         aln<-read.dna(paste0(args[1], "/",subrepcurrent, "/", file), format="fasta", as.character=TRUE)
         tmp<-aln
         rownames(tmp)<-sapply(rownames(aln), function(x) strsplit(x, split=".", fixed=TRUE)[[1]][1])
-
-        for (seq in rownames(tmp)){
+ 
+        for (i in 1:length(rownames(tmp))){
+            seq<-rownames(tmp)[i]
+            seq_2<-names(rownames(tmp)[i])
+
+            if(seq=="Legionella_pneumophila"){
+                seq_2<-strsplit(as.character(seq_2), ".p", fixed=TRUE)[[1]][1]
+                if((seq_2 %in% names(cat))==FALSE){
+                    cat<-c(cat, list(seq=""))
+                    names(cat)<-c(names(cat)[-length(cat)], seq_2)
+                }
+            }
             if((seq %in% names(cat))==FALSE){
                 #print(seq)
                 cat<-c(cat, list(seq=""))
@@ -53,8 +63,10 @@ file<-list.files(paste0(args[1], "/", subrepi))
 file<-file[grep(x=file, pattern="aln")]
     if (length(file)>0){
         aln<-read.dna(paste0(args[1], "/",subrepi, "/", file), format="fasta", as.character=TRUE, as.matrix=FALSE)
+        old_aln<-sapply(as.character(names(aln)), function(x) strsplit(x, ".p", fixed=TRUE)[[1]][1])
         names(aln)<-sapply(names(aln), function(x) strsplit(x, split=".", fixed=TRUE)[[1]][1])
-        
+        names(aln)[names(aln)=="Legionella_pneumophila"]<-old_aln[names(aln)=="Legionella_pneumophila"]
+
         len<-max(unlist(lapply(aln, length)))
         
         for (seq in names(cat)){
diff --git a/script/psmn/runscript_78Lp_step34.sh b/script/psmn/runscript_78Lp_step34.sh
index edf7943..a9adfd3 100755
--- a/script/psmn/runscript_78Lp_step34.sh
+++ b/script/psmn/runscript_78Lp_step34.sh
@@ -16,7 +16,7 @@ module load trimal
 
 
 
-/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh  ~/2021_legio/out_blastn/78Lp_uniprot.psiblast  ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1
+#/home/mcariou/2021_legio/phylolegio/script/4_parse_PSIblast.sh  ~/2021_legio/out_blastn/78Lp_uniprot.psiblast  ~/2021_legio/phylolegio/doc/tabAss.txt ~/2021_legio/fasta/78Lp ~/2021_legio/genes/78Lp_uniprot.fasta 0.0001 0.5 0.5 1
 
 
 /home/mcariou/2021_legio/phylolegio/script/5_cat_aln_phy.sh ~/2021_legio/fasta/78Lp/
-- 
GitLab