diff --git a/script/4_parse_PSIblast.R b/script/4_parse_PSIblast.R index ff2a5ad806e9f03eb4fa9fcf7378258fd6eafde2..b00f90fda1f5d3d1b9147fe97e4c6ae7a3324226 100644 --- a/script/4_parse_PSIblast.R +++ b/script/4_parse_PSIblast.R @@ -101,6 +101,7 @@ system(cmd) library(ape) aln<-read.dna(fasta, format="fasta") +aln<-clustal(aln) names(listid_short)<-gsub(names(listid_short), pattern=" ", replacement="_") diff --git a/script/4_parse_PSIblast.sh b/script/4_parse_PSIblast.sh index 32b2b2b63e7c56ed7b28201dd79f5ebdf71f0081..cc81fa12fc2a96bc0548a5782d423692dfeb9455 100755 --- a/script/4_parse_PSIblast.sh +++ b/script/4_parse_PSIblast.sh @@ -68,7 +68,6 @@ nrow=`cat $subblast | wc -l` echo "sequences already retrieved" else echo "retrieve fasta" - #Rscript --vanilla $rscript $subblast $EVAL $PERCID $PERCOVERLAP $NPERTAX $FASTA> $FASTA_REP/$Gene/paste_blast_test.log Rscript --vanilla $rscript $subblast $EVAL $PERCID $PERCOVERLAP $NPERTAX $FASTA $BLASTDBNUC $TAXO > $FASTA_REP/$Gene/paste_blast_test.log fi ; diff --git a/script/5_cat_aln_phy.R b/script/5_cat_aln_phy.R new file mode 100644 index 0000000000000000000000000000000000000000..0de18aa3abec8270a1981f202a0b6423ef4afd35 --- /dev/null +++ b/script/5_cat_aln_phy.R @@ -0,0 +1,47 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) + +# test if there is at least one argument: if not, return an error +if (length(args)!=1) { + stop("1 arguments must be supplied.\n", call.=FALSE) +} + + +#args<-c("/home/mcariou/2021_legio/fasta/78Lp", "~/2020_Attaiech/prot_db/Transdecoder", "~/2021_legio/phylolegio/doc/tabAss.txt") +#args<-c("/home/mcariou/2021_legio/fasta/78Lp") + + +subrep<-list.files(args[1]) +subrep<-subrep[grep(x=subrep, pattern="Q5")] + + +#### Change names in the fasta +library(ape) + + +cat<-list() + + +#### fuction + +file<-list.files(paste0(args[1], "/", subrep[1])) +file<-file[grep(x=file, pattern="aln")] +aln<-read.dna(paste0(args[1], "/",subrep[1], "/", file), format="fasta", as.character=TRUE) + +tmp<-aln +names(tmp)<-sapply(labels(aln), function(x) strsplit(x, split=".", fixed=TRUE)[[1]][1]) + +for (seq in names(tmp)){ + print(seq) + cat<-c(cat, list(seq=tmp[seq])) + names(cat)<-c(names(cat)[-length(cat)], seq) +} + + + +#write.dna(aln, file=fasta_aln, format="fasta", nbcol = 6, colsep = "", colw = 10) + + + + + diff --git a/script/5_cat_aln_phy.sh b/script/5_cat_aln_phy.sh new file mode 100755 index 0000000000000000000000000000000000000000..3f47830d757c2fa052ec812656c021efcfed4cd1 --- /dev/null +++ b/script/5_cat_aln_phy.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +echo "USAGE: ./5_aln_phy.sh \$1=fasta_path" +echo "--------------------------------------------------" + + +################################################################################################################## +### En local +#./5_aln_phy.sh ~/Documents/CIRI_BIBS_projects/2021_04_Doublet/pipeline/fasta/ lp0952_ortho + +### PSMN +#./5_cat_aln_phy.sh ~/2021_legio/fasta/78Lp/ +################################################################################################################## + +# variable +DATA=$1 +FASTA=$1/concat.fasta +PHYLIP=$1/concat.phylip + +echo $0 +rscript=`ls $0 | sed 's/.sh/.R/g'` +echo $rscript + + + +if [[ -s $FASTA ]] ; then + echo "sequences already concatenated" +else + echo "concatenate fasta" + Rscript --vanilla $rscript $DATA > $DATA/cat.log +fi ; + + +if [ -e $FASTA ] ; then + echo $FASTA" exists" + + #prank -d=$FASTA +F -o=$ALN + echo $ALN + ## Convert to phylipX + trimal -in $FASTA -out $PHYLIP -phylip + + ## phylogeny + phyml -i $PHYLIP -d nt -m HKY85 -a e -c 4 -s NNI -b -1 + +else + echo $FASTA "do not exists. incorrect input" +fi ; + + + + + +# fin diff --git a/script/test.log b/script/test.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391