From 33f7ac877c98c058a9d16e7fce970b6f6730fbff Mon Sep 17 00:00:00 2001 From: mcariou <115-mcariou@users.noreply.gitbio.ens-lyon.fr> Date: Wed, 24 Nov 2021 18:26:56 +0100 Subject: [PATCH] script 5 in progress, cat --- script/4_parse_PSIblast.R | 1 + script/4_parse_PSIblast.sh | 1 - script/5_cat_aln_phy.R | 47 +++++++++++++++++++++++++++++++++ script/5_cat_aln_phy.sh | 53 ++++++++++++++++++++++++++++++++++++++ script/test.log | 0 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 script/5_cat_aln_phy.R create mode 100755 script/5_cat_aln_phy.sh create mode 100644 script/test.log diff --git a/script/4_parse_PSIblast.R b/script/4_parse_PSIblast.R index ff2a5ad..b00f90f 100644 --- a/script/4_parse_PSIblast.R +++ b/script/4_parse_PSIblast.R @@ -101,6 +101,7 @@ system(cmd) library(ape) aln<-read.dna(fasta, format="fasta") +aln<-clustal(aln) names(listid_short)<-gsub(names(listid_short), pattern=" ", replacement="_") diff --git a/script/4_parse_PSIblast.sh b/script/4_parse_PSIblast.sh index 32b2b2b..cc81fa1 100755 --- a/script/4_parse_PSIblast.sh +++ b/script/4_parse_PSIblast.sh @@ -68,7 +68,6 @@ nrow=`cat $subblast | wc -l` echo "sequences already retrieved" else echo "retrieve fasta" - #Rscript --vanilla $rscript $subblast $EVAL $PERCID $PERCOVERLAP $NPERTAX $FASTA> $FASTA_REP/$Gene/paste_blast_test.log Rscript --vanilla $rscript $subblast $EVAL $PERCID $PERCOVERLAP $NPERTAX $FASTA $BLASTDBNUC $TAXO > $FASTA_REP/$Gene/paste_blast_test.log fi ; diff --git a/script/5_cat_aln_phy.R b/script/5_cat_aln_phy.R new file mode 100644 index 0000000..0de18aa --- /dev/null +++ b/script/5_cat_aln_phy.R @@ -0,0 +1,47 @@ +#!/usr/bin/env Rscript +args = commandArgs(trailingOnly=TRUE) + +# test if there is at least one argument: if not, return an error +if (length(args)!=1) { + stop("1 arguments must be supplied.\n", call.=FALSE) +} + + +#args<-c("/home/mcariou/2021_legio/fasta/78Lp", "~/2020_Attaiech/prot_db/Transdecoder", "~/2021_legio/phylolegio/doc/tabAss.txt") +#args<-c("/home/mcariou/2021_legio/fasta/78Lp") + + +subrep<-list.files(args[1]) +subrep<-subrep[grep(x=subrep, pattern="Q5")] + + +#### Change names in the fasta +library(ape) + + +cat<-list() + + +#### fuction + +file<-list.files(paste0(args[1], "/", subrep[1])) +file<-file[grep(x=file, pattern="aln")] +aln<-read.dna(paste0(args[1], "/",subrep[1], "/", file), format="fasta", as.character=TRUE) + +tmp<-aln +names(tmp)<-sapply(labels(aln), function(x) strsplit(x, split=".", fixed=TRUE)[[1]][1]) + +for (seq in names(tmp)){ + print(seq) + cat<-c(cat, list(seq=tmp[seq])) + names(cat)<-c(names(cat)[-length(cat)], seq) +} + + + +#write.dna(aln, file=fasta_aln, format="fasta", nbcol = 6, colsep = "", colw = 10) + + + + + diff --git a/script/5_cat_aln_phy.sh b/script/5_cat_aln_phy.sh new file mode 100755 index 0000000..3f47830 --- /dev/null +++ b/script/5_cat_aln_phy.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +echo "USAGE: ./5_aln_phy.sh \$1=fasta_path" +echo "--------------------------------------------------" + + +################################################################################################################## +### En local +#./5_aln_phy.sh ~/Documents/CIRI_BIBS_projects/2021_04_Doublet/pipeline/fasta/ lp0952_ortho + +### PSMN +#./5_cat_aln_phy.sh ~/2021_legio/fasta/78Lp/ +################################################################################################################## + +# variable +DATA=$1 +FASTA=$1/concat.fasta +PHYLIP=$1/concat.phylip + +echo $0 +rscript=`ls $0 | sed 's/.sh/.R/g'` +echo $rscript + + + +if [[ -s $FASTA ]] ; then + echo "sequences already concatenated" +else + echo "concatenate fasta" + Rscript --vanilla $rscript $DATA > $DATA/cat.log +fi ; + + +if [ -e $FASTA ] ; then + echo $FASTA" exists" + + #prank -d=$FASTA +F -o=$ALN + echo $ALN + ## Convert to phylipX + trimal -in $FASTA -out $PHYLIP -phylip + + ## phylogeny + phyml -i $PHYLIP -d nt -m HKY85 -a e -c 4 -s NNI -b -1 + +else + echo $FASTA "do not exists. incorrect input" +fi ; + + + + + +# fin diff --git a/script/test.log b/script/test.log new file mode 100644 index 0000000..e69de29 -- GitLab