diff --git a/covid_comp.Rnw b/covid_comp.Rnw index 0c613e8394600674ed6453f9710f44209b8cef79..47afac6595ca584a31224849f8173bf6905529df 100644 --- a/covid_comp.Rnw +++ b/covid_comp.Rnw @@ -22,9 +22,9 @@ \section{Files manipulations} -I will compare Janet results to DGINN results, on the SAME alignment. +I will compare Jeanette results to DGINN results, on the SAME alignment. -\subsection{Read Janet table} +\subsection{Read Jeanette table} <<>>= tab<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv", @@ -64,7 +64,8 @@ length(unique(tab$Gene.name)) sum(unique(dginn$Gene) %in% unique(tab$PreyGene)) sum(unique(dginn$Gene) %in% unique(tab$Gene.name)) -# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. Je soupçonne que c'est celle là : +# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. +# Je soupçonne que c'est celle là : tab[158,1:10] # Verif: @@ -73,7 +74,7 @@ tab[,1:10][(tab$Gene.name %in% unique(dginn$Gene))==F,] # Remplacement manuel par as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F]) -# dans le tableau de Janet +# dans le tableau de Jeanette val_remp=as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F]) @@ -94,7 +95,8 @@ tmp<-dginn[dginn$Method==method, c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")] names(tmp)<-c("Gene.name", paste0("Omega_", method), - paste0("PosSel_", method), paste0("PValue_", method), paste0("NbSites_", method), paste0("PSS_", method)) + paste0("PosSel_", method), paste0("PValue_", method), + paste0("NbSites_", method), paste0("PSS_", method)) tab<-merge(tab, tmp, by="Gene.name") @@ -121,8 +123,9 @@ tab<-merge(tab, tmp, by="Gene.name") \subsubsection{Write new table} <<>>= -write.table(tab, "COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt", - row.names=F, quote=F, sep="\t") +write.table(tab, + "COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt", + row.names=F, quote=F, sep="\t") @ @@ -135,7 +138,7 @@ Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" d <<omegaM7M8>>= plot(tab$whole.gene.dN.dS.model.0, tab$Omega_PamlM7M8, - xlab="Omega tabJanet", ylab="Omega DGINN") + xlab="Omega tabJeanette", ylab="Omega DGINN") @ Quels sont les 2 gènes qui s'écartent de la bissectrice? <<>>= @@ -152,7 +155,7 @@ Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + lign <<pvalM7M8>>= plot(tab$pVal.M8vsM7, tab$PValue_PamlM7M8, pch=20, - xlab="p-value tabJanet", ylab="p-value DGINN", main="M7vM8 Paml") + xlab="p-value tabJeanette", ylab="p-value DGINN", main="M7vM8 Paml") points(tab$pVal.M8vsM7[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05], tab$PValue_PamlM7M8[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05], col="red", pch=20) @@ -160,17 +163,20 @@ points(tab$pVal.M8vsM7[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05], tab$PValue_PamlM7M8[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05], col="green", pch=20) -legend("topleft", c("<0.05 in PamlM7M8 but >0.05 in Janet M8vsM7","<0.05 in Janet M8vsM7 but >0.05 in PamlM7M8"), - pch=20, col=c("red", "green")) +legend("topleft", c("<0.05 in PamlM7M8 but >0.05 in Jeanette M8vsM7", + "<0.05 in Jeanette M8vsM7 but >0.05 in PamlM7M8"), + pch=20, col=c("red", "green")) @ Quels sont les gènes en couleur: <<>>= -na.omit(tab[(tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05),c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")]) +na.omit(tab[(tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05), +c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")]) -na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05),c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")]) +na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05), +c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")]) @ Focus sur le gène CIT pour lequel la différence est vraiment assez importante: @@ -203,7 +209,7 @@ oui<-apply(ouitab, 1, function(x) sum(x=="Y")) stripchart(x=list(oui, non), method="jitter", jitter=0.2, vertical=T, pch=20, cex=0.5, - group.names=c("Yes Janet", "No Janet"), + group.names=c("Yes Jeanette", "No Jeanette"), ylab="Nb YES from dginn") @@ -225,7 +231,7 @@ head(sel) <<nsites, eval=FALSE, echo=FALSE>>= plot(sel$Number.of.codons.with.BEB....0.9, sel$NbSites_PamlM7M8) -# toujours plus de codon dans la version de janet +# toujours plus de codon dans la version de jeanette listdginn<-sapply(sel$PSS_PamlM7M8, function(x){ tmp<-strsplit(as.character(x), split=",")[[1]] @@ -241,7 +247,7 @@ listjanet<-sapply(sel$Codons.under.positive.selection..BEB..0.9...alignment.posi tmp<-strsplit(as.character(x), split=",")[[1]] tmp2<-sapply(tmp, function(x) strsplit(as.character(x), split="_")[[1]][1]) tmp2<-unlist(tmp2) - names(tmp2)<-rep("janet", length(tmp2)) + names(tmp2)<-rep("jeanette", length(tmp2)) return(unlist(tmp2)) }) @@ -253,5 +259,121 @@ listjoined<-mapply(c, listdginn, listjanet, SIMPLIFY=FALSE) +\section{Bats DGINN vs Bats Cooper} + + + +\subsection{Read DGINN table, Bats} + +Lecture du tableau. + +<<>>= +dginnbats<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/2020_bats_completeResults.csv", + fill=T, h=T) +dim(dginnbats) +names(dginnbats) +length(unique(dginnbats$Gene)) +length(unique(tab$cooper.batsGene)) +table(unique(tab$cooper.batsGene) %in% unique(dginnbats$Gene)) +@ + +Quels sont les gènes du tableau Cooper qui ne sont pas dans la sortie de DGINN + +<<>>= +unique(tab$cooper.batsGene)[unique(tab$cooper.batsGene) %in% unique(dginnbats$Gene)==F] +@ + +Fusionner les tableaux + + + + + + + + + + + +<<last>>= + +add_col<-function(dginnfile=dginnbats, method="PamlM1M2", prefixe="bats_"){ + +tmp<-dginn[dginnfile$Method==method, + c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")] + +names(tmp)<-c("cooper.batsGene", paste0(prefixe, "Omega_", method), + paste0(prefixe, "PosSel_", method), paste0(prefixe, "PValue_", method), + paste0(prefixe, "NbSites_", method), paste0(prefixe, "PSS_", method)) + +tab<-merge(tab, tmp, by="cooper.batsGene", all.x=T) + +return(tab) +} + +tab<-add_col(dginnfile=dginnbats, method="PamlM1M2", prefixe="bats_") + +tab<-add_col(dginnfile=dginnbats, method="PamlM7M8", prefixe="bats_") + +tab<-add_col(dginnfile=dginnbats, method="BppM1M2", prefixe="bats_") + +tab<-add_col(dginnfile=dginnbats, method="BppM7M8", prefixe="bats_") + + +head(tab[, c("cooper.batsGene", "cooper.batsAverage_dNdS","cooper.batsM7.M8_p_value", "bats_Omega_PamlM7M8","bats_PValue_PamlM7M8")]) + +# Manip pour la colonne BUSTED + +tmp<-dginn[dginn$Method=="BUSTED",c("Gene", "Omega", "PosSel", "PValue")] +names(tmp)<-c("cooper.batsGene", "bats_Omega_BUSTED", "bats_PosSel_BUSTED", "bats_PValue_BUSTED") +tab<-merge(tab, tmp, by="cooper.batsGene") + +tmp<-dginn[dginn$Method=="MEME",c("Gene", "NbSites", "PSS")] +names(tmp)<-c("cooper.batsGene", "bats_NbSites_MEME", "bats_PSS_MEME") +tab<-merge(tab, tmp, by="cooper.batsGene") + +dim(tab) +@ + +\subsection{Comparaisons et figures} + +Faire un sous tableau. +<<fig>>= +tabbats<-na.omit(tab[, c("cooper.batsGene", "cooper.batsAverage_dNdS","cooper.batsM7.M8_p_value", "bats_Omega_PamlM7M8","bats_PValue_PamlM7M8")]) +dim(tabbats) +@ + +Comparaison des Omega + +<<omegaM7M8bats>>= + +plot(tab$cooper.batsAverage_dNdS, tab$bats_Omega_PamlM7M8, + xlab="Omega Cooper", ylab="Omega DGINN") +@ + +pvalues pour M7M8 + +<<pvalM7M8bats>>= + +plot(tab$cooper.batsM7.M8_p_value, tab$bats_PValue_PamlM7M8, pch=20, + xlab="p-value tabJeanette", ylab="p-value DGINN", main="M7vM8 Paml") + +points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value>0.05 & tab$bats_PValue_PamlM7M8<0.05], + tab$bats_PValue_PamlM7M8[tab$cooper.batsM7.M8_p_value>0.05 & tab$bats_PValue_PamlM7M8<0.05], + col="red", pch=20) + points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value<0.05 & tab$bats_PValue_PamlM7M8>0.05], + tab$bats_PValue_PamlM7M8[tab$cooper.batsM7.M8_p_value<0.05 & tab$bats_PValue_PamlM7M8>0.05], + col="green", pch=20) + + +legend("topleft", c("<0.05 in PamlM7M8 but >0.05 in Jeanette M8vsM7", + "<0.05 in Jeanette M8vsM7 but >0.05 in PamlM7M8"), + pch=20, col=c("red", "green")) + +@ + +Apparemment, ces comparaisons n'ont pas trop de sens. + + \end{document} diff --git a/covid_comp.pdf b/covid_comp.pdf index 33817d8779eedd4488718b32300f06418a9e09eb..66726a7a5915ff26aa1127ea677b64553cc8a18f 100644 Binary files a/covid_comp.pdf and b/covid_comp.pdf differ diff --git a/covid_comp.tex b/covid_comp.tex index 5ec30f319a1e24498d9fb06a12abb8ef18bc5583..28d0f7ee75669c44e06bb28e2de3fb82987037c1 100644 --- a/covid_comp.tex +++ b/covid_comp.tex @@ -72,9 +72,9 @@ \section{Files manipulations} -I will compare Janet results to DGINN results, on the SAME alignment. +I will compare Jeanette results to DGINN results, on the SAME alignment. -\subsection{Read Janet table} +\subsection{Read Jeanette table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} @@ -263,7 +263,8 @@ I will compare Janet results to DGINN results, on the SAME alignment. ## [1] 331 \end{verbatim} \begin{alltt} -\hlcom{# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. Je soupçonne que c'est celle là :} +\hlcom{# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. } +\hlcom{# Je soupçonne que c'est celle là :} \hlstd{tab[}\hlnum{158}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{10}\hlstd{]} \end{alltt} \begin{verbatim} @@ -296,7 +297,7 @@ I will compare Janet results to DGINN results, on the SAME alignment. ## [1] "MARC1" \end{verbatim} \begin{alltt} -\hlcom{# dans le tableau de Janet} +\hlcom{# dans le tableau de Jeanette} \hlstd{val_remp}\hlkwb{=}\hlkwd{as.character}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)[(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F])} @@ -323,7 +324,8 @@ I will compare Janet results to DGINN results, on the SAME alignment. \hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} \hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"Omega_"}\hlstd{, method),} - \hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))} + \hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),} + \hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))} \hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} @@ -353,8 +355,9 @@ I will compare Janet results to DGINN results, on the SAME alignment. \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} -\hlkwd{write.table}\hlstd{(tab,} \hlstr{"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt"}\hlstd{,} - \hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)} +\hlkwd{write.table}\hlstd{(tab,} + \hlstr{"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt"}\hlstd{,} + \hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)} \end{alltt} \end{kframe} \end{knitrout} @@ -370,7 +373,7 @@ Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" d \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,} - \hlkwc{xlab}\hlstd{=}\hlstr{"Omega tabJanet"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN"}\hlstd{)} + \hlkwc{xlab}\hlstd{=}\hlstr{"Omega tabJeanette"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN"}\hlstd{)} \end{alltt} \end{kframe} \includegraphics[width=\maxwidth]{figure/omegaM7M8-1} @@ -403,7 +406,7 @@ Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + lign \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7, tab}\hlopt{$}\hlstd{PValue_PamlM7M8,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} - \hlkwc{xlab}\hlstd{=}\hlstr{"p-value tabJanet"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value DGINN"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} + \hlkwc{xlab}\hlstd{=}\hlstr{"p-value tabJeanette"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value DGINN"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} \hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{],} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{],} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} @@ -411,8 +414,9 @@ Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + lign \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{],} \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} -\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in PamlM7M8 but >0.05 in Janet M8vsM7"}\hlstd{,}\hlstr{"<0.05 in Janet M8vsM7 but >0.05 in PamlM7M8"}\hlstd{),} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} +\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in PamlM7M8 but >0.05 in Jeanette M8vsM7"}\hlstd{,} + \hlstr{"<0.05 in Jeanette M8vsM7 but >0.05 in PamlM7M8"}\hlstd{),} + \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} \end{alltt} \end{kframe} \includegraphics[width=\maxwidth]{figure/pvalM7M8-1} @@ -424,7 +428,8 @@ Quels sont les gènes en couleur: \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} -\hlkwd{na.omit}\hlstd{(tab[(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{),}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} \hlstr{"PValue_PamlM7M8"}\hlstd{,} \hlstr{"whole.gene.dN.dS.model.0"}\hlstd{,} \hlstr{"Omega_PamlM7M8"}\hlstd{)])} +\hlkwd{na.omit}\hlstd{(tab[(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{),} +\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} \hlstr{"PValue_PamlM7M8"}\hlstd{,} \hlstr{"whole.gene.dN.dS.model.0"}\hlstd{,} \hlstr{"Omega_PamlM7M8"}\hlstd{)])} \end{alltt} \begin{verbatim} ## Gene.name pVal.M8vsM7 PValue_PamlM7M8 whole.gene.dN.dS.model.0 @@ -443,7 +448,8 @@ Quels sont les gènes en couleur: ## 322 0.04780560 \end{verbatim} \begin{alltt} -\hlkwd{na.omit}\hlstd{(tab[(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{),}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} \hlstr{"PValue_PamlM7M8"}\hlstd{,} \hlstr{"whole.gene.dN.dS.model.0"}\hlstd{,} \hlstr{"Omega_PamlM7M8"}\hlstd{)])} +\hlkwd{na.omit}\hlstd{(tab[(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{),} +\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} \hlstr{"PValue_PamlM7M8"}\hlstd{,} \hlstr{"whole.gene.dN.dS.model.0"}\hlstd{,} \hlstr{"Omega_PamlM7M8"}\hlstd{)])} \end{alltt} \begin{verbatim} ## Gene.name pVal.M8vsM7 PValue_PamlM7M8 whole.gene.dN.dS.model.0 @@ -529,7 +535,7 @@ Est-ce que les gènes avec une faible p-value sont détecté par 1,2,3,4 ou 5 m \hlkwd{stripchart}\hlstd{(}\hlkwc{x}\hlstd{=}\hlkwd{list}\hlstd{(oui, non),} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{jitter}\hlstd{=}\hlnum{0.2}\hlstd{,} \hlkwc{vertical}\hlstd{=T,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.5}\hlstd{,} - \hlkwc{group.names}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Yes Janet"}\hlstd{,} \hlstr{"No Janet"}\hlstd{),} + \hlkwc{group.names}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Yes Jeanette"}\hlstd{,} \hlstr{"No Jeanette"}\hlstd{),} \hlkwc{ylab}\hlstd{=}\hlstr{"Nb YES from dginn"}\hlstd{)} \end{alltt} \end{kframe} @@ -549,5 +555,198 @@ Est-ce que les gènes avec une faible p-value sont détecté par 1,2,3,4 ou 5 m +\section{Bats DGINN vs Bats Cooper} + + + +\subsection{Read DGINN table, Bats} + +Lecture du tableau. + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{dginnbats}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/2020_bats_completeResults.csv"}\hlstd{,} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} +\hlkwd{dim}\hlstd{(dginnbats)} +\end{alltt} +\begin{verbatim} +## [1] 2904 9 +\end{verbatim} +\begin{alltt} +\hlkwd{names}\hlstd{(dginnbats)} +\end{alltt} +\begin{verbatim} +## [1] "FullName" "Gene" "GeneSize" "Omega" "Method" "PosSel" "PValue" +## [8] "NbSites" "PSS" +\end{verbatim} +\begin{alltt} +\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene))} +\end{alltt} +\begin{verbatim} +## [1] 410 +\end{verbatim} +\begin{alltt} +\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene))} +\end{alltt} +\begin{verbatim} +## [1] 218 +\end{verbatim} +\begin{alltt} +\hlkwd{table}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene))} +\end{alltt} +\begin{verbatim} +## +## FALSE TRUE +## 4 214 +\end{verbatim} +\end{kframe} +\end{knitrout} + +Quels sont les gènes du tableau Cooper qui ne sont pas dans la sortie de DGINN + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene)[}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlstd{F]} +\end{alltt} +\begin{verbatim} +## [1] BCS1L C1orf50 CYB5B +## 218 Levels: AAR2 AASS AATF ACADM ACSL3 ADAMTS1 AGPS AKAP8L ALG11 ALG5 ... ZYG11B +\end{verbatim} +\end{kframe} +\end{knitrout} + +Fusionner les tableaux + + + + + + + + + + + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{add_col}\hlkwb{<-}\hlkwa{function}\hlstd{(}\hlkwc{dginnfile}\hlstd{=dginnbats,} \hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{,} \hlkwc{prefixe}\hlstd{=}\hlstr{"bats_"}\hlstd{)\{} + +\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginnfile}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstd{method,} + \hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} + +\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"cooper.batsGene"}\hlstd{,} \hlkwd{paste0}\hlstd{(prefixe,} \hlstr{"Omega_"}\hlstd{, method),} + \hlkwd{paste0}\hlstd{(prefixe,} \hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(prefixe,} \hlstr{"PValue_"}\hlstd{, method),} + \hlkwd{paste0}\hlstd{(prefixe,} \hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(prefixe,} \hlstr{"PSS_"}\hlstd{, method))} + +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"cooper.batsGene"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)} + +\hlkwd{return}\hlstd{(tab)} +\hlstd{\}} + +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlkwc{dginnfile}\hlstd{=dginnbats,} \hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{,} \hlkwc{prefixe}\hlstd{=}\hlstr{"bats_"}\hlstd{)} + +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlkwc{dginnfile}\hlstd{=dginnbats,} \hlkwc{method}\hlstd{=}\hlstr{"PamlM7M8"}\hlstd{,} \hlkwc{prefixe}\hlstd{=}\hlstr{"bats_"}\hlstd{)} + +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlkwc{dginnfile}\hlstd{=dginnbats,} \hlkwc{method}\hlstd{=}\hlstr{"BppM1M2"}\hlstd{,} \hlkwc{prefixe}\hlstd{=}\hlstr{"bats_"}\hlstd{)} + +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlkwc{dginnfile}\hlstd{=dginnbats,} \hlkwc{method}\hlstd{=}\hlstr{"BppM7M8"}\hlstd{,} \hlkwc{prefixe}\hlstd{=}\hlstr{"bats_"}\hlstd{)} + + +\hlkwd{head}\hlstd{(tab[,} \hlkwd{c}\hlstd{(}\hlstr{"cooper.batsGene"}\hlstd{,} \hlstr{"cooper.batsAverage_dNdS"}\hlstd{,}\hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_Omega_PamlM7M8"}\hlstd{,}\hlstr{"bats_PValue_PamlM7M8"}\hlstd{)])} +\end{alltt} +\begin{verbatim} +## cooper.batsGene cooper.batsAverage_dNdS cooper.batsM7.M8_p_value +## 1 NA NA +## 2 NA NA +## 3 NA NA +## 4 NA NA +## 5 NA NA +## 6 NA NA +## bats_Omega_PamlM7M8 bats_PValue_PamlM7M8 +## 1 NA NA +## 2 NA NA +## 3 NA NA +## 4 NA NA +## 5 NA NA +## 6 NA NA +\end{verbatim} +\begin{alltt} +\hlcom{# Manip pour la colonne BUSTED} + +\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"BUSTED"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{)]} +\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"cooper.batsGene"}\hlstd{,} \hlstr{"bats_Omega_BUSTED"}\hlstd{,} \hlstr{"bats_PosSel_BUSTED"}\hlstd{,} \hlstr{"bats_PValue_BUSTED"}\hlstd{)} +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"cooper.batsGene"}\hlstd{)} + +\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} +\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"cooper.batsGene"}\hlstd{,} \hlstr{"bats_NbSites_MEME"}\hlstd{,} \hlstr{"bats_PSS_MEME"}\hlstd{)} +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"cooper.batsGene"}\hlstd{)} + +\hlkwd{dim}\hlstd{(tab)} +\end{alltt} +\begin{verbatim} +## [1] 211 134 +\end{verbatim} +\end{kframe} +\end{knitrout} + +\subsection{Comparaisons et figures} + +Faire un sous tableau. +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{tabbats}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,} \hlkwd{c}\hlstd{(}\hlstr{"cooper.batsGene"}\hlstd{,} \hlstr{"cooper.batsAverage_dNdS"}\hlstd{,}\hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_Omega_PamlM7M8"}\hlstd{,}\hlstr{"bats_PValue_PamlM7M8"}\hlstd{)])} +\hlkwd{dim}\hlstd{(tabbats)} +\end{alltt} +\begin{verbatim} +## [1] 189 5 +\end{verbatim} +\end{kframe} +\end{knitrout} + +Comparaison des Omega + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS, tab}\hlopt{$}\hlstd{bats_Omega_PamlM7M8,} + \hlkwc{xlab}\hlstd{=}\hlstr{"Omega Cooper"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN"}\hlstd{)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/omegaM7M8bats-1} + +\end{knitrout} + +pvalues pour M7M8 + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value, tab}\hlopt{$}\hlstd{bats_PValue_PamlM7M8,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} + \hlkwc{xlab}\hlstd{=}\hlstr{"p-value tabJeanette"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value DGINN"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} + +\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{],} + \hlstd{tab}\hlopt{$}\hlstd{bats_PValue_PamlM7M8[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{],} + \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} + \hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{],} + \hlstd{tab}\hlopt{$}\hlstd{bats_PValue_PamlM7M8[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{],} + \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} + + +\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in PamlM7M8 but >0.05 in Jeanette M8vsM7"}\hlstd{,} + \hlstr{"<0.05 in Jeanette M8vsM7 but >0.05 in PamlM7M8"}\hlstd{),} + \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/pvalM7M8bats-1} + +\end{knitrout} + +Apparemment, ces comparaisons n'ont pas trop de sens. + + \end{document} diff --git a/figure/omegaM7M8-1.pdf b/figure/omegaM7M8-1.pdf index dbd209b37f7d9c76afa4ddedcb506272b24c863c..f5785e4065db51e1f7e69d49437af09d8c34ece4 100644 Binary files a/figure/omegaM7M8-1.pdf and b/figure/omegaM7M8-1.pdf differ diff --git a/figure/omegaM7M8bats-1.pdf b/figure/omegaM7M8bats-1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a54b084383713fdbe178138963bfff1847c296a9 Binary files /dev/null and b/figure/omegaM7M8bats-1.pdf differ diff --git a/figure/pvalM7M8-1.pdf b/figure/pvalM7M8-1.pdf index bde88aabb82cf9a5cd695fa1120de729d0aa258d..840c9f61dca97d19006b51094d57eecb81169c6b 100644 Binary files a/figure/pvalM7M8-1.pdf and b/figure/pvalM7M8-1.pdf differ diff --git a/figure/pvalM7M8bats-1.pdf b/figure/pvalM7M8bats-1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fc788bcefdb449d8534a38cd2db3b504e58f58ea Binary files /dev/null and b/figure/pvalM7M8bats-1.pdf differ diff --git a/figure/stripchart-1.pdf b/figure/stripchart-1.pdf index 5a043d226a9fcefe06ab4ec2fed3abeb1eeb38c4..0fc54306b0b532b0c67b84bb77bb7af8e5907efd 100644 Binary files a/figure/stripchart-1.pdf and b/figure/stripchart-1.pdf differ