diff --git a/covid_comp.Rnw b/covid_comp.Rnw deleted file mode 100644 index 45b79b52eb0e969117740c3c76551538876761d7..0000000000000000000000000000000000000000 --- a/covid_comp.Rnw +++ /dev/null @@ -1,606 +0,0 @@ -\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format -%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. -%\geometry{letterpaper} % ... or a4paper or a5paper or ... -%\geometry{landscape} % Activate for for rotated page geometry -%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent -%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode - % TeX will automatically convert eps --> pdf in pdflatex -%\usepackage{amssymb} - -\usepackage[utf8]{inputenc} -%\usepackage[cyr]{aeguill} -%\usepackage[francais]{babel} -%\usepackage{hyperref} - - -\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} -\author{Marie Cariou} -\date{Mai 2020} % Activate to display a given date or no date - -\begin{document} -\maketitle - -\tableofcontents - -\newpage - -\section{Files manipulations} - -I will compare Janet Young's results to DGINN results, on the SAME alignment. - -\subsection{Read Janet Young's table} - -<<>>= -tab<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv", - fill=T, h=T, dec=",") -dim(tab) - -names(tab) - -@ - -\subsection{Read DGINN table} - -<<>>= -dginn<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/summary.res", - fill=T, h=T) - -dim(dginn) - -names(dginn) -@ - -\subsection{Joining table} - -\subsubsection{Based on which column?} - -<<>>= -head(tab)[,1:5] -# gene avec un nom bizar dans certaines colomne -tab[158,1:10] - -# -length(unique(dginn$Gene)) -length(unique(tab$PreyGene)) -length(unique(tab$Gene.name)) - -#quelle paire de colonne contient le plus de noms identiques -sum(unique(dginn$Gene) %in% unique(tab$PreyGene)) -sum(unique(dginn$Gene) %in% unique(tab$Gene.name)) - -# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. -# Je soupçonne que c'est celle là : -tab[158,1:10] - -# Verif: -tab[,1:10][(tab$Gene.name %in% unique(dginn$Gene))==F,] -# yep - -# Remplacement manuel par -as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F]) -# dans le tableau de Janet - -val_remp=as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F]) - -tab$Gene.name<-as.character(tab$Gene.name) - -tab$Gene.name[158]<-val_remp - -sum(unique(dginn$Gene) %in% unique(tab$Gene.name)) -@ - -\subsubsection{New columns} - -<<>>= - -add_col<-function(method="PamlM1M2"){ - -tmp<-dginn[dginn$Method==method, - c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")] - -names(tmp)<-c("Gene.name", paste0("Omega_", method), - paste0("PosSel_", method), paste0("PValue_", method), - paste0("NbSites_", method), paste0("PSS_", method)) - -tab<-merge(tab, tmp, by="Gene.name") - -return(tab) -} - -tab<-add_col("PamlM1M2") -tab<-add_col("PamlM7M8") -tab<-add_col("BppM1M2") -tab<-add_col("BppM7M8") - - -# Manip pour la colonne BUSTED - -tmp<-dginn[dginn$Method=="BUSTED",c("Gene", "Omega", "PosSel", "PValue")] -names(tmp)<-c("Gene.name", "Omega_BUSTED", "PosSel_BUSTED", "PValue_BUSTED") -tab<-merge(tab, tmp, by="Gene.name") - -tmp<-dginn[dginn$Method=="MEME",c("Gene", "NbSites", "PSS")] -names(tmp)<-c("Gene.name", "NbSites_MEME", "PSS_MEME") -tab<-merge(tab, tmp, by="Gene.name") - -@ - -\subsection{Write new table} -<<>>= -write.table(tab, - "COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt", - row.names=F, quote=F, sep="\t") -@ - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Comparisons Primates} - -\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results} - -\subsubsection{Omega} - -Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn. -<<omegaM7M8>>= - -plot(tab$whole.gene.dN.dS.model.0, tab$Omega_PamlM7M8, - xlab="Omega Young-primate", ylab="Omega DGINN-Young-primate") -@ -Quels sont les 2 gènes qui s'écartent de la bissectrice? -<<>>= -tab[tab$whole.gene.dN.dS.model.0<0.2 & tab$Omega_PamlM7M8>0.4,c("Gene.name")] -tab[tab$whole.gene.dN.dS.model.0<0.6 & tab$Omega_PamlM7M8>0.7,c("Gene.name")] - -@ - - -\subsubsection{pvalues pour M7M8} - -Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + ligne "PamlM7M8", pour la sortie de dginn. - -<<pvalM7M8>>= - -plot(tab$pVal.M8vsM7, tab$PValue_PamlM7M8, pch=20, - xlab="p-value Young-primate", ylab="p-value DGINN-Young-primate", main="M7vM8 Paml") -points(tab$pVal.M8vsM7[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05], - tab$PValue_PamlM7M8[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05], - col="red", pch=20) -points(tab$pVal.M8vsM7[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05], - tab$PValue_PamlM7M8[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05], - col="green", pch=20) - -legend("topleft", c("<0.05 in DGINN-Young-primate PamlM7M8 but >0.05 in Young M8vsM7", - "<0.05 in Young M8vsM7 but >0.05 in DGINN-Young-primate PamlM7M8"), - pch=20, col=c("red", "green")) - -@ - -Quels sont les gènes en couleur: - -<<>>= -na.omit(tab[(tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05), -c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")]) - -na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05), -c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")]) -@ - -Focus sur le gène CIT pour lequel la différence est vraiment assez importante: - -<<cit>>= -dginn[dginn$Gene=="CIT",] - -tab[tab$Gene.name=="CIT",1:20] -@ - - - -\subsubsection{Concordance des méthodes} - -Est-ce que les gènes avec une faible p-value sont détecté par 1,2,3,4 ou 5 méthodes en général? - -<<stripchart>>= -nontab<-tab[tab$pVal.M8vsM7>=0.05,c("Gene.name","PosSel_PamlM1M2", "PosSel_PamlM7M8","PosSel_BppM1M2", -"PosSel_BppM7M8", "PosSel_BUSTED")] - - -non<-apply(nontab, 1, function(x) sum(x=="Y")) - - -ouitab<-tab[tab$pVal.M8vsM7<0.05,c("Gene.name","PosSel_PamlM1M2", "PosSel_PamlM7M8","PosSel_BppM1M2", -"PosSel_BppM7M8", "PosSel_BUSTED")] - -oui<-apply(ouitab, 1, function(x) sum(x=="Y")) - -stripchart(x=list(oui, non), method="jitter", jitter=0.2, - vertical=T, pch=20, cex=0.5, - group.names=c("Yes Young", "No Young"), - ylab="Nb YES from dginn") - - -@ - -\subsection{Résultats Cooper-primate VS Young-primate} - -\subsubsection{How many genes in the Cooper-primate columns?} - -<<>>= -# Temporary table with necessary columns - -tmp<-tab[,c("Gene.name", "whole.gene.dN.dS.model.0", "pVal.M8vsM7", -"cooper.primates.Gene", "cooper.primates.Average_dNdS", -"cooper.primates.M7.M8_p_value")] -dim(tmp) - -# Lines with values in the cooper Gene names column -dim(tmp[tmp$cooper.primates.Gene!="",]) - -# Line with values (no NA) in the Cooper dNdS column -sum(is.na(tmp$cooper.primates.Average_dNdS)==F) - -# Line with values (no NA) in the Cooper pvalue column -sum(is.na(tmp$cooper.primates.M7.M8_p_value)==F) -@ - -\subsubsection{Omega} - -Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS" - -<<omegaM7M8coop>>= -plot(tab$whole.gene.dN.dS.model.0, tab$cooper.primates.Average_dNdS, - xlab="Omega Young-primate", ylab="Omega Cooper-primate") -@ - -\subsubsection{pvalues pour M7M8} - -Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne cooper.primates.M7.M8\_p\_value (p-value de l'analyse de Cooper). - -<<pvalM7M8coop>>= - -plot(tab$pVal.M8vsM7, tab$cooper.primates.M7.M8_p_value, pch=20, - xlab="p-value Young", ylab="p-value Cooper-primate", main="M7vM8 Paml-primate") - -points(tab$pVal.M8vsM7[tab$pVal.M8vsM7>0.05 & tab$cooper.primates.M7.M8_p_value<0.05], - tab$cooper.primates.M7.M8_p_value[tab$pVal.M8vsM7>0.05 & tab$cooper.primates.M7.M8_p_value<0.05], - col="red", pch=20) -points(tab$pVal.M8vsM7[tab$pVal.M8vsM7<0.05 & tab$cooper.primates.M7.M8_p_value>0.05], - tab$cooper.primates.M7.M8_p_value[tab$pVal.M8vsM7<0.05 & tab$cooper.primates.M7.M8_p_value>0.05], - col="green", pch=20) - -legend("topleft", c("<0.05 in Cooper PamlM7M8 but >0.05 in Young M8vsM7", - "<0.05 in Young M8vsM7 but >0.05 in Cooper PamlM7M8"), - pch=20, col=c("red", "green")) - -@ - -\subsection{Résultats DGINN sur alignement de Janet-Young (DGINN-Young-primate) VS Cooper-primates} - -\subsubsection{Omega} - -Comparaison des Omega: colonne colonne "cooper.primates.Average\_dNdS" VS omega de DGINN. - -<<omegaM7M8comp3>>= -plot(tab$Omega_PamlM7M8, tab$cooper.primates.Average_dNdS, - xlab="Omega DGINN-Young-primate", ylab="Omega Cooper-primate") -@ - -\subsubsection{pvalues pour M7M8} - -Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + ligne "PamlM7M8", pour la sortie de dginn. - -<<pvalM7M8comp3>>= - -plot(tab$PValue_PamlM7M8, tab$cooper.primates.M7.M8_p_value, pch=20, - xlab="p-value DGINN-Young-primate", ylab="p-value Cooper-primate", main="M7vM8 Paml") - -points(tab$PValue_PamlM7M8[tab$PValue_PamlM7M8>0.05 & tab$cooper.primates.M7.M8_p_value<0.05], - tab$cooper.primates.M7.M8_p_value[tab$PValue_PamlM7M8>0.05 & tab$cooper.primates.M7.M8_p_value<0.05], - col="red", pch=20) -points(tab$PValue_PamlM7M8[tab$PValue_PamlM7M8<0.05 & tab$cooper.primates.M7.M8_p_value>0.05], - tab$cooper.primates.M7.M8_p_value[tab$PValue_PamlM7M8<0.05 & tab$cooper.primates.M7.M8_p_value>0.05], - col="green", pch=20) - -legend("topleft", c("<0.05 in Cooper-primate PamlM7M8 but >0.05 in DGINN-Young-primate M8vsM7", - "<0.05 in DGINN-Young-primate M8vsM7 but >0.05 in Cooper-primate PamlM7M8"), - pch=20, col=c("red", "green")) - -@ - -\subsection{Overlap} - -I will draw a venn diagramm for the positive genes in the 3 analyses. - -\subsubsection{Library and subtable} - -<<sub>>= -library(VennDiagram) - -# keeps only genes analysed in all 3 experiments -tmp<-na.omit(tab[,c("Gene.name", "pVal.M8vsM7", "cooper.primates.M7.M8_p_value", - "PosSel_PamlM7M8", "PValue_PamlM7M8")]) -dim(tmp) -@ - -Il reste 186 gènes - -<<vennprimate>>= -area1dginn<-sum(tmp$PosSel_PamlM7M8=="Y") -area2jean<-sum(tmp$pVal.M8vsM7<0.05) -area3coop<-sum(tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T) - - -n12<-sum(tmp$PosSel_PamlM7M8=="Y" & tmp$pVal.M8vsM7<0.05) -n23<-sum(tmp$pVal.M8vsM7<0.05 & tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T) -n13<-sum(tmp$PosSel_PamlM7M8=="Y" & tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T) - -n123<-sum(tmp$PosSel_PamlM7M8=="Y" & tmp$pVal.M8vsM7<0.05 & -tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T) - -draw.triple.venn(area1dginn, area2jean, area3coop, -n12, n23, n13, n123, -category=c("DGINN-Young-primate", "Young-primate", "Cooper-primate")) -@ - -\subsection{Mondrian} - -<<mondrianprimates>>= - -library(Mondrian) - -monddata<-as.data.frame(tmp$Gene.name) -monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0) -monddata$primates_young<-ifelse(tmp$pVal.M8vsM7<0.05, 1, 0) -monddata$primates_cooper<-ifelse(tmp$cooper.primates.M7.M8_p_val<0.05, 1, 0) - -mondrian(monddata[,2:4]) -@ - - -%\subsection{Comparaison des codons?} - -%Subtable with lines with both methods showing positive selection. - -<<eval=FALSE, echo=FALSE>>= -%<<selec>>= -#cas ou selection + dans les 2 cas -sel<-na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8<0.05),c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8", "Number.of.codons.with.BEB....0.9", "Codons.under.positive.selection..BEB..0.9...alignment.position.", "NbSites_PamlM7M8","PSS_PamlM7M8")]) - -dim(sel) -head(sel) -@ - -<<nsites, eval=FALSE, echo=FALSE>>= -%<<nsites>>= -plot(sel$Number.of.codons.with.BEB....0.9, sel$NbSites_PamlM7M8) -# toujours plus de codon dans la version de janet Young - -listdginn<-sapply(sel$PSS_PamlM7M8, function(x){ - tmp<-strsplit(as.character(x), split=",")[[1]] - names(tmp)<-rep("dginn", length(tmp)) - return(tmp) -}) -names(listdginn)<-sel$Gene.name - - - -listjanet<-sapply(sel$Codons.under.positive.selection..BEB..0.9...alignment.position., function(x){ - - tmp<-strsplit(as.character(x), split=",")[[1]] - tmp2<-sapply(tmp, function(x) strsplit(as.character(x), split="_")[[1]][1]) - tmp2<-unlist(tmp2) - names(tmp2)<-rep("young", length(tmp2)) - return(unlist(tmp2)) -}) - -names(listjanet)<-sel$Gene.name - -listjoined<-mapply(c, listdginn, listjanet, SIMPLIFY=FALSE) - -@ - - - - - - - - - - - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{Bats Comparisons} - -\subsection{Add DGINN results for Bats} - -Lecture du tableau. - -<<readtab4test, eval=F>>= -# Tableau tel qu'il est à cet étape du script, pour travailler sur l'ajout du nouveau tableau bats sans recommencer au début à chaque fois. -tab<-read.delim("COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt", - header=TRUE, sep="\t") -@ - -<<>>= -#dginnbatsold<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/2020_bats_completeResults.csv", -# fill=T, h=T) - - -dginnbats<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/DGINN_202005281339summary_cleaned.tab", - fill=T, h=T) - - -dim(dginnbats) -names(dginnbats) -length(unique(dginnbats$Gene)) -length(unique(tab$cooper.batsGene)) -table(unique(tab$cooper.batsGene) %in% unique(dginnbats$Gene)) -@ - -Which genes in the Cooper table are not in the gene output? - -<<>>= -unique(tab$cooper.batsGene)[unique(tab$cooper.batsGene) %in% unique(dginnbats$Gene)==F] -@ - -Merge tables: - -<<bats>>= - -names(dginnbats)<-c("File", "bats_Name", "cooper.batsGene", paste0("bats_", names(dginnbats)[-(1:3)])) - -tab<-merge(tab,dginnbats, by="cooper.batsGene", all.x=T) -@ - - -\subsection{Cooper-bats results vs DGINN-bats results} - -\subsubsection{Omega} - -<<omegaM7M8bats>>= - -plot(tab$cooper.batsAverage_dNdS, tab$bats_omegaM0codeml, - xlab="Omega Cooper-bats", ylab="Omega DGINN-bats") -@ - -\subsubsection{pvalues pour M7M8} - -<<pvalM7M8bats>>= -tab$bats_codemlM7M8.p.value<-as.numeric(as.character(tab$bats_codemlM7M8.p.value)) - -plot(tab$cooper.batsM7.M8_p_value, tab$bats_codemlM7M8.p.value, pch=20, - xlab="p-value Cooper-bats", ylab="p-value DGINN-bats", main="M7vM8 Paml") - -points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value>0.05 & tab$bats_codemlM7M8.p.value<0.05], - tab$bats_codemlM7M8.p.value[tab$cooper.batsM7.M8_p_value>0.05 & tab$bats_codemlM7M8.p.value<0.05], - col="red", pch=20) -points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value<0.05 & tab$bats_codemlM7M8.p.value>0.05], - tab$bats_codemlM7M8.p.value[tab$cooper.batsM7.M8_p_value<0.05 & tab$bats_codemlM7M8.p.value>0.05], - col="green", pch=20) - - -legend("topleft", c("<0.05 in DGINN-bats but >0.05 in Cooper-bats", - "<0.05 in Cooper-bats but >0.05 in DGINN-bats"), - pch=20, col=c("red", "green")) - -@ - - - - -\subsection{Comparaison Cooper-Hawkins} - - -\subsubsection{pvalues pour M7M8} - -<<pvalM7M8comp2>>= - -plot(tab$cooper.batsM7.M8_p_value, tab$hawkins_Positive.Selection..M8vM8a.p.value, - pch=20, xlab="p-value Cooper-bats", ylab="p-value hawkins-bats", main="M7vM8 Paml") - -points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value>0.05 & - tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05], - tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$cooper.batsM7.M8_p_value>0.05 & - tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05], - col="red", pch=20) -points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value<0.05 & - tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05], - tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$cooper.batsM7.M8_p_value<0.05 & - tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05], - col="green", pch=20) - - -legend("topleft", c("<0.05 in Hawkins but >0.05 in Cooper", - "<0.05 in Cooper but >0.05 in Hawkins"), - pch=20, col=c("red", "green")) - -@ - - -\subsection{Comparaison dginn-Hawkins} - -<<pvalM7M8compautre>>= - -plot(tab$hawkins_Positive.Selection..M8vM8a.p.value, tab$bats_codemlM7M8.p.value, - pch=20, xlab="p-value hawkins-bats", ylab="p-value DGINN-bats", main="M7vM8 Paml") - -points(tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05 & - tab$bats_codemlM7M8.p.value<0.05], - tab$bats_codemlM7M8.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05 & - tab$bats_codemlM7M8.p.value<0.05], col="red", pch=20) -points(tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05 & - tab$bats_codemlM7M8.p.value>0.05], - tab$bats_codemlM7M8.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05 & - tab$bats_codemlM7M8.p.value>0.05], col="green", pch=20) - - -legend("topleft", c("<0.05 in DGINN-bats but >0.05 in Hawkins", - "<0.05 in Hawkinsbut >0.05 in DGINN-bats"), - pch=20, col=c("red", "green")) - -@ - - - - - -\subsection{Diagramme de Venn} - -I will draw a venn diagramm for the positive genes in the 3 analyses. - -\subsubsection{subtab} - -<<subbats>>= -tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8.p.value", "hawkins_Positive.Selection..M8vM8a.p.value", "cooper.batsM7.M8_p_value")]) -dim(tmp) -@ - -154 genes (present in the 3 experiments) - -\subsubsection{figure} -<<vennbats>>= -area1dginn<-sum(tmp$bats_codemlM7M8.p.value<0.05, na.rm=T) -area2hawk<-sum(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, na.rm=T) -area3coop<-sum(tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T) - - -n12<-sum(tmp$bats_codemlM7M8.p.value<0.05 & tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, na.rm=T) - -n23<-sum(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05 & tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T) - -n13<-sum(tmp$bats_codemlM7M8.p.value<0.05 & tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T) - - -n123<-sum(tmp$bats_codemlM7M8.p.value<0.05 & tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05 & tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T) - -draw.triple.venn(area1dginn, area2hawk, area3coop, -n12, n23, n13, n123, -category=c("DGINN-Young-bats", "Hawkins-bats", "Cooper-bats")) - - -@ - -\subsection{Mondrian} - -<<mondrianbats>>= -library(Mondrian) - -monddata<-as.data.frame(tmp$Gene.name) -monddata$bats_dginn<-ifelse(tmp$bats_codemlM7M8.p.value<0.05, 1,0) -monddata$bats_hawkins<-ifelse(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0) -monddata$bats_cooper<-ifelse(tmp$cooper.batsM7.M8_p_value<0.05, 1, 0) - -mondrian(monddata[,2:4]) -@ - -\section{To do} - -Comparaison G4 pas G4 - -\end{document} - diff --git a/covid_comp.pdf b/covid_comp.pdf deleted file mode 100644 index e14d50ff9a0afb4c455f56c31d9313b987382db0..0000000000000000000000000000000000000000 Binary files a/covid_comp.pdf and /dev/null differ diff --git a/covid_comp.tex b/covid_comp.tex deleted file mode 100644 index 1965f848b2bbb5177a80f8b362350f670b4f7e69..0000000000000000000000000000000000000000 --- a/covid_comp.tex +++ /dev/null @@ -1,1053 +0,0 @@ -\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color} -% maxwidth is the original width if it is less than linewidth -% otherwise use linewidth (to make sure the graphics do not exceed the margin) -\makeatletter -\def\maxwidth{ % - \ifdim\Gin@nat@width>\linewidth - \linewidth - \else - \Gin@nat@width - \fi -} -\makeatother - -\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} -\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}% -\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}% -\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}% -\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% -\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}% -\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}% -\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}% -\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}% -\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}% -\let\hlipl\hlkwb - -\usepackage{framed} -\makeatletter -\newenvironment{kframe}{% - \def\at@end@of@kframe{}% - \ifinner\ifhmode% - \def\at@end@of@kframe{\end{minipage}}% - \begin{minipage}{\columnwidth}% - \fi\fi% - \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep - \colorbox{shadecolor}{##1}\hskip-\fboxsep - % There is no \\@totalrightmargin, so: - \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% - \MakeFramed {\advance\hsize-\width - \@totalleftmargin\z@ \linewidth\hsize - \@setminipage}}% - {\par\unskip\endMakeFramed% - \at@end@of@kframe} -\makeatother - -\definecolor{shadecolor}{rgb}{.97, .97, .97} -\definecolor{messagecolor}{rgb}{0, 0, 0} -\definecolor{warningcolor}{rgb}{1, 0, 1} -\definecolor{errorcolor}{rgb}{1, 0, 0} -\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX - -\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format -%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. -%\geometry{letterpaper} % ... or a4paper or a5paper or ... -%\geometry{landscape} % Activate for for rotated page geometry -%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent -%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode - % TeX will automatically convert eps --> pdf in pdflatex -%\usepackage{amssymb} - -\usepackage[utf8]{inputenc} -%\usepackage[cyr]{aeguill} -%\usepackage[francais]{babel} -%\usepackage{hyperref} - - -\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} -\author{Marie Cariou} -\date{Mai 2020} % Activate to display a given date or no date -\IfFileExists{upquote.sty}{\usepackage{upquote}}{} -\begin{document} -\maketitle - -\tableofcontents - -\newpage - -\section{Files manipulations} - -I will compare Janet Young's results to DGINN results, on the SAME alignment. - -\subsection{Read Janet Young's table} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{,} - \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)} -\hlkwd{dim}\hlstd{(tab)} -\end{alltt} -\begin{verbatim} -## [1] 332 84 -\end{verbatim} -\begin{alltt} -\hlkwd{names}\hlstd{(tab)} -\end{alltt} -\begin{verbatim} -## [1] "PreyGene" -## [2] "PreyGene_JYname" -## [3] "BaitShort" -## [4] "Gene.name" -## [5] "list" -## [6] "description" -## [7] "other.names" -## [8] "top40_posSeln" -## [9] "Num.primate.seqs" -## [10] "Alignment.length..nucleotides." -## [11] "Alignment.length..codons." -## [12] "whole.gene.dN.dS.model.0" -## [13] "total.tree.length" -## [14] "total.dN.tree.length" -## [15] "total.dS.tree.length" -## [16] "p.value.M8vsM8a..raw." -## [17] "p.value.M8vsM8a..BH.corrected." -## [18] "pVal.M8vsM7" -## [19] "pVal.M8vsM7.adj" -## [20] "pVal.M2vsM1" -## [21] "pVal.M2vsM1.adj" -## [22] "X..codons.under.positive.selection" -## [23] "dN.dS.of.positively.selected.codons" -## [24] "Number.of.codons.with.BEB....0.9" -## [25] "Codons.under.positive.selection..BEB..0.9...alignment.position." -## [26] "cooper.batsGene" -## [27] "cooper.batsGene_Ensembl_ID" -## [28] "cooper.batsIsoform_Ensembl_ID" -## [29] "cooper.batsSpecies" -## [30] "cooper.batsReference_length.aa." -## [31] "cooper.batsPercent_analyzed" -## [32] "cooper.batsAverage_dNdS" -## [33] "cooper.batsMaximum_dS" -## [34] "cooper.batsAverage_M7_tree" -## [35] "cooper.batsAverage_M8_tree" -## [36] "cooper.batsM7_log_likelihood" -## [37] "cooper.batsM8_log_likelihood" -## [38] "cooper.batsM7.M8_p_value" -## [39] "cooper.batsM8a_log_likelihood" -## [40] "cooper.batsM8.M8a_pvalue" -## [41] "cooper.batsBEB_hits.pp.0.95." -## [42] "cooper.batsBEB_sites" -## [43] "cooper.primates.Gene" -## [44] "cooper.primates.Gene_Ensembl_ID" -## [45] "cooper.primates.Isoform_Ensembl_ID" -## [46] "cooper.primates.Species" -## [47] "cooper.primates.Reference_length.aa." -## [48] "cooper.primates.Percent_analyzed" -## [49] "cooper.primates.Average_dNdS" -## [50] "cooper.primates.Maximum_dS" -## [51] "cooper.primates.Average_M7_tree" -## [52] "cooper.primates.Average_M8_tree" -## [53] "cooper.primates.M7_log_likelihood" -## [54] "cooper.primates.M8_log_likelihood" -## [55] "cooper.primates.M7.M8_p_value" -## [56] "cooper.primates.M8a_log_likelihood" -## [57] "cooper.primates.M8.M8a_pvalue" -## [58] "cooper.primates.BEB_hits.pp.0.95." -## [59] "cooper.primates.BEB_sites" -## [60] "hawkins_Gene" -## [61] "hawkins_Positive.Selection..M8vM8a.p.value" -## [62] "hawkins_Positive.Selection..M8vM8a.FDR.corrected.p.value" -## [63] "hawkins_Gene.Name.Alias" -## [64] "hawkins_Connection.to.immunity.or.pathogens" -## [65] "hawkins_Connection.to.reproduction" -## [66] "hawkins_Connection.to.collagen" -## [67] "hawkins_Connection.to.peroxisome" -## [68] "hawkins_Gene.Description.for.Human.Ortholog..from.Genbank.GENE.database." -## [69] "CpGmask.numNT" -## [70] "CpGmask.numAA" -## [71] "CpGmask.overall.dN.dS" -## [72] "CpGmask.total.tree.length" -## [73] "CpGmask.total.dN.tree.length" -## [74] "CpGmask.total.dS.tree.length" -## [75] "CpGmask.pVal.M8vsM8a" -## [76] "CpGmask.pVal.M8vsM8a.adj" -## [77] "CpGmask.pVal.M8vsM7" -## [78] "CpGmask.pVal.M8vsM7.adj" -## [79] "CpGmask.pVal.M2vsM1" -## [80] "CpGmask.pVal.M2vsM1.adj" -## [81] "CpGmask.percent.sites.under.positive.selection" -## [82] "CpGmask.dN.dS.of.selected.sites" -## [83] "CpGmask.num.sites.with.BEB...0.9" -## [84] "CpGmask.which.sites.have.BEB...0.9" -\end{verbatim} -\end{kframe} -\end{knitrout} - -\subsection{Read DGINN table} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{dginn}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/summary.res"}\hlstd{,} - \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} - -\hlkwd{dim}\hlstd{(dginn)} -\end{alltt} -\begin{verbatim} -## [1] 1992 7 -\end{verbatim} -\begin{alltt} -\hlkwd{names}\hlstd{(dginn)} -\end{alltt} -\begin{verbatim} -## [1] "Gene" "Omega" "Method" "PosSel" "PValue" "NbSites" "PSS" -\end{verbatim} -\end{kframe} -\end{knitrout} - -\subsection{Joining table} - -\subsubsection{Based on which column?} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{head}\hlstd{(tab)[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} -\end{alltt} -\begin{verbatim} -## PreyGene PreyGene_JYname BaitShort Gene.name list -## 1 PCNT PCNT nsp13 PCNT list26_COV_list4dataset2nonOrf -## 2 PVR PVR orf8 PVR list23_COV_list1orf -## 3 POLA1 POLA1 nsp1 POLA1 list24_COV_list2nonOrf -## 4 FASTKD5 FASTKD5 M FASTKD5 list26_COV_list4dataset2nonOrf -## 5 PRIM2 PRIM2 nsp1 PRIM2 list24_COV_list2nonOrf -## 6 ITGB1 ITGB1 orf8 ITGB1 list25_COV_list3dataset2orf -\end{verbatim} -\begin{alltt} -\hlcom{# gene avec un nom bizar dans certaines colomne} -\hlstd{tab[}\hlnum{158}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{10}\hlstd{]} -\end{alltt} -\begin{verbatim} -## PreyGene PreyGene_JYname BaitShort Gene.name list -## 158 MTARC1 01/03/2020 nsp7 01/03/2020 list24_COV_list2nonOrf -## description other.names top40_posSeln -## 158 mitochondrial amidoxime reducing component 1 MOSC1 no -## Num.primate.seqs Alignment.length..nucleotides. -## 158 24 1023 -\end{verbatim} -\begin{alltt} -\hlcom{#} -\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene))} -\end{alltt} -\begin{verbatim} -## [1] 332 -\end{verbatim} -\begin{alltt} -\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{PreyGene))} -\end{alltt} -\begin{verbatim} -## [1] 332 -\end{verbatim} -\begin{alltt} -\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))} -\end{alltt} -\begin{verbatim} -## [1] 332 -\end{verbatim} -\begin{alltt} -\hlcom{#quelle paire de colonne contient le plus de noms identiques} -\hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{PreyGene))} -\end{alltt} -\begin{verbatim} -## [1] 314 -\end{verbatim} -\begin{alltt} -\hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))} -\end{alltt} -\begin{verbatim} -## [1] 331 -\end{verbatim} -\begin{alltt} -\hlcom{# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. } -\hlcom{# Je soupçonne que c'est celle là :} -\hlstd{tab[}\hlnum{158}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{10}\hlstd{]} -\end{alltt} -\begin{verbatim} -## PreyGene PreyGene_JYname BaitShort Gene.name list -## 158 MTARC1 01/03/2020 nsp7 01/03/2020 list24_COV_list2nonOrf -## description other.names top40_posSeln -## 158 mitochondrial amidoxime reducing component 1 MOSC1 no -## Num.primate.seqs Alignment.length..nucleotides. -## 158 24 1023 -\end{verbatim} -\begin{alltt} -\hlcom{# Verif:} -\hlstd{tab[,}\hlnum{1}\hlopt{:}\hlnum{10}\hlstd{][(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene))}\hlopt{==}\hlstd{F,]} -\end{alltt} -\begin{verbatim} -## PreyGene PreyGene_JYname BaitShort Gene.name list -## 158 MTARC1 01/03/2020 nsp7 01/03/2020 list24_COV_list2nonOrf -## description other.names top40_posSeln -## 158 mitochondrial amidoxime reducing component 1 MOSC1 no -## Num.primate.seqs Alignment.length..nucleotides. -## 158 24 1023 -\end{verbatim} -\begin{alltt} -\hlcom{# yep} - -\hlcom{# Remplacement manuel par} -\hlkwd{as.character}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)[(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F])} -\end{alltt} -\begin{verbatim} -## [1] "MARC1" -\end{verbatim} -\begin{alltt} -\hlcom{# dans le tableau de Janet} - -\hlstd{val_remp}\hlkwb{=}\hlkwd{as.character}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)[(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F])} - -\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)} - -\hlstd{tab}\hlopt{$}\hlstd{Gene.name[}\hlnum{158}\hlstd{]}\hlkwb{<-}\hlstd{val_remp} - -\hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))} -\end{alltt} -\begin{verbatim} -## [1] 332 -\end{verbatim} -\end{kframe} -\end{knitrout} - -\subsubsection{New columns} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{add_col}\hlkwb{<-}\hlkwa{function}\hlstd{(}\hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{)\{} - -\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstd{method,} - \hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} - -\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"Omega_"}\hlstd{, method),} - \hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),} - \hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))} - -\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} - -\hlkwd{return}\hlstd{(tab)} -\hlstd{\}} - -\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM1M2"}\hlstd{)} -\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM7M8"}\hlstd{)} -\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM1M2"}\hlstd{)} -\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM7M8"}\hlstd{)} - - -\hlcom{# Manip pour la colonne BUSTED} - -\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"BUSTED"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{)]} -\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"Omega_BUSTED"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{,} \hlstr{"PValue_BUSTED"}\hlstd{)} -\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} - -\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} -\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"NbSites_MEME"}\hlstd{,} \hlstr{"PSS_MEME"}\hlstd{)} -\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} -\end{alltt} -\end{kframe} -\end{knitrout} - -\subsection{Write new table} -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{write.table}\hlstd{(tab,} - \hlstr{"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt"}\hlstd{,} - \hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)} -\end{alltt} -\end{kframe} -\end{knitrout} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Comparisons Primates} - -\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results} - -\subsubsection{Omega} - -Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn. -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,} - \hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/omegaM7M8-1} - -\end{knitrout} -Quels sont les 2 gènes qui s'écartent de la bissectrice? -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.2} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{>}\hlnum{0.4}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{)]} -\end{alltt} -\begin{verbatim} -## [1] "MRPS2" -\end{verbatim} -\begin{alltt} -\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.6} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{>}\hlnum{0.7}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{)]} -\end{alltt} -\begin{verbatim} -## [1] "PVR" -\end{verbatim} -\end{kframe} -\end{knitrout} - - -\subsubsection{pvalues pour M7M8} - -Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + ligne "PamlM7M8", pour la sortie de dginn. - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7, tab}\hlopt{$}\hlstd{PValue_PamlM7M8,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} - \hlkwc{xlab}\hlstd{=}\hlstr{"p-value Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value DGINN-Young-primate"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} - -\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in DGINN-Young-primate PamlM7M8 but >0.05 in Young M8vsM7"}\hlstd{,} - \hlstr{"<0.05 in Young M8vsM7 but >0.05 in DGINN-Young-primate PamlM7M8"}\hlstd{),} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/pvalM7M8-1} - -\end{knitrout} - -Quels sont les gènes en couleur: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{na.omit}\hlstd{(tab[(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05}\hlstd{),} -\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} \hlstr{"PValue_PamlM7M8"}\hlstd{,} \hlstr{"whole.gene.dN.dS.model.0"}\hlstd{,} \hlstr{"Omega_PamlM7M8"}\hlstd{)])} -\end{alltt} -\begin{verbatim} -## Gene.name pVal.M8vsM7 PValue_PamlM7M8 whole.gene.dN.dS.model.0 -## 51 CIT 0.103170 1.854024e-02 0.03889 -## 101 FBN2 0.174750 2.253070e-08 0.06871 -## 158 MARK1 0.062265 1.890420e-02 0.08147 -## 196 NUP88 0.052061 4.950260e-02 0.19123 -## 316 UBXN8 0.053229 3.945009e-02 0.50084 -## 322 VPS11 0.108710 3.061568e-02 0.04236 -## Omega_PamlM7M8 -## 51 0.04325399 -## 101 0.07233605 -## 158 0.08802245 -## 196 0.20601208 -## 316 0.50718198 -## 322 0.04780560 -\end{verbatim} -\begin{alltt} -\hlkwd{na.omit}\hlstd{(tab[(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05}\hlstd{),} -\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} \hlstr{"PValue_PamlM7M8"}\hlstd{,} \hlstr{"whole.gene.dN.dS.model.0"}\hlstd{,} \hlstr{"Omega_PamlM7M8"}\hlstd{)])} -\end{alltt} -\begin{verbatim} -## Gene.name pVal.M8vsM7 PValue_PamlM7M8 whole.gene.dN.dS.model.0 -## 68 DCTPP1 0.0431830 0.10613016 0.29992 -## 181 NDUFB9 0.0024264 0.05119297 0.29487 -## 188 NLRX1 0.0463220 0.13614538 0.17885 -## 197 NUP98 0.0345210 0.98219934 0.17017 -## 284 STOM 0.0345710 0.19872467 0.16126 -## Omega_PamlM7M8 -## 68 0.3538646 -## 181 0.3104234 -## 188 0.2159544 -## 197 0.1772109 -## 284 0.1477986 -\end{verbatim} -\end{kframe} -\end{knitrout} - -Focus sur le gène CIT pour lequel la différence est vraiment assez importante: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{dginn[dginn}\hlopt{$}\hlstd{Gene}\hlopt{==}\hlstr{"CIT"}\hlstd{,]} -\end{alltt} -\begin{verbatim} -## Gene Omega Method PosSel PValue NbSites -## 1201 CIT 0.04325399 BUSTED N 1.000000e+00 NA -## 1202 CIT 0.04325399 BppM1M2 N 9.999983e-01 NA -## 1203 CIT 0.04325399 BppM7M8 Y 2.254251e-05 11 -## 1204 CIT 0.04325399 PamlM1M2 N 1.000000e+00 NA -## 1205 CIT 0.04325399 PamlM7M8 Y 1.854024e-02 0 -## 1206 CIT 0.04325399 MEME NA 1 -## PSS -## 1201 -## 1202 -## 1203 258, 8, 1835, 304, 369, 338, 434, 625, 151, 410, 255 -## 1204 -## 1205 -## 1206 410 -\end{verbatim} -\begin{alltt} -\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"CIT"}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{20}\hlstd{]} -\end{alltt} -\begin{verbatim} -## Gene.name PreyGene PreyGene_JYname BaitShort list -## 51 CIT CIT CIT nsp13 list26_COV_list4dataset2nonOrf -## description other.names -## 51 citron rho-interacting serine/threonine kinase CITK|CRIK|MCPH17|STK21 -## top40_posSeln Num.primate.seqs Alignment.length..nucleotides. -## 51 no 24 6210 -## Alignment.length..codons. whole.gene.dN.dS.model.0 total.tree.length -## 51 2070 0.03889 0.33654 -## total.dN.tree.length total.dS.tree.length p.value.M8vsM8a..raw. -## 51 0.014 0.3603 0.99887 -## p.value.M8vsM8a..BH.corrected. pVal.M8vsM7 pVal.M8vsM7.adj pVal.M2vsM1 -## 51 1 0.10317 0.3747212 1 -\end{verbatim} -\end{kframe} -\end{knitrout} - - - -\subsubsection{Concordance des méthodes} - -Est-ce que les gènes avec une faible p-value sont détecté par 1,2,3,4 ou 5 méthodes en général? - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{nontab}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>=}\hlnum{0.05}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,}\hlstr{"PosSel_PamlM1M2"}\hlstd{,} \hlstr{"PosSel_PamlM7M8"}\hlstd{,}\hlstr{"PosSel_BppM1M2"}\hlstd{,} -\hlstr{"PosSel_BppM7M8"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{)]} - - -\hlstd{non}\hlkwb{<-}\hlkwd{apply}\hlstd{(nontab,} \hlnum{1}\hlstd{,} \hlkwa{function}\hlstd{(}\hlkwc{x}\hlstd{)} \hlkwd{sum}\hlstd{(x}\hlopt{==}\hlstr{"Y"}\hlstd{))} - - -\hlstd{ouitab}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,}\hlstr{"PosSel_PamlM1M2"}\hlstd{,} \hlstr{"PosSel_PamlM7M8"}\hlstd{,}\hlstr{"PosSel_BppM1M2"}\hlstd{,} -\hlstr{"PosSel_BppM7M8"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{)]} - -\hlstd{oui}\hlkwb{<-}\hlkwd{apply}\hlstd{(ouitab,} \hlnum{1}\hlstd{,} \hlkwa{function}\hlstd{(}\hlkwc{x}\hlstd{)} \hlkwd{sum}\hlstd{(x}\hlopt{==}\hlstr{"Y"}\hlstd{))} - -\hlkwd{stripchart}\hlstd{(}\hlkwc{x}\hlstd{=}\hlkwd{list}\hlstd{(oui, non),} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{jitter}\hlstd{=}\hlnum{0.2}\hlstd{,} - \hlkwc{vertical}\hlstd{=T,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.5}\hlstd{,} - \hlkwc{group.names}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Yes Young"}\hlstd{,} \hlstr{"No Young"}\hlstd{),} - \hlkwc{ylab}\hlstd{=}\hlstr{"Nb YES from dginn"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/stripchart-1} - -\end{knitrout} - -\subsection{Résultats Cooper-primate VS Young-primate} - -\subsubsection{How many genes in the Cooper-primate columns?} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlcom{# Temporary table with necessary columns} - -\hlstd{tmp}\hlkwb{<-}\hlstd{tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"whole.gene.dN.dS.model.0"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} -\hlstr{"cooper.primates.Gene"}\hlstd{,} \hlstr{"cooper.primates.Average_dNdS"}\hlstd{,} -\hlstr{"cooper.primates.M7.M8_p_value"}\hlstd{)]} -\hlkwd{dim}\hlstd{(tmp)} -\end{alltt} -\begin{verbatim} -## [1] 332 6 -\end{verbatim} -\begin{alltt} -\hlcom{# Lines with values in the cooper Gene names column} -\hlkwd{dim}\hlstd{(tmp[tmp}\hlopt{$}\hlstd{cooper.primates.Gene}\hlopt{!=}\hlstr{""}\hlstd{,])} -\end{alltt} -\begin{verbatim} -## [1] 207 6 -\end{verbatim} -\begin{alltt} -\hlcom{# Line with values (no NA) in the Cooper dNdS column} -\hlkwd{sum}\hlstd{(}\hlkwd{is.na}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.primates.Average_dNdS)}\hlopt{==}\hlstd{F)} -\end{alltt} -\begin{verbatim} -## [1] 201 -\end{verbatim} -\begin{alltt} -\hlcom{# Line with values (no NA) in the Cooper pvalue column} -\hlkwd{sum}\hlstd{(}\hlkwd{is.na}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value)}\hlopt{==}\hlstd{F)} -\end{alltt} -\begin{verbatim} -## [1] 207 -\end{verbatim} -\end{kframe} -\end{knitrout} - -\subsubsection{Omega} - -Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS" - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0, tab}\hlopt{$}\hlstd{cooper.primates.Average_dNdS,} - \hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega Cooper-primate"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/omegaM7M8coop-1} - -\end{knitrout} - -\subsubsection{pvalues pour M7M8} - -Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne cooper.primates.M7.M8\_p\_value (p-value de l'analyse de Cooper). - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7, tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} - \hlkwc{xlab}\hlstd{=}\hlstr{"p-value Young"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value Cooper-primate"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml-primate"}\hlstd{)} - -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value[tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} - -\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in Cooper PamlM7M8 but >0.05 in Young M8vsM7"}\hlstd{,} - \hlstr{"<0.05 in Young M8vsM7 but >0.05 in Cooper PamlM7M8"}\hlstd{),} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/pvalM7M8coop-1} - -\end{knitrout} - -\subsection{Résultats DGINN sur alignement de Janet-Young (DGINN-Young-primate) VS Cooper-primates} - -\subsubsection{Omega} - -Comparaison des Omega: colonne colonne "cooper.primates.Average\_dNdS" VS omega de DGINN. - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{Omega_PamlM7M8, tab}\hlopt{$}\hlstd{cooper.primates.Average_dNdS,} - \hlkwc{xlab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega Cooper-primate"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/omegaM7M8comp3-1} - -\end{knitrout} - -\subsubsection{pvalues pour M7M8} - -Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + ligne "PamlM7M8", pour la sortie de dginn. - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{PValue_PamlM7M8, tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} - \hlkwc{xlab}\hlstd{=}\hlstr{"p-value DGINN-Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value Cooper-primate"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} - -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{PValue_PamlM7M8[tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value[tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{PValue_PamlM7M8[tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value[tab}\hlopt{$}\hlstd{PValue_PamlM7M8}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} - -\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in Cooper-primate PamlM7M8 but >0.05 in DGINN-Young-primate M8vsM7"}\hlstd{,} - \hlstr{"<0.05 in DGINN-Young-primate M8vsM7 but >0.05 in Cooper-primate PamlM7M8"}\hlstd{),} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/pvalM7M8comp3-1} - -\end{knitrout} - -\subsection{Overlap} - -I will draw a venn diagramm for the positive genes in the 3 analyses. - -\subsubsection{Library and subtable} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{library}\hlstd{(VennDiagram)} - -\hlcom{# keeps only genes analysed in all 3 experiments} -\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"pVal.M8vsM7"}\hlstd{,} \hlstr{"cooper.primates.M7.M8_p_value"}\hlstd{,} - \hlstr{"PosSel_PamlM7M8"}\hlstd{,} \hlstr{"PValue_PamlM7M8"}\hlstd{)])} -\hlkwd{dim}\hlstd{(tmp)} -\end{alltt} -\begin{verbatim} -## [1] 186 5 -\end{verbatim} -\end{kframe} -\end{knitrout} - -Il reste 186 gènes - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{area1dginn}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{)} -\hlstd{area2jean}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{)} -\hlstd{area3coop}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_val}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - - -\hlstd{n12}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{)} -\hlstd{n23}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_val}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} -\hlstd{n13}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_val}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - -\hlstd{n123}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05} \hlopt{&} -\hlstd{tmp}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_val}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - -\hlkwd{draw.triple.venn}\hlstd{(area1dginn, area2jean, area3coop,} -\hlstd{n12, n23, n13, n123,} -\hlkwc{category}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN-Young-primate"}\hlstd{,} \hlstr{"Young-primate"}\hlstd{,} \hlstr{"Cooper-primate"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/vennprimate-1} -\begin{kframe}\begin{verbatim} -## (polygon[GRID.polygon.836], polygon[GRID.polygon.837], polygon[GRID.polygon.838], polygon[GRID.polygon.839], polygon[GRID.polygon.840], polygon[GRID.polygon.841], text[GRID.text.842], text[GRID.text.843], text[GRID.text.844], text[GRID.text.845], text[GRID.text.846], text[GRID.text.847], text[GRID.text.848], text[GRID.text.849], text[GRID.text.850], text[GRID.text.851]) -\end{verbatim} -\end{kframe} -\end{knitrout} - -\subsection{Mondrian} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{library}\hlstd{(Mondrian)} - -\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp}\hlopt{$}\hlstd{Gene.name)} -\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)} -\hlstd{monddata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} -\hlstd{monddata}\hlopt{$}\hlstd{primates_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_val}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} - -\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{])} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/mondrianprimates-1} - -\end{knitrout} - - -%\subsection{Comparaison des codons?} - -%Subtable with lines with both methods showing positive selection. - - - - - - - - - - - - - - - - - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\section{Bats Comparisons} - -\subsection{Add DGINN results for Bats} - -Lecture du tableau. - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlcom{# Tableau tel qu'il est à cet étape du script, pour travailler sur l'ajout du nouveau tableau bats sans recommencer au début à chaque fois.} -\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlstr{"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt"}\hlstd{,} - \hlkwc{header}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)} -\end{alltt} -\end{kframe} -\end{knitrout} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlcom{#dginnbatsold<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/2020_bats_completeResults.csv", } -\hlcom{# fill=T, h=T)} - - -\hlstd{dginnbats}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/DGINN_202005281339summary_cleaned.tab"}\hlstd{,} - \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} - - -\hlkwd{dim}\hlstd{(dginnbats)} -\end{alltt} -\begin{verbatim} -## [1] 349 27 -\end{verbatim} -\begin{alltt} -\hlkwd{names}\hlstd{(dginnbats)} -\end{alltt} -\begin{verbatim} -## [1] "File" "Name" "Gene" -## [4] "GeneSize" "NbSpecies" "omegaM0Bpp" -## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value" -## [10] "MEME.NbSites" "MEME.PSS" "BppM1M2" -## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" -## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites" -## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value" -## [22] "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8" -## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS" -\end{verbatim} -\begin{alltt} -\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene))} -\end{alltt} -\begin{verbatim} -## [1] 349 -\end{verbatim} -\begin{alltt} -\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene))} -\end{alltt} -\begin{verbatim} -## [1] 218 -\end{verbatim} -\begin{alltt} -\hlkwd{table}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene))} -\end{alltt} -\begin{verbatim} -## -## FALSE TRUE -## 3 215 -\end{verbatim} -\end{kframe} -\end{knitrout} - -Which genes in the Cooper table are not in the gene output? - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene)[}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsGene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlstd{F]} -\end{alltt} -\begin{verbatim} -## [1] BCS1L C1orf50 -## 218 Levels: AAR2 AASS AATF ACADM ACSL3 ADAMTS1 AGPS AKAP8L ALG11 ALG5 ... ZYG11B -\end{verbatim} -\end{kframe} -\end{knitrout} - -Merge tables: - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{names}\hlstd{(dginnbats)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,} \hlstr{"cooper.batsGene"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"bats_"}\hlstd{,} \hlkwd{names}\hlstd{(dginnbats)[}\hlopt{-}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{)]))} - -\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnbats,} \hlkwc{by}\hlstd{=}\hlstr{"cooper.batsGene"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)} -\end{alltt} -\end{kframe} -\end{knitrout} - - -\subsection{Cooper-bats results vs DGINN-bats results} - -\subsubsection{Omega} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS, tab}\hlopt{$}\hlstd{bats_omegaM0codeml,} - \hlkwc{xlab}\hlstd{=}\hlstr{"Omega Cooper-bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-bats"}\hlstd{)} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/omegaM7M8bats-1} - -\end{knitrout} - -\subsubsection{pvalues pour M7M8} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value))} -\end{alltt} - - -{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value, tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} - \hlkwc{xlab}\hlstd{=}\hlstr{"p-value Cooper-bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value DGINN-bats"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} - -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{>}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} - - -\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in DGINN-bats but >0.05 in Cooper-bats"}\hlstd{,} - \hlstr{"<0.05 in Cooper-bats but >0.05 in DGINN-bats"}\hlstd{),} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/pvalM7M8bats-1} - -\end{knitrout} - - - - -\subsection{Comparaison Cooper-Hawkins} - - -\subsubsection{pvalues pour M7M8} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value, tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value,} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"p-value Cooper-bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value hawkins-bats"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} - -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{>}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{>}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value[tab}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} - - -\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in Hawkins but >0.05 in Cooper"}\hlstd{,} - \hlstr{"<0.05 in Cooper but >0.05 in Hawkins"}\hlstd{),} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/pvalM7M8comp2-1} - -\end{knitrout} - - -\subsection{Comparaison dginn-Hawkins} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value, tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value,} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"p-value hawkins-bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"p-value DGINN-bats"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"M7vM8 Paml"}\hlstd{)} - -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value[tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{>}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value[tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{>}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05}\hlstd{],} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} -\hlkwd{points}\hlstd{(tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value[tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{>}\hlnum{0.05}\hlstd{],} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value[tab}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05} \hlopt{&} - \hlstd{tab}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{>}\hlnum{0.05}\hlstd{],} \hlkwc{col}\hlstd{=}\hlstr{"green"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{)} - - -\hlkwd{legend}\hlstd{(}\hlstr{"topleft"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"<0.05 in DGINN-bats but >0.05 in Hawkins"}\hlstd{,} - \hlstr{"<0.05 in Hawkinsbut >0.05 in DGINN-bats"}\hlstd{),} - \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"red"}\hlstd{,} \hlstr{"green"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/pvalM7M8compautre-1} - -\end{knitrout} - - - - - -\subsection{Diagramme de Venn} - -I will draw a venn diagramm for the positive genes in the 3 analyses. - -\subsubsection{subtab} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_codemlM7M8.p.value"}\hlstd{,} \hlstr{"hawkins_Positive.Selection..M8vM8a.p.value"}\hlstd{,} \hlstr{"cooper.batsM7.M8_p_value"}\hlstd{)])} -\hlkwd{dim}\hlstd{(tmp)} -\end{alltt} -\begin{verbatim} -## [1] 168 4 -\end{verbatim} -\end{kframe} -\end{knitrout} - -154 genes (present in the 3 experiments) - -\subsubsection{figure} -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlstd{area1dginn}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} -\hlstd{area2hawk}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} -\hlstd{area3coop}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - - -\hlstd{n12}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - -\hlstd{n23}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - -\hlstd{n13}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - - -\hlstd{n123}\hlkwb{<-}\hlkwd{sum}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlkwc{na.rm}\hlstd{=T)} - -\hlkwd{draw.triple.venn}\hlstd{(area1dginn, area2hawk, area3coop,} -\hlstd{n12, n23, n13, n123,} -\hlkwc{category}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN-Young-bats"}\hlstd{,} \hlstr{"Hawkins-bats"}\hlstd{,} \hlstr{"Cooper-bats"}\hlstd{))} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/vennbats-1} -\begin{kframe}\begin{verbatim} -## (polygon[GRID.polygon.852], polygon[GRID.polygon.853], polygon[GRID.polygon.854], polygon[GRID.polygon.855], polygon[GRID.polygon.856], polygon[GRID.polygon.857], text[GRID.text.858], text[GRID.text.859], text[GRID.text.860], text[GRID.text.861], text[GRID.text.862], text[GRID.text.863], text[GRID.text.864], text[GRID.text.865], text[GRID.text.866], text[GRID.text.867]) -\end{verbatim} -\end{kframe} -\end{knitrout} - -\subsection{Mondrian} - -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -\hlkwd{library}\hlstd{(Mondrian)} - -\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp}\hlopt{$}\hlstd{Gene.name)} -\hlstd{monddata}\hlopt{$}\hlstd{bats_dginn}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)} -\hlstd{monddata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} -\hlstd{monddata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} - -\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{])} -\end{alltt} -\end{kframe} -\includegraphics[width=\maxwidth]{figure/mondrianbats-1} - -\end{knitrout} - -\section{To do} - -Comparaison G4 pas G4 - -\end{document} -