diff --git a/covid_comp_bats.Rnw b/covid_comp_bats.Rnw index 08b9a62f0420ef963e9a20d0bb3498e08f7d6d14..279c190894cf75ffb6be9da2fd1f69cb28da075e 100644 --- a/covid_comp_bats.Rnw +++ b/covid_comp_bats.Rnw @@ -34,6 +34,8 @@ workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/" tab<-read.delim(paste0(workdir, "covid_comp/covid_comp_complete.txt"), h=T, sep="\t") dim(tab) +tab$Gene.name<-as.character(tab$Gene.name) +tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1" @ \section{Comparison Bats} @@ -45,6 +47,7 @@ dim(tab) plot(tab$cooper.batsAverage_dNdS, as.numeric(as.character(tab$bats_omegaM0codeml)), xlab="Omega Cooper-bats", ylab="Omega DGINN-bats") abline(0,1) +abline(lm(as.numeric(as.character(tab$bats_omegaM0codeml))~tab$cooper.batsAverage_dNdS), col="red") @ \subsection{Cooper-bats VS Hawkins-bats and DGINN-bats VS Hawkins-bats} @@ -56,13 +59,13 @@ abline(0,1) \subsection{Data} <<subbats>>= -tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8.p.value", "hawkins_Positive.Selection..M8vM8a.p.value", "cooper.batsM7.M8_p_value", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8")]) +tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8_p.value", "hawkins_Positive.Selection..M8vM8a.p.value", "cooper.batsM7.M8_p_value", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8")]) -tmp$bats_codemlM7M8.p.value<-as.numeric(as.character(tmp$bats_codemlM7M8.p.value)) +tmp$bats_codemlM7M8_p.value<-as.numeric(as.character(tmp$bats_codemlM7M8_p.value)) dim(tmp) @ -174 genes (present in the 3 experiments) +170 genes (present in the 3 experiments) \subsection{Mondrian} @@ -105,9 +108,37 @@ main.bar.color = "#648FFF", sets.bar.color = "#FE6100") @ +<<>>= +source("covid_comp_shiny.R") + + +df<-read.delim(paste0(workdir, +"/data/DGINN_202005281649summary_cleaned.csv"), + fill=T, h=T, sep=",") +names(df) +dftmp<-tab[,c("bats_File", "bats_Name", "Gene.name", + "bats_GeneSize", "bats_NbSpecies", "bats_omegaM0Bpp", + "bats_omegaM0codeml", "bats_BUSTED", "bats_BUSTED_p.value", + "bats_MEME_NbSites", "bats_MEME_PSS", "bats_BppM1M2", + "bats_BppM1M2_p.value", "bats_BppM1M2_NbSites", "bats_BppM1M2_PSS", + "bats_BppM7M8", "bats_BppM7M8_p.value", "bats_BppM7M8_NbSites", + "bats_BppM7M8_PSS", "bats_codemlM1M2", "bats_codemlM1M2_p.value", + "bats_codemlM1M2_NbSites","bats_codemlM1M2_PSS", "bats_codemlM7M8", + "bats_codemlM7M8_p.value", "bats_codemlM7M8_NbSites" , "bats_codemlM7M8_PSS")] + +names(dftmp)<-names(df) +makeFig1(dftmp) + +@ \end{document} + + + + + + diff --git a/covid_comp_bats.pdf b/covid_comp_bats.pdf index 6a94749512a2dd3398e2d43fe5a4e2ae8ac14fd0..ccdf459f9ee0c1633b0d1494840891dec816e06b 100644 Binary files a/covid_comp_bats.pdf and b/covid_comp_bats.pdf differ diff --git a/covid_comp_bats.tex b/covid_comp_bats.tex index 1334ecaf9228485f664fcd2e54e7bc4385fa13c9..492243c8a5276216ead8510bb5045919d59f19a1 100644 --- a/covid_comp_bats.tex +++ b/covid_comp_bats.tex @@ -88,8 +88,12 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. \hlkwd{dim}\hlstd{(tab)} \end{alltt} \begin{verbatim} -## [1] 333 161 +## [1] 332 139 \end{verbatim} +\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)} +\hlstd{tab}\hlopt{$}\hlstd{Gene.name[tab}\hlopt{$}\hlstd{PreyGene}\hlopt{==}\hlstr{"MTARC1"}\hlstd{]}\hlkwb{<-}\hlstr{"MTARC1"} +\end{alltt} \end{kframe} \end{knitrout} @@ -107,8 +111,11 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. {\ttfamily\noindent\color{warningcolor}{\#\# Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduits lors de la conversion automatique}}\begin{alltt} \hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)} +\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml))}\hlopt{~}\hlstd{tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS),} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)} \end{alltt} -\end{kframe} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning in eval(predvars, data, env): NAs introduits lors de la conversion automatique}}\end{kframe} \includegraphics[width=\maxwidth]{figure/omegaM7M8bats-1} \end{knitrout} @@ -124,9 +131,9 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} -\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_codemlM7M8.p.value"}\hlstd{,} \hlstr{"hawkins_Positive.Selection..M8vM8a.p.value"}\hlstd{,} \hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{)])} +\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_codemlM7M8_p.value"}\hlstd{,} \hlstr{"hawkins_Positive.Selection..M8vM8a.p.value"}\hlstd{,} \hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{)])} -\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value))} +\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value))} \end{alltt} @@ -134,12 +141,12 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. \hlkwd{dim}\hlstd{(tmp)} \end{alltt} \begin{verbatim} -## [1] 174 9 +## [1] 170 9 \end{verbatim} \end{kframe} \end{knitrout} -174 genes (present in the 3 experiments) +170 genes (present in the 3 experiments) \subsection{Mondrian} @@ -199,9 +206,55 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. \end{knitrout} +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{source}\hlstd{(}\hlstr{"covid_comp_shiny.R"}\hlstd{)} + + +\hlstd{df}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} +\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)} + +\hlkwd{names}\hlstd{(df)} +\end{alltt} +\begin{verbatim} +## [1] "File" "Name" "Gene" +## [4] "GeneSize" "NbSpecies" "omegaM0Bpp" +## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value" +## [10] "MEME.NbSites" "MEME.PSS" "BppM1M2" +## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" +## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites" +## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value" +## [22] "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8" +## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS" +\end{verbatim} +\begin{alltt} +\hlstd{dftmp}\hlkwb{<-}\hlstd{tab[,}\hlkwd{c}\hlstd{(}\hlstr{"bats_File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} + \hlstr{"bats_GeneSize"}\hlstd{,} \hlstr{"bats_NbSpecies"}\hlstd{,} \hlstr{"bats_omegaM0Bpp"}\hlstd{,} + \hlstr{"bats_omegaM0codeml"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BUSTED_p.value"}\hlstd{,} + \hlstr{"bats_MEME_NbSites"}\hlstd{,} \hlstr{"bats_MEME_PSS"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,} + \hlstr{"bats_BppM1M2_p.value"}\hlstd{,} \hlstr{"bats_BppM1M2_NbSites"}\hlstd{,} \hlstr{"bats_BppM1M2_PSS"}\hlstd{,} + \hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_BppM7M8_p.value"}\hlstd{,} \hlstr{"bats_BppM7M8_NbSites"}\hlstd{,} + \hlstr{"bats_BppM7M8_PSS"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM1M2_p.value"}\hlstd{,} + \hlstr{"bats_codemlM1M2_NbSites"}\hlstd{,}\hlstr{"bats_codemlM1M2_PSS"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,} + \hlstr{"bats_codemlM7M8_p.value"}\hlstd{,} \hlstr{"bats_codemlM7M8_NbSites"} \hlstd{,} \hlstr{"bats_codemlM7M8_PSS"}\hlstd{)]} + +\hlkwd{names}\hlstd{(dftmp)}\hlkwb{<-}\hlkwd{names}\hlstd{(df)} +\hlkwd{makeFig1}\hlstd{(dftmp)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/unnamed-chunk-2-1} +\end{knitrout} \end{document} + + + + + + diff --git a/covid_comp_primate.Rnw b/covid_comp_primate.Rnw new file mode 100644 index 0000000000000000000000000000000000000000..d502adf3daecec1e63da7a98eacd83ceb4495e7f --- /dev/null +++ b/covid_comp_primate.Rnw @@ -0,0 +1,265 @@ +\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format +%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. +%\geometry{letterpaper} % ... or a4paper or a5paper or ... +%\geometry{landscape} % Activate for for rotated page geometry +%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent +%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode + % TeX will automatically convert eps --> pdf in pdflatex +%\usepackage{amssymb} + +\usepackage[utf8]{inputenc} +%\usepackage[cyr]{aeguill} +%\usepackage[francais]{babel} +%\usepackage{hyperref} + + +\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} +\author{Marie Cariou} +\date{October 2020} % Activate to display a given date or no date + +\begin{document} +\maketitle + +\tableofcontents + +\newpage + + +\section{Data} + +Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. + +<<>>= +workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/" + +tab<-read.delim(paste0(workdir, + "covid_comp/covid_comp_complete.txt"), h=T, sep="\t") +dim(tab) +tab$Gene.name<-as.character(tab$Gene.name) +tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1" + +@ + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Comparisons Primates} + +\subsection{Janet Young's results (Young-primate) VS DGINN-full's results} + +Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn. +<<omegaM7M8_1>>= + +plot(tab$whole.gene.dN.dS.model.0, as.numeric(as.character(tab$dginn.primate_omegaM0Bpp)), + xlab="Omega Young-primate", ylab="DGINN-full's") +abline(0,1) +abline(lm(as.numeric(as.character(tab$dginn.primate_omegaM0Bpp))~tab$whole.gene.dN.dS.model.0), col="red") + + +outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & tab$dginn.primate_omegaM0Bpp>0.5,] +text(x=outlier$whole.gene.dN.dS.model.0, +y=outlier$dginn.primate_omegaM0Bpp, +outlier$Gene.name) + +@ + +\subsection{Janet Young's results (Young-primate) VS Cooper's result} + +Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS". +<<omegaM7M8_2>>= + +plot(tab$whole.gene.dN.dS.model.0, as.numeric(as.character(tab$cooper.primates.Average_dNdS)), + xlab="Omega Young-primate", ylab="Omega Cooper-primate") +abline(0,1) +abline(lm(as.numeric(as.character(tab$cooper.primates.Average_dNdS))~tab$whole.gene.dN.dS.model.0), col="red") + + +outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & tab$cooper.primates.Average_dNdS>0.5,] +text(x=outlier$whole.gene.dN.dS.model.0, +y=outlier$cooper.primates.Average_dNdS, +outlier$Gene.name) + +@ + +\subsection{Cooper's results (Cooper-primate) VS DGINN-full's results} + +Comparaison des Omega: colonne "cooper.primates.Average\_dNdS" VS colonne "omega" dans la sortie de dginn. +<<omegaM7M8_3>>= + +plot(tab$cooper.primates.Average_dNd, as.numeric(as.character(tab$dginn.primate_omegaM0Bpp)), + xlab="Omega Cooper-primate", ylab="DGINN-full's") +abline(0,1) +abline(lm(as.numeric(as.character(tab$dginn.primate_omegaM0Bpp))~tab$cooper.primates.Average_dNd), col="red") + +outlier<-tab[tab$cooper.primates.Average_dNd<0.4 & tab$dginn.primate_omegaM0Bpp>0.5,] +text(x=outlier$cooper.primates.Average_dNd, +y=outlier$dginn.primate_omegaM0Bpp, +outlier$Gene.name) + +@ + + + + + + +\section{Overlap} + + +\subsection{Mondrian} + +<<mondrianprimates>>= + +library(Mondrian) + +####### + +monddata<-as.data.frame(tab$Gene.name) +dim(monddata) + + +dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y", tab$dginn.primate_BppM1M2=="Y", +tab$dginn.primate_BppM7M8=="Y", tab$dginn.primate_codemlM1M2=="Y", tab$dginn.primate_codemlM7M8=="Y")) + +monddata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0) +monddata$primate_cooper<-ifelse(tab$cooper.primates.M7.M8_p_value<0.05, 1, 0) +monddata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0) + +mondrian(na.omit(monddata[,2:4]), labels=c("Young", "Cooper", "DGINN-full >=3" )) + +##### +monddata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0) + +mondrian(na.omit(monddata[,2:4]), labels=c("Young", "Cooper", "DGINN-full >=4")) +@ + + +\subsection{subsetR} + +Just another representation of the same result. + +<<subsetprimates>>= +library(UpSetR) +upsetdata<-as.data.frame(tab$Gene.name) + +upsetdata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0) +upsetdata$primate_cooper<-ifelse(tab$cooper.primates.M7.M8_p_value<0.05, 1, 0) +upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0) + + +upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F", +main.bar.color = "#648FFF", sets.bar.color = "#FE6100") + +### +upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0) + +upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F", +main.bar.color = "#648FFF", sets.bar.color = "#FE6100") + +@ + +\section{Gene List} + +Genes under positive selection for at least 4 methods. + +<<>>= +dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y", + tab$dginn.primate_BppM1M2=="Y", +tab$dginn.primate_BppM7M8=="Y", +tab$dginn.primate_codemlM1M2=="Y", +tab$dginn.primate_codemlM7M8=="Y")) + +tab$Gene.name[dginnfulltmp>=4 & is.na(dginnfulltmp)==F] + +tab$Gene.name[dginnfulltmp>=3 & is.na(dginnfulltmp)==F] + +tmp<-tab[dginnfulltmp>=4 & is.na(dginnfulltmp)==F, +c("Gene.name","dginn.primate_BUSTED", "dginn.primate_BppM1M2", + "dginn.primate_BppM7M8","dginn.primate_codemlM1M2","dginn.primate_codemlM7M8")] + +write.table(tmp, "geneList_DGINN_full_primate_pos4.txt", row.names=F, quote=F) +@ + + +\section{Shiny like} + +<<shiny, fig.height=11>>= +makeFig1 <- function(df){ + + # prepare data for colors etc + colMethods <- c("deepskyblue4", "darkorange" , "deepskyblue3" , "mediumseagreen" , "yellow3" , "black") + nameMethods <- c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8", "MEME") + metColor <- data.frame(Name = nameMethods , Col = colMethods , stringsAsFactors = FALSE) + + # subset for this specific figure + #df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets) + xt <- df[, c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8")] + xt$Gene <- df$Gene + nbrMeth <- 5 + # reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot) + xt[,1:5] <- ifelse(xt[,1:5] == "Y", 1, 0) + # sort and Filter the 0 lines + xt<-xt[order(rowSums(xt[,1:5])),] + xt<-xt[rowSums(xt[,1:5])>2,] + + row.names(xt)<-xt$Gene + xt<-xt[,1:5] + + colFig1 <- metColor[which(metColor$Name %in% colnames(xt)) , ] + + ##### PART 1 : NUMBER OF METHODS + par(xpd = NA , mar=c(2,7,4,0) , oma = c(0,0,0,0) , mgp = c(3,0.3,0)) + + h = barplot( + t(xt), + border = NA , + axes = F , + col = adjustcolor(colFig1$Col, alpha.f = 1), + horiz = T , + las = 2 , + main = "Methods detecting positive selection" , + cex.main = 0.85, + cex.names = min(50/nrow(xt), 1.5) + ) + + axis(3, line = 0, at = c(0:nbrMeth), label = c("0", rep("", nbrMeth -1), nbrMeth), tck = 0.02) + + legend("bottomleft", + horiz = T, + border = colFig1$Col, + legend = colFig1$Name, + fill = colFig1$Col, + cex = 0.8, + bty = "n", + xpd = NA + ) +} +@ + +<<>>= +source("covid_comp_shiny.R") + + +df<-read.delim(paste0(workdir, +"/data/DGINN_202005281649summary_cleaned.csv"), + fill=T, h=T, sep=",") + +names(df) +dftmp<-tab[,c("File", "Name", "Gene.name", + "GeneSize", "dginn.primate_NbSpecies", "dginn.primate_omegaM0Bpp", + "dginn.primate_omegaM0codeml", "dginn.primate_BUSTED", "dginn.primate_BUSTED.p.value", + "dginn.primate_MEME.NbSites", "dginn.primate_MEME.PSS", "dginn.primate_BppM1M2", + "dginn.primate_BppM1M2.p.value", "dginn.primate_BppM1M2.NbSites", "dginn.primate_BppM1M2.PSS", + "dginn.primate_BppM7M8", "dginn.primate_BppM7M8.p.value", "dginn.primate_BppM7M8.NbSites", + "dginn.primate_BppM7M8.PSS", "dginn.primate_codemlM1M2", "dginn.primate_codemlM1M2.p.value", + "dginn.primate_codemlM1M2.NbSites","dginn.primate_codemlM1M2.PSS", "dginn.primate_codemlM7M8", + "dginn.primate_codemlM7M8.p.value", "dginn.primate_codemlM7M8.NbSites" , "dginn.primate_codemlM7M8.PSS")] + +names(dftmp)<-names(df) +makeFig1(dftmp) + +@ + +\end{document} + diff --git a/covid_comp_primate.pdf b/covid_comp_primate.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0840330fbd5d2c6fb016cec3aadd243fb8922118 Binary files /dev/null and b/covid_comp_primate.pdf differ diff --git a/covid_comp_primate.tex b/covid_comp_primate.tex new file mode 100644 index 0000000000000000000000000000000000000000..145593b25b82c3ee2d452c4438f9253dc1cc5cd8 --- /dev/null +++ b/covid_comp_primate.tex @@ -0,0 +1,434 @@ +\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color} +% maxwidth is the original width if it is less than linewidth +% otherwise use linewidth (to make sure the graphics do not exceed the margin) +\makeatletter +\def\maxwidth{ % + \ifdim\Gin@nat@width>\linewidth + \linewidth + \else + \Gin@nat@width + \fi +} +\makeatother + +\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} +\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}% +\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}% +\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}% +\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% +\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}% +\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}% +\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}% +\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}% +\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}% +\let\hlipl\hlkwb + +\usepackage{framed} +\makeatletter +\newenvironment{kframe}{% + \def\at@end@of@kframe{}% + \ifinner\ifhmode% + \def\at@end@of@kframe{\end{minipage}}% + \begin{minipage}{\columnwidth}% + \fi\fi% + \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep + \colorbox{shadecolor}{##1}\hskip-\fboxsep + % There is no \\@totalrightmargin, so: + \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% + \MakeFramed {\advance\hsize-\width + \@totalleftmargin\z@ \linewidth\hsize + \@setminipage}}% + {\par\unskip\endMakeFramed% + \at@end@of@kframe} +\makeatother + +\definecolor{shadecolor}{rgb}{.97, .97, .97} +\definecolor{messagecolor}{rgb}{0, 0, 0} +\definecolor{warningcolor}{rgb}{1, 0, 1} +\definecolor{errorcolor}{rgb}{1, 0, 0} +\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX + +\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format +%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. +%\geometry{letterpaper} % ... or a4paper or a5paper or ... +%\geometry{landscape} % Activate for for rotated page geometry +%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent +%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode + % TeX will automatically convert eps --> pdf in pdflatex +%\usepackage{amssymb} + +\usepackage[utf8]{inputenc} +%\usepackage[cyr]{aeguill} +%\usepackage[francais]{babel} +%\usepackage{hyperref} + + +\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} +\author{Marie Cariou} +\date{October 2020} % Activate to display a given date or no date +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +\begin{document} +\maketitle + +\tableofcontents + +\newpage + + +\section{Data} + +Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"} + +\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} + \hlstr{"covid_comp/covid_comp_complete.txt"}\hlstd{),} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)} +\hlkwd{dim}\hlstd{(tab)} +\end{alltt} +\begin{verbatim} +## [1] 332 139 +\end{verbatim} +\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)} +\hlstd{tab}\hlopt{$}\hlstd{Gene.name[tab}\hlopt{$}\hlstd{PreyGene}\hlopt{==}\hlstr{"MTARC1"}\hlstd{]}\hlkwb{<-}\hlstr{"MTARC1"} +\end{alltt} +\end{kframe} +\end{knitrout} + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Comparisons Primates} + +\subsection{Janet Young's results (Young-primate) VS DGINN-full's results} + +Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn. +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp)),} + \hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{)} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)} +\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp))}\hlopt{~}\hlstd{tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0),} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning in eval(predvars, data, env): NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp}\hlopt{>}\hlnum{0.5}\hlstd{,]} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning in Ops.factor(tab\$dginn.primate\_omegaM0Bpp, 0.5): '>' not meaningful for factors}}\begin{alltt} +\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,} +\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp,} +\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/omegaM7M8_1-1} + +\end{knitrout} + +\subsection{Janet Young's results (Young-primate) VS Cooper's result} + +Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS". +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{cooper.primates.Average_dNdS)),} + \hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega Cooper-primate"}\hlstd{)} +\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)} +\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{cooper.primates.Average_dNdS))}\hlopt{~}\hlstd{tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0),} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)} + + +\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{cooper.primates.Average_dNdS}\hlopt{>}\hlnum{0.5}\hlstd{,]} +\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,} +\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstd{cooper.primates.Average_dNdS,} +\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/omegaM7M8_2-1} + +\end{knitrout} + +\subsection{Cooper's results (Cooper-primate) VS DGINN-full's results} + +Comparaison des Omega: colonne "cooper.primates.Average\_dNdS" VS colonne "omega" dans la sortie de dginn. +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.primates.Average_dNd,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp)),} + \hlkwc{xlab}\hlstd{=}\hlstr{"Omega Cooper-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{)} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)} +\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp))}\hlopt{~}\hlstd{tab}\hlopt{$}\hlstd{cooper.primates.Average_dNd),} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning in eval(predvars, data, env): NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{cooper.primates.Average_dNd}\hlopt{<}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp}\hlopt{>}\hlnum{0.5}\hlstd{,]} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning in Ops.factor(tab\$dginn.primate\_omegaM0Bpp, 0.5): '>' not meaningful for factors}}\begin{alltt} +\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{cooper.primates.Average_dNd,} +\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp,} +\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/omegaM7M8_3-1} + +\end{knitrout} + + + + + + +\section{Overlap} + + +\subsection{Mondrian} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{library}\hlstd{(Mondrian)} + +\hlcom{#######} + +\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)} +\hlkwd{dim}\hlstd{(monddata)} +\end{alltt} +\begin{verbatim} +## [1] 332 1 +\end{verbatim} +\begin{alltt} +\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstd{dginn.primate_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{dginn.primate_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,} +\hlstd{tab}\hlopt{$}\hlstd{dginn.primate_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{dginn.primate_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{dginn.primate_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{))} + +\hlstd{monddata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} +\hlstd{monddata}\hlopt{$}\hlstd{primate_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} +\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)} + +\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"Cooper"}\hlstd{,} \hlstr{"DGINN-full >=3"} \hlstd{))} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/mondrianprimates-1} +\begin{kframe}\begin{alltt} +\hlcom{#####} +\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)} + +\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"Cooper"}\hlstd{,} \hlstr{"DGINN-full >=4"}\hlstd{))} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/mondrianprimates-2} + +\end{knitrout} + + +\subsection{subsetR} + +Just another representation of the same result. + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{library}\hlstd{(UpSetR)} +\hlstd{upsetdata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)} + +\hlstd{upsetdata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} +\hlstd{upsetdata}\hlopt{$}\hlstd{primate_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{cooper.primates.M7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} +\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)} + + +\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,} +\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/subsetprimates-1} +\begin{kframe}\begin{alltt} +\hlcom{###} +\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)} + +\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,} +\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/subsetprimates-2} + +\end{knitrout} + +\section{Gene List} + +Genes under positive selection for at least 4 methods. + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstd{dginn.primate_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{,} + \hlstd{tab}\hlopt{$}\hlstd{dginn.primate_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,} +\hlstd{tab}\hlopt{$}\hlstd{dginn.primate_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,} +\hlstd{tab}\hlopt{$}\hlstd{dginn.primate_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,} +\hlstd{tab}\hlopt{$}\hlstd{dginn.primate_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{))} + +\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]} +\end{alltt} +\begin{verbatim} +## [1] "ACADM" "BCS1L" "BRD4" "CDK5RAP2" "CEP135" +## [6] "CEP68" "CLIP4" "DNMT1" "DPH5" "EMC1" +## [11] "FYCO1" "GCC2" "GGH" "GHITM" "GIGYF2" +## [16] "GLA" "GOLGA7" "HECTD1" "IDE" "ITGB1" +## [21] "LARP1" "LARP4B" "LMAN2" "MARK1" "MIPOL1" +## [26] "MPHOSPH10" "MYCBP2" "NDUFAF2" "NDUFB9" "PCNT" +## [31] "POLA1" "PRIM2" "PRKAR2A" "PVR" "REEP6" +## [36] "RIPK1" "SAAL1" "SEPSECS" "SIRT5" "SLC25A21" +## [41] "SLC27A2" "TMEM39B" "TOR1AIP1" "TUBGCP2" "UBAP2" +## [46] "UGGT2" "VPS39" "ZNF318" +\end{verbatim} +\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{3} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]} +\end{alltt} +\begin{verbatim} +## [1] "ACADM" "ADAM9" "AP2A2" "ATE1" "BCS1L" +## [6] "BRD4" "BZW2" "CDK5RAP2" "CEP135" "CEP68" +## [11] "CLIP4" "CNTRL" "DNMT1" "DPH5" "EDEM3" +## [16] "EIF4E2" "EMC1" "EXOSC2" "FYCO1" "GCC2" +## [21] "GGH" "GHITM" "GIGYF2" "GLA" "GOLGA7" +## [26] "GOLGB1" "GORASP1" "HDAC2" "HECTD1" "HS6ST2" +## [31] "IDE" "ITGB1" "LARP1" "LARP4B" "LARP7" +## [36] "LMAN2" "MARK1" "MDN1" "MIPOL1" "MOV10" +## [41] "MPHOSPH10" "MRPS5" "MYCBP2" "NAT14" "NDUFAF2" +## [46] "NDUFB9" "NGLY1" "NPC2" "PCNT" "PITRM1" +## [51] "PLAT" "PLOD2" "PMPCB" "POLA1" "POR" +## [56] "PRIM2" "PRKAR2A" "PTBP2" "PVR" "RAB14" +## [61] "RAB1A" "RAB2A" "RAP1GDS1" "RBX1" "REEP6" +## [66] "RIPK1" "RPL36" "SAAL1" "SCCPDH" "SEPSECS" +## [71] "SIRT5" "SLC25A21" "SLC27A2" "STOM" "TIMM8B" +## [76] "TMEM39B" "TOR1AIP1" "TRIM59" "TRMT1" "TUBGCP2" +## [81] "UBAP2" "UGGT2" "USP54" "VPS39" "ZNF318" +\end{verbatim} +\begin{alltt} +\hlstd{tmp}\hlkwb{<-}\hlstd{tab[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F,} +\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,}\hlstr{"dginn.primate_BUSTED"}\hlstd{,} \hlstr{"dginn.primate_BppM1M2"}\hlstd{,} + \hlstr{"dginn.primate_BppM7M8"}\hlstd{,}\hlstr{"dginn.primate_codemlM1M2"}\hlstd{,}\hlstr{"dginn.primate_codemlM7M8"}\hlstd{)]} + +\hlkwd{write.table}\hlstd{(tmp,} \hlstr{"geneList_DGINN_full_primate_pos4.txt"}\hlstd{,} \hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F)} +\end{alltt} +\end{kframe} +\end{knitrout} + + +\section{Shiny like} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{makeFig1} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{df}\hlstd{)\{} + + \hlcom{# prepare data for colors etc} + \hlstd{colMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"deepskyblue4"}\hlstd{,} \hlstr{"darkorange"} \hlstd{,} \hlstr{"deepskyblue3"} \hlstd{,} \hlstr{"mediumseagreen"} \hlstd{,} \hlstr{"yellow3"} \hlstd{,} \hlstr{"black"}\hlstd{)} + \hlstd{nameMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{,} \hlstr{"MEME"}\hlstd{)} + \hlstd{metColor} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(}\hlkwc{Name} \hlstd{= nameMethods ,} \hlkwc{Col} \hlstd{= colMethods ,} \hlkwc{stringsAsFactors} \hlstd{=} \hlnum{FALSE}\hlstd{)} + + \hlcom{# subset for this specific figure} + \hlcom{#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)} + \hlstd{xt} \hlkwb{<-} \hlstd{df[,} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{)]} + \hlstd{xt}\hlopt{$}\hlstd{Gene} \hlkwb{<-} \hlstd{df}\hlopt{$}\hlstd{Gene} + \hlstd{nbrMeth} \hlkwb{<-} \hlnum{5} + \hlcom{# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)} + \hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlkwb{<-} \hlkwd{ifelse}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlopt{==} \hlstr{"Y"}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)} + \hlcom{# sort and Filter the 0 lines} + \hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{order}\hlstd{(}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])),]} + \hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])}\hlopt{>}\hlnum{2}\hlstd{,]} + + \hlkwd{row.names}\hlstd{(xt)}\hlkwb{<-}\hlstd{xt}\hlopt{$}\hlstd{Gene} + \hlstd{xt}\hlkwb{<-}\hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} + + \hlstd{colFig1} \hlkwb{<-} \hlstd{metColor[}\hlkwd{which}\hlstd{(metColor}\hlopt{$}\hlstd{Name} \hlopt{%in%} \hlkwd{colnames}\hlstd{(xt)) , ]} + + \hlcom{##### PART 1 : NUMBER OF METHODS} + \hlkwd{par}\hlstd{(}\hlkwc{xpd} \hlstd{=} \hlnum{NA} \hlstd{,} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{7}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{oma} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{mgp} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,}\hlnum{0.3}\hlstd{,}\hlnum{0}\hlstd{))} + + \hlstd{h} \hlkwb{=} \hlkwd{barplot}\hlstd{(} + \hlkwd{t}\hlstd{(xt),} + \hlkwc{border} \hlstd{=} \hlnum{NA} \hlstd{,} + \hlkwc{axes} \hlstd{= F ,} + \hlkwc{col} \hlstd{=} \hlkwd{adjustcolor}\hlstd{(colFig1}\hlopt{$}\hlstd{Col,} \hlkwc{alpha.f} \hlstd{=} \hlnum{1}\hlstd{),} + \hlkwc{horiz} \hlstd{= T ,} + \hlkwc{las} \hlstd{=} \hlnum{2} \hlstd{,} + \hlkwc{main} \hlstd{=} \hlstr{"Methods detecting positive selection"} \hlstd{,} + \hlkwc{cex.main} \hlstd{=} \hlnum{0.85}\hlstd{,} + \hlkwc{cex.names} \hlstd{=} \hlkwd{min}\hlstd{(}\hlnum{50}\hlopt{/}\hlkwd{nrow}\hlstd{(xt),} \hlnum{1.5}\hlstd{)} + \hlstd{)} + + \hlkwd{axis}\hlstd{(}\hlnum{3}\hlstd{,} \hlkwc{line} \hlstd{=} \hlnum{0}\hlstd{,} \hlkwc{at} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlopt{:}\hlstd{nbrMeth),} \hlkwc{label} \hlstd{=} \hlkwd{c}\hlstd{(}\hlstr{"0"}\hlstd{,} \hlkwd{rep}\hlstd{(}\hlstr{""}\hlstd{, nbrMeth} \hlopt{-}\hlnum{1}\hlstd{), nbrMeth),} \hlkwc{tck} \hlstd{=} \hlnum{0.02}\hlstd{)} + + \hlkwd{legend}\hlstd{(}\hlstr{"bottomleft"}\hlstd{,} + \hlkwc{horiz} \hlstd{= T,} + \hlkwc{border} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,} + \hlkwc{legend} \hlstd{= colFig1}\hlopt{$}\hlstd{Name,} + \hlkwc{fill} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,} + \hlkwc{cex} \hlstd{=} \hlnum{0.8}\hlstd{,} + \hlkwc{bty} \hlstd{=} \hlstr{"n"}\hlstd{,} + \hlkwc{xpd} \hlstd{=} \hlnum{NA} + \hlstd{)} +\hlstd{\}} +\end{alltt} +\end{kframe} +\end{knitrout} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{source}\hlstd{(}\hlstr{"covid_comp_shiny.R"}\hlstd{)} + + +\hlstd{df}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} +\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)} + +\hlkwd{names}\hlstd{(df)} +\end{alltt} +\begin{verbatim} +## [1] "File" "Name" "Gene" +## [4] "GeneSize" "NbSpecies" "omegaM0Bpp" +## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value" +## [10] "MEME.NbSites" "MEME.PSS" "BppM1M2" +## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" +## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites" +## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value" +## [22] "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8" +## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS" +\end{verbatim} +\begin{alltt} +\hlstd{dftmp}\hlkwb{<-}\hlstd{tab[,}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} + \hlstr{"GeneSize"}\hlstd{,} \hlstr{"dginn.primate_NbSpecies"}\hlstd{,} \hlstr{"dginn.primate_omegaM0Bpp"}\hlstd{,} + \hlstr{"dginn.primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn.primate_BUSTED"}\hlstd{,} \hlstr{"dginn.primate_BUSTED.p.value"}\hlstd{,} + \hlstr{"dginn.primate_MEME.NbSites"}\hlstd{,} \hlstr{"dginn.primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn.primate_BppM1M2"}\hlstd{,} + \hlstr{"dginn.primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn.primate_BppM1M2.NbSites"}\hlstd{,} \hlstr{"dginn.primate_BppM1M2.PSS"}\hlstd{,} + \hlstr{"dginn.primate_BppM7M8"}\hlstd{,} \hlstr{"dginn.primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn.primate_BppM7M8.NbSites"}\hlstd{,} + \hlstr{"dginn.primate_BppM7M8.PSS"}\hlstd{,} \hlstr{"dginn.primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn.primate_codemlM1M2.p.value"}\hlstd{,} + \hlstr{"dginn.primate_codemlM1M2.NbSites"}\hlstd{,}\hlstr{"dginn.primate_codemlM1M2.PSS"}\hlstd{,} \hlstr{"dginn.primate_codemlM7M8"}\hlstd{,} + \hlstr{"dginn.primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn.primate_codemlM7M8.NbSites"} \hlstd{,} \hlstr{"dginn.primate_codemlM7M8.PSS"}\hlstd{)]} + +\hlkwd{names}\hlstd{(dftmp)}\hlkwb{<-}\hlkwd{names}\hlstd{(df)} +\hlkwd{makeFig1}\hlstd{(dftmp)} +\end{alltt} +\end{kframe} +\includegraphics[width=\maxwidth]{figure/unnamed-chunk-3-1} + +\end{knitrout} + +\end{document} + diff --git a/covid_comp_shiny.R b/covid_comp_shiny.R new file mode 100644 index 0000000000000000000000000000000000000000..b7f220d73644e3ad095632d9c0f3d15b90b7eaad --- /dev/null +++ b/covid_comp_shiny.R @@ -0,0 +1,56 @@ +makeFig1 <- function(df){ + + # prepare data for colors etc + colMethods <- c("deepskyblue4", "darkorange" , "deepskyblue3" , "mediumseagreen" , "yellow3" , "black") + nameMethods <- c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8", "MEME") + metColor <- data.frame(Name = nameMethods , Col = colMethods , stringsAsFactors = FALSE) + + # subset for this specific figure + #df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets) + xt <- df[, c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8")] + xt$Gene <- df$Gene + nbrMeth <- 5 + # reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot) + xt[,1:5] <- ifelse(xt[,1:5] == "Y", 1, 0) + # sort and Filter the 0 lines + xt<-xt[order(rowSums(xt[,1:5])),] + xt<-na.omit(xt[rowSums(xt[,1:5])>2,]) + + row.names(xt)<-xt$Gene + xt<-xt[,1:5] + + colFig1 <- metColor[which(metColor$Name %in% colnames(xt)) , ] + + ##### PART 1 : NUMBER OF METHODS + par(xpd = NA , mar=c(2,7,4,0) , oma = c(0,0,0,0) , mgp = c(3,0.3,0)) + + h = barplot( + t(xt), + border = NA , + axes = F , + col = adjustcolor(colFig1$Col, alpha.f = 1), + horiz = T , + las = 2 , + main = "Methods detecting positive selection" , + cex.main = 0.85, + cex.names = min(50/nrow(xt), 1.5) + ) + + axis(3, line = 0, at = c(0:nbrMeth), label = c("0", rep("", nbrMeth -1), nbrMeth), tck = 0.02) + + legend("bottomleft", + horiz = T, + border = colFig1$Col, + legend = colFig1$Name, + fill = colFig1$Col, + cex = 0.8, + bty = "n", + xpd = NA + ) +} + + +df<-read.delim(paste0(workdir, + "/data/DGINN_202005281649summary_cleaned.csv"), + fill=T, h=T, sep=",") + diff --git a/figure/mondrianbats-1.pdf b/figure/mondrianbats-1.pdf index b941f9a7ec7fa7e6f85c42703b59bc4af92f2382..88a028767f0b688f3d86cff65ac526019bf15e78 100644 Binary files a/figure/mondrianbats-1.pdf and b/figure/mondrianbats-1.pdf differ diff --git a/figure/mondrianbats-2.pdf b/figure/mondrianbats-2.pdf index 3512138bcc7ea7501e2db70f980a826e5316aeff..4c00affe25ec1a6d9889513bd814f559d2635cd5 100644 Binary files a/figure/mondrianbats-2.pdf and b/figure/mondrianbats-2.pdf differ diff --git a/figure/mondrianprimates-1.pdf b/figure/mondrianprimates-1.pdf index c1f0484a5303c7879ba74169b79ec965169b2d29..718d1c6a0e26d203c05b3dc6a432510600210e96 100644 Binary files a/figure/mondrianprimates-1.pdf and b/figure/mondrianprimates-1.pdf differ diff --git a/figure/mondrianprimates-2.pdf b/figure/mondrianprimates-2.pdf index 923646a5f294679c8f165d8f08002655a2845893..6e5d9dd914da64e139886d4443e7e8fcf7ea4904 100644 Binary files a/figure/mondrianprimates-2.pdf and b/figure/mondrianprimates-2.pdf differ diff --git a/figure/omegaM7M8_1-1.pdf b/figure/omegaM7M8_1-1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..233cb966c4f36dab7cf54aea060017bbcccb5d38 Binary files /dev/null and b/figure/omegaM7M8_1-1.pdf differ diff --git a/figure/omegaM7M8_2-1.pdf b/figure/omegaM7M8_2-1.pdf index 3d38524b7cb31002ee689f9a342ce029ff8dbaf2..41574a85812c7d5fe763f0c944b21bcdf17b91de 100644 Binary files a/figure/omegaM7M8_2-1.pdf and b/figure/omegaM7M8_2-1.pdf differ diff --git a/figure/omegaM7M8_3-1.pdf b/figure/omegaM7M8_3-1.pdf index 5e5663cbf99cf44ff38c20c08e782d1babe547f7..99b52a0102ee0f9d715bf4598a1c4aca6eea4bc7 100644 Binary files a/figure/omegaM7M8_3-1.pdf and b/figure/omegaM7M8_3-1.pdf differ diff --git a/figure/omegaM7M8bats-1.pdf b/figure/omegaM7M8bats-1.pdf index d33aaaaacde30347b7af3a6f8f4fa683a7317aa7..56ed3b5f08d07d13a76b4e4fbd76e3faa6f02035 100644 Binary files a/figure/omegaM7M8bats-1.pdf and b/figure/omegaM7M8bats-1.pdf differ diff --git a/figure/subsetbats-1.pdf b/figure/subsetbats-1.pdf index 96dae84ba4733d1ef7b1e0c67ca05f23eb6c1b43..9c64b0e5931dc71e5880451e059b0ab80bcf7d10 100644 Binary files a/figure/subsetbats-1.pdf and b/figure/subsetbats-1.pdf differ diff --git a/figure/subsetbats-2.pdf b/figure/subsetbats-2.pdf index cf4cffe7cd14d9f6fd9a1cda317079a9152384bc..d606faa72e324e1d8d16beca74e1d2c7ad8e0ed1 100644 Binary files a/figure/subsetbats-2.pdf and b/figure/subsetbats-2.pdf differ diff --git a/figure/subsetprimates-1.pdf b/figure/subsetprimates-1.pdf index 82a0e501d24a4cf15d2d52466bc51d57a74c5838..10efe02ed49c282afd13cb985a246fbac97c1233 100644 Binary files a/figure/subsetprimates-1.pdf and b/figure/subsetprimates-1.pdf differ diff --git a/figure/subsetprimates-2.pdf b/figure/subsetprimates-2.pdf index 088b0af6890c24fd5fe5cdb5e285b1d329052f2f..49430730afb361faa676fa46ad477e2dded77fed 100644 Binary files a/figure/subsetprimates-2.pdf and b/figure/subsetprimates-2.pdf differ diff --git a/figure/unnamed-chunk-2-1.pdf b/figure/unnamed-chunk-2-1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cdcf109281ea18aab3f1478ebf68ac74c4aa3039 Binary files /dev/null and b/figure/unnamed-chunk-2-1.pdf differ diff --git a/figure/unnamed-chunk-3-1.pdf b/figure/unnamed-chunk-3-1.pdf index 5df305383e0d69dd3899e3342420fce4f6760620..7fe25254fd3191ffe605fe654ef8b761739a96aa 100644 Binary files a/figure/unnamed-chunk-3-1.pdf and b/figure/unnamed-chunk-3-1.pdf differ diff --git a/geneList_DGINN_full_primate_pos4.txt b/geneList_DGINN_full_primate_pos4.txt index 2b5ab577c11307f6e6f67482a055e4d0bc9eddd3..b8f446a07c096524234e109f0be8b2d1d644e85d 100644 --- a/geneList_DGINN_full_primate_pos4.txt +++ b/geneList_DGINN_full_primate_pos4.txt @@ -1,4 +1,4 @@ -Gene.name dginn-primate_BUSTED dginn-primate_BppM1M2 dginn-primate_BppM7M8 dginn-primate_codemlM1M2 dginn-primate_codemlM7M8 +Gene.name dginn.primate_BUSTED dginn.primate_BppM1M2 dginn.primate_BppM7M8 dginn.primate_codemlM1M2 dginn.primate_codemlM7M8 ACADM Y Y Y Y Y BCS1L Y N Y Y Y BRD4 Y Y Y N Y @@ -34,7 +34,7 @@ PRIM2 Y Y Y Y Y PRKAR2A N Y Y Y Y PVR Y Y Y Y Y REEP6 Y Y Y Y Y -RIPK1 Y N Y Y Y +RIPK1 N Y Y Y Y SAAL1 Y N Y Y Y SEPSECS Y Y Y Y Y SIRT5 N Y Y Y Y