Skip to content
Snippets Groups Projects
Commit 82f55ae4 authored by your name's avatar your name
Browse files

script for primates

parent 4156bc21
No related branches found
No related tags found
No related merge requests found
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Files manipulations}
\subsection{Read Janet Young's table}
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
tab<-read.delim(paste0(workdir,
"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"),
fill=T, h=T, dec=",")
dim(tab)
#names(tab)
@
\subsection{Read DGINN Young table}
DGINN-Young-primate table correspond to DGINN results, on the SAME alignment as Young-primate.
I will merge the 2 tables.
<<>>=
dginnY<-read.delim(paste0(workdir,
"data/summary_primate_young.res"),
fill=T, h=T)
dim(dginnY)
names(dginnY)
@
\subsection{Joining Young and DGINN Young table}
\textit{I hide some code corresponding to verifications of gene names coherence between tables}
<<results="hide", echo=FALSE>>=
head(tab)[,1:5]
# gene avec un nom bizar dans certaines colomne
tab[158,1:10]
#
length(unique(dginnY$Gene))
length(unique(tab$PreyGene))
length(unique(tab$Gene.name))
#quelle paire de colonne contient le plus de noms identiques
sum(unique(dginnY$Gene) %in% unique(tab$PreyGene))
sum(unique(dginnY$Gene) %in% unique(tab$Gene.name))
# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne.
# Je soupçonne que c'est celle là:
tab[158,1:10]
# Verif:
tab[,1:10][(tab$Gene.name %in% unique(dginnY$Gene))==F,]
# yep
# Remplacement manuel par
as.character(unique(dginnY$Gene)[(unique(dginnY$Gene) %in% tab$Gene.name)==F])
# dans le tableau de Janet
val_remp=as.character(unique(dginnY$Gene)[(unique(dginnY$Gene) %in% tab$Gene.name)==F])
tab$Gene.name<-as.character(tab$Gene.name)
tab$Gene.name[158]<-val_remp
sum(unique(dginnY$Gene) %in% unique(tab$Gene.name))
@
<<>>=
add_col<-function(method="PamlM1M2"){
tmp<-dginnY[dginnY$Method==method,
c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", paste0("Omega_", method),
paste0("PosSel_", method), paste0("PValue_", method),
paste0("NbSites_", method), paste0("PSS_", method))
tab<-merge(tab, tmp, by="Gene.name")
return(tab)
}
tab<-add_col("PamlM1M2")
tab<-add_col("PamlM7M8")
tab<-add_col("BppM1M2")
tab<-add_col("BppM7M8")
# Manip pour la colonne BUSTED
tmp<-dginnY[dginnY$Method=="BUSTED",c("Gene", "Omega", "PosSel", "PValue")]
names(tmp)<-c("Gene.name", "Omega_BUSTED", "PosSel_BUSTED", "PValue_BUSTED")
tab<-merge(tab, tmp, by="Gene.name")
tmp<-dginnY[dginnY$Method=="MEME",c("Gene", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", "NbSites_MEME", "PSS_MEME")
tab<-merge(tab, tmp, by="Gene.name")
@
\subsection{Read DGINN Table}
<<>>=
dginnT<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
dim(dginnT)
names(dginnT)
# Number of genes in dginn-primate output not present in the original table
dginnT[(dginnT$Gene %in% tab$Gene.name)==F,"Gene"]
# This includes paralogs, recombinations found by DGINN
# and additionnal genes included on purpose
# Number of genes from the original list not present in DGINN output
tab[(tab$Gene.name %in% dginnT$Gene)==F,"Gene.name"]
names(dginnT)<-c("File", "Name", "Gene.name", "GeneSize", "dginn-primate_NbSpecies", "dginn-primate_omegaM0Bpp",
"dginn-primate_omegaM0codeml", "dginn-primate_BUSTED", "dginn-primate_BUSTED.p.value",
"dginn-primate_MEME.NbSites", "dginn-primate_MEME.PSS", "dginn-primate_BppM1M2",
"dginn-primate_BppM1M2.p.value", "dginn-primate_BppM1M2.NbSites", "dginn-primate_BppM1M2.PSS",
"dginn-primate_BppM7M8", "dginn-primate_BppM7M8.p.value", "dginn-primate_BppM7M8.NbSites",
"dginn-primate_BppM7M8.PSS", "dginn-primate_codemlM1M2", "dginn-primate_codemlM1M2.p.value",
"dginn-primate_codemlM1M2.NbSites", "dginn-primate_codemlM1M2.PSS", "dginn-primate_codemlM7M8",
"dginn-primate_codemlM7M8.p.value", "dginn-primate_codemlM7M8.NbSites", "dginn-primate_codemlM7M8.PSS")
@
\subsection{Join Table and DGINN table}
<<>>=
tab<-merge(tab,dginnT, by="Gene.name", all.x=T)
@
\subsection{Write new table}
<<>>=
write.table(tab,
"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20201014.txt",
row.names=F, quote=F, sep="\t")
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8>>=
plot(tab$whole.gene.dN.dS.model.0, tab$Omega_PamlM7M8,
xlab="Omega Young-primate", ylab="Omega DGINN-Young-primate")
abline(0,1)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.2 & tab$Omega_PamlM7M8>0.4,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.6 & tab$Omega_PamlM7M8>0.7,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
@
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_2>>=
tab$'dginn-primate_omegaM0Bpp'<-as.numeric(as.character(tab$'dginn-primate_omegaM0Bpp'))
plot(tab$'dginn-primate_omegaM0Bpp', tab$Omega_PamlM7M8,
xlab="DGINN-full's", ylab="Omega DGINN-Young-primate")
abline(0,1)
outlier<-tab[tab$'dginn-primate_omegaM0Bpp'>0.4 & tab$Omega_PamlM7M8<0.2,]
text(x=outlier$'dginn-primate_omegaM0Bpp',
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
outlier<-tab[tab$'dginn-primate_omegaM0Bpp'>0.5 & tab$Omega_PamlM7M8<0.4,]
text(x=outlier$'dginn-primate_omegaM0Bpp',
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
@
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_3>>=
plot(tab$whole.gene.dN.dS.model.0, as.numeric(as.character(tab$'dginn-primate_omegaM0Bpp')),
xlab="Omega Young-primate", ylab="DGINN-full's")
abline(0,1)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & tab$'dginn-primate_omegaM0Bpp'>0.5,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$'dginn-primate_omegaM0Bpp',
outlier$Gene.name)
@
\section{Overlap}
\subsection{Mondrian}
<<mondrianprimates>>=
library(Mondrian)
#######
monddata<-as.data.frame(tab$Gene.name)
dim(monddata)
dginnyoungtmp<-rowSums(cbind(tab$PosSel_PamlM1M2=="Y", tab$PosSel_PamlM7M8=="Y",
tab$PosSel_BppM1M2=="Y", tab$PosSel_BppM7M8=="Y", tab$PosSel_BUSTED=="Y"))
#monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0)
dginnfulltmp<-rowSums(cbind(tab$'dginn-primate_BUSTED'=="Y", tab$'dginn-primate_BppM1M2'=="Y",
tab$'dginn-primate_BppM7M8'=="Y", tab$'dginn-primate_codemlM1M2'=="Y", tab$'dginn-primate_codemlM7M8'=="Y"))
monddata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
#monddata$primates_cooper<-ifelse(tab$cooper.primates.M7.M8_p_val<0.05, 1, 0)
monddata$primates_dginn_young<-ifelse(dginnyoungtmp>=3, 1,0)
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young >=3", "DGINN-full >=3" ))
#####
monddata$primates_dginn_young<-ifelse(dginnyoungtmp>=4, 1,0)
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young >=4", "DGINN-full >=4"))
@
Comparison of results with the same method.
<<>>=
#####
monddata$primates_dginn_young<-tab$PosSel_BppM7M8=="Y"
monddata$primates_dginn_full<-tab$'dginn-primate_codemlM7M8'=="Y"
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young", "DGINN-full"), main="posel codeml M7M8")
@
\subsection{subsetR}
Just another representation of the same result.
<<subsetprimates>>=
library(UpSetR)
upsetdata<-as.data.frame(tab$Gene.name)
upsetdata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
###
upsetdata$primates_dginn_young<-ifelse(dginnyoungtmp>=3, 1,0)
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
###
upsetdata$primates_dginn_young<-ifelse(dginnyoungtmp>=4, 1,0)
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
@
\section{Gene List}
Genes under positive selection for at least 4 methods.
<<>>=
dginnfulltmp<-rowSums(cbind(tab$'dginn-primate_BUSTED'=="Y",
tab$'dginn-primate_BppM1M2'=="Y",
tab$'dginn-primate_BppM7M8'=="Y",
tab$'dginn-primate_codemlM1M2'=="Y",
tab$'dginn-primate_codemlM7M8'=="Y"))
tab$Gene.name[dginnfulltmp>=4 & is.na(dginnfulltmp)==F]
tab$Gene.name[dginnfulltmp>=3 & is.na(dginnfulltmp)==F]
tmp<-tab[dginnfulltmp>=4 & is.na(dginnfulltmp)==F,
c("Gene.name","dginn-primate_BUSTED", "dginn-primate_BppM1M2",
"dginn-primate_BppM7M8","dginn-primate_codemlM1M2","dginn-primate_codemlM7M8")]
write.table(tmp, "geneList_DGINN_full_primate_pos4.txt", row.names=F, quote=F)
@
\section{Shiny like}
<<shiny, fig.height=11>>=
makeFig1 <- function(df){
# prepare data for colors etc
colMethods <- c("deepskyblue4", "darkorange" , "deepskyblue3" , "mediumseagreen" , "yellow3" , "black")
nameMethods <- c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8", "MEME")
metColor <- data.frame(Name = nameMethods , Col = colMethods , stringsAsFactors = FALSE)
# subset for this specific figure
#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)
xt <- df[, c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8")]
xt$Gene <- df$Gene
nbrMeth <- 5
# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)
xt[,1:5] <- ifelse(xt[,1:5] == "Y", 1, 0)
# sort and Filter the 0 lines
xt<-xt[order(rowSums(xt[,1:5])),]
xt<-xt[rowSums(xt[,1:5])>2,]
row.names(xt)<-xt$Gene
xt<-xt[,1:5]
colFig1 <- metColor[which(metColor$Name %in% colnames(xt)) , ]
##### PART 1 : NUMBER OF METHODS
par(xpd = NA , mar=c(2,7,4,0) , oma = c(0,0,0,0) , mgp = c(3,0.3,0))
h = barplot(
t(xt),
border = NA ,
axes = F ,
col = adjustcolor(colFig1$Col, alpha.f = 1),
horiz = T ,
las = 2 ,
main = "Methods detecting positive selection" ,
cex.main = 0.85,
cex.names = min(50/nrow(xt), 1.5)
)
axis(3, line = 0, at = c(0:nbrMeth), label = c("0", rep("", nbrMeth -1), nbrMeth), tck = 0.02)
legend("bottomleft",
horiz = T,
border = colFig1$Col,
legend = colFig1$Name,
fill = colFig1$Col,
cex = 0.8,
bty = "n",
xpd = NA
)
}
df<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
makeFig1(df)
@
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Files manipulations}
\subsection{Read Janet Young's table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 332 84
\end{verbatim}
\begin{alltt}
\hlcom{#names(tab)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Read DGINN Young table}
DGINN-Young-primate table correspond to DGINN results, on the SAME alignment as Young-primate.
I will merge the 2 tables.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnY}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"data/summary_primate_young.res"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)}
\hlkwd{dim}\hlstd{(dginnY)}
\end{alltt}
\begin{verbatim}
## [1] 1992 7
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnY)}
\end{alltt}
\begin{verbatim}
## [1] "Gene" "Omega" "Method" "PosSel" "PValue" "NbSites" "PSS"
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsection{Joining Young and DGINN Young table}
\textit{I hide some code corresponding to verifications of gene names coherence between tables}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{add_col}\hlkwb{<-}\hlkwa{function}\hlstd{(}\hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{)\{}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstd{method,}
\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"Omega_"}\hlstd{, method),}
\hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),}
\hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\hlkwd{return}\hlstd{(tab)}
\hlstd{\}}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM1M2"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM7M8"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM1M2"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM7M8"}\hlstd{)}
\hlcom{# Manip pour la colonne BUSTED}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"BUSTED"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"Omega_BUSTED"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{,} \hlstr{"PValue_BUSTED"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"NbSites_MEME"}\hlstd{,} \hlstr{"PSS_MEME"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Read DGINN Table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnT}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{dim}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
## [1] 412 27
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
## [1] "File" "Name" "Gene"
## [4] "GeneSize" "NbSpecies" "omegaM0Bpp"
## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value"
## [10] "MEME.NbSites" "MEME.PSS" "BppM1M2"
## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS"
## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites"
## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value"
## [22] "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8"
## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS"
\end{verbatim}
\begin{alltt}
\hlcom{# Number of genes in dginn-primate output not present in the original table}
\hlstd{dginnT[(dginnT}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F,}\hlstr{"Gene"}\hlstd{]}
\end{alltt}
\begin{verbatim}
## [1] ACE2 ADAM9[0-3120] ADAM9[3119-3927] ATP5MGL
## [5] C1H1ORF50 CEP135[0-3264] CEP135[3263-3678] CEP43
## [9] COQ8B COQ8A CSNK2A1 CSNK2B[0-609]
## [13] CSNK2B[608-2568] CYB5R1 DDX21[0-717] DDX21[716-2538]
## [17] DDX50 DNAJC15 DPH5[0-702] DPH5[701-1326]
## [21] DPY19L2 ELOC ERO1B EXOSC3[0-1446]
## [25] EXOSC3[1445-1980] FBN3 GNB4 GNB2
## [29] GNB3 GOLGA7[0-312] GOLGA7[311-549] GPX1[0-1218]
## [33] GPX1[1217-2946] HDAC1 HS6ST3 IMPDH1
## [37] ITGB1[0-2328] ITGB1[2327-2844] LMAN2L MRPS5[0-1569]
## [41] MRPS5[1568-3783] MARC2 MGRN1 NDFIP2[0-768]
## [45] NDFIP2[767-1314] NDUFAF2[0-258] NDUFAF2[257-744] NSD2
## [49] NUP58 NUP58[0-1824] NUP58[1823-2367] PABPC3
## [53] POTPABPC1 PABPC4L PABPC5 PCSK5
## [57] PRIM2[0-1071] PRIM2[1070-1902] PRKACB PRKACG
## [61] PTGES2[0-1587] PTGES2[1586-2202] RAB8B RAB13
## [65] RAB18[0-855] RAB18[854-1815] RAB2B RAB5A
## [69] RAB5B RAB15 RALB EZR
## [73] EZR[0-1458] EZR[1457-3771] MSN RETREG3
## [77] RHOB RHOC SLC44A2[0-2577] SLC44A2[2576-3657]
## [81] SPART SRP72[0-2604] SRP72[2603-3417] STOM[0-1047]
## [85] STOM[1046-1800] STOML3 TIMM29 TLE4
## [89] TLE2 TLE2[0-1302] TLE2[1301-3987] TMPRSS2
## [93] TOMM70 TOR1B WASHC4 WFS1[0-2346]
## [97] WFS1[2345-3216] YIF1B
## 411 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\begin{alltt}
\hlcom{# This includes paralogs, recombinations found by DGINN }
\hlcom{# and additionnal genes included on purpose}
\hlcom{# Number of genes from the original list not present in DGINN output}
\hlstd{tab[(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlstd{F,}\hlstr{"Gene.name"}\hlstd{]}
\end{alltt}
\begin{verbatim}
## [1] "ADCK4" "ARL6IP6" "ATP5L" "C19orf52" "C1orf50" "ERO1LB"
## [7] "FAM134C" "FGFR1OP" "KIAA1033" "MFGE8" "NUPL1" "SIGMAR1"
## [13] "SPG20" "TCEB1" "TCEB2" "TOMM70A" "USP13" "VIMP"
## [19] "WHSC1"
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnT)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,} \hlstr{"dginn-primate_NbSpecies"}\hlstd{,} \hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{,}
\hlstr{"dginn-primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{,}
\hlstr{"dginn-primate_MEME.NbSites"}\hlstd{,} \hlstr{"dginn-primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{,}
\hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8"}\hlstd{,}
\hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Join Table and DGINN table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnT,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Write new table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{write.table}\hlstd{(tab,}
\hlstr{"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20201014.txt"}\hlstd{,}
\hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.2} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{>}\hlnum{0.4}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.6} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{>}\hlnum{0.7}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8-1}
\end{knitrout}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{))}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,}
\hlkwc{xlab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.2}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.5} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.4}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8_2-1}
\end{knitrout}
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{)),}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.5}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8_3-1}
\end{knitrout}
\section{Overlap}
\subsection{Mondrian}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{library}\hlstd{(Mondrian)}
\hlcom{#######}
\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlkwd{dim}\hlstd{(monddata)}
\end{alltt}
\begin{verbatim}
## [1] 333 1
\end{verbatim}
\begin{alltt}
\hlstd{dginnyoungtmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstd{PosSel_PamlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlcom{#monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0)}
\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_BUSTED'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_BppM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_codemlM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{monddata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{#monddata$primates_cooper<-ifelse(tab$cooper.primates.M7.M8_p_val<0.05, 1, 0)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=3"}\hlstd{,} \hlstr{"DGINN-full >=3"} \hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianprimates-1}
\begin{kframe}\begin{alltt}
\hlcom{#####}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=4"}\hlstd{,} \hlstr{"DGINN-full >=4"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianprimates-2}
\end{knitrout}
Comparison of results with the same method.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{#####}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM7M8}\hlopt{==}\hlstr{"Y"}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young"}\hlstd{,} \hlstr{"DGINN-full"}\hlstd{),} \hlkwc{main}\hlstd{=}\hlstr{"posel codeml M7M8"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/unnamed-chunk-8-1}
\end{knitrout}
\subsection{subsetR}
Just another representation of the same result.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{library}\hlstd{(UpSetR)}
\hlstd{upsetdata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{###}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,}
\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/subsetprimates-1}
\begin{kframe}\begin{alltt}
\hlcom{###}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,}
\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/subsetprimates-2}
\end{knitrout}
\section{Gene List}
Genes under positive selection for at least 4 methods.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_BUSTED'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]}
\end{alltt}
\begin{verbatim}
## [1] "ACADM" "BCS1L" "BRD4" "CDK5RAP2" "CEP135" "CEP68"
## [7] "CLIP4" "DNMT1" "DPH5" "EMC1" "FYCO1" "GCC2"
## [13] "GGH" "GHITM" "GIGYF2" "GLA" "GOLGA7" "HECTD1"
## [19] "IDE" "ITGB1" "LARP1" "LARP4B" "LMAN2" "MARK1"
## [25] "MIPOL1" "MPHOSPH10" "MYCBP2" "NDUFAF2" "NDUFB9" "PCNT"
## [31] "POLA1" "PRIM2" "PRKAR2A" "PVR" "REEP6" "RIPK1"
## [37] "SAAL1" "SEPSECS" "SIRT5" "SLC25A21" "SLC27A2" "TMEM39B"
## [43] "TOR1AIP1" "TUBGCP2" "UBAP2" "UGGT2" "VPS39" "ZNF318"
\end{verbatim}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{3} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]}
\end{alltt}
\begin{verbatim}
## [1] "ACADM" "ADAM9" "AP2A2" "ATE1" "BCS1L" "BRD4"
## [7] "BZW2" "CDK5RAP2" "CEP135" "CEP68" "CLIP4" "CNTRL"
## [13] "DNMT1" "DPH5" "EDEM3" "EIF4E2" "EMC1" "EXOSC2"
## [19] "FYCO1" "GCC2" "GGH" "GHITM" "GIGYF2" "GLA"
## [25] "GOLGA7" "GOLGB1" "GORASP1" "HDAC2" "HECTD1" "HS6ST2"
## [31] "IDE" "ITGB1" "LARP1" "LARP4B" "LARP7" "LMAN2"
## [37] "MARK1" "MDN1" "MIPOL1" "MOV10" "MPHOSPH10" "MRPS5"
## [43] "MYCBP2" "NAT14" "NDUFAF2" "NDUFB9" "NGLY1" "NPC2"
## [49] "PCNT" "PITRM1" "PLAT" "PLOD2" "PMPCB" "POLA1"
## [55] "POR" "PRIM2" "PRKAR2A" "PTBP2" "PVR" "RAB14"
## [61] "RAB1A" "RAB2A" "RAP1GDS1" "RBX1" "REEP6" "RIPK1"
## [67] "RPL36" "SAAL1" "SCCPDH" "SEPSECS" "SIRT5" "SLC25A21"
## [73] "SLC27A2" "STOM" "TIMM8B" "TMEM39B" "TOR1AIP1" "TRIM59"
## [79] "TRMT1" "TUBGCP2" "UBAP2" "UGGT2" "USP54" "VPS39"
## [85] "ZNF318"
\end{verbatim}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{tab[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F,}
\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,}\hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8"}\hlstd{)]}
\hlkwd{write.table}\hlstd{(tmp,} \hlstr{"geneList_DGINN_full_primate_pos4.txt"}\hlstd{,} \hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F)}
\end{alltt}
\end{kframe}
\end{knitrout}
\section{Shiny like}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{makeFig1} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{df}\hlstd{)\{}
\hlcom{# prepare data for colors etc}
\hlstd{colMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"deepskyblue4"}\hlstd{,} \hlstr{"darkorange"} \hlstd{,} \hlstr{"deepskyblue3"} \hlstd{,} \hlstr{"mediumseagreen"} \hlstd{,} \hlstr{"yellow3"} \hlstd{,} \hlstr{"black"}\hlstd{)}
\hlstd{nameMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{,} \hlstr{"MEME"}\hlstd{)}
\hlstd{metColor} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(}\hlkwc{Name} \hlstd{= nameMethods ,} \hlkwc{Col} \hlstd{= colMethods ,} \hlkwc{stringsAsFactors} \hlstd{=} \hlnum{FALSE}\hlstd{)}
\hlcom{# subset for this specific figure}
\hlcom{#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)}
\hlstd{xt} \hlkwb{<-} \hlstd{df[,} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{)]}
\hlstd{xt}\hlopt{$}\hlstd{Gene} \hlkwb{<-} \hlstd{df}\hlopt{$}\hlstd{Gene}
\hlstd{nbrMeth} \hlkwb{<-} \hlnum{5}
\hlcom{# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)}
\hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlkwb{<-} \hlkwd{ifelse}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlopt{==} \hlstr{"Y"}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{# sort and Filter the 0 lines}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{order}\hlstd{(}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])),]}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])}\hlopt{>}\hlnum{2}\hlstd{,]}
\hlkwd{row.names}\hlstd{(xt)}\hlkwb{<-}\hlstd{xt}\hlopt{$}\hlstd{Gene}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]}
\hlstd{colFig1} \hlkwb{<-} \hlstd{metColor[}\hlkwd{which}\hlstd{(metColor}\hlopt{$}\hlstd{Name} \hlopt{%in%} \hlkwd{colnames}\hlstd{(xt)) , ]}
\hlcom{##### PART 1 : NUMBER OF METHODS}
\hlkwd{par}\hlstd{(}\hlkwc{xpd} \hlstd{=} \hlnum{NA} \hlstd{,} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{7}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{oma} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{mgp} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,}\hlnum{0.3}\hlstd{,}\hlnum{0}\hlstd{))}
\hlstd{h} \hlkwb{=} \hlkwd{barplot}\hlstd{(}
\hlkwd{t}\hlstd{(xt),}
\hlkwc{border} \hlstd{=} \hlnum{NA} \hlstd{,}
\hlkwc{axes} \hlstd{= F ,}
\hlkwc{col} \hlstd{=} \hlkwd{adjustcolor}\hlstd{(colFig1}\hlopt{$}\hlstd{Col,} \hlkwc{alpha.f} \hlstd{=} \hlnum{1}\hlstd{),}
\hlkwc{horiz} \hlstd{= T ,}
\hlkwc{las} \hlstd{=} \hlnum{2} \hlstd{,}
\hlkwc{main} \hlstd{=} \hlstr{"Methods detecting positive selection"} \hlstd{,}
\hlkwc{cex.main} \hlstd{=} \hlnum{0.85}\hlstd{,}
\hlkwc{cex.names} \hlstd{=} \hlkwd{min}\hlstd{(}\hlnum{50}\hlopt{/}\hlkwd{nrow}\hlstd{(xt),} \hlnum{1.5}\hlstd{)}
\hlstd{)}
\hlkwd{axis}\hlstd{(}\hlnum{3}\hlstd{,} \hlkwc{line} \hlstd{=} \hlnum{0}\hlstd{,} \hlkwc{at} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlopt{:}\hlstd{nbrMeth),} \hlkwc{label} \hlstd{=} \hlkwd{c}\hlstd{(}\hlstr{"0"}\hlstd{,} \hlkwd{rep}\hlstd{(}\hlstr{""}\hlstd{, nbrMeth} \hlopt{-}\hlnum{1}\hlstd{), nbrMeth),} \hlkwc{tck} \hlstd{=} \hlnum{0.02}\hlstd{)}
\hlkwd{legend}\hlstd{(}\hlstr{"bottomleft"}\hlstd{,}
\hlkwc{horiz} \hlstd{= T,}
\hlkwc{border} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,}
\hlkwc{legend} \hlstd{= colFig1}\hlopt{$}\hlstd{Name,}
\hlkwc{fill} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,}
\hlkwc{cex} \hlstd{=} \hlnum{0.8}\hlstd{,}
\hlkwc{bty} \hlstd{=} \hlstr{"n"}\hlstd{,}
\hlkwc{xpd} \hlstd{=} \hlnum{NA}
\hlstd{)}
\hlstd{\}}
\hlstd{df}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{makeFig1}\hlstd{(df)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/shiny-1}
\end{knitrout}
\end{document}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment