Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mcariou/2020_dginn_covid19
  • ciri/ps_sars-cov-2/2021_dginn_covid19
2 results
Show changes
Commits on Source (11)
Showing
with 3301 additions and 704 deletions
......@@ -364,7 +364,7 @@ But Genetic Innovation by splicing variants YES.
" "" "onlybats"
"350" "SCCPDH" "SCCPDH_sequences_filtered_longestORFs_mafft_mincov_prank" "SCCPDH_all" 717 23 "0.297146699972" "0.323" "Y" "0.0242" 4 "203, 437, 632, 635" "N" "0.16718066834449946" 0 "na" "N" "0.05798336825737528" 0 "na" "Y" "0.03951796106177764" 1 "590" "Y" "0.015732919159708345" 1 "590" "SCCPDH_sequences_filtered_longestORFs_mafft_prank" "SCCPDH" 654 12 "0.281129406592" "0.291" "N" 0.1315 11 "46, 70, 87, 228, 229, 231, 238, 327, 379, 594, 644" "N" "0.0747269312417341" 0 "na" "Y" "0.005045637026761042" 1 "367" "Y" "0.047028569511397056" 0 "" "Y" "0.0063709925831932045" 1 "367" "PS ok. Splice variants" "" "shared"
"351" "SDF2" "SDF2_sequences_filtered_longestORFs_mafft_mincov_prank" "SDF2_all" 364 23 "0.091229165614" "0.089" "N" "0.7862" 0 "na" "N" "0.9999997541511068" 0 "na" "N" "0.9999998942125841" 0 "na" "N" "1.0" 0 "na" "N" "0.9990004998331715" 0 "na" "SDF2_bat_select_mafft_prank" "SDF2" 246 12 "0.082797829312181" "0.080" "N" 0.2145 0 "na" "N" "0.999999439606811" 0 "na" "N" "0.999999993020992" 0 "na" "N" "1.0" 0 "na" "N" "1.0" 0 "na" "" "" "shared"
"352" "SELENOS" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA "SELENOS_bat_select_mafft_prank" "SELENOS" 218 8 "0.1693975661318" "0.172" "N" 0.8176 5 "71, 72, 85, 128, 173" "N" "0.999999579118228" 0 "na" "N" "0.389714156459387" 0 "na" "N" "0.9970044955034437" 0 "na" "N" "0.2133118712228997" 0 "na" "" "" "onlybats"
"352" "SELENOS" "SELENOS_sequences_filtered_longestORFs_mafft_mincov_prank" "SELENOS_all" 367 24 "0.207287174753623" "0.196" "Y" "0" 1 "90" "N" "0.999999228305" 0 "na" "N" "0.986116909613" 0 "na" "N" "1" 0 "na" "N" "0.692117181689" 0 "na" "SELENOS_bat_select_mafft_prank" "SELENOS" 218 8 "0.1693975661318" "0.172" "N" 0.8176 5 "71, 72, 85, 128, 173" "N" "0.999999579118228" 0 "na" "N" "0.389714156459387" 0 "na" "N" "0.9970044955034437" 0 "na" "N" "0.2133118712228997" 0 "na" "" "" "shared"
"353" "SELENOS[0-927]" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA "SELENOS_sequences_filtered_longestORFs_mafft_prank_frag0to927" "SELENOS[0-927]" 309 11 "0.213235744659" "0.250" "Y" 0.0085 3 "126, 148, 154" "N" "0.3022734485269354" 0 "na" "N" "0.1168296305173564" 0 "na" "N" "1.0" 0 "na" "N" "0.24268280925769264" 0 "na" "Recomb triggered by aln with lots of indels" "" "onlybats"
"354" "SELENOS[926-1137]" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA "SELENOS_sequences_filtered_longestORFs_mafft_prank_frag926to1137" "SELENOS[926-1137]" 70 11 "0.233147569842" "0.246" "N" 1 0 "na" "N" "0.9999999153342323" 0 "na" "N" "0.9999906640992244" 0 "na" "N" "0.9694755730759651" 0 "na" "N" "0.3348743138811512" 0 "na" "" "" "onlybats"
"355" "SEPSECS" "SEPSECS_sequences_filtered_longestORFs_mafft_mincov_prank" "SEPSECS_all" 1204 24 "0.200383584131" "0.191" "Y" "0.0012" 4 "411, 792, 871, 1131" "Y" "0.04699410636855563" 1 "871" "Y" "0.000675790377354786" 1 "871" "Y" "0.022573017543302893" 0 "" "Y" "0.002544044860029952" 1 "871" "SEPSECS_sequences_filtered_longestORFs_mafft_prank" "SEPSECS" 642 12 "0.207760285718" "0.221" "Y" 0.0113 0 "na" "N" "0.4067707665111908" 0 "na" "Y" "0.00326458340100601" 2 "577, 585" "N" "0.4269877306527835" 0 "na" "N" "0.0707218996108505" 0 "na" "" "" "shared"
......
......@@ -15,7 +15,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{janvier 2021} % Activate to display a given date or no date
\date{Mars 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
......@@ -29,12 +29,13 @@
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home,"CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
tab$Gene.name<-as.character(tab$Gene.name)
tab$Gene.name<-as.character(tab$Gene.name.x)
tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1"
@
......@@ -43,25 +44,38 @@ tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1"
\subsection{Cooper-bats results VS DGINN-bats results}
<<omegaM7M8bats>>=
tab$bats_omegaM0codeml[tab$bats_omegaM0codeml=="na"]<-NA
plot(tab$cooper.batsAverage_dNdS, as.numeric(as.character(tab$bats_omegaM0codeml)),
xlab="Omega Cooper-bats", ylab="Omega DGINN-bats")
plot(tab$cooper.batsAverage_dNdS,
as.numeric(as.character(tab$bats_omegaM0codeml)),
xlab="Omega Cooper-bats",
ylab="Omega DGINN-bats")
abline(0,1)
abline(lm(as.numeric(as.character(tab$bats_omegaM0codeml))~tab$cooper.batsAverage_dNdS), col="red")
abline(lm(as.numeric(as.character(tab$bats_omegaM0codeml))~
tab$cooper.batsAverage_dNdS),
col="red")
outlier<-tab[tab$cooper.batsAverage_dNdS>0.35 &
as.numeric(as.character(tab$bats_omegaM0codeml))<0.3,]
text(x=outlier$cooper.batsAverage_dNdS,
y=as.numeric(as.character(outlier$bats_omegaM0codeml)),
outlier$Gene.name)
@
\subsection{Cooper-bats VS Hawkins-bats and DGINN-bats VS Hawkins-bats}
\textit{I don't think we have the omega values}
\section{Overlap}
\subsection{Data}
<<subbats>>=
tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8_p.value", "hawkins_Positive.Selection..M8vM8a.p.value", "cooper.batsM7.M8_p_value", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8")])
tmp$bats_codemlM7M8_p.value<-as.numeric(as.character(tmp$bats_codemlM7M8_p.value))
tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8_p.value",
"hawkins_Positive.Selection..M8vM8a.p.value",
"cooper.batsM7.M8_p_value", "bats_BUSTED",
"bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2",
"bats_codemlM7M8")])
tmp$bats_codemlM7M8_p.value[tmp$bats_codemlM7M8_p.value=="na"]<-NA
tmp$bats_codemlM7M8_p.value<-as.numeric(
as.character(tmp$bats_codemlM7M8_p.value))
dim(tmp)
@
......@@ -73,18 +87,25 @@ dim(tmp)
library(Mondrian)
monddata<-as.data.frame(tmp$Gene.name)
monddata$bats_hawkins<-ifelse(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
monddata$bats_cooper<-ifelse(tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
monddata$bats_hawkins<-ifelse(
tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
monddata$bats_cooper<-ifelse(
tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
dginntmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y", tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y", tmp$bats_BppM7M8=="Y", tmp$bats_BUSTED=="Y"))
dginntmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y",
tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y",
tmp$bats_BppM7M8=="Y",
tmp$bats_BUSTED=="Y"))
monddata$bats_dginn<-ifelse(dginntmp>=3, 1,0)
mondrian(monddata[,2:4], labels=c("DGINN >=3", "hawkins", "Cooper"))
mondrian(monddata[,2:4],
labels=c("DGINN >=3", "hawkins", "Cooper"))
monddata$bats_dginn<-ifelse(dginntmp>=4, 1,0)
mondrian(monddata[,2:4], labels=c("DGINN >=4", "hawkins", "Cooper"))
mondrian(monddata[,2:4],
labels=c("DGINN >=4", "hawkins", "Cooper"))
@
\subsection{subsetR}
......@@ -93,8 +114,10 @@ mondrian(monddata[,2:4], labels=c("DGINN >=4", "hawkins", "Cooper"))
library(UpSetR)
upsetdata<-as.data.frame(tmp$Gene.name)
upsetdata$bats_hawkins<-ifelse(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
upsetdata$bats_cooper<-ifelse(tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
upsetdata$bats_hawkins<-ifelse(
tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
upsetdata$bats_cooper<-ifelse(
tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
upsetdata$bats_dginn<-ifelse(dginntmp>=3, 1,0)
......@@ -117,23 +140,25 @@ df<-read.delim(paste0(workdir,
fill=T, h=T, sep=",")
names(df)
dftmp<-tab[,c("bats_File", "bats_Name", "Gene.name",
"bats_GeneSize", "bats_NbSpecies", "bats_omegaM0Bpp",
"bats_omegaM0codeml", "bats_BUSTED", "bats_BUSTED_p.value",
"bats_MEME_NbSites", "bats_MEME_PSS", "bats_BppM1M2",
"bats_BppM1M2_p.value", "bats_BppM1M2_NbSites", "bats_BppM1M2_PSS",
"bats_BppM7M8", "bats_BppM7M8_p.value", "bats_BppM7M8_NbSites",
"bats_BppM7M8_PSS", "bats_codemlM1M2", "bats_codemlM1M2_p.value",
"bats_codemlM1M2_NbSites","bats_codemlM1M2_PSS", "bats_codemlM7M8",
"bats_codemlM7M8_p.value", "bats_codemlM7M8_NbSites" , "bats_codemlM7M8_PSS")]
dftmp<-tab[,c("bats_File", "bats_Name",
"Gene.name", "bats_GeneSize",
"bats_NbSpecies", "bats_omegaM0Bpp",
"bats_omegaM0codeml", "bats_BUSTED",
"bats_BUSTED_p.value", "bats_MEME_NbSites",
"bats_MEME_PSS", "bats_BppM1M2",
"bats_BppM1M2_p.value", "bats_BppM1M2_NbSites",
"bats_BppM1M2_PSS", "bats_BppM7M8",
"bats_BppM7M8_p.value", "bats_BppM7M8_NbSites",
"bats_BppM7M8_PSS", "bats_codemlM1M2",
"bats_codemlM1M2_p.value", "bats_codemlM1M2_NbSites",
"bats_codemlM1M2_PSS", "bats_codemlM7M8",
"bats_codemlM7M8_p.value", "bats_codemlM7M8_NbSites" ,
"bats_codemlM7M8_PSS")]
names(dftmp)<-names(df)
makeFig1(dftmp)
@
\end{document}
......
No preview for this file type
......@@ -65,7 +65,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{janvier 2021} % Activate to display a given date or no date
\date{Mars 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
......@@ -81,17 +81,18 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,}\hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"covid_comp/covid_comp_complete.txt"}\hlstd{),} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 332 139
## [1] 332 141
\end{verbatim}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name.x)}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[tab}\hlopt{$}\hlstd{PreyGene}\hlopt{==}\hlstr{"MTARC1"}\hlstd{]}\hlkwb{<-}\hlstr{"MTARC1"}
\end{alltt}
\end{kframe}
......@@ -104,26 +105,30 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml)),}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Cooper-bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-bats"}\hlstd{)}
\end{alltt}
\hlstd{tab}\hlopt{$}\hlstd{bats_omegaM0codeml[tab}\hlopt{$}\hlstd{bats_omegaM0codeml}\hlopt{==}\hlstr{"na"}\hlstd{]}\hlkwb{<-}\hlnum{NA}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS,}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml)),}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Cooper-bats"}\hlstd{,}
\hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-bats"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml))}\hlopt{~}\hlstd{tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS),} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml))}\hlopt{~}
\hlstd{tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS),}
\hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS}\hlopt{>}\hlnum{0.35} \hlopt{&}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml))}\hlopt{<}\hlnum{0.3}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{cooper.batsAverage_dNdS,}
\hlkwc{y}\hlstd{=}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(outlier}\hlopt{$}\hlstd{bats_omegaM0codeml)),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in eval(predvars, data, env): NAs introduits lors de la conversion automatique}}\end{kframe}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8bats-1}
\end{knitrout}
\subsection{Cooper-bats VS Hawkins-bats and DGINN-bats VS Hawkins-bats}
\textit{I don't think we have the omega values}
\section{Overlap}
\subsection{Data}
......@@ -131,17 +136,19 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_codemlM7M8_p.value"}\hlstd{,} \hlstr{"hawkins_Positive.Selection..M8vM8a.p.value"}\hlstd{,} \hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{)])}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value))}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_codemlM7M8_p.value"}\hlstd{,}
\hlstr{"hawkins_Positive.Selection..M8vM8a.p.value"}\hlstd{,}
\hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,}
\hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,}
\hlstr{"bats_codemlM7M8"}\hlstd{)])}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value[tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value}\hlopt{==}\hlstr{"na"}\hlstd{]}\hlkwb{<-}\hlnum{NA}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}
\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value))}
\hlkwd{dim}\hlstd{(tmp)}
\end{alltt}
\begin{verbatim}
## [1] 170 9
## [1] 174 9
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -156,21 +163,28 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\hlkwd{library}\hlstd{(Mondrian)}
\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp}\hlopt{$}\hlstd{Gene.name)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{dginntmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{dginntmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{monddata}\hlopt{$}\hlstd{bats_dginn}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginntmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=3"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=3"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianbats-1}
\begin{kframe}\begin{alltt}
\hlstd{monddata}\hlopt{$}\hlstd{bats_dginn}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginntmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=4"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=4"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianbats-2}
......@@ -185,8 +199,10 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\hlkwd{library}\hlstd{(UpSetR)}
\hlstd{upsetdata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp}\hlopt{$}\hlstd{Gene.name)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_dginn}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginntmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
......@@ -230,15 +246,20 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS"
\end{verbatim}
\begin{alltt}
\hlstd{dftmp}\hlkwb{<-}\hlstd{tab[,}\hlkwd{c}\hlstd{(}\hlstr{"bats_File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,}
\hlstr{"bats_GeneSize"}\hlstd{,} \hlstr{"bats_NbSpecies"}\hlstd{,} \hlstr{"bats_omegaM0Bpp"}\hlstd{,}
\hlstr{"bats_omegaM0codeml"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BUSTED_p.value"}\hlstd{,}
\hlstr{"bats_MEME_NbSites"}\hlstd{,} \hlstr{"bats_MEME_PSS"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,}
\hlstr{"bats_BppM1M2_p.value"}\hlstd{,} \hlstr{"bats_BppM1M2_NbSites"}\hlstd{,} \hlstr{"bats_BppM1M2_PSS"}\hlstd{,}
\hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_BppM7M8_p.value"}\hlstd{,} \hlstr{"bats_BppM7M8_NbSites"}\hlstd{,}
\hlstr{"bats_BppM7M8_PSS"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM1M2_p.value"}\hlstd{,}
\hlstr{"bats_codemlM1M2_NbSites"}\hlstd{,}\hlstr{"bats_codemlM1M2_PSS"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,}
\hlstr{"bats_codemlM7M8_p.value"}\hlstd{,} \hlstr{"bats_codemlM7M8_NbSites"} \hlstd{,} \hlstr{"bats_codemlM7M8_PSS"}\hlstd{)]}
\hlstd{dftmp}\hlkwb{<-}\hlstd{tab[,}\hlkwd{c}\hlstd{(}\hlstr{"bats_File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,}
\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_GeneSize"}\hlstd{,}
\hlstr{"bats_NbSpecies"}\hlstd{,} \hlstr{"bats_omegaM0Bpp"}\hlstd{,}
\hlstr{"bats_omegaM0codeml"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,}
\hlstr{"bats_BUSTED_p.value"}\hlstd{,} \hlstr{"bats_MEME_NbSites"}\hlstd{,}
\hlstr{"bats_MEME_PSS"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,}
\hlstr{"bats_BppM1M2_p.value"}\hlstd{,} \hlstr{"bats_BppM1M2_NbSites"}\hlstd{,}
\hlstr{"bats_BppM1M2_PSS"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,}
\hlstr{"bats_BppM7M8_p.value"}\hlstd{,} \hlstr{"bats_BppM7M8_NbSites"}\hlstd{,}
\hlstr{"bats_BppM7M8_PSS"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,}
\hlstr{"bats_codemlM1M2_p.value"}\hlstd{,} \hlstr{"bats_codemlM1M2_NbSites"}\hlstd{,}
\hlstr{"bats_codemlM1M2_PSS"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,}
\hlstr{"bats_codemlM7M8_p.value"}\hlstd{,} \hlstr{"bats_codemlM7M8_NbSites"} \hlstd{,}
\hlstr{"bats_codemlM7M8_PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(dftmp)}\hlkwb{<-}\hlkwd{names}\hlstd{(df)}
\hlkwd{makeFig1}\hlstd{(dftmp)}
......
......@@ -273,7 +273,7 @@ But Genetic Innovation by splicing variants YES.
"SCARB1" "SCARB1" "SCARB1" "nsp7" "SCARB1" "list26_COV_list4dataset2nonOrf" "scavenger receptor class B member 1" "CD36L1|CLA-1|CLA1|HDLQTL6|SR-BI|SRB1" "no" 25 1530 510 0.10383 0.62207 0.0784 0.7554 1 1 1 1 1 1 NA NA NA "" "" "" "" NA NA NA NA NA NA NA NA NA NA NA NA NA "" "SCARB1" "ENSG00000073060.15" "ENST00000545493.1" 14 146 100 0.221176368 0.1874 0.49354 0.49355 -910.195166 -910.195401 0.999765028 NA NA NA "" "SCARB1" 0.06378625 0.750735805 "" NA NA NA "" "" 1059 353 0.12908 0.34857 0.0509 0.3941 1 1 1 1 1 1 NA NA NA "" "SCARB1" "SCARB1_sequences_filtered_longestORFs_mafft_mincov_prank_part2_prank" "SCARB1_all_part2" 1623 24 "0.094947443872" "0.131" "Y" "0.0000" 11 "549, 607, 630, 703, 733, 911, 965, 993, 1432, 1531, 1622" "N" "0.9999999983037924" 0 "na" "N" "0.42297612986890076" 0 "na" "Y" "7.551887340080195e-27" 1 "1531" "na" "na" 0 "na" "SCARB1_sequences_filtered_longestORFs_mafft_prank" "SCARB1" 763 10 "0.218091439138" "0.155" "Y" 0 19 "5, 7, 132, 133, 137, 157, 176, 236, 277, 417, 476, 509, 592, 674, 688, 724, 740, 741, 763" "N" "0.9999998503026403" 0 "na" "N" "0.4515726499701246" 0 "na" "N" "1.0" 0 "na" "Y" "0.0002460440433622239" 1 "763" "PS. Splicing variants" "" "shared"
"SCCPDH" "SCCPDH" "SCCPDH" "nsp7" "SCCPDH" "list24_COV_list2nonOrf" "saccharopine dehydrogenase (putative)" "CGI-49|NET11" "no" 24 1290 430 0.26038 0.57846 0.1053 0.4043 1 1 1 1 1 1 NA NA NA "" "" "" "" NA NA NA NA NA NA NA NA NA NA NA NA NA "" "SCCPDH" "ENSG00000143653.9" "ENST00000366510.3" 15 430 97.6744186 0.196343201 0.1627 0.45905 0.45905 -2886.510879 -2886.510885 0.999994 NA NA NA "" "SCCPDH" 0.002019556 0.09129021 "" NA NA NA "" "" 999 333 0.29684 0.36519 0.074 0.2492 1 1 1 1 1 1 NA NA NA "" "SCCPDH" "SCCPDH_sequences_filtered_longestORFs_mafft_mincov_prank" "SCCPDH_all" 717 23 "0.297146699972" "0.323" "Y" "0.0242" 4 "203, 437, 632, 635" "N" "0.16718066834449946" 0 "na" "N" "0.05798336825737528" 0 "na" "Y" "0.03951796106177764" 1 "590" "Y" "0.015732919159708345" 1 "590" "SCCPDH_sequences_filtered_longestORFs_mafft_prank" "SCCPDH" 654 12 "0.281129406592" "0.291" "N" 0.1315 11 "46, 70, 87, 228, 229, 231, 238, 327, 379, 594, 644" "N" "0.0747269312417341" 0 "na" "Y" "0.005045637026761042" 1 "367" "Y" "0.047028569511397056" 0 "" "Y" "0.0063709925831932045" 1 "367" "PS ok. Splice variants" "" "shared"
"SDF2" "SDF2" "SDF2" "orf8" "SDF2" "list25_COV_list3dataset2orf" "stromal cell derived factor 2" "-" "no" 23 636 212 0.13507 0.3131 0.0364 0.2694 1 1 1 1 1 1 NA NA NA "" "SDF2" "ENSMLUG00000012419.2" "ENSMLUT00000012419.2" 15 212 98.58490566 0.06852197 0.335 0.90704 0.90706 -1790.437725 -1790.438975 0.998750781 NA NA NA "" "SDF2" "ENSG00000132581.9" "ENST00000591903.1" 19 105 88.57142857 0.073449063 4.3067 9.69242 9.69891 -726.542628 -726.543552 0.999076427 NA NA NA "" "SDF2" 0.997743245 1 "" NA NA NA "" "" 522 174 0.10167 0.24504 0.0225 0.2215 1 1 0.96895 1 1 1 NA NA NA "" "SDF2" "SDF2_sequences_filtered_longestORFs_mafft_mincov_prank" "SDF2_all" 364 23 "0.091229165614" "0.089" "N" "0.7862" 0 "na" "N" "0.9999997541511068" 0 "na" "N" "0.9999998942125841" 0 "na" "N" "1.0" 0 "na" "N" "0.9990004998331715" 0 "na" "SDF2_bat_select_mafft_prank" "SDF2" 246 12 "0.082797829312181" "0.080" "N" 0.2145 0 "na" "N" "0.999999439606811" 0 "na" "N" "0.999999993020992" 0 "na" "N" "1.0" 0 "na" "N" "1.0" 0 "na" "" "" "shared"
"SELENOS" "SELENOS" "VIMP" "nsp7" "VIMP" "list26_COV_list4dataset2nonOrf" "selenoprotein S protein-coding" "AD-015|ADO15|SBBI8|SELS|SEPS1|VIMP" "no" 24 567 189 0.21832 0.68129 0.1108 0.5076 0.93424 1 0.91359 1 0.99621 1 NA NA NA "" "" "" "" NA NA NA NA NA NA NA NA NA NA NA NA NA "" "SELENOS" "ENSG00000131871.14" "ENST00000528346.1" 9 218 83.02752294 0.295972578 0.1231 0.34109 0.34109 -1105.646929 -1105.646929 1 NA NA NA "" "" NA NA "" NA NA NA "" "" 408 136 0.1532 0.50802 0.0684 0.4462 0.65661 1 0.78342 1 0.90578 1 NA NA NA "" "SELENOS" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA "SELENOS_bat_select_mafft_prank" "SELENOS" 218 8 "0.1693975661318" "0.172" "N" 0.8176 5 "71, 72, 85, 128, 173" "N" "0.999999579118228" 0 "na" "N" "0.389714156459387" 0 "na" "N" "0.9970044955034437" 0 "na" "N" "0.2133118712228997" 0 "na" "" "" "onlybats"
"SELENOS" "SELENOS" "VIMP" "nsp7" "VIMP" "list26_COV_list4dataset2nonOrf" "selenoprotein S protein-coding" "AD-015|ADO15|SBBI8|SELS|SEPS1|VIMP" "no" 24 567 189 0.21832 0.68129 0.1108 0.5076 0.93424 1 0.91359 1 0.99621 1 NA NA NA "" "" "" "" NA NA NA NA NA NA NA NA NA NA NA NA NA "" "SELENOS" "ENSG00000131871.14" "ENST00000528346.1" 9 218 83.02752294 0.295972578 0.1231 0.34109 0.34109 -1105.646929 -1105.646929 1 NA NA NA "" "" NA NA "" NA NA NA "" "" 408 136 0.1532 0.50802 0.0684 0.4462 0.65661 1 0.78342 1 0.90578 1 NA NA NA "" "SELENOS" "SELENOS_sequences_filtered_longestORFs_mafft_mincov_prank" "SELENOS_all" 367 24 "0.207287174753623" "0.196" "Y" "0" 1 "90" "N" "0.999999228305" 0 "na" "N" "0.986116909613" 0 "na" "N" "1" 0 "na" "N" "0.692117181689" 0 "na" "SELENOS_bat_select_mafft_prank" "SELENOS" 218 8 "0.1693975661318" "0.172" "N" 0.8176 5 "71, 72, 85, 128, 173" "N" "0.999999579118228" 0 "na" "N" "0.389714156459387" 0 "na" "N" "0.9970044955034437" 0 "na" "N" "0.2133118712228997" 0 "na" "" "" "shared"
"SEPSECS" "SEPSECS" "SEPSECS" "nsp8" "SEPSECS" "list24_COV_list2nonOrf" "Sep (O-phosphoserine) tRNA:Sec (selenocysteine) tRNA synthase" "LP|PCH2D|SLA|SLA/LP" "yes" 24 1506 502 0.23138 0.39192 0.0699 0.3022 0.00014539 0.0027987575 0.00073465 0.0104755648148148 0.00073013 0.0140550025 5.009 3.57509 6 "246_D_0.971,375_M_1.000,383_H_0.956,385_D_0.950,386_E_0.925,456_K_0.990" "SEPSECS" "ENSMLUG00000005883.2" "ENSMLUT00000005885.2" 11 505 92.07920792 0.157833305 0.327 0.84904 0.84915 -3816.572052 -3816.423878 0.862281065 NA NA NA "" "" "" "" NA NA NA NA NA NA NA NA NA NA NA NA NA "" "" NA NA "" NA NA NA "" "" 1296 432 0.24565 0.28571 0.0539 0.2196 8.3292e-06 0.000458106 4.8623e-05 0.002339981875 4.9379e-05 0.002715845 1.667 8.61509 5 "325_M_1.000,333_H_0.980,345_A_0.902,394_K_0.996,428_Y_0.910" "SEPSECS" "SEPSECS_sequences_filtered_longestORFs_mafft_mincov_prank" "SEPSECS_all" 1204 24 "0.200383584131" "0.191" "Y" "0.0012" 4 "411, 792, 871, 1131" "Y" "0.04699410636855563" 1 "871" "Y" "0.000675790377354786" 1 "871" "Y" "0.022573017543302893" 0 "" "Y" "0.002544044860029952" 1 "871" "SEPSECS_sequences_filtered_longestORFs_mafft_prank" "SEPSECS" 642 12 "0.207760285718" "0.221" "Y" 0.0113 0 "na" "N" "0.4067707665111908" 0 "na" "Y" "0.00326458340100601" 2 "577, 585" "N" "0.4269877306527835" 0 "na" "N" "0.0707218996108505" 0 "na" "" "" "shared"
"SIGMAR1" "SIGMAR1" "SIGMAR1" "nsp6" "SIGMAR1" "list26_COV_list4dataset2nonOrf" "sigma non-opioid intracellular receptor 1" "ALS16|DSMA2|OPRS1|SIG-1R|SR-BP|SR-BP1|SRBP|hSigmaR1|sigma1R" "no" 24 672 224 0.05548 0.42023 0.0297 0.5354 0.38838 0.874422807017544 0.596 1 0.67815 1 NA NA NA "" "SIGMAR1" "ENSMLUG00000003061.1" "ENSMLUT00000003059.1" 9 224 98.66071429 0.085786955 0.2738 0.68602 0.68531 -1513.500222 -1513.071044 0.651044033 NA NA NA "" "SIGMAR1" "ENSG00000147955.16" "ENST00000477726.1" 16 193 87.56476684 0.045269245 0.2398 0.42429 0.42002 -1002.547639 -1002.375629 0.841970753 NA NA NA "" "" NA NA "" NA NA NA "" "" 429 143 0.0585 0.29391 0.0203 0.3471 1 1 1 1 1 1 NA NA NA "" "SIGMAR1" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA "SIGMAR1_sequences_filtered_longestORFs_mafft_prank" "SIGMAR1" 463 11 "0.063555782622" "0.062" "N" 1 0 "na" "N" "0.9999998523637823" 0 "na" "Y" "0.03935837746698778" 3 "28, 43, 296" "N" "1.0" 0 "na" "N" "0.3163202908753499" 0 "na" "" "" "onlybats"
"SIL1" "SIL1" "SIL1" "orf8" "SIL1" "list23_COV_list1orf" "SIL1 nucleotide exchange factor" "BAP|MSS|ULG5" "no" 24 1386 462 0.15942 0.59934 0.0829 0.52 0.12745 0.434232300884956 1 1 1 1 NA NA NA "" "SIL1" "ENSMLUG00000005198.2" "ENSMLUT00000005199.2" 11 385 98.44155844 0.116118369 0.3877 1.00231 1.00231 -3151.512186 -3151.512306 0.999880007 NA NA NA "" "SIL1" "ENSG00000120725.12" "ENST00000505353.1" 19 94 90.42553191 0.221006576 0.2776 0.74645 0.74672 -692.585908 -692.585983 0.999925003 NA NA NA "" "" NA NA "" NA NA NA "" "" 1059 353 0.20076 0.29004 0.047 0.2343 1 1 1 1 0.57402 1 NA NA NA "" "SIL1" "SIL1_sequences_filtered_longestORFs_mafft_mincov_prank" "SIL1_all" 586 24 "0.187714729747" "0.164" "N" "0.5261" 5 "98, 119, 143, 208, 560" "N" "0.9999991549406264" 0 "na" "N" "0.2610615228717177" 0 "na" "N" "1.0" 0 "na" "N" "0.5210027286033118" 0 "na" "SIL1_sequences_filtered_longestORFs_mafft_prank" "SIL1" 670 12 "0.168133221490" "0.160" "Y" 0 5 "212, 241, 273, 568, 647" "N" "0.8329986992390037" 0 "na" "N" "0.10828949711717219" 0 "na" "N" "0.3426656803484229" 0 "na" "N" "0.05535435177850455" 0 "na" "" "" "shared"
......
......@@ -15,7 +15,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Janvier 2021} % Activate to display a given date or no date
\date{March 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
......@@ -28,19 +28,18 @@
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<eval=FALSE>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
@
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_alldginn.txt"), h=T, sep="\t")
......@@ -52,31 +51,42 @@ dim(tab)
\subsection{Data}
<<data>>=
tmp<-na.omit(tab[,c("Gene.name", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8",
"bats_codemlM1M2", "bats_codemlM7M8", "dginn.primate_codemlM1M2",
"dginn.primate_codemlM7M8", "dginn.primate_BppM1M2",
"dginn.primate_BppM7M8", "dginn.primate_BUSTED")])
col<-c("Gene.name", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8",
"bats_codemlM1M2", "bats_codemlM7M8", "dginn.primate_codemlM1M2",
"dginn.primate_codemlM7M8", "dginn.primate_BppM1M2",
"dginn.primate_BppM7M8", "dginn.primate_BUSTED")
tmp<-na.omit(tab[,c("Gene.name", "bats_BUSTED", "bats_BppM1M2",
"bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8",
"dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8",
"dginn.primate_BppM1M2", "dginn.primate_BppM7M8",
"dginn.primate_BUSTED")])
col<-c("Gene.name", "bats_BUSTED", "bats_BppM1M2",
"bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8",
"dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8",
"dginn.primate_BppM1M2", "dginn.primate_BppM7M8",
"dginn.primate_BUSTED")
dim(tmp)
@
\subsection{Omega plot}
<<>>=
x=as.numeric(as.character(tab$dginn.primate_omegaM0Bpp[tab$status=="shared"]))
y=as.numeric(as.character(tab$bats_omegaM0Bpp[tab$status=="shared"]))
tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA
x=as.numeric(as.character(
tab$dginn.primate_omegaM0Bpp[tab$status=="shared"]))
tab$bats_omegaM0Bpp[tab$bats_omegaM0Bpp=="na"]<-NA
y=as.numeric(as.character(
tab$bats_omegaM0Bpp[tab$status=="shared"]))
names(x)<-tab$Gene.name[tab$status=="shared"]
plot(x,y, xlab="bpp omega primate", ylab="bpp omega bats", cex=0.5)
abline(0,1)
abline(lm(y~x), col="red")
text(x[x>0.5 &y<0.4], (y[x>0.5 &y<0.4]+0.01), names(x)[x>0.5 &y<0.4], cex=0.7)
text(x[x<0.45 &y>0.45], (y[x<0.45 &y>0.45]+0.01), names(x)[x<0.45 &y>0.45], cex=0.7)
text(x[x>0.45 &y>0.4], (y[x>0.45 &y>0.4]+0.01), names(x)[x>0.45 &y>0.4], cex=0.7)
text(x[x>0.5 &y<0.4], (y[x>0.5 &y<0.4]+0.01),
names(x)[x>0.5 &y<0.4], cex=0.7)
text(x[x<0.45 &y>0.45], (y[x<0.45 &y>0.45]+0.01),
names(x)[x<0.45 &y>0.45], cex=0.7)
text(x[x>0.45 &y>0.4], (y[x>0.45 &y>0.4]+0.01),
names(x)[x>0.45 &y>0.4], cex=0.7)
@
......@@ -87,21 +97,28 @@ library(Mondrian)
monddata<-as.data.frame(tmp$Gene.name)
batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y", tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y", tmp$bats_BppM7M8=="Y", tmp$bats_BUSTED=="Y"))
primatetmp<-rowSums(cbind(tmp$"dginn.primate_codemlM1M2"=="Y",
tmp$"dginn.primate_codemlM7M8"=="Y", tmp$"dginn.primate_BppM1M2"=="Y",
tmp$"dginn.primate_BppM7M8"=="Y", tmp$"dginn.primate_BUSTED"=="Y"))
batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y",
tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y",
tmp$bats_BppM7M8=="Y",
tmp$bats_BUSTED=="Y"))
primatetmp<-rowSums(cbind(tmp$"dginn.primate_codemlM1M2"=="Y",
tmp$"dginn.primate_codemlM7M8"=="Y",
tmp$"dginn.primate_BppM1M2"=="Y",
tmp$"dginn.primate_BppM7M8"=="Y",
tmp$"dginn.primate_BUSTED"=="Y"))
monddata$bats_dginn3<-ifelse(batstmp>=3, 1,0)
monddata$primate_dginn3<-ifelse(primatetmp>=3, 1,0)
monddata$bats_dginn4<-ifelse(batstmp>=4, 1,0)
monddata$primate_dginn4<-ifelse(primatetmp>=4, 1,0)
mondrian(monddata[,2:3], labels=c("DGINN bats >3", "DGINN primate >3"))
mondrian(monddata[,2:3],
labels=c("DGINN bats >3", "DGINN primate >3"))
mondrian(monddata[,4:5], labels=c("DGINN bats >4", "DGINN primate >4"))
mondrian(monddata[,4:5],
labels=c("DGINN bats >4", "DGINN primate >4"))
@
......@@ -173,7 +190,11 @@ tablo<-as.data.frame(tmp$Gene.name)
tablo$nbats<-batstmp
tablo$nprimates<-primatetmp
plot(NULL, xlim=c(-0.5,5.5), ylim=c(-3,5.5), xlab="bats", ylab="primates", main="Genes supported by x,y methods in bats and primates", bty="n", xaxt="n", yaxt="n")
plot(NULL, xlim=c(-0.5,5.5), ylim=c(-3,5.5),
xlab="bats", ylab="primates",
main="Genes supported by x,y methods in bats and primates",
bty="n",
xaxt="n", yaxt="n")
text(x=rep(-0.6, 6), y=0:5, 0:5)
text(y=rep(-0.65, 6), x=0:5, 0:5)
......@@ -189,11 +210,14 @@ for (p in 0:5){
for (b in 0:5){
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==b & tablo$nprimates==p]
if(length(tmp)>0 & length(tmp)<=8){
text(b,seq(from=(p-0.4), to=(p+0.4), length.out = length(tmp)), tmp, cex=0.4)
text(b,seq(from=(p-0.4), to=(p+0.4), length.out = length(tmp)),
tmp, cex=0.4)
}else if (length(tmp)>8 & length(tmp)<=16){
print(c(p, b))
text((b-0.3),seq(from=(p-0.4), to=(p+0.4), length.out = 8), tmp[1:8], cex=0.4)
text((b+0.3),seq(from=(p-0.4), to=(p+0.4), length.out = (length(tmp)-8)), tmp[9:length(tmp)], cex=0.4)
text((b-0.3),seq(from=(p-0.4), to=(p+0.4), length.out = 8),
tmp[1:8], cex=0.4)
text((b+0.3),seq(from=(p-0.4), to=(p+0.4), length.out = (length(tmp)-8)),
tmp[9:length(tmp)], cex=0.4)
}else if (length(tmp)>16){
text(b,p, paste0(length(tmp), " values"))
}
......@@ -203,13 +227,25 @@ for (p in 0:5){
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==1]
text(-0.4,-1.2, "p=1/n=0", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-1.1, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = length(tmp)-18),-1.3, tmp[19:length(tmp)], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = 19),
-1.1,
tmp[1:19],
cex=0.4)
text(seq(from=0.1, to=5.5, length.out = length(tmp)-19),
-1.3,
tmp[20:length(tmp)],
cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==1 & tablo$nprimates==1]
text(-0.4,-1.7, "p=1/n=1", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-1.6, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=4.5, length.out = length(tmp)-18),-1.8, tmp[19:length(tmp)], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = 18),
-1.6,
tmp[1:18],
cex=0.4)
text(seq(from=0.1, to=4.5, length.out = length(tmp)-18),
-1.8,
tmp[19:length(tmp)],
cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==0]
......@@ -227,10 +263,16 @@ text(seq(from=0.1, to=1, length.out = length(tmp)-18),-3.0, tmp[19:length(tmp)],
@
<<>>=
write.csv(tablo[tablo$nbats>=3,"tmp$Gene.name"], "batssup3.csv", row.names=FALSE, quote=FALSE)
write.csv(tablo[tablo$nprimates>=3,"tmp$Gene.name"], "primatessup3.csv", row.names=FALSE, quote=FALSE)
write.csv(tablo, "primatesVbats.csv", row.names=FALSE, quote=FALSE)
write.csv(tablo[tablo$nbats>=3,"tmp$Gene.name"], "batssup3.csv",
row.names=FALSE,
quote=FALSE)
write.csv(tablo[tablo$nprimates>=3,"tmp$Gene.name"], "primatessup3.csv",
row.names=FALSE,
quote=FALSE)
write.csv(tablo, "primatesVbats.csv",
row.names=FALSE,
quote=FALSE)
@
Restreindre ce tableau aux gènes présent dans l'analyse de Krogan.
......@@ -269,6 +311,201 @@ sort(krogan[krogan %in% tablo$`tmp$Gene.name`==F])
write.csv(tabloK, "primatesVbats_onlykrogan.csv", row.names=FALSE, quote=FALSE)
@
\section{Tanglegram}
<<eval=TRUE>>=
#install.packages('dendextend') # stable CRAN version
library(dendextend) # load the package
#install.packages("phytools") # stable CRAN version
library(phytools) # load the package
library(ggraph)
library(igraph)
library(tidyverse)
tmp<-tablo[(tablo$nbats!=0 | tablo$nprimates!=0),]
#tmp<-head(tablo, 20)
#tmp<-rbind(as.matrix(tmp), c("outgroup", 50, 50))
tmp<-as.data.frame(tmp)
matbats<-hclust(dist(tmp$nbats))
matpri<-hclust(dist(tmp$nprimates))
tmp[order(tmp$nbats),]
dendpri<-as.dendrogram(matpri)
dendbats<-as.dendrogram(matbats)
labels(dendpri)<-as.character(tmp$`tmp$Gene.name`[labels(dendpri)])
labels(dendbats)<-as.character(tmp$`tmp$Gene.name`[labels(dendbats)])
tmp[order(tmp$nprimates, decreasing=FALSE),]$'tmp$Gene.name'-> order
dendpri<-dendextend::rotate(dendpri, order=order)
tmp[order(tmp$nbats, decreasing=FALSE),]$'tmp$Gene.name'-> order
dendbats<-dendextend::rotate(dendbats, order=order)
#### Il faut swapper certains neud de l'arbrese
class(labels(dendpri))
dend12 <- dendlist(dendbats, dendpri)
png("figure/tanglegramm.png", width = 1800, height = 3000)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=2,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=1,
lab.cex=1,
k_labels=6)
dev.off()
@
<<eval=TRUE>>=
ace<-264
tmprss2<-75
znf318<-81
sepsecs<-228
tbk1<-273
ripk1<-224
col<-rep("grey", length(labels(dendpri)))
col[ace]<-"black"
col[tmprss2]<-"black"
col[znf318]<-"black"
col[sepsecs]<-"black"
col[tbk1]<-"black"
col[ripk1]<-"black"
font<-rep(1, length(labels(dendpri))*2)
#font[ace]<-1.3
#font[tmprss2]<-1.3
#font[length(labels(dendpri))+160]<-1.3
png("figure/tanglegramm.png", width = 1800, height = 3000)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=2,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=1,
lab.cex=font,
k_labels=6,
color_lines=col)
dev.off()
@
<<>>=
tmp<-tablo[(tablo$nbats>=3 | tablo$nprimates>=3),]
dim(tmp)
tmp<-as.data.frame(tmp)
names(tmp)<-c("tmp.Gene.name", "nbats", "nprimates")
matbats<-hclust(dist(tmp$nbats))
matpri<-hclust(dist(tmp$nprimates))
#tmp[order(tmp$nbats),]
dendpri<-as.dendrogram(matpri)
dendbats<-as.dendrogram(matbats)
labels(dendpri)<-as.character(tmp$tmp.Gene.name[labels(dendpri)])
labels(dendbats)<-as.character(tmp$tmp.Gene.name[labels(dendbats)])
tmp[order(tmp$nprimates, decreasing=FALSE),]$tmp.Gene.name-> order
dendpri<-dendextend::rotate(dendpri, order=order)
tmp[order(tmp$nbats, decreasing=FALSE),]$tmp.Gene.name-> order
dendbats<-dendextend::rotate(dendbats, order=order)
#### Il faut swapper certains neuds de l'arbres
class(labels(dendpri))
dend12 <- dendlist(dendbats, dendpri)
ace<-97
tmprss2<-27
znf318<-31
sepsecs<-69
tbk1<-106
ripk1<-68
col<-rep("lightblue", length(labels(dendpri)))
plusplus<-tmp$tmp.Gene.name[tmp$nbats>=3 & tmp$nprimates>=3]
col[which(labels(dendbats) %in% plusplus)]<-"pink"
interest<-c("TMPRSS2","ZNF318", "SEPSECS","TBK1", "RIPK1")
col[which(labels(dendbats) %in% interest)]<-"blue"
interestpp<-c("ACE2")
col[which(labels(dendbats) %in% interestpp)]<-"red"
png("figure/tanglegrammsup3.png", width = 500, height = 1200)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=2,
lab.cex=1,
k_labels=6,
color_lines=col)
dev.off()
### Changer couleurs des groupes
## changer couleurs des lines sel vs sel or sel vs non-sel
setEPS()
postscript("figure/tanglegramsup3.eps", height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=2,
lab.cex=1,
# k_labels=6,
color_lines=col)
dev.off()
labels_colors(dend12[[1]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nbats))
labels_colors(dend12[[2]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nprimates))
labels_colors(dend12[[1]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nbats))
labels_colors(dend12[[2]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nprimates))
setEPS()
postscript("figure/tanglegramsup3_V2.eps", height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=2,
lab.cex=1,
# k_labels=6,
color_lines=col)
dev.off()
@
\end{document}
......
No preview for this file type
......@@ -65,7 +65,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Janvier 2021} % Activate to display a given date or no date
\date{March 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
......@@ -78,11 +78,11 @@
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"covid_comp/covid_comp_complete.txt"}\hlstd{),} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
......@@ -91,12 +91,11 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"covid_comp/covid_comp_alldginn.txt"}\hlstd{),} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
......@@ -115,18 +114,20 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,}
\hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,} \hlstr{"dginn.primate_codemlM1M2"}\hlstd{,}
\hlstr{"dginn.primate_codemlM7M8"}\hlstd{,} \hlstr{"dginn.primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn.primate_BppM7M8"}\hlstd{,} \hlstr{"dginn.primate_BUSTED"}\hlstd{)])}
\hlstd{col}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,}
\hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,} \hlstr{"dginn.primate_codemlM1M2"}\hlstd{,}
\hlstr{"dginn.primate_codemlM7M8"}\hlstd{,} \hlstr{"dginn.primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn.primate_BppM7M8"}\hlstd{,} \hlstr{"dginn.primate_BUSTED"}\hlstd{)}
\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,}
\hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,}
\hlstr{"dginn.primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn.primate_codemlM7M8"}\hlstd{,}
\hlstr{"dginn.primate_BppM1M2"}\hlstd{,} \hlstr{"dginn.primate_BppM7M8"}\hlstd{,}
\hlstr{"dginn.primate_BUSTED"}\hlstd{)])}
\hlstd{col}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,}
\hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,}
\hlstr{"dginn.primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn.primate_codemlM7M8"}\hlstd{,}
\hlstr{"dginn.primate_BppM1M2"}\hlstd{,} \hlstr{"dginn.primate_BppM7M8"}\hlstd{,}
\hlstr{"dginn.primate_BUSTED"}\hlstd{)}
\hlkwd{dim}\hlstd{(tmp)}
\end{alltt}
\begin{verbatim}
## [1] 323 11
## [1] 324 11
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -136,25 +137,26 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{x}\hlkwb{=}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp[tab}\hlopt{$}\hlstd{status}\hlopt{==}\hlstr{"shared"}\hlstd{]))}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlstd{y}\hlkwb{=}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0Bpp[tab}\hlopt{$}\hlstd{status}\hlopt{==}\hlstr{"shared"}\hlstd{]))}
\end{alltt}
\hlstd{tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp[tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp}\hlopt{==}\hlstr{"na"}\hlstd{]}\hlkwb{<-}\hlnum{NA}
\hlstd{x}\hlkwb{=}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(}
\hlstd{tab}\hlopt{$}\hlstd{dginn.primate_omegaM0Bpp[tab}\hlopt{$}\hlstd{status}\hlopt{==}\hlstr{"shared"}\hlstd{]))}
\hlstd{tab}\hlopt{$}\hlstd{bats_omegaM0Bpp[tab}\hlopt{$}\hlstd{bats_omegaM0Bpp}\hlopt{==}\hlstr{"na"}\hlstd{]}\hlkwb{<-}\hlnum{NA}
\hlstd{y}\hlkwb{=}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(}
\hlstd{tab}\hlopt{$}\hlstd{bats_omegaM0Bpp[tab}\hlopt{$}\hlstd{status}\hlopt{==}\hlstr{"shared"}\hlstd{]))}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlkwd{names}\hlstd{(x)}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstd{Gene.name[tab}\hlopt{$}\hlstd{status}\hlopt{==}\hlstr{"shared"}\hlstd{]}
\hlkwd{plot}\hlstd{(x,y,} \hlkwc{xlab}\hlstd{=}\hlstr{"bpp omega primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"bpp omega bats"}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.5}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(y}\hlopt{~}\hlstd{x),} \hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)}
\hlkwd{text}\hlstd{(x[x}\hlopt{>}\hlnum{0.5} \hlopt{&}\hlstd{y}\hlopt{<}\hlnum{0.4}\hlstd{], (y[x}\hlopt{>}\hlnum{0.5} \hlopt{&}\hlstd{y}\hlopt{<}\hlnum{0.4}\hlstd{]}\hlopt{+}\hlnum{0.01}\hlstd{),} \hlkwd{names}\hlstd{(x)[x}\hlopt{>}\hlnum{0.5} \hlopt{&}\hlstd{y}\hlopt{<}\hlnum{0.4}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{text}\hlstd{(x[x}\hlopt{<}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.45}\hlstd{], (y[x}\hlopt{<}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.45}\hlstd{]}\hlopt{+}\hlnum{0.01}\hlstd{),} \hlkwd{names}\hlstd{(x)[x}\hlopt{<}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.45}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{text}\hlstd{(x[x}\hlopt{>}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.4}\hlstd{], (y[x}\hlopt{>}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.4}\hlstd{]}\hlopt{+}\hlnum{0.01}\hlstd{),} \hlkwd{names}\hlstd{(x)[x}\hlopt{>}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.4}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{text}\hlstd{(x[x}\hlopt{>}\hlnum{0.5} \hlopt{&}\hlstd{y}\hlopt{<}\hlnum{0.4}\hlstd{], (y[x}\hlopt{>}\hlnum{0.5} \hlopt{&}\hlstd{y}\hlopt{<}\hlnum{0.4}\hlstd{]}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlkwd{names}\hlstd{(x)[x}\hlopt{>}\hlnum{0.5} \hlopt{&}\hlstd{y}\hlopt{<}\hlnum{0.4}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{text}\hlstd{(x[x}\hlopt{<}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.45}\hlstd{], (y[x}\hlopt{<}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.45}\hlstd{]}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlkwd{names}\hlstd{(x)[x}\hlopt{<}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.45}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{text}\hlstd{(x[x}\hlopt{>}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.4}\hlstd{], (y[x}\hlopt{>}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.4}\hlstd{]}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlkwd{names}\hlstd{(x)[x}\hlopt{>}\hlnum{0.45} \hlopt{&}\hlstd{y}\hlopt{>}\hlnum{0.4}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/unnamed-chunk-3-1}
......@@ -170,24 +172,31 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp}\hlopt{$}\hlstd{Gene.name)}
\hlstd{batstmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{batstmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{primatetmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tmp}\hlopt{$}\hlstr{"dginn.primate_codemlM1M2"}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstr{"dginn.primate_codemlM7M8"}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstr{"dginn.primate_BppM1M2"}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstr{"dginn.primate_BppM7M8"}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstr{"dginn.primate_BUSTED"}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{tmp}\hlopt{$}\hlstr{"dginn.primate_codemlM7M8"}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstr{"dginn.primate_BppM1M2"}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstr{"dginn.primate_BppM7M8"}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstr{"dginn.primate_BUSTED"}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{monddata}\hlopt{$}\hlstd{bats_dginn3}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(batstmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primate_dginn3}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(primatetmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_dginn4}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(batstmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primate_dginn4}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(primatetmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{3}\hlstd{],} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN bats >3"}\hlstd{,} \hlstr{"DGINN primate >3"}\hlstd{))}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{3}\hlstd{],}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN bats >3"}\hlstd{,} \hlstr{"DGINN primate >3"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianbats-1}
\begin{kframe}\begin{alltt}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{4}\hlopt{:}\hlnum{5}\hlstd{],} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN bats >4"}\hlstd{,} \hlstr{"DGINN primate >4"}\hlstd{))}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{4}\hlopt{:}\hlnum{5}\hlstd{],}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN bats >4"}\hlstd{,} \hlstr{"DGINN primate >4"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianbats-2}
......@@ -242,9 +251,9 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 139 ITGB1 1 1 1
## 146 LMAN2 1 1 1
## 212 POLA1 1 1 1
## 263 SLC27A2 1 1 1
## 301 TOR1AIP1 1 1 1
## 314 VPS39 1 1 1
## 264 SLC27A2 1 1 1
## 302 TOR1AIP1 1 1 1
## 315 VPS39 1 1 1
## primate_dginn4
## 6 1
## 7 1
......@@ -254,9 +263,9 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 139 1
## 146 1
## 212 1
## 263 1
## 301 1
## 314 1
## 264 1
## 302 1
## 315 1
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -285,9 +294,9 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 212 POLA1 1 1 1
## 239 RAP1GDS1 1 1 1
## 257 SCCPDH 1 1 0
## 263 SLC27A2 1 1 1
## 301 TOR1AIP1 1 1 1
## 314 VPS39 1 1 1
## 264 SLC27A2 1 1 1
## 302 TOR1AIP1 1 1 1
## 315 VPS39 1 1 1
## primate_dginn4
## 6 1
## 7 1
......@@ -304,9 +313,9 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 212 1
## 239 0
## 257 0
## 263 1
## 301 1
## 314 1
## 264 1
## 302 1
## 315 1
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -354,15 +363,15 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 245 REEP6 0 1 0
## 248 RIPK1 0 1 0
## 253 SAAL1 0 1 0
## 259 SEPSECS 0 1 0
## 261 SIRT5 0 1 0
## 262 SLC25A21 0 1 0
## 296 TMEM39B 0 1 0
## 298 TMPRSS2 0 1 0
## 304 TUBGCP2 0 1 0
## 307 UBAP2 0 1 0
## 310 UGGT2 0 1 0
## 321 ZNF318 0 1 0
## 260 SEPSECS 0 1 0
## 262 SIRT5 0 1 0
## 263 SLC25A21 0 1 0
## 297 TMEM39B 0 1 0
## 299 TMPRSS2 0 1 0
## 305 TUBGCP2 0 1 0
## 308 UBAP2 0 1 0
## 311 UGGT2 0 1 0
## 322 ZNF318 0 1 0
## primate_dginn4
## 31 1
## 34 1
......@@ -395,15 +404,15 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 245 1
## 248 1
## 253 1
## 259 1
## 261 1
## 260 1
## 262 1
## 296 1
## 298 1
## 304 1
## 307 1
## 310 1
## 321 1
## 263 1
## 297 1
## 299 1
## 305 1
## 308 1
## 311 1
## 322 1
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -473,20 +482,20 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 248 RIPK1 0 1 0
## 250 RPL36 0 1 0
## 253 SAAL1 0 1 0
## 259 SEPSECS 0 1 0
## 261 SIRT5 0 1 0
## 262 SLC25A21 0 1 0
## 277 STOM 0 1 0
## 290 TIMM8B 0 1 0
## 296 TMEM39B 0 1 0
## 298 TMPRSS2 0 1 0
## 302 TRIM59 0 1 0
## 303 TRMT1 0 1 0
## 304 TUBGCP2 0 1 0
## 307 UBAP2 0 1 0
## 310 UGGT2 0 1 0
## 312 USP54 0 1 0
## 321 ZNF318 0 1 0
## 260 SEPSECS 0 1 0
## 262 SIRT5 0 1 0
## 263 SLC25A21 0 1 0
## 278 STOM 0 1 0
## 291 TIMM8B 0 1 0
## 297 TMEM39B 0 1 0
## 299 TMPRSS2 0 1 0
## 303 TRIM59 0 1 0
## 304 TRMT1 0 1 0
## 305 TUBGCP2 0 1 0
## 308 UBAP2 0 1 0
## 311 UGGT2 0 1 0
## 313 USP54 0 1 0
## 322 ZNF318 0 1 0
## primate_dginn4
## 19 0
## 23 0
......@@ -544,20 +553,20 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 248 1
## 250 0
## 253 1
## 259 1
## 261 1
## 260 1
## 262 1
## 277 0
## 290 0
## 296 1
## 298 1
## 302 0
## 263 1
## 278 0
## 291 0
## 297 1
## 299 1
## 303 0
## 304 1
## 307 1
## 310 1
## 312 0
## 321 1
## 304 0
## 305 1
## 308 1
## 311 1
## 313 0
## 322 1
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -583,8 +592,8 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 137 INHBE 1 0 1
## 231 RAB18 1 0 1
## 239 RAP1GDS1 1 1 1
## 267 SLC44A2 1 0 1
## 283 TBK1 1 0 1
## 268 SLC44A2 1 0 1
## 284 TBK1 1 0 1
## primate_dginn4
## 14 0
## 26 0
......@@ -595,8 +604,8 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 137 0
## 231 0
## 239 0
## 267 0
## 283 0
## 268 0
## 284 0
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -626,10 +635,10 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 217 PRIM1 1 0 0
## 226 PUSL1 1 0 0
## 231 RAB18 1 0 1
## 266 SLC30A9 1 0 0
## 267 SLC44A2 1 0 1
## 268 SLC9A3R1 1 0 0
## 283 TBK1 1 0 1
## 267 SLC30A9 1 0 0
## 268 SLC44A2 1 0 1
## 269 SLC9A3R1 1 0 0
## 284 TBK1 1 0 1
## primate_dginn4
## 5 0
## 11 0
......@@ -647,10 +656,10 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
## 217 0
## 226 0
## 231 0
## 266 0
## 267 0
## 268 0
## 283 0
## 269 0
## 284 0
\end{verbatim}
\end{kframe}
\end{knitrout}
......@@ -664,7 +673,11 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\hlstd{tablo}\hlopt{$}\hlstd{nbats}\hlkwb{<-}\hlstd{batstmp}
\hlstd{tablo}\hlopt{$}\hlstd{nprimates}\hlkwb{<-}\hlstd{primatetmp}
\hlkwd{plot}\hlstd{(}\hlkwa{NULL}\hlstd{,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlopt{-}\hlnum{0.5}\hlstd{,}\hlnum{5.5}\hlstd{),} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlopt{-}\hlnum{3}\hlstd{,}\hlnum{5.5}\hlstd{),} \hlkwc{xlab}\hlstd{=}\hlstr{"bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"primates"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Genes supported by x,y methods in bats and primates"}\hlstd{,} \hlkwc{bty}\hlstd{=}\hlstr{"n"}\hlstd{,} \hlkwc{xaxt}\hlstd{=}\hlstr{"n"}\hlstd{,} \hlkwc{yaxt}\hlstd{=}\hlstr{"n"}\hlstd{)}
\hlkwd{plot}\hlstd{(}\hlkwa{NULL}\hlstd{,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlopt{-}\hlnum{0.5}\hlstd{,}\hlnum{5.5}\hlstd{),} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlopt{-}\hlnum{3}\hlstd{,}\hlnum{5.5}\hlstd{),}
\hlkwc{xlab}\hlstd{=}\hlstr{"bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"primates"}\hlstd{,}
\hlkwc{main}\hlstd{=}\hlstr{"Genes supported by x,y methods in bats and primates"}\hlstd{,}
\hlkwc{bty}\hlstd{=}\hlstr{"n"}\hlstd{,}
\hlkwc{xaxt}\hlstd{=}\hlstr{"n"}\hlstd{,} \hlkwc{yaxt}\hlstd{=}\hlstr{"n"}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlopt{-}\hlnum{0.6}\hlstd{,} \hlnum{6}\hlstd{),} \hlkwc{y}\hlstd{=}\hlnum{0}\hlopt{:}\hlnum{5}\hlstd{,} \hlnum{0}\hlopt{:}\hlnum{5}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwc{y}\hlstd{=}\hlkwd{rep}\hlstd{(}\hlopt{-}\hlnum{0.65}\hlstd{,} \hlnum{6}\hlstd{),} \hlkwc{x}\hlstd{=}\hlnum{0}\hlopt{:}\hlnum{5}\hlstd{,} \hlnum{0}\hlopt{:}\hlnum{5}\hlstd{)}
......@@ -726,11 +739,14 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\hlkwa{for} \hlstd{(b} \hlkwa{in} \hlnum{0}\hlopt{:}\hlnum{5}\hlstd{)\{}
\hlstd{tmp}\hlkwb{<-}\hlstd{tablo}\hlopt{$}\hlstd{`tmp$Gene.name`[tablo}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlstd{b} \hlopt{&} \hlstd{tablo}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlstd{p]}
\hlkwa{if}\hlstd{(}\hlkwd{length}\hlstd{(tmp)}\hlopt{>}\hlnum{0} \hlopt{&} \hlkwd{length}\hlstd{(tmp)}\hlopt{<=}\hlnum{8}\hlstd{)\{}
\hlkwd{text}\hlstd{(b,}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=(p}\hlopt{-}\hlnum{0.4}\hlstd{),} \hlkwc{to}\hlstd{=(p}\hlopt{+}\hlnum{0.4}\hlstd{),} \hlkwc{length.out} \hlstd{=} \hlkwd{length}\hlstd{(tmp)), tmp,} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{(b,}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=(p}\hlopt{-}\hlnum{0.4}\hlstd{),} \hlkwc{to}\hlstd{=(p}\hlopt{+}\hlnum{0.4}\hlstd{),} \hlkwc{length.out} \hlstd{=} \hlkwd{length}\hlstd{(tmp)),}
\hlstd{tmp,} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlstd{\}}\hlkwa{else if} \hlstd{(}\hlkwd{length}\hlstd{(tmp)}\hlopt{>}\hlnum{8} \hlopt{&} \hlkwd{length}\hlstd{(tmp)}\hlopt{<=}\hlnum{16}\hlstd{)\{}
\hlkwd{print}\hlstd{(}\hlkwd{c}\hlstd{(p, b))}
\hlkwd{text}\hlstd{((b}\hlopt{-}\hlnum{0.3}\hlstd{),}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=(p}\hlopt{-}\hlnum{0.4}\hlstd{),} \hlkwc{to}\hlstd{=(p}\hlopt{+}\hlnum{0.4}\hlstd{),} \hlkwc{length.out} \hlstd{=} \hlnum{8}\hlstd{), tmp[}\hlnum{1}\hlopt{:}\hlnum{8}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{((b}\hlopt{+}\hlnum{0.3}\hlstd{),}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=(p}\hlopt{-}\hlnum{0.4}\hlstd{),} \hlkwc{to}\hlstd{=(p}\hlopt{+}\hlnum{0.4}\hlstd{),} \hlkwc{length.out} \hlstd{= (}\hlkwd{length}\hlstd{(tmp)}\hlopt{-}\hlnum{8}\hlstd{)), tmp[}\hlnum{9}\hlopt{:}\hlkwd{length}\hlstd{(tmp)],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{((b}\hlopt{-}\hlnum{0.3}\hlstd{),}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=(p}\hlopt{-}\hlnum{0.4}\hlstd{),} \hlkwc{to}\hlstd{=(p}\hlopt{+}\hlnum{0.4}\hlstd{),} \hlkwc{length.out} \hlstd{=} \hlnum{8}\hlstd{),}
\hlstd{tmp[}\hlnum{1}\hlopt{:}\hlnum{8}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{((b}\hlopt{+}\hlnum{0.3}\hlstd{),}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=(p}\hlopt{-}\hlnum{0.4}\hlstd{),} \hlkwc{to}\hlstd{=(p}\hlopt{+}\hlnum{0.4}\hlstd{),} \hlkwc{length.out} \hlstd{= (}\hlkwd{length}\hlstd{(tmp)}\hlopt{-}\hlnum{8}\hlstd{)),}
\hlstd{tmp[}\hlnum{9}\hlopt{:}\hlkwd{length}\hlstd{(tmp)],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlstd{\}}\hlkwa{else if} \hlstd{(}\hlkwd{length}\hlstd{(tmp)}\hlopt{>}\hlnum{16}\hlstd{)\{}
\hlkwd{text}\hlstd{(b,p,} \hlkwd{paste0}\hlstd{(}\hlkwd{length}\hlstd{(tmp),} \hlstr{" values"}\hlstd{))}
\hlstd{\}}
......@@ -750,13 +766,25 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{tablo}\hlopt{$}\hlstd{`tmp$Gene.name`[tablo}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{0} \hlopt{&} \hlstd{tablo}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{1}\hlstd{]}
\hlkwd{text}\hlstd{(}\hlopt{-}\hlnum{0.4}\hlstd{,}\hlopt{-}\hlnum{1.2}\hlstd{,} \hlstr{"p=1/n=0"}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.6}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{5.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlnum{18}\hlstd{),}\hlopt{-}\hlnum{1.1}\hlstd{, tmp[}\hlnum{1}\hlopt{:}\hlnum{18}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{5.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlkwd{length}\hlstd{(tmp)}\hlopt{-}\hlnum{18}\hlstd{),}\hlopt{-}\hlnum{1.3}\hlstd{, tmp[}\hlnum{19}\hlopt{:}\hlkwd{length}\hlstd{(tmp)],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{5.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlnum{19}\hlstd{),}
\hlopt{-}\hlnum{1.1}\hlstd{,}
\hlstd{tmp[}\hlnum{1}\hlopt{:}\hlnum{19}\hlstd{],}
\hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{5.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlkwd{length}\hlstd{(tmp)}\hlopt{-}\hlnum{19}\hlstd{),}
\hlopt{-}\hlnum{1.3}\hlstd{,}
\hlstd{tmp[}\hlnum{20}\hlopt{:}\hlkwd{length}\hlstd{(tmp)],}
\hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlstd{tmp}\hlkwb{<-}\hlstd{tablo}\hlopt{$}\hlstd{`tmp$Gene.name`[tablo}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{1} \hlopt{&} \hlstd{tablo}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{1}\hlstd{]}
\hlkwd{text}\hlstd{(}\hlopt{-}\hlnum{0.4}\hlstd{,}\hlopt{-}\hlnum{1.7}\hlstd{,} \hlstr{"p=1/n=1"}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.6}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{5.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlnum{18}\hlstd{),}\hlopt{-}\hlnum{1.6}\hlstd{, tmp[}\hlnum{1}\hlopt{:}\hlnum{18}\hlstd{],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{4.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlkwd{length}\hlstd{(tmp)}\hlopt{-}\hlnum{18}\hlstd{),}\hlopt{-}\hlnum{1.8}\hlstd{, tmp[}\hlnum{19}\hlopt{:}\hlkwd{length}\hlstd{(tmp)],} \hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{5.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlnum{18}\hlstd{),}
\hlopt{-}\hlnum{1.6}\hlstd{,}
\hlstd{tmp[}\hlnum{1}\hlopt{:}\hlnum{18}\hlstd{],}
\hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlkwd{text}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwc{from}\hlstd{=}\hlnum{0.1}\hlstd{,} \hlkwc{to}\hlstd{=}\hlnum{4.5}\hlstd{,} \hlkwc{length.out} \hlstd{=} \hlkwd{length}\hlstd{(tmp)}\hlopt{-}\hlnum{18}\hlstd{),}
\hlopt{-}\hlnum{1.8}\hlstd{,}
\hlstd{tmp[}\hlnum{19}\hlopt{:}\hlkwd{length}\hlstd{(tmp)],}
\hlkwc{cex}\hlstd{=}\hlnum{0.4}\hlstd{)}
\hlstd{tmp}\hlkwb{<-}\hlstd{tablo}\hlopt{$}\hlstd{`tmp$Gene.name`[tablo}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{0} \hlopt{&} \hlstd{tablo}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{0}\hlstd{]}
......@@ -779,10 +807,16 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{write.csv}\hlstd{(tablo[tablo}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{,}\hlstr{"tmp$Gene.name"}\hlstd{],} \hlstr{"batssup3.csv"}\hlstd{,} \hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{)}
\hlkwd{write.csv}\hlstd{(tablo[tablo}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{,}\hlstr{"tmp$Gene.name"}\hlstd{],} \hlstr{"primatessup3.csv"}\hlstd{,} \hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{)}
\hlkwd{write.csv}\hlstd{(tablo,} \hlstr{"primatesVbats.csv"}\hlstd{,} \hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{)}
\hlkwd{write.csv}\hlstd{(tablo[tablo}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{,}\hlstr{"tmp$Gene.name"}\hlstd{],} \hlstr{"batssup3.csv"}\hlstd{,}
\hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{)}
\hlkwd{write.csv}\hlstd{(tablo[tablo}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{,}\hlstr{"tmp$Gene.name"}\hlstd{],} \hlstr{"primatessup3.csv"}\hlstd{,}
\hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{)}
\hlkwd{write.csv}\hlstd{(tablo,} \hlstr{"primatesVbats.csv"}\hlstd{,}
\hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
......@@ -891,13 +925,13 @@ Restreindre ce tableau aux gènes présent dans l'analyse de Krogan.
\hlkwd{dim}\hlstd{(tablo)}
\end{alltt}
\begin{verbatim}
## [1] 323 3
## [1] 324 3
\end{verbatim}
\begin{alltt}
\hlkwd{dim}\hlstd{(tabloK)}
\end{alltt}
\begin{verbatim}
## [1] 320 3
## [1] 321 3
\end{verbatim}
\begin{alltt}
\hlcom{# Les gènes perdus (dans le tableau mais pas dans la liste de Krogan)}
......@@ -913,8 +947,8 @@ Restreindre ce tableau aux gènes présent dans l'analyse de Krogan.
\end{alltt}
\begin{verbatim}
## [1] "ARL6IP6" "ATP5MG" "BCS1L" "C1orf50" "CEP43" "CYB5R3"
## [7] "ELOB" "MFGE8" "POGLUT2" "POGLUT3" "SELENOS" "SIGMAR1"
## [13] "TLE5" "USP13"
## [7] "ELOB" "MFGE8" "POGLUT2" "POGLUT3" "SIGMAR1" "TLE5"
## [13] "USP13"
\end{verbatim}
\begin{alltt}
\hlkwd{write.csv}\hlstd{(tabloK,} \hlstr{"primatesVbats_onlykrogan.csv"}\hlstd{,} \hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{)}
......@@ -922,6 +956,535 @@ Restreindre ce tableau aux gènes présent dans l'analyse de Krogan.
\end{kframe}
\end{knitrout}
\section{Tanglegram}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{#install.packages('dendextend') # stable CRAN version}
\hlkwd{library}\hlstd{(dendextend)} \hlcom{# load the package}
\hlcom{#install.packages("phytools") # stable CRAN version}
\hlkwd{library}\hlstd{(phytools)} \hlcom{# load the package}
\hlkwd{library}\hlstd{(ggraph)}
\hlkwd{library}\hlstd{(igraph)}
\hlkwd{library}\hlstd{(tidyverse)}
\hlstd{tmp}\hlkwb{<-}\hlstd{tablo[(tablo}\hlopt{$}\hlstd{nbats}\hlopt{!=}\hlnum{0} \hlopt{|} \hlstd{tablo}\hlopt{$}\hlstd{nprimates}\hlopt{!=}\hlnum{0}\hlstd{),]}
\hlcom{#tmp<-head(tablo, 20)}
\hlcom{#tmp<-rbind(as.matrix(tmp), c("outgroup", 50, 50))}
\hlstd{tmp}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp)}
\hlstd{matbats}\hlkwb{<-}\hlkwd{hclust}\hlstd{(}\hlkwd{dist}\hlstd{(tmp}\hlopt{$}\hlstd{nbats))}
\hlstd{matpri}\hlkwb{<-}\hlkwd{hclust}\hlstd{(}\hlkwd{dist}\hlstd{(tmp}\hlopt{$}\hlstd{nprimates))}
\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{nbats),]}
\end{alltt}
\begin{verbatim}
## tmp$Gene.name nbats nprimates
## 3 AATF 0 1
## 8 ACSL3 0 1
## 13 AKAP8L 0 1
## 16 ALG5 0 2
## 20 AP2M1 0 1
## 23 ATE1 0 3
## 24 ATP13A3 0 2
## 29 BCKDK 0 1
## 32 BZW2 0 3
## 37 CEP135 0 4
## 39 CEP350 0 2
## 42 CHPF 0 2
## 46 CLCC1 0 1
## 54 CSDE1 0 1
## 56 CSNK2B 0 1
## 60 DCAF7 0 1
## 68 DPH5 0 4
## 78 ERLEC1 0 2
## 80 ERO1B 0 4
## 83 EXOSC2 0 3
## 96 FBXL12 0 2
## 99 FKBP7 0 2
## 105 GCC2 0 4
## 107 GFER 0 1
## 110 GHITM 0 4
## 113 GNB1 0 1
## 114 GNG5 0 1
## 116 GOLGA3 0 1
## 119 GORASP1 0 3
## 121 GPX1 0 1
## 125 HDAC2 0 3
## 131 HS6ST2 0 3
## 132 HSBP1 0 1
## 144 LARP4B 0 4
## 145 LARP7 0 3
## 148 MAP7D1 0 1
## 161 MRPS2 0 1
## 168 NAT14 0 3
## 169 NDFIP2 0 1
## 170 NDUFAF1 0 2
## 171 NDUFAF2 0 5
## 172 NDUFB9 0 4
## 178 NINL 0 2
## 181 NPC2 0 3
## 190 NUP98 0 1
## 191 NUTF2 0 1
## 198 PDE4DIP 0 1
## 199 PDZD11 0 1
## 200 PIGO 0 1
## 203 PKP2 0 1
## 205 PLD3 0 1
## 209 PMPCA 0 2
## 215 PPIL3 0 1
## 218 PRIM2 0 5
## 224 PTBP2 0 3
## 230 RAB14 0 3
## 232 RAB1A 0 3
## 233 RAB2A 0 3
## 234 RAB5C 0 2
## 235 RAB7A 0 1
## 241 RBM41 0 2
## 242 RBX1 0 3
## 245 REEP6 0 5
## 246 RETREG3 0 1
## 259 SELENOS 0 1
## 263 SLC25A21 0 4
## 271 SMOC1 0 2
## 274 SRP19 0 1
## 282 TARS2 0 1
## 285 TBKBP1 0 2
## 286 TCF12 0 1
## 291 TIMM8B 0 3
## 294 TLE3 0 1
## 298 TMEM97 0 1
## 299 TMPRSS2 0 4
## 300 TOMM70 0 1
## 303 TRIM59 0 3
## 305 TUBGCP2 0 4
## 308 UBAP2 0 4
## 310 UBXN8 0 1
## 322 ZNF318 0 5
## 323 ZNF503 0 1
## 324 ZYG11B 0 2
## 4 ABCC1 1 2
## 15 ALG11 1 0
## 17 ALG8 1 1
## 19 AP2A2 1 3
## 21 AP3B1 1 1
## 27 ATP6V1A 1 1
## 30 BRD2 1 0
## 35 CENPF 1 2
## 36 CEP112 1 1
## 38 CEP250 1 2
## 40 CEP68 1 5
## 43 CHPF2 1 1
## 45 CIT 1 2
## 48 CNTRL 1 3
## 52 COQ8B 1 0
## 53 CRTC3 1 2
## 58 CWC27 1 2
## 59 CYB5B 1 0
## 64 DDX21 1 1
## 65 DNAJC11 1 0
## 67 DNMT1 1 5
## 72 EIF4E2 1 3
## 76 ERC1 1 1
## 81 ERP44 1 0
## 84 EXOSC3 1 0
## 87 F2RL1 1 0
## 89 FAM8A1 1 1
## 93 FBLN5 1 1
## 97 FKBP10 1 0
## 102 G3BP1 1 1
## 103 G3BP2 1 0
## 104 GCC1 1 0
## 108 GGCX 1 0
## 111 GIGYF2 1 4
## 112 GLA 1 4
## 126 HEATR3 1 0
## 128 HMOX1 1 1
## 135 IL17RA 1 0
## 138 INTS4 1 0
## 141 KDELC1 1 1
## 143 LARP1 1 4
## 147 LOX 1 1
## 150 MARK1 1 4
## 160 MPHOSPH10 1 4
## 162 MRPS25 1 0
## 163 MRPS27 1 2
## 165 MTCH1 1 0
## 167 NARS2 1 1
## 176 NGLY1 1 3
## 177 NIN 1 0
## 179 NLRX1 1 2
## 180 NOL10 1 1
## 182 NPTX1 1 0
## 187 NUP58 1 5
## 188 NUP62 1 1
## 194 PABPC4 1 2
## 196 PCSK5 1 0
## 202 PITRM1 1 3
## 204 PLAT 1 3
## 208 PLOD2 1 3
## 213 POLA2 1 0
## 216 PPT1 1 0
## 219 PRKACA 1 0
## 220 PRKAR2A 1 4
## 221 PRKAR2B 1 2
## 222 PRRC2B 1 0
## 228 QSOX2 1 1
## 236 RAB8A 1 0
## 237 RAE1 1 1
## 250 RPL36 1 3
## 252 RTN4 1 2
## 253 SAAL1 1 4
## 255 SCAP 1 2
## 261 SIL1 1 0
## 262 SIRT5 1 4
## 266 SLC30A7 1 1
## 270 SLU7 1 2
## 273 SPART 1 0
## 276 SRP72 1 1
## 278 STOM 1 3
## 281 TAPT1 1 2
## 288 TIMM10 1 0
## 290 TIMM29 1 1
## 295 TM2D3 1 1
## 301 TOR1A 1 1
## 304 TRMT1 1 3
## 309 UBAP2L 1 1
## 311 UGGT2 1 4
## 313 USP54 1 3
## 314 VPS11 1 1
## 319 ZC3H18 1 1
## 320 ZC3H7A 1 1
## 321 ZDHHC5 1 2
## 2 AASS 2 0
## 18 ANO6 2 2
## 25 ATP1B1 2 0
## 31 BRD4 2 4
## 41 CHMP2A 2 1
## 47 CLIP4 2 4
## 50 COLGALT1 2 1
## 51 COMT 2 0
## 57 CUL2 2 0
## 63 DDX10 2 0
## 74 ELOC 2 0
## 75 EMC1 2 5
## 79 ERMP1 2 0
## 91 FAR2 2 2
## 94 FBN1 2 2
## 95 FBN2 2 2
## 100 FOXRED2 2 0
## 101 FYCO1 2 5
## 106 GDF15 2 1
## 115 GOLGA2 2 2
## 118 GOLGB1 2 3
## 120 GPAA1 2 0
## 127 HECTD1 2 5
## 130 HS2ST1 2 0
## 133 HYOU1 2 1
## 140 JAKMIP1 2 0
## 152 MARK3 2 1
## 154 MDN1 2 3
## 155 MEPCE 2 0
## 156 MIB1 2 0
## 164 MRPS5 2 3
## 166 MYCBP2 2 5
## 173 NEK9 2 0
## 174 NEU1 2 0
## 183 NSD2 2 1
## 184 NUP210 2 2
## 195 PCNT 2 4
## 197 PCSK6 2 0
## 207 PLEKHF2 2 0
## 210 PMPCB 2 3
## 211 POFUT1 2 1
## 214 POR 2 3
## 223 PSMD8 2 1
## 225 PTGES2 2 1
## 227 PVR 2 5
## 240 RBM28 2 0
## 243 RDX 2 0
## 248 RIPK1 2 4
## 251 RRP9 2 1
## 254 SBNO1 2 2
## 256 SCARB1 2 2
## 260 SEPSECS 2 5
## 280 SUN2 2 0
## 283 TBCA 2 1
## 293 TLE1 2 1
## 296 TMED5 2 0
## 297 TMEM39B 2 4
## 306 TUBGCP3 2 1
## 307 TYSND1 2 0
## 316 WASHC4 2 2
## 317 WFS1 2 2
## 5 ACAD9 3 1
## 9 ADAM9 3 3
## 11 AGPS 3 1
## 34 CDK5RAP2 3 5
## 49 COL6A1 3 2
## 122 GRIPAP1 3 1
## 123 GRPEL1 3 0
## 151 MARK2 3 0
## 157 MIPOL1 3 4
## 159 MOV10 3 3
## 185 NUP214 3 2
## 217 PRIM1 3 2
## 226 PUSL1 3 1
## 257 SCCPDH 3 3
## 267 SLC30A9 3 0
## 269 SLC9A3R1 3 0
## 6 ACADM 4 5
## 14 AKAP9 4 2
## 26 ATP6AP1 4 1
## 44 CISD3 4 1
## 77 ERGIC1 4 0
## 134 IDE 4 4
## 136 IMPDH2 4 1
## 139 ITGB1 4 5
## 146 LMAN2 4 4
## 212 POLA1 4 4
## 7 ACE2 5 5
## 71 EDEM3 5 3
## 109 GGH 5 5
## 117 GOLGA7 5 5
## 137 INHBE 5 0
## 231 RAB18 5 1
## 239 RAP1GDS1 5 3
## 264 SLC27A2 5 4
## 268 SLC44A2 5 0
## 284 TBK1 5 2
## 302 TOR1AIP1 5 5
## 315 VPS39 5 5
\end{verbatim}
\begin{alltt}
\hlstd{dendpri}\hlkwb{<-}\hlkwd{as.dendrogram}\hlstd{(matpri)}
\hlstd{dendbats}\hlkwb{<-}\hlkwd{as.dendrogram}\hlstd{(matbats)}
\hlkwd{labels}\hlstd{(dendpri)}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{`tmp$Gene.name`[}\hlkwd{labels}\hlstd{(dendpri)])}
\hlkwd{labels}\hlstd{(dendbats)}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{`tmp$Gene.name`[}\hlkwd{labels}\hlstd{(dendbats)])}
\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{nprimates,} \hlkwc{decreasing}\hlstd{=}\hlnum{FALSE}\hlstd{),]}\hlopt{$}\hlstr{'tmp$Gene.name'}\hlkwb{->} \hlstd{order}
\hlstd{dendpri}\hlkwb{<-}\hlstd{dendextend}\hlopt{::}\hlkwd{rotate}\hlstd{(dendpri,} \hlkwc{order}\hlstd{=order)}
\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{nbats,} \hlkwc{decreasing}\hlstd{=}\hlnum{FALSE}\hlstd{),]}\hlopt{$}\hlstr{'tmp$Gene.name'}\hlkwb{->} \hlstd{order}
\hlstd{dendbats}\hlkwb{<-}\hlstd{dendextend}\hlopt{::}\hlkwd{rotate}\hlstd{(dendbats,} \hlkwc{order}\hlstd{=order)}
\hlcom{#### Il faut swapper certains neud de l'arbrese}
\hlkwd{class}\hlstd{(}\hlkwd{labels}\hlstd{(dendpri))}
\end{alltt}
\begin{verbatim}
## [1] "character"
\end{verbatim}
\begin{alltt}
\hlstd{dend12} \hlkwb{<-} \hlkwd{dendlist}\hlstd{(dendbats, dendpri)}
\hlkwd{png}\hlstd{(}\hlstr{"figure/tanglegramm.png"}\hlstd{,} \hlkwc{width} \hlstd{=} \hlnum{1800}\hlstd{,} \hlkwc{height} \hlstd{=} \hlnum{3000}\hlstd{)}
\hlkwd{tanglegram}\hlstd{(dend12,} \hlkwc{columns_width}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{3}\hlstd{,}\hlnum{3}\hlstd{),} \hlkwc{axes}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{edge.lwd}\hlstd{=}\hlnum{0}\hlstd{,} \hlkwc{margin_inner}\hlstd{=}\hlnum{6}\hlstd{,}
\hlkwc{margin_top}\hlstd{=}\hlnum{2}\hlstd{,}
\hlkwc{main_left}\hlstd{=}\hlstr{" bats"}\hlstd{,}
\hlkwc{main_right} \hlstd{=} \hlstr{"primates "}\hlstd{,}
\hlkwc{lwd}\hlstd{=}\hlnum{0.5}\hlstd{,}
\hlkwc{cex_main}\hlstd{=}\hlnum{1}\hlstd{,}
\hlkwc{lab.cex}\hlstd{=}\hlnum{1}\hlstd{,}
\hlkwc{k_labels}\hlstd{=}\hlnum{6}\hlstd{)}
\hlkwd{dev.off}\hlstd{()}
\end{alltt}
\begin{verbatim}
## RStudioGD
## 2
\end{verbatim}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{ace}\hlkwb{<-}\hlnum{264}
\hlstd{tmprss2}\hlkwb{<-}\hlnum{75}
\hlstd{znf318}\hlkwb{<-}\hlnum{81}
\hlstd{sepsecs}\hlkwb{<-}\hlnum{228}
\hlstd{tbk1}\hlkwb{<-}\hlnum{273}
\hlstd{ripk1}\hlkwb{<-}\hlnum{224}
\hlstd{col}\hlkwb{<-}\hlkwd{rep}\hlstd{(}\hlstr{"grey"}\hlstd{,} \hlkwd{length}\hlstd{(}\hlkwd{labels}\hlstd{(dendpri)))}
\hlstd{col[ace]}\hlkwb{<-}\hlstr{"black"}
\hlstd{col[tmprss2]}\hlkwb{<-}\hlstr{"black"}
\hlstd{col[znf318]}\hlkwb{<-}\hlstr{"black"}
\hlstd{col[sepsecs]}\hlkwb{<-}\hlstr{"black"}
\hlstd{col[tbk1]}\hlkwb{<-}\hlstr{"black"}
\hlstd{col[ripk1]}\hlkwb{<-}\hlstr{"black"}
\hlstd{font}\hlkwb{<-}\hlkwd{rep}\hlstd{(}\hlnum{1}\hlstd{,} \hlkwd{length}\hlstd{(}\hlkwd{labels}\hlstd{(dendpri))}\hlopt{*}\hlnum{2}\hlstd{)}
\hlcom{#font[ace]<-1.3}
\hlcom{#font[tmprss2]<-1.3}
\hlcom{#font[length(labels(dendpri))+160]<-1.3}
\hlkwd{png}\hlstd{(}\hlstr{"figure/tanglegramm.png"}\hlstd{,} \hlkwc{width} \hlstd{=} \hlnum{1800}\hlstd{,} \hlkwc{height} \hlstd{=} \hlnum{3000}\hlstd{)}
\hlkwd{tanglegram}\hlstd{(dend12,} \hlkwc{columns_width}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{3}\hlstd{,}\hlnum{3}\hlstd{),} \hlkwc{axes}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{edge.lwd}\hlstd{=}\hlnum{0}\hlstd{,} \hlkwc{margin_inner}\hlstd{=}\hlnum{6}\hlstd{,}
\hlkwc{margin_top}\hlstd{=}\hlnum{2}\hlstd{,}
\hlkwc{main_left}\hlstd{=}\hlstr{" bats"}\hlstd{,}
\hlkwc{main_right} \hlstd{=} \hlstr{"primates "}\hlstd{,}
\hlkwc{lwd}\hlstd{=}\hlnum{0.5}\hlstd{,}
\hlkwc{cex_main}\hlstd{=}\hlnum{1}\hlstd{,}
\hlkwc{lab.cex}\hlstd{=font,}
\hlkwc{k_labels}\hlstd{=}\hlnum{6}\hlstd{,}
\hlkwc{color_lines}\hlstd{=col)}
\hlkwd{dev.off}\hlstd{()}
\end{alltt}
\begin{verbatim}
## RStudioGD
## 2
\end{verbatim}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{tablo[(tablo}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3} \hlopt{|} \hlstd{tablo}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{),]}
\hlkwd{dim}\hlstd{(tmp)}
\end{alltt}
\begin{verbatim}
## [1] 108 3
\end{verbatim}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp)}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlstr{"nbats"}\hlstd{,} \hlstr{"nprimates"}\hlstd{)}
\hlstd{matbats}\hlkwb{<-}\hlkwd{hclust}\hlstd{(}\hlkwd{dist}\hlstd{(tmp}\hlopt{$}\hlstd{nbats))}
\hlstd{matpri}\hlkwb{<-}\hlkwd{hclust}\hlstd{(}\hlkwd{dist}\hlstd{(tmp}\hlopt{$}\hlstd{nprimates))}
\hlcom{#tmp[order(tmp$nbats),]}
\hlstd{dendpri}\hlkwb{<-}\hlkwd{as.dendrogram}\hlstd{(matpri)}
\hlstd{dendbats}\hlkwb{<-}\hlkwd{as.dendrogram}\hlstd{(matbats)}
\hlkwd{labels}\hlstd{(dendpri)}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{tmp.Gene.name[}\hlkwd{labels}\hlstd{(dendpri)])}
\hlkwd{labels}\hlstd{(dendbats)}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{tmp.Gene.name[}\hlkwd{labels}\hlstd{(dendbats)])}
\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{nprimates,} \hlkwc{decreasing}\hlstd{=}\hlnum{FALSE}\hlstd{),]}\hlopt{$}\hlstd{tmp.Gene.name}\hlkwb{->} \hlstd{order}
\hlstd{dendpri}\hlkwb{<-}\hlstd{dendextend}\hlopt{::}\hlkwd{rotate}\hlstd{(dendpri,} \hlkwc{order}\hlstd{=order)}
\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{nbats,} \hlkwc{decreasing}\hlstd{=}\hlnum{FALSE}\hlstd{),]}\hlopt{$}\hlstd{tmp.Gene.name}\hlkwb{->} \hlstd{order}
\hlstd{dendbats}\hlkwb{<-}\hlstd{dendextend}\hlopt{::}\hlkwd{rotate}\hlstd{(dendbats,} \hlkwc{order}\hlstd{=order)}
\hlcom{#### Il faut swapper certains neuds de l'arbres}
\hlkwd{class}\hlstd{(}\hlkwd{labels}\hlstd{(dendpri))}
\end{alltt}
\begin{verbatim}
## [1] "character"
\end{verbatim}
\begin{alltt}
\hlstd{dend12} \hlkwb{<-} \hlkwd{dendlist}\hlstd{(dendbats, dendpri)}
\hlstd{ace}\hlkwb{<-}\hlnum{97}
\hlstd{tmprss2}\hlkwb{<-}\hlnum{27}
\hlstd{znf318}\hlkwb{<-}\hlnum{31}
\hlstd{sepsecs}\hlkwb{<-}\hlnum{69}
\hlstd{tbk1}\hlkwb{<-}\hlnum{106}
\hlstd{ripk1}\hlkwb{<-}\hlnum{68}
\hlstd{col}\hlkwb{<-}\hlkwd{rep}\hlstd{(}\hlstr{"lightblue"}\hlstd{,} \hlkwd{length}\hlstd{(}\hlkwd{labels}\hlstd{(dendpri)))}
\hlstd{plusplus}\hlkwb{<-}\hlstd{tmp}\hlopt{$}\hlstd{tmp.Gene.name[tmp}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3} \hlopt{&} \hlstd{tmp}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{]}
\hlstd{col[}\hlkwd{which}\hlstd{(}\hlkwd{labels}\hlstd{(dendbats)} \hlopt{%in%} \hlstd{plusplus)]}\hlkwb{<-}\hlstr{"pink"}
\hlstd{interest}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"TMPRSS2"}\hlstd{,}\hlstr{"ZNF318"}\hlstd{,} \hlstr{"SEPSECS"}\hlstd{,}\hlstr{"TBK1"}\hlstd{,} \hlstr{"RIPK1"}\hlstd{)}
\hlstd{col[}\hlkwd{which}\hlstd{(}\hlkwd{labels}\hlstd{(dendbats)} \hlopt{%in%} \hlstd{interest)]}\hlkwb{<-}\hlstr{"blue"}
\hlstd{interestpp}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"ACE2"}\hlstd{)}
\hlstd{col[}\hlkwd{which}\hlstd{(}\hlkwd{labels}\hlstd{(dendbats)} \hlopt{%in%} \hlstd{interestpp)]}\hlkwb{<-}\hlstr{"red"}
\hlkwd{png}\hlstd{(}\hlstr{"figure/tanglegrammsup3.png"}\hlstd{,} \hlkwc{width} \hlstd{=} \hlnum{500}\hlstd{,} \hlkwc{height} \hlstd{=} \hlnum{1200}\hlstd{)}
\hlkwd{tanglegram}\hlstd{(dend12,} \hlkwc{columns_width}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{3}\hlstd{,}\hlnum{3}\hlstd{),} \hlkwc{axes}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{edge.lwd}\hlstd{=}\hlnum{0}\hlstd{,} \hlkwc{margin_inner}\hlstd{=}\hlnum{6}\hlstd{,}
\hlkwc{margin_top}\hlstd{=}\hlnum{3}\hlstd{,}
\hlkwc{main_left}\hlstd{=}\hlstr{" bats"}\hlstd{,}
\hlkwc{main_right} \hlstd{=} \hlstr{"primates "}\hlstd{,}
\hlkwc{lwd}\hlstd{=}\hlnum{0.5}\hlstd{,}
\hlkwc{cex_main}\hlstd{=}\hlnum{2}\hlstd{,}
\hlkwc{lab.cex}\hlstd{=}\hlnum{1}\hlstd{,}
\hlkwc{k_labels}\hlstd{=}\hlnum{6}\hlstd{,}
\hlkwc{color_lines}\hlstd{=col)}
\hlkwd{dev.off}\hlstd{()}
\end{alltt}
\begin{verbatim}
## RStudioGD
## 2
\end{verbatim}
\begin{alltt}
\hlcom{### Changer couleurs des groupes}
\hlcom{## changer couleurs des lines sel vs sel or sel vs non-sel}
\hlkwd{setEPS}\hlstd{()}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/unnamed-chunk-13-1}
\begin{kframe}\begin{alltt}
\hlkwd{postscript}\hlstd{(}\hlstr{"figure/tanglegramsup3.eps"}\hlstd{,} \hlkwc{height}\hlstd{=}\hlnum{15}\hlstd{,} \hlkwc{width}\hlstd{=}\hlnum{5}\hlstd{)}
\hlkwd{tanglegram}\hlstd{(dend12,} \hlkwc{columns_width}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{3}\hlstd{,}\hlnum{3}\hlstd{),} \hlkwc{axes}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{edge.lwd}\hlstd{=}\hlnum{0}\hlstd{,} \hlkwc{margin_inner}\hlstd{=}\hlnum{6}\hlstd{,}
\hlkwc{margin_top}\hlstd{=}\hlnum{3}\hlstd{,}
\hlkwc{main_left}\hlstd{=}\hlstr{" bats"}\hlstd{,}
\hlkwc{main_right} \hlstd{=} \hlstr{"primates "}\hlstd{,}
\hlkwc{lwd}\hlstd{=}\hlnum{0.5}\hlstd{,}
\hlkwc{cex_main}\hlstd{=}\hlnum{2}\hlstd{,}
\hlkwc{lab.cex}\hlstd{=}\hlnum{1}\hlstd{,}
\hlcom{# k_labels=6,}
\hlkwc{color_lines}\hlstd{=col)}
\hlkwd{dev.off}\hlstd{()}
\end{alltt}
\begin{verbatim}
## RStudioGD
## 2
\end{verbatim}
\begin{alltt}
\hlkwd{labels_colors}\hlstd{(dend12[[}\hlnum{1}\hlstd{]])}\hlkwb{<-}\hlkwd{rep}\hlstd{(}\hlkwd{rainbow}\hlstd{(}\hlnum{15}\hlstd{)[}\hlkwd{c}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{,} \hlnum{9}\hlopt{:}\hlnum{11}\hlstd{)],} \hlkwd{table}\hlstd{(tmp}\hlopt{$}\hlstd{nbats))}
\hlkwd{labels_colors}\hlstd{(dend12[[}\hlnum{2}\hlstd{]])}\hlkwb{<-}\hlkwd{rep}\hlstd{(}\hlkwd{rainbow}\hlstd{(}\hlnum{15}\hlstd{)[}\hlkwd{c}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{,} \hlnum{9}\hlopt{:}\hlnum{11}\hlstd{)],} \hlkwd{table}\hlstd{(tmp}\hlopt{$}\hlstd{nprimates))}
\hlkwd{labels_colors}\hlstd{(dend12[[}\hlnum{1}\hlstd{]])}\hlkwb{<-}\hlkwd{rep}\hlstd{(}\hlkwd{viridis}\hlstd{(}\hlnum{10}\hlstd{)[}\hlkwd{c}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{,} \hlnum{7}\hlopt{:}\hlnum{9}\hlstd{)],} \hlkwd{table}\hlstd{(tmp}\hlopt{$}\hlstd{nbats))}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in viridis(10): impossible de trouver la fonction "{}viridis"{}}}\begin{alltt}
\hlkwd{labels_colors}\hlstd{(dend12[[}\hlnum{2}\hlstd{]])}\hlkwb{<-}\hlkwd{rep}\hlstd{(}\hlkwd{viridis}\hlstd{(}\hlnum{10}\hlstd{)[}\hlkwd{c}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{,} \hlnum{7}\hlopt{:}\hlnum{9}\hlstd{)],} \hlkwd{table}\hlstd{(tmp}\hlopt{$}\hlstd{nprimates))}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in viridis(10): impossible de trouver la fonction "{}viridis"{}}}\begin{alltt}
\hlkwd{setEPS}\hlstd{()}
\hlkwd{postscript}\hlstd{(}\hlstr{"figure/tanglegramsup3_V2.eps"}\hlstd{,} \hlkwc{height}\hlstd{=}\hlnum{15}\hlstd{,} \hlkwc{width}\hlstd{=}\hlnum{5}\hlstd{)}
\hlkwd{tanglegram}\hlstd{(dend12,} \hlkwc{columns_width}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,} \hlnum{3}\hlstd{,}\hlnum{3}\hlstd{),} \hlkwc{axes}\hlstd{=}\hlnum{FALSE}\hlstd{,}
\hlkwc{edge.lwd}\hlstd{=}\hlnum{0}\hlstd{,} \hlkwc{margin_inner}\hlstd{=}\hlnum{6}\hlstd{,}
\hlkwc{margin_top}\hlstd{=}\hlnum{3}\hlstd{,}
\hlkwc{main_left}\hlstd{=}\hlstr{" bats"}\hlstd{,}
\hlkwc{main_right} \hlstd{=} \hlstr{"primates "}\hlstd{,}
\hlkwc{lwd}\hlstd{=}\hlnum{0.5}\hlstd{,}
\hlkwc{cex_main}\hlstd{=}\hlnum{2}\hlstd{,}
\hlkwc{lab.cex}\hlstd{=}\hlnum{1}\hlstd{,}
\hlcom{# k_labels=6,}
\hlkwc{color_lines}\hlstd{=col)}
\hlkwd{dev.off}\hlstd{()}
\end{alltt}
\begin{verbatim}
## RStudioGD
## 2
\end{verbatim}
\end{kframe}
\end{knitrout}
\end{document}
......
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis, maic}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
output from covid\_comp\_dataset.
<<>>=
tablo<-read.table("primatesVbats.csv",
h=T, sep=",")
@
Output MAIC formatted by Léa. This table includes the DGINN "score".
<<>>=
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
maic<-read.table(paste0(workdir, "data/covid_comp_maic.txt"),
h=T)
@
\section{MAIC}
\subsection{Boxplot}
<<boxplot, fig.height=12>>=
par(mfrow=c(2,1))
boxplot(maic$rank~maic$nbats, notch=TRUE, varwidth=TRUE, xlab="score DGINN", ylab="rank MAIC", main="Bats")
stripchart(maic$rank~maic$nbats, method="jitter", vertical=TRUE, pch=1, cex=0.3, add=TRUE)
boxplot(maic$rank~maic$nprimates, notch=TRUE, xlab="score DGINN", ylab="rank MAIC", main="Primates")
stripchart(maic$rank~maic$nprimates, method="jitter", vertical=TRUE, pch=1, cex=0.3, add=TRUE)
@
\subsection{Dotchart}
<<dotbats, fig.height=8>>=
tmp<-maic[maic$nbats>=3, c("gene", "rank", "nbats")]
tmp<-tmp[order(tmp$rank, decreasing = TRUE),]
tmp$col<-"black"
tmp$col[tmp$gene=="ACE2"]<-"red"
tmp$col[tmp$gene=="TMPRSS2"]<-"red"
tmp$pch[tmp$nbats==5]<-1
tmp$pch[tmp$nbats==4]<-20
tmp$pch[tmp$nbats==3]<-4
dotchart(tmp$rank, main="Bats DGINN >=3", xlab="rank MAIC", labels=tmp$gene, pch=tmp$pch, col=tmp$col)
legend("topright", c("5 (score DGINN)", "4", "3"), pch=c(1,20,4))
@
<<dotprimates, fig.height=12>>=
tmp<-maic[maic$nprimates>=3, c("gene", "rank", "nprimates")]
tmp<-tmp[order(tmp$rank, decreasing = TRUE),]
tmp$pch[tmp$nprimates==5]<-1
tmp$pch[tmp$nprimates==4]<-20
tmp$pch[tmp$nprimates==3]<-4
tmp$col<-"black"
tmp$col[tmp$gene=="ACE2"]<-"red"
tmp$col[tmp$gene=="TMPRSS2"]<-"red"
dotchart(tmp$rank, main="Primates DGINN >=3", xlab="rank MAIC", labels=tmp$gene, pch=tmp$pch, cex=0.8, col=tmp$col)
legend("topright", c("5 (score DGINN)", "4", "3"), pch=c(1,20,4))
@
\section{Pan Corona}
<<>>=
pancorona<-read.table(paste0(workdir, "data/pancorona_S5.csv"),
h=T, fill = TRUE, sep="\t")
names(pancorona)<-c("tmp.Gene.name", names(pancorona)[-1])
# Genes en commun
pancorona$tmp.Gene.name[pancorona$tmp.Gene.name %in% tablo$tmp.Gene.name]
# Uniquement dans le tableau pancorona
sort(pancorona$tmp.Gene.name[(pancorona$tmp.Gene.name %in% tablo$tmp.Gene.name)==FALSE])
## Uniquement dans tableau
sort(tablo$tmp.Gene.name[(tablo$tmp.Gene.name %in% pancorona$tmp.Gene.name)==FALSE])
@
<<pancorona, fig.height=8>>=
pancorona<-pancorona[,c("tmp.Gene.name", "TOTAL")]
pandginn<-na.omit(merge(pancorona, tablo, by="tmp.Gene.name", all.x=TRUE))
pandginn<-pandginn[order(pandginn$nprimates),]
pandginn<-pandginn[order(pandginn$TOTAL),]
dotchart(as.matrix(pandginn[,2]), labels = pandginn$tmp.Gene.name, xlim=c(0,5))
points(pandginn[,4], 1:nrow(pandginn), col="blue", pch=20, cex=0.7)
points(pandginn[,3], 1:nrow(pandginn), col="blue", pch=4)
legend("bottomright", c("pancorona score", "dginn primate score", "dginn bats score"), pch=c(1,20,4), col=c("black", "blue", "blue"))
@
A-t-on un enrichissement en Pan-corona dans nos gènes sous PS?
<<>>=
pandginnall<-merge(pancorona, tablo, by="tmp.Gene.name", all.x=FALSE,all.y=TRUE)
dim(pandginnall)
# test indépendance: under PS / in the pancorona list
table(is.na(pandginnall$TOTAL)==FALSE)
table(pandginnall$nbats>=3)
chi<-table(is.na(pandginnall$TOTAL)==FALSE,pandginnall$nbats>=3)
chi
chisq.test(chi)
table(is.na(pandginnall$TOTAL)==FALSE)
table(pandginnall$nprimates>=3)
chi<-table(is.na(pandginnall$TOTAL)==FALSE,pandginnall$nprimates>=3)
chi
chisq.test(chi)
@
No enrichment in PanCORONA in our genes under PS.
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis, maic}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
output from covid\_comp\_dataset.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tablo}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlstr{"primatesVbats.csv"}\hlstd{,}
\hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
Output MAIC formatted by Léa. This table includes the DGINN "score".
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{maic}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/covid_comp_maic.txt"}\hlstd{),}
\hlkwc{h}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\section{MAIC}
\subsection{Boxplot}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{1}\hlstd{))}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{varwidth}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Bats"}\hlstd{)}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in bxp(list(stats = structure(c(21, 825, 1664, 2860, 5392, 15, 625.5, : some notches went outside hinges ('box'): maybe set notch=FALSE}}\begin{alltt}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Primates"}\hlstd{)}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/boxplot-1}
\end{knitrout}
\subsection{Dotchart}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nbats"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Bats DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotbats-1}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nprimates"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Primates DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{cex}\hlstd{=}\hlnum{0.8}\hlstd{,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotprimates-1}
\end{knitrout}
\section{Pan Corona}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/pancorona_S5.csv"}\hlstd{),}
\hlkwc{h}\hlstd{=T,} \hlkwc{fill} \hlstd{=} \hlnum{TRUE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{names}\hlstd{(pancorona)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwd{names}\hlstd{(pancorona)[}\hlopt{-}\hlnum{1}\hlstd{])}
\hlcom{# Genes en commun}
\hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name[pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name]}
\end{alltt}
\begin{verbatim}
## [1] TBK1 MARK3 GIGYF2 MARK2 G3BP1 LARP1 ACE2 PABPC1
## [9] TMPRSS2 AP3B1 CLCC1 CSDE1 HECTD1 MARK1 MEPCE PDE4DIP
## [17] POR PRKAR2B RAB5C RTN4 SRP54 UBAP2 UBAP2L UBXN8
## [25] SPART BZW2 EIF4E2 SMOC1 STOML2 DDX21 FAM98A G3BP2
## [33] MOV10 PABPC4 UPF1
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{# Uniquement dans le tableau pancorona}
\hlkwd{sort}\hlstd{(pancorona}\hlopt{$}\hlstd{tmp.Gene.name[(pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] ANPEP ATXN2L BTF3 CKAP5 CTSB CTSL
## [7] CYB5R3 DDX1 DDX5 DDX58 DHX9 DNM1L
## [13] EEF1A1 EIF2A EIF3F EIF4B EZR FLNA
## [19] FURIN FUS GSK3A GSK3B HDLBP HNRNPA1
## [25] HNRNPD HNRNPF HNRNPU IFIH1 IGF2BP1 IKBKB
## [31] IKBKE IRF3 ISG15 KPNA3 KPNB1 MYH9
## [37] NCL POLD1 POLR2B PRKRA RBM14 RCHY1
## [43] RPL13A RPL26 RPS13 RPS17 RPS19 RPS9
## [49] SDCBP SERBP1 SGTA SLC1A5 SNAP47 SSB
## [55] STING1 SYNCRIP TANC1 TBCB TMPRSS11D TRAF3
## [61] TUBA4A TUBB2A TUBB4A TUBB6 USP10 VPS36
## [67] XRCC5 XRCC6 YBX1 YTHDF2
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{## Uniquement dans tableau }
\hlkwd{sort}\hlstd{(tablo}\hlopt{$}\hlstd{tmp.Gene.name[(tablo}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] AAR2 AASS AATF ABCC1 ACAD9 ACADM
## [7] ACSL3 ADAM9 ADAMTS1 AGPS AKAP8 AKAP8L
## [13] AKAP9 ALG11 ALG5 ALG8 ANO6 AP2A2
## [19] AP2M1 ARF6 ATE1 ATP13A3 ATP1B1 ATP6AP1
## [25] ATP6V1A BAG5 BCKDK BRD2 BRD4 CCDC86
## [31] CDK5RAP2 CENPF CEP112 CEP135 CEP250 CEP350
## [37] CEP68 CHMP2A CHPF CHPF2 CISD3 CIT
## [43] CLIP4 CNTRL COL6A1 COLGALT1 COMT COQ8B
## [49] CRTC3 CSNK2A2 CSNK2B CUL2 CWC27 CYB5B
## [55] DCAF7 DCAKD DCTPP1 DDX10 DNAJC11 DNAJC19
## [61] DNMT1 DPH5 DPY19L1 ECSIT EDEM3 EIF4H
## [67] ELOC EMC1 ERC1 ERGIC1 ERLEC1 ERMP1
## [73] ERO1B ERP44 ETFA EXOSC2 EXOSC3 EXOSC5
## [79] EXOSC8 F2RL1 FAM162A FAM8A1 FAR2 FASTKD5
## [85] FBLN5 FBN1 FBN2 FBXL12 FKBP10 FKBP15
## [91] FKBP7 FOXRED2 FYCO1 GCC1 GCC2 GDF15
## [97] GFER GGCX GGH GHITM GLA GNB1
## [103] GNG5 GOLGA2 GOLGA3 GOLGA7 GOLGB1 GORASP1
## [109] GPAA1 GPX1 GRIPAP1 GRPEL1 GTF2F2 HDAC2
## [115] HEATR3 HMOX1 HOOK1 HS2ST1 HS6ST2 HSBP1
## [121] HYOU1 IDE IL17RA IMPDH2 INHBE INTS4
## [127] ITGB1 JAKMIP1 KDELC1 KDELC2 LARP4B LARP7
## [133] LMAN2 LOX MAP7D1 MARC1 MAT2B MDN1
## [139] MIB1 MIPOL1 MOGS MPHOSPH10 MRPS2 MRPS25
## [145] MRPS27 MRPS5 MTCH1 MYCBP2 NARS2 NAT14
## [151] NDFIP2 NDUFAF1 NDUFAF2 NDUFB9 NEK9 NEU1
## [157] NGDN NGLY1 NIN NINL NLRX1 NOL10
## [163] NPC2 NPTX1 NSD2 NUP210 NUP214 NUP54
## [169] NUP58 NUP62 NUP88 NUP98 NUTF2 OS9
## [175] PCNT PCSK5 PCSK6 PDZD11 PIGO PIGS
## [181] PITRM1 PKP2 PLAT PLD3 PLEKHA5 PLEKHF2
## [187] PLOD2 PMPCA PMPCB POFUT1 POLA1 POLA2
## [193] PPIL3 PPT1 PRIM1 PRIM2 PRKACA PRKAR2A
## [199] PRRC2B PSMD8 PTBP2 PTGES2 PUSL1 PVR
## [205] QSOX2 RAB10 RAB14 RAB18 RAB1A RAB2A
## [211] RAB7A RAB8A RAE1 RALA RAP1GDS1 RBM28
## [217] RBM41 RBX1 RDX REEP5 REEP6 RETREG3
## [223] RHOA RIPK1 RNF41 RPL36 RRP9 SAAL1
## [229] SBNO1 SCAP SCARB1 SCCPDH SDF2 SELENOS
## [235] SEPSECS SIL1 SIRT5 SLC25A21 SLC27A2 SLC30A6
## [241] SLC30A7 SLC30A9 SLC44A2 SLC9A3R1 SLU7 SNIP1
## [247] SRP19 SRP72 STC2 STOM SUN2 TAPT1
## [253] TARS2 TBCA TBKBP1 TCF12 THTPA TIMM10
## [259] TIMM10B TIMM29 TIMM8B TIMM9 TLE1 TLE3
## [265] TM2D3 TMED5 TMEM39B TMEM97 TOMM70 TOR1A
## [271] TOR1AIP1 TRIM59 TRMT1 TUBGCP2 TUBGCP3 TYSND1
## [277] UGGT2 USP54 VPS11 VPS39 WASHC4 WFS1
## [283] YIF1A ZC3H18 ZC3H7A ZDHHC5 ZNF318 ZNF503
## [289] ZYG11B
## 324 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlstd{pancorona[,}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlstr{"TOTAL"}\hlstd{)]}
\hlstd{pandginn}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(}\hlkwd{merge}\hlstd{(pancorona, tablo,} \hlkwc{by}\hlstd{=}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=}\hlnum{TRUE}\hlstd{))}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{nprimates),]}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{TOTAL),]}
\hlkwd{dotchart}\hlstd{(}\hlkwd{as.matrix}\hlstd{(pandginn[,}\hlnum{2}\hlstd{]),} \hlkwc{labels} \hlstd{= pandginn}\hlopt{$}\hlstd{tmp.Gene.name,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{4}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{3}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{4}\hlstd{)}
\hlkwd{legend}\hlstd{(}\hlstr{"bottomright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"pancorona score"}\hlstd{,} \hlstr{"dginn primate score"}\hlstd{,} \hlstr{"dginn bats score"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{),} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"black"}\hlstd{,} \hlstr{"blue"}\hlstd{,} \hlstr{"blue"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/pancorona-1}
\end{knitrout}
A-t-on un enrichissement en Pan-corona dans nos gènes sous PS?
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pandginnall}\hlkwb{<-}\hlkwd{merge}\hlstd{(pancorona, tablo,} \hlkwc{by}\hlstd{=}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=}\hlnum{FALSE}\hlstd{,}\hlkwc{all.y}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\hlkwd{dim}\hlstd{(pandginnall)}
\end{alltt}
\begin{verbatim}
## [1] 324 4
\end{verbatim}
\begin{alltt}
\hlcom{# test indépendance: under PS / in the pancorona list}
\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 289 35
\end{verbatim}
\begin{alltt}
\hlkwd{table}\hlstd{(pandginnall}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 286 38
\end{verbatim}
\begin{alltt}
\hlstd{chi}\hlkwb{<-}\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{,pandginnall}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{)}
\hlstd{chi}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## FALSE 255 34
## TRUE 31 4
\end{verbatim}
\begin{alltt}
\hlkwd{chisq.test}\hlstd{(chi)}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in chisq.test(chi): Chi-squared approximation may be incorrect}}\begin{verbatim}
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: chi
## X-squared = 7.6869e-31, df = 1, p-value = 1
\end{verbatim}
\begin{alltt}
\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 289 35
\end{verbatim}
\begin{alltt}
\hlkwd{table}\hlstd{(pandginnall}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 236 88
\end{verbatim}
\begin{alltt}
\hlstd{chi}\hlkwb{<-}\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{,pandginnall}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{)}
\hlstd{chi}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## FALSE 212 77
## TRUE 24 11
\end{verbatim}
\begin{alltt}
\hlkwd{chisq.test}\hlstd{(chi)}
\end{alltt}
\begin{verbatim}
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: chi
## X-squared = 0.15992, df = 1, p-value = 0.6892
\end{verbatim}
\end{kframe}
\end{knitrout}
No enrichment in PanCORONA in our genes under PS.
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis, maic}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
output from covid\_comp\_dataset.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tablo}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlstr{"primatesVbats.csv"}\hlstd{,}
\hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
Output MAIC formatted by Léa. This table includes the DGINN "score".
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{maic}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/covid_comp_maic.txt"}\hlstd{),}
\hlkwc{h}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\section{MAIC}
\subsection{Boxplot}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{1}\hlstd{))}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{varwidth}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Bats"}\hlstd{)}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in bxp(list(stats = structure(c(21, 825, 1664, 2860, 5392, 15, 625.5, : some notches went outside hinges ('box'): maybe set notch=FALSE}}\begin{alltt}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Primates"}\hlstd{)}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/boxplot-1}
\end{knitrout}
\subsection{Dotchart}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nbats"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Bats DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotbats-1}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nprimates"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Primates DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{cex}\hlstd{=}\hlnum{0.8}\hlstd{,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotprimates-1}
\end{knitrout}
\section{Pan Corona}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/pancorona_S5.csv"}\hlstd{),}
\hlkwc{h}\hlstd{=T,} \hlkwc{fill} \hlstd{=} \hlnum{TRUE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{names}\hlstd{(pancorona)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwd{names}\hlstd{(pancorona)[}\hlopt{-}\hlnum{1}\hlstd{])}
\hlcom{# Genes en commun}
\hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name[pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name]}
\end{alltt}
\begin{verbatim}
## [1] TBK1 MARK3 GIGYF2 MARK2 G3BP1 LARP1 ACE2 PABPC1
## [9] TMPRSS2 AP3B1 CLCC1 CSDE1 HECTD1 MARK1 MEPCE PDE4DIP
## [17] POR PRKAR2B RAB5C RTN4 SRP54 UBAP2 UBAP2L UBXN8
## [25] SPART BZW2 EIF4E2 SMOC1 STOML2 DDX21 FAM98A G3BP2
## [33] MOV10 PABPC4 UPF1
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{# Uniquement dans le tableau pancorona}
\hlkwd{sort}\hlstd{(pancorona}\hlopt{$}\hlstd{tmp.Gene.name[(pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] ANPEP ATXN2L BTF3 CKAP5 CTSB CTSL
## [7] CYB5R3 DDX1 DDX5 DDX58 DHX9 DNM1L
## [13] EEF1A1 EIF2A EIF3F EIF4B EZR FLNA
## [19] FURIN FUS GSK3A GSK3B HDLBP HNRNPA1
## [25] HNRNPD HNRNPF HNRNPU IFIH1 IGF2BP1 IKBKB
## [31] IKBKE IRF3 ISG15 KPNA3 KPNB1 MYH9
## [37] NCL POLD1 POLR2B PRKRA RBM14 RCHY1
## [43] RPL13A RPL26 RPS13 RPS17 RPS19 RPS9
## [49] SDCBP SERBP1 SGTA SLC1A5 SNAP47 SSB
## [55] STING1 SYNCRIP TANC1 TBCB TMPRSS11D TRAF3
## [61] TUBA4A TUBB2A TUBB4A TUBB6 USP10 VPS36
## [67] XRCC5 XRCC6 YBX1 YTHDF2
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{## Uniquement dans tableau }
\hlkwd{sort}\hlstd{(tablo}\hlopt{$}\hlstd{tmp.Gene.name[(tablo}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] AAR2 AASS AATF ABCC1 ACAD9 ACADM
## [7] ACSL3 ADAM9 ADAMTS1 AGPS AKAP8 AKAP8L
## [13] AKAP9 ALG11 ALG5 ALG8 ANO6 AP2A2
## [19] AP2M1 ARF6 ATE1 ATP13A3 ATP1B1 ATP6AP1
## [25] ATP6V1A BAG5 BCKDK BRD2 BRD4 CCDC86
## [31] CDK5RAP2 CENPF CEP112 CEP135 CEP250 CEP350
## [37] CEP68 CHMP2A CHPF CHPF2 CISD3 CIT
## [43] CLIP4 CNTRL COL6A1 COLGALT1 COMT COQ8B
## [49] CRTC3 CSNK2A2 CSNK2B CUL2 CWC27 CYB5B
## [55] DCAF7 DCAKD DCTPP1 DDX10 DNAJC11 DNAJC19
## [61] DNMT1 DPH5 DPY19L1 ECSIT EDEM3 EIF4H
## [67] ELOC EMC1 ERC1 ERGIC1 ERLEC1 ERMP1
## [73] ERO1B ERP44 ETFA EXOSC2 EXOSC3 EXOSC5
## [79] EXOSC8 F2RL1 FAM162A FAM8A1 FAR2 FASTKD5
## [85] FBLN5 FBN1 FBN2 FBXL12 FKBP10 FKBP15
## [91] FKBP7 FOXRED2 FYCO1 GCC1 GCC2 GDF15
## [97] GFER GGCX GGH GHITM GLA GNB1
## [103] GNG5 GOLGA2 GOLGA3 GOLGA7 GOLGB1 GORASP1
## [109] GPAA1 GPX1 GRIPAP1 GRPEL1 GTF2F2 HDAC2
## [115] HEATR3 HMOX1 HOOK1 HS2ST1 HS6ST2 HSBP1
## [121] HYOU1 IDE IL17RA IMPDH2 INHBE INTS4
## [127] ITGB1 JAKMIP1 KDELC1 KDELC2 LARP4B LARP7
## [133] LMAN2 LOX MAP7D1 MARC1 MAT2B MDN1
## [139] MIB1 MIPOL1 MOGS MPHOSPH10 MRPS2 MRPS25
## [145] MRPS27 MRPS5 MTCH1 MYCBP2 NARS2 NAT14
## [151] NDFIP2 NDUFAF1 NDUFAF2 NDUFB9 NEK9 NEU1
## [157] NGDN NGLY1 NIN NINL NLRX1 NOL10
## [163] NPC2 NPTX1 NSD2 NUP210 NUP214 NUP54
## [169] NUP58 NUP62 NUP88 NUP98 NUTF2 OS9
## [175] PCNT PCSK5 PCSK6 PDZD11 PIGO PIGS
## [181] PITRM1 PKP2 PLAT PLD3 PLEKHA5 PLEKHF2
## [187] PLOD2 PMPCA PMPCB POFUT1 POLA1 POLA2
## [193] PPIL3 PPT1 PRIM1 PRIM2 PRKACA PRKAR2A
## [199] PRRC2B PSMD8 PTBP2 PTGES2 PUSL1 PVR
## [205] QSOX2 RAB10 RAB14 RAB18 RAB1A RAB2A
## [211] RAB7A RAB8A RAE1 RALA RAP1GDS1 RBM28
## [217] RBM41 RBX1 RDX REEP5 REEP6 RETREG3
## [223] RHOA RIPK1 RNF41 RPL36 RRP9 SAAL1
## [229] SBNO1 SCAP SCARB1 SCCPDH SDF2 SELENOS
## [235] SEPSECS SIL1 SIRT5 SLC25A21 SLC27A2 SLC30A6
## [241] SLC30A7 SLC30A9 SLC44A2 SLC9A3R1 SLU7 SNIP1
## [247] SRP19 SRP72 STC2 STOM SUN2 TAPT1
## [253] TARS2 TBCA TBKBP1 TCF12 THTPA TIMM10
## [259] TIMM10B TIMM29 TIMM8B TIMM9 TLE1 TLE3
## [265] TM2D3 TMED5 TMEM39B TMEM97 TOMM70 TOR1A
## [271] TOR1AIP1 TRIM59 TRMT1 TUBGCP2 TUBGCP3 TYSND1
## [277] UGGT2 USP54 VPS11 VPS39 WASHC4 WFS1
## [283] YIF1A ZC3H18 ZC3H7A ZDHHC5 ZNF318 ZNF503
## [289] ZYG11B
## 324 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlstd{pancorona[,}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlstr{"TOTAL"}\hlstd{)]}
\hlstd{pandginn}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(}\hlkwd{merge}\hlstd{(pancorona, tablo,} \hlkwc{by}\hlstd{=}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=}\hlnum{TRUE}\hlstd{))}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{nprimates),]}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{TOTAL),]}
\hlkwd{dotchart}\hlstd{(}\hlkwd{as.matrix}\hlstd{(pandginn[,}\hlnum{2}\hlstd{]),} \hlkwc{labels} \hlstd{= pandginn}\hlopt{$}\hlstd{tmp.Gene.name,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{4}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{3}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{4}\hlstd{)}
\hlkwd{legend}\hlstd{(}\hlstr{"bottomright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"pancorona score"}\hlstd{,} \hlstr{"dginn primate score"}\hlstd{,} \hlstr{"dginn bats score"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{),} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"black"}\hlstd{,} \hlstr{"blue"}\hlstd{,} \hlstr{"blue"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/pancorona-1}
\end{knitrout}
\end{document}
......@@ -15,7 +15,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020} % Activate to display a given date or no date
\date{March 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
......@@ -26,20 +26,21 @@
\section{Files manipulations}
\subsection{Read Janet Young's table}
\subsection{Complete table}
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"),
fill=T, h=T, dec=",")
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
#names(tab)
tab$Gene.name<-as.character(tab$Gene.name.x)
tab$Gene.name[tab$PreyGene=="MARC1"]<-"MARC1"
@
\subsection{Read DGINN Young table}
DGINN-Young-primate table correspond to DGINN results, on the SAME alignment as Young-primate.
......@@ -56,45 +57,6 @@ dim(dginnY)
names(dginnY)
@
\subsection{Joining Young and DGINN Young table}
\textit{I hide some code corresponding to verifications of gene names coherence between tables}
<<results="hide", echo=FALSE>>=
head(tab)[,1:5]
# gene avec un nom bizar dans certaines colomne
tab[158,1:10]
#
length(unique(dginnY$Gene))
length(unique(tab$PreyGene))
length(unique(tab$Gene.name))
#quelle paire de colonne contient le plus de noms identiques
sum(unique(dginnY$Gene) %in% unique(tab$PreyGene))
sum(unique(dginnY$Gene) %in% unique(tab$Gene.name))
# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne.
# Je soupçonne que c'est celle là:
tab[158,1:10]
# Verif:
tab[,1:10][(tab$Gene.name %in% unique(dginnY$Gene))==F,]
# yep
# Remplacement manuel par
as.character(unique(dginnY$Gene)[(unique(dginnY$Gene) %in% tab$Gene.name)==F])
# dans le tableau de Janet
val_remp=as.character(unique(dginnY$Gene)[(unique(dginnY$Gene) %in% tab$Gene.name)==F])
tab$Gene.name<-as.character(tab$Gene.name)
tab$Gene.name[158]<-val_remp
sum(unique(dginnY$Gene) %in% unique(tab$Gene.name))
@
<<>>=
add_col<-function(method="PamlM1M2"){
......@@ -127,79 +89,19 @@ tmp<-dginnY[dginnY$Method=="MEME",c("Gene", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", "NbSites_MEME", "PSS_MEME")
tab<-merge(tab, tmp, by="Gene.name")
dim(tab)
@
\subsection{Read DGINN Table}
<<>>=
dginnT<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
dim(dginnT)
names(dginnT)
# Number of genes in dginn-primate output not present in the original table
dginnT[(dginnT$Gene %in% tab$Gene.name)==F,"Gene"]
# This includes paralogs, recombinations found by DGINN
# and additionnal genes included on purpose
# Number of genes from the original list not present in DGINN output
tab[(tab$Gene.name %in% dginnT$Gene)==F,"Gene.name"]
names(dginnT)<-c("File", "Name", "Gene.name", "GeneSize", "dginn-primate_NbSpecies", "dginn-primate_omegaM0Bpp",
"dginn-primate_omegaM0codeml", "dginn-primate_BUSTED", "dginn-primate_BUSTED.p.value",
"dginn-primate_MEME.NbSites", "dginn-primate_MEME.PSS", "dginn-primate_BppM1M2",
"dginn-primate_BppM1M2.p.value", "dginn-primate_BppM1M2.NbSites", "dginn-primate_BppM1M2.PSS",
"dginn-primate_BppM7M8", "dginn-primate_BppM7M8.p.value", "dginn-primate_BppM7M8.NbSites",
"dginn-primate_BppM7M8.PSS", "dginn-primate_codemlM1M2", "dginn-primate_codemlM1M2.p.value",
"dginn-primate_codemlM1M2.NbSites", "dginn-primate_codemlM1M2.PSS", "dginn-primate_codemlM7M8",
"dginn-primate_codemlM7M8.p.value", "dginn-primate_codemlM7M8.NbSites", "dginn-primate_codemlM7M8.PSS")
@
\subsection{Join Table and DGINN table}
<<>>=
tab<-merge(tab,dginnT, by="Gene.name", all.x=T)
@
\subsection{Write new table}
<<>>=
write.table(tab,
"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20201014.txt",
row.names=F, quote=F, sep="\t")
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8>>=
<<omegaM7M8>>=
tab$whole.gene.dN.dS.model.0<-as.numeric(
as.character(tab$whole.gene.dN.dS.model.0))
plot(tab$whole.gene.dN.dS.model.0, tab$Omega_PamlM7M8,
xlab="Omega Young-primate", ylab="Omega DGINN-Young-primate")
abline(0,1)
......@@ -212,159 +114,207 @@ outlier<-tab[tab$whole.gene.dN.dS.model.0<0.6 & tab$Omega_PamlM7M8>0.7,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
@
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_2>>=
tab$'dginn-primate_omegaM0Bpp'<-as.numeric(as.character(tab$'dginn-primate_omegaM0Bpp'))
plot(tab$'dginn-primate_omegaM0Bpp', tab$Omega_PamlM7M8,
tab$'dginn.primate_omegaM0Bpp'<-as.numeric(
as.character(tab$'dginn.primate_omegaM0Bpp'))
plot(tab$'dginn.primate_omegaM0Bpp', tab$Omega_PamlM7M8,
xlab="DGINN-full's", ylab="Omega DGINN-Young-primate")
abline(0,1)
outlier<-tab[tab$'dginn-primate_omegaM0Bpp'>0.4 & tab$Omega_PamlM7M8<0.2,]
text(x=outlier$'dginn-primate_omegaM0Bpp',
outlier<-tab[tab$'dginn.primate_omegaM0Bpp'>0.4 & tab$Omega_PamlM7M8<0.2,]
text(x=outlier$'dginn.primate_omegaM0Bpp',
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
outlier<-tab[tab$'dginn-primate_omegaM0Bpp'>0.5 & tab$Omega_PamlM7M8<0.4,]
text(x=outlier$'dginn-primate_omegaM0Bpp',
outlier<-tab[tab$'dginn.primate_omegaM0Bpp'>0.5 & tab$Omega_PamlM7M8<0.4,]
text(x=outlier$'dginn.primate_omegaM0Bpp',
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
outlier<-tab[tab$'dginn.primate_omegaM0Bpp'>0.2 & tab$Omega_PamlM7M8>0.6,]
text(x=outlier$'dginn.primate_omegaM0Bpp',
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
@
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_3>>=
plot(tab$whole.gene.dN.dS.model.0, as.numeric(as.character(tab$'dginn-primate_omegaM0Bpp')),
plot(tab$whole.gene.dN.dS.model.0,
as.numeric(as.character(tab$'dginn.primate_omegaM0Bpp')),
xlab="Omega Young-primate", ylab="DGINN-full's")
abline(0,1)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & tab$'dginn-primate_omegaM0Bpp'>0.5,]
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 &
as.numeric(as.character(tab$'dginn.primate_omegaM0Bpp'))>0.5,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$'dginn-primate_omegaM0Bpp',
y=outlier$'dginn.primate_omegaM0Bpp',
outlier$Gene.name)
@
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.7 &
as.numeric(as.character(tab$'dginn.primate_omegaM0Bpp'))>0,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$'dginn.primate_omegaM0Bpp',
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.1 &
as.numeric(as.character(tab$'dginn.primate_omegaM0Bpp'))>0.3,]
text(x=outlier$whole.gene.dN.dS.model.0+0.03,
y=outlier$'dginn.primate_omegaM0Bpp',
outlier$Gene.name)
@
\section{Overlap}
\subsection{Mondrian}
<<mondrianprimates>>=
library(Mondrian)
#######
monddata<-as.data.frame(tab$Gene.name)
dim(monddata)
dginnyoungtmp<-rowSums(cbind(tab$PosSel_PamlM1M2=="Y", tab$PosSel_PamlM7M8=="Y",
tab$PosSel_BppM1M2=="Y", tab$PosSel_BppM7M8=="Y", tab$PosSel_BUSTED=="Y"))
#monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0)
dginnfulltmp<-rowSums(cbind(tab$'dginn-primate_BUSTED'=="Y", tab$'dginn-primate_BppM1M2'=="Y",
tab$'dginn-primate_BppM7M8'=="Y", tab$'dginn-primate_codemlM1M2'=="Y", tab$'dginn-primate_codemlM7M8'=="Y"))
dginnyoungtmp<-rowSums(cbind(tab$PosSel_PamlM1M2=="Y",
tab$PosSel_PamlM7M8=="Y",
tab$PosSel_BppM1M2=="Y",
tab$PosSel_BppM7M8=="Y",
tab$PosSel_BUSTED=="Y"))
dginnfulltmp<-rowSums(cbind(tab$'dginn.primate_BUSTED'=="Y",
tab$'dginn.primate_BppM1M2'=="Y",
tab$'dginn.primate_BppM7M8'=="Y",
tab$'dginn.primate_codemlM1M2'=="Y",
tab$'dginn.primate_codemlM7M8'=="Y"))
monddata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
#monddata$primates_cooper<-ifelse(tab$cooper.primates.M7.M8_p_val<0.05, 1, 0)
monddata$primates_dginn_young<-ifelse(dginnyoungtmp>=3, 1,0)
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young >=3", "DGINN-full >=3" ))
mondrian(na.omit(monddata[,2:4]),
labels=c("Young", "DGINN-Young >=3", "DGINN-full >=3" ))
#####
monddata$primates_dginn_young<-ifelse(dginnyoungtmp>=4, 1,0)
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young >=4", "DGINN-full >=4"))
mondrian(na.omit(monddata[,2:4]),
labels=c("Young", "DGINN-Young >=4", "DGINN-full >=4"))
@
Comparison of results with the same method.
<<>>=
#####
monddata$primates_dginn_young<-tab$PosSel_BppM7M8=="Y"
monddata$primates_dginn_full<-tab$'dginn-primate_codemlM7M8'=="Y"
monddata$primates_dginn_full<-tab$'dginn.primate_codemlM7M8'=="Y"
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young", "DGINN-full"), main="posel codeml M7M8")
mondrian(na.omit(monddata[,2:4]),
labels=c("Young", "DGINN-Young", "DGINN-full"),
main="posel codeml M7M8")
@
\subsection{subsetR}
Just another representation of the same result.
Just another representation of the same result, for now, I focuse on the gene positive in 3 methodes for DGINN analysis.
<<subsetprimates>>=
library(UpSetR)
upsetdata<-as.data.frame(tab$Gene.name)
upsetdata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
###
upsetdata$primates_dginn_young<-ifelse(dginnyoungtmp>=3, 1,0)
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
###
upsetdata$primates_dginn_young<-ifelse(dginnyoungtmp>=4, 1,0)
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
@
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
\section{Gene List}
<<setup, include=FALSE, cache=FALSE, tidy=TRUE>>=
options(tidy=TRUE, width=70)
@
List of the 34 genes found under positive selection in all analysis.
<<>>=
upsetdata$`tab$Gene.name`[(upsetdata$primates_young==TRUE &
upsetdata$primates_dginn_young==TRUE &
upsetdata$primates_dginn_full==TRUE)]
@
\section{Gene List}
Genes under positive selection for at least 4 methods.
List of the 13 genes found under positive selection in both Young analysis and DGINN-Young alignments (but not full-DGINN).
<<>>=
upsetdata$`tab$Gene.name`[(upsetdata$primates_young==TRUE &
upsetdata$primates_dginn_young==TRUE &
upsetdata$primates_dginn_full==FALSE)]
@
List of the 1 gene found under positive selection in both DGINN analysis, but not Young.
<<>>=
upsetdata$`tab$Gene.name`[(upsetdata$primates_young==FALSE &
upsetdata$primates_dginn_young==TRUE &
upsetdata$primates_dginn_full==TRUE)]
@
List of the 8 genes found under positive selection in both Young analysis and full-DGINN, but not DGINN-young.
<<>>=
upsetdata$`tab$Gene.name`[(upsetdata$primates_young==TRUE &
upsetdata$primates_dginn_young==FALSE &
upsetdata$primates_dginn_full==TRUE)]
@
List of the 18 genes found under positive selection ONLY in Young analysis.
<<>>=
upsetdata$`tab$Gene.name`[(upsetdata$primates_young==TRUE &
upsetdata$primates_dginn_young==FALSE &
upsetdata$primates_dginn_full==FALSE)]
@
List of the 1 genes found under positive selection ONLY in DGINN-Young.
<<>>=
upsetdata$`tab$Gene.name`[(upsetdata$primates_young==FALSE &
upsetdata$primates_dginn_young==TRUE &
upsetdata$primates_dginn_full==FALSE)]
@
List of the 44 genes found under positive selection ONLY in full-DGINN.
<<>>=
dginnfulltmp<-rowSums(cbind(tab$'dginn-primate_BUSTED'=="Y",
tab$'dginn-primate_BppM1M2'=="Y",
tab$'dginn-primate_BppM7M8'=="Y",
tab$'dginn-primate_codemlM1M2'=="Y",
tab$'dginn-primate_codemlM7M8'=="Y"))
upsetdata$`tab$Gene.name`[(upsetdata$primates_young==FALSE &
upsetdata$primates_dginn_young==FALSE &
upsetdata$primates_dginn_full==TRUE)]
@
<<echo=FALSE, results="hide">>=
dginnfulltmp<-rowSums(cbind(tab$'dginn.primate_BUSTED'=="Y",
tab$'dginn.primate_BppM1M2'=="Y",
tab$'dginn.primate_BppM7M8'=="Y",
tab$'dginn.primate_codemlM1M2'=="Y",
tab$'dginn.primate_codemlM7M8'=="Y"))
tab$Gene.name[dginnfulltmp>=4 & is.na(dginnfulltmp)==F]
tab$Gene.name[dginnfulltmp>=3 & is.na(dginnfulltmp)==F]
tmp<-tab[dginnfulltmp>=4 & is.na(dginnfulltmp)==F,
c("Gene.name","dginn-primate_BUSTED", "dginn-primate_BppM1M2",
"dginn-primate_BppM7M8","dginn-primate_codemlM1M2","dginn-primate_codemlM7M8")]
c("Gene.name","dginn.primate_BUSTED", "dginn.primate_BppM1M2",
"dginn.primate_BppM7M8","dginn.primate_codemlM1M2","dginn.primate_codemlM7M8")]
write.table(tmp, "geneList_DGINN_full_primate_pos4.txt", row.names=F, quote=F)
@
\section{Shiny like}
<<shiny, fig.height=11>>=
<<shiny, fig.height=11, echo=FALSE, results="hide", fig="hide">>=
makeFig1 <- function(df){
# prepare data for colors etc
......@@ -416,14 +366,11 @@ makeFig1 <- function(df){
)
}
df<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
makeFig1(df)
#makeFig1(df)
@
\end{document}
......
No preview for this file type
......@@ -65,7 +65,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020} % Activate to display a given date or no date
\date{March 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
......@@ -76,27 +76,29 @@
\section{Files manipulations}
\subsection{Read Janet Young's table}
\subsection{Complete table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)}
\hlstr{"covid_comp/covid_comp_complete.txt"}\hlstd{),} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 332 84
## [1] 332 141
\end{verbatim}
\begin{alltt}
\hlcom{#names(tab)}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name.x)}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[tab}\hlopt{$}\hlstd{PreyGene}\hlopt{==}\hlstr{"MARC1"}\hlstd{]}\hlkwb{<-}\hlstr{"MARC1"}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Read DGINN Young table}
DGINN-Young-primate table correspond to DGINN results, on the SAME alignment as Young-primate.
......@@ -119,17 +121,12 @@ I will merge the 2 tables.
\hlkwd{names}\hlstd{(dginnY)}
\end{alltt}
\begin{verbatim}
## [1] "Gene" "Omega" "Method" "PosSel" "PValue" "NbSites" "PSS"
## [1] "Gene" "Omega" "Method" "PosSel" "PValue" "NbSites"
## [7] "PSS"
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsection{Joining Young and DGINN Young table}
\textit{I hide some code corresponding to verifications of gene names coherence between tables}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
......@@ -162,131 +159,27 @@ I will merge the 2 tables.
\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"NbSites_MEME"}\hlstd{,} \hlstr{"PSS_MEME"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Read DGINN Table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnT}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{dim}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
## [1] 412 27
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
## [1] "File" "Name" "Gene" "GeneSize" "NbSpecies" "omegaM0Bpp"
## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value" "MEME.NbSites" "MEME.PSS" "BppM1M2"
## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites"
## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value" "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8"
## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS"
\end{verbatim}
\begin{alltt}
\hlcom{# Number of genes in dginn-primate output not present in the original table}
\hlstd{dginnT[(dginnT}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F,}\hlstr{"Gene"}\hlstd{]}
\end{alltt}
\begin{verbatim}
## [1] ACE2 ADAM9[0-3120] ADAM9[3119-3927] ATP5MGL C1H1ORF50 CEP135[0-3264]
## [7] CEP135[3263-3678] CEP43 COQ8B COQ8A CSNK2A1 CSNK2B[0-609]
## [13] CSNK2B[608-2568] CYB5R1 DDX21[0-717] DDX21[716-2538] DDX50 DNAJC15
## [19] DPH5[0-702] DPH5[701-1326] DPY19L2 ELOC ERO1B EXOSC3[0-1446]
## [25] EXOSC3[1445-1980] FBN3 GNB4 GNB2 GNB3 GOLGA7[0-312]
## [31] GOLGA7[311-549] GPX1[0-1218] GPX1[1217-2946] HDAC1 HS6ST3 IMPDH1
## [37] ITGB1[0-2328] ITGB1[2327-2844] LMAN2L MRPS5[0-1569] MRPS5[1568-3783] MARC2
## [43] MGRN1 NDFIP2[0-768] NDFIP2[767-1314] NDUFAF2[0-258] NDUFAF2[257-744] NSD2
## [49] NUP58 NUP58[0-1824] NUP58[1823-2367] PABPC3 POTPABPC1 PABPC4L
## [55] PABPC5 PCSK5 PRIM2[0-1071] PRIM2[1070-1902] PRKACB PRKACG
## [61] PTGES2[0-1587] PTGES2[1586-2202] RAB8B RAB13 RAB18[0-855] RAB18[854-1815]
## [67] RAB2B RAB5A RAB5B RAB15 RALB EZR
## [73] EZR[0-1458] EZR[1457-3771] MSN RETREG3 RHOB RHOC
## [79] SLC44A2[0-2577] SLC44A2[2576-3657] SPART SRP72[0-2604] SRP72[2603-3417] STOM[0-1047]
## [85] STOM[1046-1800] STOML3 TIMM29 TLE4 TLE2 TLE2[0-1302]
## [91] TLE2[1301-3987] TMPRSS2 TOMM70 TOR1B WASHC4 WFS1[0-2346]
## [97] WFS1[2345-3216] YIF1B
## 411 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ADAM9 ADAM9[0-3120] ADAM9[3119-3927] ADAMTS1 AES AGPS AKAP8 AKAP8L ... ZYG11B
\end{verbatim}
\begin{alltt}
\hlcom{# This includes paralogs, recombinations found by DGINN }
\hlcom{# and additionnal genes included on purpose}
\hlcom{# Number of genes from the original list not present in DGINN output}
\hlstd{tab[(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlstd{F,}\hlstr{"Gene.name"}\hlstd{]}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] "ADCK4" "ARL6IP6" "ATP5L" "C19orf52" "C1orf50" "ERO1LB" "FAM134C" "FGFR1OP" "KIAA1033" "MFGE8" "NUPL1"
## [12] "SIGMAR1" "SPG20" "TCEB1" "TCEB2" "TOMM70A" "USP13" "VIMP" "WHSC1"
## [1] 332 167
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnT)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,} \hlstr{"dginn-primate_NbSpecies"}\hlstd{,} \hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{,}
\hlstr{"dginn-primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{,}
\hlstr{"dginn-primate_MEME.NbSites"}\hlstd{,} \hlstr{"dginn-primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{,}
\hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8"}\hlstd{,}
\hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Join Table and DGINN table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnT,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Write new table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{write.table}\hlstd{(tab,}
\hlstr{"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20201014.txt"}\hlstd{,}
\hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}
\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0))}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
......@@ -305,31 +198,35 @@ Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" d
\end{knitrout}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{))}
\hlstd{tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}
\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{))}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,}
\hlkwc{xlab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.2}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.2}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.5} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.4}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.5} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.4}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.2} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{>}\hlnum{0.6}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
......@@ -338,22 +235,35 @@ Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" d
\end{knitrout}
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{)),}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{)),}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.5}\hlstd{,]}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.4} \hlopt{&}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{))}\hlopt{>}\hlnum{0.5}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{,}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{>}\hlnum{0.7} \hlopt{&}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{))}\hlopt{>}\hlnum{0}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{,}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.1} \hlopt{&}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{))}\hlopt{>}\hlnum{0.3}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{+}\hlnum{0.03}\hlstd{,}
\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn.primate_omegaM0Bpp'}\hlstd{,}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
......@@ -361,10 +271,8 @@ Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" d
\end{knitrout}
\section{Overlap}
\subsection{Mondrian}
\begin{knitrout}
......@@ -372,42 +280,41 @@ Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" d
\begin{alltt}
\hlkwd{library}\hlstd{(Mondrian)}
\hlcom{#######}
\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlkwd{dim}\hlstd{(monddata)}
\end{alltt}
\begin{verbatim}
## [1] 333 1
## [1] 332 1
\end{verbatim}
\begin{alltt}
\hlstd{dginnyoungtmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstd{PosSel_PamlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlcom{#monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0)}
\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_BUSTED'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_BppM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_codemlM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{dginnyoungtmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstd{PosSel_PamlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstd{PosSel_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstr{'dginn.primate_BUSTED'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn.primate_BppM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn.primate_BppM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn.primate_codemlM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn.primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{monddata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{#monddata$primates_cooper<-ifelse(tab$cooper.primates.M7.M8_p_val<0.05, 1, 0)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=3"}\hlstd{,} \hlstr{"DGINN-full >=3"} \hlstd{))}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=3"}\hlstd{,} \hlstr{"DGINN-full >=3"} \hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianprimates-1}
\begin{kframe}\begin{alltt}
\hlcom{#####}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=4"}\hlstd{,} \hlstr{"DGINN-full >=4"}\hlstd{))}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=4"}\hlstd{,} \hlstr{"DGINN-full >=4"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianprimates-2}
......@@ -418,169 +325,170 @@ Comparison of results with the same method.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{#####}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM7M8}\hlopt{==}\hlstr{"Y"}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstr{'dginn.primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young"}\hlstd{,} \hlstr{"DGINN-full"}\hlstd{),} \hlkwc{main}\hlstd{=}\hlstr{"posel codeml M7M8"}\hlstd{)}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young"}\hlstd{,} \hlstr{"DGINN-full"}\hlstd{),}
\hlkwc{main}\hlstd{=}\hlstr{"posel codeml M7M8"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/unnamed-chunk-8-1}
\includegraphics[width=\maxwidth]{figure/unnamed-chunk-4-1}
\end{knitrout}
\subsection{subsetR}
Just another representation of the same result.
Just another representation of the same result, for now, I focuse on the gene positive in 3 methodes for DGINN analysis.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{library}\hlstd{(UpSetR)}
\hlstd{upsetdata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{###}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,}
\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/subsetprimates-1}
\begin{kframe}\begin{alltt}
\hlcom{###}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,}
\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/subsetprimates-2}
\end{knitrout}
\section{Gene List}
Genes under positive selection for at least 4 methods.
List of the 34 genes found under positive selection in all analysis.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_BUSTED'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{upsetdata}\hlopt{$}\hlstd{`tab$Gene.name`[(upsetdata}\hlopt{$}\hlstd{primates_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlopt{==}\hlnum{TRUE}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] ACADM ATE1 BCS1L BRD4 CDK5RAP2 CEP68
## [7] CNTRL DNMT1 EDEM3 FYCO1 GCC2 GHITM
## [13] GIGYF2 GOLGB1 GORASP1 ITGB1 MDN1 MPHOSPH10
## [19] MRPS5 NDUFAF2 PCNT PLAT POLA1 PRIM2
## [25] PVR SAAL1 SEPSECS SIRT5 SLC25A21 SLC27A2
## [31] TOR1AIP1 UGGT2 USP54 ZNF318
## 332 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]}
List of the 13 genes found under positive selection in both Young analysis and DGINN-Young alignments (but not full-DGINN).
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{upsetdata}\hlopt{$}\hlstd{`tab$Gene.name`[(upsetdata}\hlopt{$}\hlstd{primates_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlopt{==}\hlnum{FALSE}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] "ACADM" "BCS1L" "BRD4" "CDK5RAP2" "CEP135" "CEP68" "CLIP4" "DNMT1" "DPH5" "EMC1"
## [11] "FYCO1" "GCC2" "GGH" "GHITM" "GIGYF2" "GLA" "GOLGA7" "HECTD1" "IDE" "ITGB1"
## [21] "LARP1" "LARP4B" "LMAN2" "MARK1" "MIPOL1" "MPHOSPH10" "MYCBP2" "NDUFAF2" "NDUFB9" "PCNT"
## [31] "POLA1" "PRIM2" "PRKAR2A" "PVR" "REEP6" "RIPK1" "SAAL1" "SEPSECS" "SIRT5" "SLC25A21"
## [41] "SLC27A2" "TMEM39B" "TOR1AIP1" "TUBGCP2" "UBAP2" "UGGT2" "VPS39" "ZNF318"
## [1] ABCC1 ALG8 CEP250 CEP350 ERLEC1 FASTKD5 GOLGA2 MRPS27
## [9] NINL PDE4DIP PRRC2B RAB18 WFS1
## 332 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
List of the 1 gene found under positive selection in both DGINN analysis, but not Young.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{3} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]}
\hlstd{upsetdata}\hlopt{$}\hlstd{`tab$Gene.name`[(upsetdata}\hlopt{$}\hlstd{primates_young}\hlopt{==}\hlnum{FALSE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlopt{==}\hlnum{TRUE}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] "ACADM" "ADAM9" "AP2A2" "ATE1" "BCS1L" "BRD4" "BZW2" "CDK5RAP2" "CEP135" "CEP68"
## [11] "CLIP4" "CNTRL" "DNMT1" "DPH5" "EDEM3" "EIF4E2" "EMC1" "EXOSC2" "FYCO1" "GCC2"
## [21] "GGH" "GHITM" "GIGYF2" "GLA" "GOLGA7" "GOLGB1" "GORASP1" "HDAC2" "HECTD1" "HS6ST2"
## [31] "IDE" "ITGB1" "LARP1" "LARP4B" "LARP7" "LMAN2" "MARK1" "MDN1" "MIPOL1" "MOV10"
## [41] "MPHOSPH10" "MRPS5" "MYCBP2" "NAT14" "NDUFAF2" "NDUFB9" "NGLY1" "NPC2" "PCNT" "PITRM1"
## [51] "PLAT" "PLOD2" "PMPCB" "POLA1" "POR" "PRIM2" "PRKAR2A" "PTBP2" "PVR" "RAB14"
## [61] "RAB1A" "RAB2A" "RAP1GDS1" "RBX1" "REEP6" "RIPK1" "RPL36" "SAAL1" "SCCPDH" "SEPSECS"
## [71] "SIRT5" "SLC25A21" "SLC27A2" "STOM" "TIMM8B" "TMEM39B" "TOR1AIP1" "TRIM59" "TRMT1" "TUBGCP2"
## [81] "UBAP2" "UGGT2" "USP54" "VPS39" "ZNF318"
## [1] MARK1
## 332 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
List of the 8 genes found under positive selection in both Young analysis and full-DGINN, but not DGINN-young.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{tab[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F,}
\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,}\hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8"}\hlstd{)]}
\hlkwd{write.table}\hlstd{(tmp,} \hlstr{"geneList_DGINN_full_primate_pos4.txt"}\hlstd{,} \hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F)}
\hlstd{upsetdata}\hlopt{$}\hlstd{`tab$Gene.name`[(upsetdata}\hlopt{$}\hlstd{primates_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlopt{==}\hlnum{FALSE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlopt{==}\hlnum{TRUE}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] <NA> LMAN2 NDUFB9 RIPK1 STOM TMEM39B TRMT1 UBAP2
## [9] VPS39
## 332 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
\section{Shiny like}
List of the 18 genes found under positive selection ONLY in Young analysis.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{makeFig1} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{df}\hlstd{)\{}
\hlcom{# prepare data for colors etc}
\hlstd{colMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"deepskyblue4"}\hlstd{,} \hlstr{"darkorange"} \hlstd{,} \hlstr{"deepskyblue3"} \hlstd{,} \hlstr{"mediumseagreen"} \hlstd{,} \hlstr{"yellow3"} \hlstd{,} \hlstr{"black"}\hlstd{)}
\hlstd{nameMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{,} \hlstr{"MEME"}\hlstd{)}
\hlstd{metColor} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(}\hlkwc{Name} \hlstd{= nameMethods ,} \hlkwc{Col} \hlstd{= colMethods ,} \hlkwc{stringsAsFactors} \hlstd{=} \hlnum{FALSE}\hlstd{)}
\hlcom{# subset for this specific figure}
\hlcom{#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)}
\hlstd{xt} \hlkwb{<-} \hlstd{df[,} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{)]}
\hlstd{xt}\hlopt{$}\hlstd{Gene} \hlkwb{<-} \hlstd{df}\hlopt{$}\hlstd{Gene}
\hlstd{nbrMeth} \hlkwb{<-} \hlnum{5}
\hlcom{# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)}
\hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlkwb{<-} \hlkwd{ifelse}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlopt{==} \hlstr{"Y"}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{# sort and Filter the 0 lines}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{order}\hlstd{(}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])),]}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])}\hlopt{>}\hlnum{2}\hlstd{,]}
\hlkwd{row.names}\hlstd{(xt)}\hlkwb{<-}\hlstd{xt}\hlopt{$}\hlstd{Gene}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]}
\hlstd{colFig1} \hlkwb{<-} \hlstd{metColor[}\hlkwd{which}\hlstd{(metColor}\hlopt{$}\hlstd{Name} \hlopt{%in%} \hlkwd{colnames}\hlstd{(xt)) , ]}
\hlcom{##### PART 1 : NUMBER OF METHODS}
\hlkwd{par}\hlstd{(}\hlkwc{xpd} \hlstd{=} \hlnum{NA} \hlstd{,} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{7}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{oma} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{mgp} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,}\hlnum{0.3}\hlstd{,}\hlnum{0}\hlstd{))}
\hlstd{h} \hlkwb{=} \hlkwd{barplot}\hlstd{(}
\hlkwd{t}\hlstd{(xt),}
\hlkwc{border} \hlstd{=} \hlnum{NA} \hlstd{,}
\hlkwc{axes} \hlstd{= F ,}
\hlkwc{col} \hlstd{=} \hlkwd{adjustcolor}\hlstd{(colFig1}\hlopt{$}\hlstd{Col,} \hlkwc{alpha.f} \hlstd{=} \hlnum{1}\hlstd{),}
\hlkwc{horiz} \hlstd{= T ,}
\hlkwc{las} \hlstd{=} \hlnum{2} \hlstd{,}
\hlkwc{main} \hlstd{=} \hlstr{"Methods detecting positive selection"} \hlstd{,}
\hlkwc{cex.main} \hlstd{=} \hlnum{0.85}\hlstd{,}
\hlkwc{cex.names} \hlstd{=} \hlkwd{min}\hlstd{(}\hlnum{50}\hlopt{/}\hlkwd{nrow}\hlstd{(xt),} \hlnum{1.5}\hlstd{)}
\hlstd{)}
\hlkwd{axis}\hlstd{(}\hlnum{3}\hlstd{,} \hlkwc{line} \hlstd{=} \hlnum{0}\hlstd{,} \hlkwc{at} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlopt{:}\hlstd{nbrMeth),} \hlkwc{label} \hlstd{=} \hlkwd{c}\hlstd{(}\hlstr{"0"}\hlstd{,} \hlkwd{rep}\hlstd{(}\hlstr{""}\hlstd{, nbrMeth} \hlopt{-}\hlnum{1}\hlstd{), nbrMeth),} \hlkwc{tck} \hlstd{=} \hlnum{0.02}\hlstd{)}
\hlkwd{legend}\hlstd{(}\hlstr{"bottomleft"}\hlstd{,}
\hlkwc{horiz} \hlstd{= T,}
\hlkwc{border} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,}
\hlkwc{legend} \hlstd{= colFig1}\hlopt{$}\hlstd{Name,}
\hlkwc{fill} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,}
\hlkwc{cex} \hlstd{=} \hlnum{0.8}\hlstd{,}
\hlkwc{bty} \hlstd{=} \hlstr{"n"}\hlstd{,}
\hlkwc{xpd} \hlstd{=} \hlnum{NA}
\hlstd{)}
\hlstd{\}}
\hlstd{upsetdata}\hlopt{$}\hlstd{`tab$Gene.name`[(upsetdata}\hlopt{$}\hlstd{primates_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlopt{==}\hlnum{FALSE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlopt{==}\hlnum{FALSE}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] AKAP9 ALG11 ALG5 C19orf52 CENPF CHMP2A COLGALT1
## [8] DCTPP1 DDX21 FBN1 FBXL12 JAKMIP1 <NA> NLRX1
## [15] NUP210 NUP98 PCSK6 PUSL1 ZYG11B
## 332 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
List of the 1 genes found under positive selection ONLY in DGINN-Young.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{upsetdata}\hlopt{$}\hlstd{`tab$Gene.name`[(upsetdata}\hlopt{$}\hlstd{primates_young}\hlopt{==}\hlnum{FALSE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlopt{==}\hlnum{TRUE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlopt{==}\hlnum{FALSE}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] FBN2
## 332 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
List of the 44 genes found under positive selection ONLY in full-DGINN.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{upsetdata}\hlopt{$}\hlstd{`tab$Gene.name`[(upsetdata}\hlopt{$}\hlstd{primates_young}\hlopt{==}\hlnum{FALSE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlopt{==}\hlnum{FALSE} \hlopt{&}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlopt{==}\hlnum{TRUE}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] ADAM9 <NA> AP2A2 <NA> <NA> BZW2 <NA>
## [8] CEP135 CLIP4 DPH5 EIF4E2 EMC1 ERO1LB EXOSC2
## [15] GGH GLA GOLGA7 HDAC2 HECTD1 HS6ST2 IDE
## [22] <NA> LARP1 LARP4B LARP7 <NA> MIPOL1 MOV10
## [29] MYCBP2 NAT14 NGLY1 NPC2 NUPL1 PITRM1 PLOD2
## [36] PMPCB POR PRKAR2A PTBP2 RAB14 RAB1A RAB2A
## [43] RAP1GDS1 RBX1 REEP6 RPL36 SCCPDH <NA> <NA>
## [50] TIMM8B TRIM59 TUBGCP2 <NA>
## 332 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACSL3 ADAM9 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
\hlstd{df}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{makeFig1}\hlstd{(df)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/shiny-1}
\end{knitrout}
\end{document}
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Files manipulations}
\subsection{Read Janet Young's table}
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
tab<-read.delim(paste0(workdir,
"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"),
fill=T, h=T, dec=",")
dim(tab)
#names(tab)
@
\subsection{Read DGINN Young table}
DGINN-Young-primate table correspond to DGINN results, on the SAME alignment as Young-primate.
I will merge the 2 tables.
<<>>=
dginnY<-read.delim(paste0(workdir,
"data/summary_primate_young.res"),
fill=T, h=T)
dim(dginnY)
names(dginnY)
@
\subsection{Joining Young and DGINN Young table}
\textit{I hide some code corresponding to verifications of gene names coherence between tables}
<<results="hide", echo=FALSE>>=
head(tab)[,1:5]
# gene avec un nom bizar dans certaines colomne
tab[158,1:10]
#
length(unique(dginnY$Gene))
length(unique(tab$PreyGene))
length(unique(tab$Gene.name))
#quelle paire de colonne contient le plus de noms identiques
sum(unique(dginnY$Gene) %in% unique(tab$PreyGene))
sum(unique(dginnY$Gene) %in% unique(tab$Gene.name))
# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne.
# Je soupçonne que c'est celle là:
tab[158,1:10]
# Verif:
tab[,1:10][(tab$Gene.name %in% unique(dginnY$Gene))==F,]
# yep
# Remplacement manuel par
as.character(unique(dginnY$Gene)[(unique(dginnY$Gene) %in% tab$Gene.name)==F])
# dans le tableau de Janet
val_remp=as.character(unique(dginnY$Gene)[(unique(dginnY$Gene) %in% tab$Gene.name)==F])
tab$Gene.name<-as.character(tab$Gene.name)
tab$Gene.name[158]<-val_remp
sum(unique(dginnY$Gene) %in% unique(tab$Gene.name))
@
<<>>=
add_col<-function(method="PamlM1M2"){
tmp<-dginnY[dginnY$Method==method,
c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", paste0("Omega_", method),
paste0("PosSel_", method), paste0("PValue_", method),
paste0("NbSites_", method), paste0("PSS_", method))
tab<-merge(tab, tmp, by="Gene.name")
return(tab)
}
tab<-add_col("PamlM1M2")
tab<-add_col("PamlM7M8")
tab<-add_col("BppM1M2")
tab<-add_col("BppM7M8")
# Manip pour la colonne BUSTED
tmp<-dginnY[dginnY$Method=="BUSTED",c("Gene", "Omega", "PosSel", "PValue")]
names(tmp)<-c("Gene.name", "Omega_BUSTED", "PosSel_BUSTED", "PValue_BUSTED")
tab<-merge(tab, tmp, by="Gene.name")
tmp<-dginnY[dginnY$Method=="MEME",c("Gene", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", "NbSites_MEME", "PSS_MEME")
tab<-merge(tab, tmp, by="Gene.name")
@
\subsection{Read DGINN Table}
<<>>=
dginnT<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
dim(dginnT)
names(dginnT)
# Number of genes in dginn-primate output not present in the original table
dginnT[(dginnT$Gene %in% tab$Gene.name)==F,"Gene"]
# This includes paralogs, recombinations found by DGINN
# and additionnal genes included on purpose
# Number of genes from the original list not present in DGINN output
tab[(tab$Gene.name %in% dginnT$Gene)==F,"Gene.name"]
names(dginnT)<-c("File", "Name", "Gene.name", "GeneSize", "dginn-primate_NbSpecies", "dginn-primate_omegaM0Bpp",
"dginn-primate_omegaM0codeml", "dginn-primate_BUSTED", "dginn-primate_BUSTED.p.value",
"dginn-primate_MEME.NbSites", "dginn-primate_MEME.PSS", "dginn-primate_BppM1M2",
"dginn-primate_BppM1M2.p.value", "dginn-primate_BppM1M2.NbSites", "dginn-primate_BppM1M2.PSS",
"dginn-primate_BppM7M8", "dginn-primate_BppM7M8.p.value", "dginn-primate_BppM7M8.NbSites",
"dginn-primate_BppM7M8.PSS", "dginn-primate_codemlM1M2", "dginn-primate_codemlM1M2.p.value",
"dginn-primate_codemlM1M2.NbSites", "dginn-primate_codemlM1M2.PSS", "dginn-primate_codemlM7M8",
"dginn-primate_codemlM7M8.p.value", "dginn-primate_codemlM7M8.NbSites", "dginn-primate_codemlM7M8.PSS")
@
\subsection{Join Table and DGINN table}
<<>>=
tab<-merge(tab,dginnT, by="Gene.name", all.x=T)
@
\subsection{Write new table}
<<>>=
write.table(tab,
"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20201014.txt",
row.names=F, quote=F, sep="\t")
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8>>=
plot(tab$whole.gene.dN.dS.model.0, tab$Omega_PamlM7M8,
xlab="Omega Young-primate", ylab="Omega DGINN-Young-primate")
abline(0,1)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.2 & tab$Omega_PamlM7M8>0.4,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.6 & tab$Omega_PamlM7M8>0.7,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
@
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_2>>=
tab$'dginn-primate_omegaM0Bpp'<-as.numeric(as.character(tab$'dginn-primate_omegaM0Bpp'))
plot(tab$'dginn-primate_omegaM0Bpp', tab$Omega_PamlM7M8,
xlab="DGINN-full's", ylab="Omega DGINN-Young-primate")
abline(0,1)
outlier<-tab[tab$'dginn-primate_omegaM0Bpp'>0.4 & tab$Omega_PamlM7M8<0.2,]
text(x=outlier$'dginn-primate_omegaM0Bpp',
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
outlier<-tab[tab$'dginn-primate_omegaM0Bpp'>0.5 & tab$Omega_PamlM7M8<0.4,]
text(x=outlier$'dginn-primate_omegaM0Bpp',
y=(outlier$Omega_PamlM7M8+0.01),
outlier$Gene.name)
@
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_3>>=
plot(tab$whole.gene.dN.dS.model.0, as.numeric(as.character(tab$'dginn-primate_omegaM0Bpp')),
xlab="Omega Young-primate", ylab="DGINN-full's")
abline(0,1)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & tab$'dginn-primate_omegaM0Bpp'>0.5,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$'dginn-primate_omegaM0Bpp',
outlier$Gene.name)
@
\section{Overlap}
\subsection{Mondrian}
<<mondrianprimates>>=
library(Mondrian)
#######
monddata<-as.data.frame(tab$Gene.name)
dim(monddata)
dginnyoungtmp<-rowSums(cbind(tab$PosSel_PamlM1M2=="Y", tab$PosSel_PamlM7M8=="Y",
tab$PosSel_BppM1M2=="Y", tab$PosSel_BppM7M8=="Y", tab$PosSel_BUSTED=="Y"))
#monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0)
dginnfulltmp<-rowSums(cbind(tab$'dginn-primate_BUSTED'=="Y", tab$'dginn-primate_BppM1M2'=="Y",
tab$'dginn-primate_BppM7M8'=="Y", tab$'dginn-primate_codemlM1M2'=="Y", tab$'dginn-primate_codemlM7M8'=="Y"))
monddata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
#monddata$primates_cooper<-ifelse(tab$cooper.primates.M7.M8_p_val<0.05, 1, 0)
monddata$primates_dginn_young<-ifelse(dginnyoungtmp>=3, 1,0)
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young >=3", "DGINN-full >=3" ))
#####
monddata$primates_dginn_young<-ifelse(dginnyoungtmp>=4, 1,0)
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young >=4", "DGINN-full >=4"))
@
Comparison of results with the same method.
<<>>=
#####
monddata$primates_dginn_young<-tab$PosSel_BppM7M8=="Y"
monddata$primates_dginn_full<-tab$'dginn-primate_codemlM7M8'=="Y"
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "DGINN-Young", "DGINN-full"), main="posel codeml M7M8")
@
\subsection{subsetR}
Just another representation of the same result.
<<subsetprimates>>=
library(UpSetR)
upsetdata<-as.data.frame(tab$Gene.name)
upsetdata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
###
upsetdata$primates_dginn_young<-ifelse(dginnyoungtmp>=3, 1,0)
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
###
upsetdata$primates_dginn_young<-ifelse(dginnyoungtmp>=4, 1,0)
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
@
\section{Gene List}
Genes under positive selection for at least 4 methods.
<<>>=
dginnfulltmp<-rowSums(cbind(tab$'dginn-primate_BUSTED'=="Y",
tab$'dginn-primate_BppM1M2'=="Y",
tab$'dginn-primate_BppM7M8'=="Y",
tab$'dginn-primate_codemlM1M2'=="Y",
tab$'dginn-primate_codemlM7M8'=="Y"))
tab$Gene.name[dginnfulltmp>=4 & is.na(dginnfulltmp)==F]
tab$Gene.name[dginnfulltmp>=3 & is.na(dginnfulltmp)==F]
tmp<-tab[dginnfulltmp>=4 & is.na(dginnfulltmp)==F,
c("Gene.name","dginn-primate_BUSTED", "dginn-primate_BppM1M2",
"dginn-primate_BppM7M8","dginn-primate_codemlM1M2","dginn-primate_codemlM7M8")]
write.table(tmp, "geneList_DGINN_full_primate_pos4.txt", row.names=F, quote=F)
@
\section{Shiny like}
<<shiny, fig.height=11>>=
makeFig1 <- function(df){
# prepare data for colors etc
colMethods <- c("deepskyblue4", "darkorange" , "deepskyblue3" , "mediumseagreen" , "yellow3" , "black")
nameMethods <- c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8", "MEME")
metColor <- data.frame(Name = nameMethods , Col = colMethods , stringsAsFactors = FALSE)
# subset for this specific figure
#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)
xt <- df[, c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8")]
xt$Gene <- df$Gene
nbrMeth <- 5
# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)
xt[,1:5] <- ifelse(xt[,1:5] == "Y", 1, 0)
# sort and Filter the 0 lines
xt<-xt[order(rowSums(xt[,1:5])),]
xt<-xt[rowSums(xt[,1:5])>2,]
row.names(xt)<-xt$Gene
xt<-xt[,1:5]
colFig1 <- metColor[which(metColor$Name %in% colnames(xt)) , ]
##### PART 1 : NUMBER OF METHODS
par(xpd = NA , mar=c(2,7,4,0) , oma = c(0,0,0,0) , mgp = c(3,0.3,0))
h = barplot(
t(xt),
border = NA ,
axes = F ,
col = adjustcolor(colFig1$Col, alpha.f = 1),
horiz = T ,
las = 2 ,
main = "Methods detecting positive selection" ,
cex.main = 0.85,
cex.names = min(50/nrow(xt), 1.5)
)
axis(3, line = 0, at = c(0:nbrMeth), label = c("0", rep("", nbrMeth -1), nbrMeth), tck = 0.02)
legend("bottomleft",
horiz = T,
border = colFig1$Col,
legend = colFig1$Name,
fill = colFig1$Col,
cex = 0.8,
bty = "n",
xpd = NA
)
}
df<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
makeFig1(df)
@
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Files manipulations}
\subsection{Read Janet Young's table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 332 84
\end{verbatim}
\begin{alltt}
\hlcom{#names(tab)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Read DGINN Young table}
DGINN-Young-primate table correspond to DGINN results, on the SAME alignment as Young-primate.
I will merge the 2 tables.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnY}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"data/summary_primate_young.res"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)}
\hlkwd{dim}\hlstd{(dginnY)}
\end{alltt}
\begin{verbatim}
## [1] 1992 7
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnY)}
\end{alltt}
\begin{verbatim}
## [1] "Gene" "Omega" "Method" "PosSel" "PValue" "NbSites" "PSS"
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsection{Joining Young and DGINN Young table}
\textit{I hide some code corresponding to verifications of gene names coherence between tables}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{add_col}\hlkwb{<-}\hlkwa{function}\hlstd{(}\hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{)\{}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstd{method,}
\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"Omega_"}\hlstd{, method),}
\hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),}
\hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\hlkwd{return}\hlstd{(tab)}
\hlstd{\}}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM1M2"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM7M8"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM1M2"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM7M8"}\hlstd{)}
\hlcom{# Manip pour la colonne BUSTED}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"BUSTED"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"Omega_BUSTED"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{,} \hlstr{"PValue_BUSTED"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"NbSites_MEME"}\hlstd{,} \hlstr{"PSS_MEME"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Read DGINN Table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnT}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{dim}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
## [1] 412 27
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
## [1] "File" "Name" "Gene" "GeneSize" "NbSpecies" "omegaM0Bpp"
## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value" "MEME.NbSites" "MEME.PSS" "BppM1M2"
## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites"
## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value" "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8"
## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS"
\end{verbatim}
\begin{alltt}
\hlcom{# Number of genes in dginn-primate output not present in the original table}
\hlstd{dginnT[(dginnT}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F,}\hlstr{"Gene"}\hlstd{]}
\end{alltt}
\begin{verbatim}
## [1] ACE2 ADAM9[0-3120] ADAM9[3119-3927] ATP5MGL C1H1ORF50 CEP135[0-3264] CEP135[3263-3678]
## [8] CEP43 COQ8B COQ8A CSNK2A1 CSNK2B[0-609] CSNK2B[608-2568] CYB5R1
## [15] DDX21[0-717] DDX21[716-2538] DDX50 DNAJC15 DPH5[0-702] DPH5[701-1326] DPY19L2
## [22] ELOC ERO1B EXOSC3[0-1446] EXOSC3[1445-1980] FBN3 GNB4 GNB2
## [29] GNB3 GOLGA7[0-312] GOLGA7[311-549] GPX1[0-1218] GPX1[1217-2946] HDAC1 HS6ST3
## [36] IMPDH1 ITGB1[0-2328] ITGB1[2327-2844] LMAN2L MRPS5[0-1569] MRPS5[1568-3783] MARC2
## [43] MGRN1 NDFIP2[0-768] NDFIP2[767-1314] NDUFAF2[0-258] NDUFAF2[257-744] NSD2 NUP58
## [50] NUP58[0-1824] NUP58[1823-2367] PABPC3 POTPABPC1 PABPC4L PABPC5 PCSK5
## [57] PRIM2[0-1071] PRIM2[1070-1902] PRKACB PRKACG PTGES2[0-1587] PTGES2[1586-2202] RAB8B
## [64] RAB13 RAB18[0-855] RAB18[854-1815] RAB2B RAB5A RAB5B RAB15
## [71] RALB EZR EZR[0-1458] EZR[1457-3771] MSN RETREG3 RHOB
## [78] RHOC SLC44A2[0-2577] SLC44A2[2576-3657] SPART SRP72[0-2604] SRP72[2603-3417] STOM[0-1047]
## [85] STOM[1046-1800] STOML3 TIMM29 TLE4 TLE2 TLE2[0-1302] TLE2[1301-3987]
## [92] TMPRSS2 TOMM70 TOR1B WASHC4 WFS1[0-2346] WFS1[2345-3216] YIF1B
## 411 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ADAM9 ADAM9[0-3120] ADAM9[3119-3927] ADAMTS1 AES AGPS AKAP8 AKAP8L AKAP9 ... ZYG11B
\end{verbatim}
\begin{alltt}
\hlcom{# This includes paralogs, recombinations found by DGINN }
\hlcom{# and additionnal genes included on purpose}
\hlcom{# Number of genes from the original list not present in DGINN output}
\hlstd{tab[(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlstd{F,}\hlstr{"Gene.name"}\hlstd{]}
\end{alltt}
\begin{verbatim}
## [1] "ADCK4" "ARL6IP6" "ATP5L" "C19orf52" "C1orf50" "ERO1LB" "FAM134C" "FGFR1OP" "KIAA1033" "MFGE8" "NUPL1" "SIGMAR1"
## [13] "SPG20" "TCEB1" "TCEB2" "TOMM70A" "USP13" "VIMP" "WHSC1"
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnT)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,} \hlstr{"dginn-primate_NbSpecies"}\hlstd{,} \hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{,}
\hlstr{"dginn-primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{,}
\hlstr{"dginn-primate_MEME.NbSites"}\hlstd{,} \hlstr{"dginn-primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{,}
\hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8"}\hlstd{,}
\hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Join Table and DGINN table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnT,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Write new table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{write.table}\hlstd{(tab,}
\hlstr{"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20201014.txt"}\hlstd{,}
\hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.2} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{>}\hlnum{0.4}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.6} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{>}\hlnum{0.7}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8-1}
\end{knitrout}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{))}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{, tab}\hlopt{$}\hlstd{Omega_PamlM7M8,}
\hlkwc{xlab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-Young-primate"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.2}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.5} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{<}\hlnum{0.4}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlkwc{y}\hlstd{=(outlier}\hlopt{$}\hlstd{Omega_PamlM7M8}\hlopt{+}\hlnum{0.01}\hlstd{),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8_2-1}
\end{knitrout}
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{)),}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Young-primate"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"DGINN-full's"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0}\hlopt{<}\hlnum{0.4} \hlopt{&} \hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlopt{>}\hlnum{0.5}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{whole.gene.dN.dS.model.0,}
\hlkwc{y}\hlstd{=outlier}\hlopt{$}\hlstr{'dginn-primate_omegaM0Bpp'}\hlstd{,}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8_3-1}
\end{knitrout}
\section{Overlap}
\subsection{Mondrian}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{library}\hlstd{(Mondrian)}
\hlcom{#######}
\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlkwd{dim}\hlstd{(monddata)}
\end{alltt}
\begin{verbatim}
## [1] 333 1
\end{verbatim}
\begin{alltt}
\hlstd{dginnyoungtmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstd{PosSel_PamlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_PamlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstd{PosSel_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlcom{#monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0)}
\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_BUSTED'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_BppM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_codemlM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{, tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{monddata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{#monddata$primates_cooper<-ifelse(tab$cooper.primates.M7.M8_p_val<0.05, 1, 0)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=3"}\hlstd{,} \hlstr{"DGINN-full >=3"} \hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianprimates-1}
\begin{kframe}\begin{alltt}
\hlcom{#####}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young >=4"}\hlstd{,} \hlstr{"DGINN-full >=4"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianprimates-2}
\end{knitrout}
Comparison of results with the same method.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{#####}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstd{PosSel_BppM7M8}\hlopt{==}\hlstr{"Y"}
\hlstd{monddata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}
\hlkwd{mondrian}\hlstd{(}\hlkwd{na.omit}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]),} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"Young"}\hlstd{,} \hlstr{"DGINN-Young"}\hlstd{,} \hlstr{"DGINN-full"}\hlstd{),} \hlkwc{main}\hlstd{=}\hlstr{"posel codeml M7M8"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/unnamed-chunk-8-1}
\end{knitrout}
\subsection{subsetR}
Just another representation of the same result.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{library}\hlstd{(UpSetR)}
\hlstd{upsetdata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tab}\hlopt{$}\hlstd{pVal.M8vsM7}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{###}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,}
\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/subsetprimates-1}
\begin{kframe}\begin{alltt}
\hlcom{###}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_young}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnyoungtmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{primates_dginn_full}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginnfulltmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{upset}\hlstd{(}\hlkwd{na.omit}\hlstd{(upsetdata),} \hlkwc{nsets} \hlstd{=} \hlnum{3}\hlstd{,} \hlkwc{matrix.color} \hlstd{=} \hlstr{"#DC267F"}\hlstd{,}
\hlkwc{main.bar.color} \hlstd{=} \hlstr{"#648FFF"}\hlstd{,} \hlkwc{sets.bar.color} \hlstd{=} \hlstr{"#FE6100"}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/subsetprimates-2}
\end{knitrout}
\section{Gene List}
Genes under positive selection for at least 4 methods.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnfulltmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tab}\hlopt{$}\hlstr{'dginn-primate_BUSTED'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_BppM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM1M2'}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tab}\hlopt{$}\hlstr{'dginn-primate_codemlM7M8'}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]}
\end{alltt}
\begin{verbatim}
## [1] "ACADM" "BCS1L" "BRD4" "CDK5RAP2" "CEP135" "CEP68" "CLIP4" "DNMT1" "DPH5" "EMC1" "FYCO1"
## [12] "GCC2" "GGH" "GHITM" "GIGYF2" "GLA" "GOLGA7" "HECTD1" "IDE" "ITGB1" "LARP1" "LARP4B"
## [23] "LMAN2" "MARK1" "MIPOL1" "MPHOSPH10" "MYCBP2" "NDUFAF2" "NDUFB9" "PCNT" "POLA1" "PRIM2" "PRKAR2A"
## [34] "PVR" "REEP6" "RIPK1" "SAAL1" "SEPSECS" "SIRT5" "SLC25A21" "SLC27A2" "TMEM39B" "TOR1AIP1" "TUBGCP2"
## [45] "UBAP2" "UGGT2" "VPS39" "ZNF318"
\end{verbatim}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[dginnfulltmp}\hlopt{>=}\hlnum{3} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F]}
\end{alltt}
\begin{verbatim}
## [1] "ACADM" "ADAM9" "AP2A2" "ATE1" "BCS1L" "BRD4" "BZW2" "CDK5RAP2" "CEP135" "CEP68" "CLIP4"
## [12] "CNTRL" "DNMT1" "DPH5" "EDEM3" "EIF4E2" "EMC1" "EXOSC2" "FYCO1" "GCC2" "GGH" "GHITM"
## [23] "GIGYF2" "GLA" "GOLGA7" "GOLGB1" "GORASP1" "HDAC2" "HECTD1" "HS6ST2" "IDE" "ITGB1" "LARP1"
## [34] "LARP4B" "LARP7" "LMAN2" "MARK1" "MDN1" "MIPOL1" "MOV10" "MPHOSPH10" "MRPS5" "MYCBP2" "NAT14"
## [45] "NDUFAF2" "NDUFB9" "NGLY1" "NPC2" "PCNT" "PITRM1" "PLAT" "PLOD2" "PMPCB" "POLA1" "POR"
## [56] "PRIM2" "PRKAR2A" "PTBP2" "PVR" "RAB14" "RAB1A" "RAB2A" "RAP1GDS1" "RBX1" "REEP6" "RIPK1"
## [67] "RPL36" "SAAL1" "SCCPDH" "SEPSECS" "SIRT5" "SLC25A21" "SLC27A2" "STOM" "TIMM8B" "TMEM39B" "TOR1AIP1"
## [78] "TRIM59" "TRMT1" "TUBGCP2" "UBAP2" "UGGT2" "USP54" "VPS39" "ZNF318"
\end{verbatim}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{tab[dginnfulltmp}\hlopt{>=}\hlnum{4} \hlopt{&} \hlkwd{is.na}\hlstd{(dginnfulltmp)}\hlopt{==}\hlstd{F,}
\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,}\hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,}
\hlstr{"dginn-primate_BppM7M8"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8"}\hlstd{)]}
\hlkwd{write.table}\hlstd{(tmp,} \hlstr{"geneList_DGINN_full_primate_pos4.txt"}\hlstd{,} \hlkwc{row.names}\hlstd{=F,} \hlkwc{quote}\hlstd{=F)}
\end{alltt}
\end{kframe}
\end{knitrout}
\section{Shiny like}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{makeFig1} \hlkwb{<-} \hlkwa{function}\hlstd{(}\hlkwc{df}\hlstd{)\{}
\hlcom{# prepare data for colors etc}
\hlstd{colMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"deepskyblue4"}\hlstd{,} \hlstr{"darkorange"} \hlstd{,} \hlstr{"deepskyblue3"} \hlstd{,} \hlstr{"mediumseagreen"} \hlstd{,} \hlstr{"yellow3"} \hlstd{,} \hlstr{"black"}\hlstd{)}
\hlstd{nameMethods} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{,} \hlstr{"MEME"}\hlstd{)}
\hlstd{metColor} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(}\hlkwc{Name} \hlstd{= nameMethods ,} \hlkwc{Col} \hlstd{= colMethods ,} \hlkwc{stringsAsFactors} \hlstd{=} \hlnum{FALSE}\hlstd{)}
\hlcom{# subset for this specific figure}
\hlcom{#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)}
\hlstd{xt} \hlkwb{<-} \hlstd{df[,} \hlkwd{c}\hlstd{(}\hlstr{"BUSTED"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,} \hlstr{"codemlM1M2"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{)]}
\hlstd{xt}\hlopt{$}\hlstd{Gene} \hlkwb{<-} \hlstd{df}\hlopt{$}\hlstd{Gene}
\hlstd{nbrMeth} \hlkwb{<-} \hlnum{5}
\hlcom{# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)}
\hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlkwb{<-} \hlkwd{ifelse}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]} \hlopt{==} \hlstr{"Y"}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlcom{# sort and Filter the 0 lines}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{order}\hlstd{(}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])),]}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[}\hlkwd{rowSums}\hlstd{(xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])}\hlopt{>}\hlnum{2}\hlstd{,]}
\hlkwd{row.names}\hlstd{(xt)}\hlkwb{<-}\hlstd{xt}\hlopt{$}\hlstd{Gene}
\hlstd{xt}\hlkwb{<-}\hlstd{xt[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]}
\hlstd{colFig1} \hlkwb{<-} \hlstd{metColor[}\hlkwd{which}\hlstd{(metColor}\hlopt{$}\hlstd{Name} \hlopt{%in%} \hlkwd{colnames}\hlstd{(xt)) , ]}
\hlcom{##### PART 1 : NUMBER OF METHODS}
\hlkwd{par}\hlstd{(}\hlkwc{xpd} \hlstd{=} \hlnum{NA} \hlstd{,} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{7}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{oma} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{) ,} \hlkwc{mgp} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{3}\hlstd{,}\hlnum{0.3}\hlstd{,}\hlnum{0}\hlstd{))}
\hlstd{h} \hlkwb{=} \hlkwd{barplot}\hlstd{(}
\hlkwd{t}\hlstd{(xt),}
\hlkwc{border} \hlstd{=} \hlnum{NA} \hlstd{,}
\hlkwc{axes} \hlstd{= F ,}
\hlkwc{col} \hlstd{=} \hlkwd{adjustcolor}\hlstd{(colFig1}\hlopt{$}\hlstd{Col,} \hlkwc{alpha.f} \hlstd{=} \hlnum{1}\hlstd{),}
\hlkwc{horiz} \hlstd{= T ,}
\hlkwc{las} \hlstd{=} \hlnum{2} \hlstd{,}
\hlkwc{main} \hlstd{=} \hlstr{"Methods detecting positive selection"} \hlstd{,}
\hlkwc{cex.main} \hlstd{=} \hlnum{0.85}\hlstd{,}
\hlkwc{cex.names} \hlstd{=} \hlkwd{min}\hlstd{(}\hlnum{50}\hlopt{/}\hlkwd{nrow}\hlstd{(xt),} \hlnum{1.5}\hlstd{)}
\hlstd{)}
\hlkwd{axis}\hlstd{(}\hlnum{3}\hlstd{,} \hlkwc{line} \hlstd{=} \hlnum{0}\hlstd{,} \hlkwc{at} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{0}\hlopt{:}\hlstd{nbrMeth),} \hlkwc{label} \hlstd{=} \hlkwd{c}\hlstd{(}\hlstr{"0"}\hlstd{,} \hlkwd{rep}\hlstd{(}\hlstr{""}\hlstd{, nbrMeth} \hlopt{-}\hlnum{1}\hlstd{), nbrMeth),} \hlkwc{tck} \hlstd{=} \hlnum{0.02}\hlstd{)}
\hlkwd{legend}\hlstd{(}\hlstr{"bottomleft"}\hlstd{,}
\hlkwc{horiz} \hlstd{= T,}
\hlkwc{border} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,}
\hlkwc{legend} \hlstd{= colFig1}\hlopt{$}\hlstd{Name,}
\hlkwc{fill} \hlstd{= colFig1}\hlopt{$}\hlstd{Col,}
\hlkwc{cex} \hlstd{=} \hlnum{0.8}\hlstd{,}
\hlkwc{bty} \hlstd{=} \hlstr{"n"}\hlstd{,}
\hlkwc{xpd} \hlstd{=} \hlnum{NA}
\hlstd{)}
\hlstd{\}}
\hlstd{df}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{makeFig1}\hlstd{(df)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/shiny-1}
\end{knitrout}
\end{document}
......@@ -30,108 +30,166 @@
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
tab$Gene.name<-as.character(tab$Gene.name)
tab$Gene.name<-as.character(tab$Gene.name.x)
tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1"
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_1>>=
plot(tab$whole.gene.dN.dS.model.0, as.numeric(as.character(tab$dginn.primate_omegaM0Bpp)),
xlab="Omega Young-primate", ylab="DGINN-full's")
<<omegaM7M8_1>>=
tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA
tab$dginn.primate_omegaM0Bpp<-as.numeric(as.character(
tab$dginn.primate_omegaM0Bpp))
plot(tab$whole.gene.dN.dS.model.0,
tab$dginn.primate_omegaM0Bpp,
xlab="Omega Young-primate",
ylab="DGINN-full's",
cex=0.3)
abline(0,1)
abline(lm(as.numeric(as.character(tab$dginn.primate_omegaM0Bpp))~tab$whole.gene.dN.dS.model.0), col="red")
abline(lm(tab$dginn.primate_omegaM0Bpp~tab$whole.gene.dN.dS.model.0),
col="red")
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & tab$dginn.primate_omegaM0Bpp>0.5,]
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 &
tab$dginn.primate_omegaM0Bpp>0.5,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.1 &
tab$dginn.primate_omegaM0Bpp>0.3,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.33 &
tab$dginn.primate_omegaM0Bpp<0.2,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.6 &
tab$dginn.primate_omegaM0Bpp<0.6,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
@
\subsection{Janet Young's results (Young-primate) VS Cooper's result}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS".
<<omegaM7M8_2>>=
plot(tab$whole.gene.dN.dS.model.0, as.numeric(as.character(tab$cooper.primates.Average_dNdS)),
xlab="Omega Young-primate", ylab="Omega Cooper-primate")
<<omegaM7M8_2>>=
tab$cooper.primates.Average_dNdS<-as.numeric(as.character(
tab$cooper.primates.Average_dNdS))
plot(tab$whole.gene.dN.dS.model.0,
tab$cooper.primates.Average_dNdS,
xlab="Omega Young-primate",
ylab="Omega Cooper-primate",
cex=0.3)
abline(0,1)
abline(lm(as.numeric(as.character(tab$cooper.primates.Average_dNdS))~tab$whole.gene.dN.dS.model.0), col="red")
abline(lm(tab$cooper.primates.Average_dNdS~tab$whole.gene.dN.dS.model.0),
col="red")
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.15 &
tab$cooper.primates.Average_dNdS>0.4,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & tab$cooper.primates.Average_dNdS>0.5,]
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.3 &
tab$cooper.primates.Average_dNdS>0.5,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier$Gene.name)
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.3 &
tab$cooper.primates.Average_dNdS<0.1,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier$Gene.name, cex=0.5)
@
\subsection{Cooper's results (Cooper-primate) VS DGINN-full's results}
Comparaison des Omega: colonne "cooper.primates.Average\_dNdS" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_3>>=
plot(tab$cooper.primates.Average_dNd, as.numeric(as.character(tab$dginn.primate_omegaM0Bpp)),
xlab="Omega Cooper-primate", ylab="DGINN-full's")
<<omegaM7M8_3>>=
plot(tab$cooper.primates.Average_dNd,
tab$dginn.primate_omegaM0Bpp,
xlab="Omega Cooper-primate",
ylab="DGINN-full's",
cex=0.3)
abline(0,1)
abline(lm(as.numeric(as.character(tab$dginn.primate_omegaM0Bpp))~tab$cooper.primates.Average_dNd), col="red")
abline(lm(tab$dginn.primate_omegaM0Bpp~tab$cooper.primates.Average_dNd), col="red")
outlier<-tab[tab$cooper.primates.Average_dNd<0.4 & tab$dginn.primate_omegaM0Bpp>0.5,]
outlier<-tab[tab$cooper.primates.Average_dNd<0.4 &
tab$dginn.primate_omegaM0Bpp>0.5,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
@
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$cooper.primates.Average_dNd<0.1 &
tab$dginn.primate_omegaM0Bpp>0.3,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$cooper.primates.Average_dNd>0.7 &
tab$dginn.primate_omegaM0Bpp<0.3,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$cooper.primates.Average_dNd>0.45 &
tab$dginn.primate_omegaM0Bpp<0.2,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
@
\section{Overlap}
\subsection{Mondrian}
<<mondrianprimates>>=
library(Mondrian)
#######
monddata<-as.data.frame(tab$Gene.name)
dim(monddata)
dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y",
tab$dginn.primate_BppM1M2=="Y",
tab$dginn.primate_BppM7M8=="Y",
tab$dginn.primate_codemlM1M2=="Y",
tab$dginn.primate_codemlM7M8=="Y"))
dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y", tab$dginn.primate_BppM1M2=="Y",
tab$dginn.primate_BppM7M8=="Y", tab$dginn.primate_codemlM1M2=="Y", tab$dginn.primate_codemlM7M8=="Y"))
monddata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
monddata$primate_cooper<-ifelse(tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
monddata$primates_young<-ifelse(
tab$pVal.M8vsM7<0.05, 1, 0)
monddata$primate_cooper<-ifelse(
tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
monddata$primates_dginn_full<-ifelse(
dginnfulltmp>=3, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "Cooper", "DGINN-full >=3" ))
mondrian(na.omit(monddata[,2:4]),
labels=c("Young", "Cooper", "DGINN-full >=3" ))
#####
monddata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
monddata$primates_dginn_full<-ifelse(
dginnfulltmp>=4, 1,0)
mondrian(na.omit(monddata[,2:4]), labels=c("Young", "Cooper", "DGINN-full >=4"))
mondrian(na.omit(monddata[,2:4]),
labels=c("Young", "Cooper", "DGINN-full >=4"))
@
......@@ -144,7 +202,8 @@ library(UpSetR)
upsetdata<-as.data.frame(tab$Gene.name)
upsetdata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
upsetdata$primate_cooper<-ifelse(tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
upsetdata$primate_cooper<-ifelse(
tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
......