\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color} % maxwidth is the original width if it is less than linewidth % otherwise use linewidth (to make sure the graphics do not exceed the margin) \makeatletter \def\maxwidth{ % \ifdim\Gin@nat@width>\linewidth \linewidth \else \Gin@nat@width \fi } \makeatother \definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} \newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}% \newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}% \newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}% \newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% \newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}% \newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}% \newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}% \newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}% \newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}% \let\hlipl\hlkwb \usepackage{framed} \makeatletter \newenvironment{kframe}{% \def\at@end@of@kframe{}% \ifinner\ifhmode% \def\at@end@of@kframe{\end{minipage}}% \begin{minipage}{\columnwidth}% \fi\fi% \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep \colorbox{shadecolor}{##1}\hskip-\fboxsep % There is no \\@totalrightmargin, so: \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% \MakeFramed {\advance\hsize-\width \@totalleftmargin\z@ \linewidth\hsize \@setminipage}}% {\par\unskip\endMakeFramed% \at@end@of@kframe} \makeatother \definecolor{shadecolor}{rgb}{.97, .97, .97} \definecolor{messagecolor}{rgb}{0, 0, 0} \definecolor{warningcolor}{rgb}{1, 0, 1} \definecolor{errorcolor}{rgb}{1, 0, 0} \newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX \usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format %\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. %\geometry{letterpaper} % ... or a4paper or a5paper or ... %\geometry{landscape} % Activate for for rotated page geometry %\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent %\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode % TeX will automatically convert eps --> pdf in pdflatex %\usepackage{amssymb} \usepackage[utf8]{inputenc} %\usepackage[cyr]{aeguill} %\usepackage[francais]{babel} %\usepackage{hyperref} \title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} \author{Marie Cariou} \date{Janvier 2021} % Activate to display a given date or no date \IfFileExists{upquote.sty}{\usepackage{upquote}}{} \begin{document} \maketitle \tableofcontents \newpage \section{Files manipulations} \subsection{Read Janet Young's table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"} \hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)} \hlkwd{dim}\hlstd{(tab)} \end{alltt} \begin{verbatim} ## [1] 332 84 \end{verbatim} \end{kframe} \end{knitrout} \subsection{Read DGINN Young table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{dginnY}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/summary_primate_young.res"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} \hlkwd{dim}\hlstd{(dginnY)} \end{alltt} \begin{verbatim} ## [1] 1992 7 \end{verbatim} \end{kframe} \end{knitrout} \subsection{Joining Young and DGINN Young table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlcom{# correct gene names (MARC1)} \hlstd{val_remp}\hlkwb{=}\hlkwd{as.character}\hlstd{(}\hlkwd{unique}\hlstd{(dginnY}\hlopt{$}\hlstd{Gene)[(}\hlkwd{unique}\hlstd{(dginnY}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F])} \hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)} \hlstd{tab}\hlopt{$}\hlstd{Gene.name[}\hlnum{158}\hlstd{]}\hlkwb{<-}\hlstd{val_remp} \hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginnY}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))} \end{alltt} \begin{verbatim} ## [1] 332 \end{verbatim} \end{kframe} \end{knitrout} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{add_col}\hlkwb{<-}\hlkwa{function}\hlstd{(}\hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{)\{} \hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstd{method,} \hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} \hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"Omega_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))} \hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} \hlkwd{return}\hlstd{(tab)} \hlstd{\}} \hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM1M2"}\hlstd{)} \hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM7M8"}\hlstd{)} \hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM1M2"}\hlstd{)} \hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM7M8"}\hlstd{)} \hlcom{# Manip pour la colonne BUSTED} \hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"BUSTED"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{)]} \hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"Omega_BUSTED"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{,} \hlstr{"PValue_BUSTED"}\hlstd{)} \hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} \hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} \hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"NbSites_MEME"}\hlstd{,} \hlstr{"PSS_MEME"}\hlstd{)} \hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} \end{alltt} \end{kframe} \end{knitrout} \subsection{Read DGINN Table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{dginnT}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)} \hlkwd{dim}\hlstd{(dginnT)} \end{alltt} \begin{verbatim} ## [1] 412 27 \end{verbatim} \begin{alltt} \hlkwd{names}\hlstd{(dginnT)} \end{alltt} \begin{verbatim} ## [1] "File" "Name" "Gene" ## [4] "GeneSize" "NbSpecies" "omegaM0Bpp" ## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value" ## [10] "MEME.NbSites" "MEME.PSS" "BppM1M2" ## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" ## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites" ## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value" ## [22] "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8" ## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS" \end{verbatim} \begin{alltt} \hlcom{# Number of genes in dginn-primate output not present in the original table} \hlstd{dginnT[(dginnT}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F,}\hlstr{"Gene"}\hlstd{]} \end{alltt} \begin{verbatim} ## [1] ACE2 ADAM9[0-3120] ADAM9[3119-3927] ## [4] ATP5MGL C1H1ORF50 CEP135[0-3264] ## [7] CEP135[3263-3678] CEP43 COQ8B ## [10] COQ8A CSNK2A1 CSNK2B[0-609] ## [13] CSNK2B[608-2568] CYB5R1 DDX21[0-717] ## [16] DDX21[716-2538] DDX50 DNAJC15 ## [19] DPH5[0-702] DPH5[701-1326] DPY19L2 ## [22] ELOC ERO1B EXOSC3[0-1446] ## [25] EXOSC3[1445-1980] FBN3 GNB4 ## [28] GNB2 GNB3 GOLGA7[0-312] ## [31] GOLGA7[311-549] GPX1[0-1218] GPX1[1217-2946] ## [34] HDAC1 HS6ST3 IMPDH1 ## [37] ITGB1[0-2328] ITGB1[2327-2844] LMAN2L ## [40] MRPS5[0-1569] MRPS5[1568-3783] MARC2 ## [43] MGRN1 NDFIP2[0-768] NDFIP2[767-1314] ## [46] NDUFAF2[0-258] NDUFAF2[257-744] NSD2 ## [49] NUP58 NUP58[0-1824] NUP58[1823-2367] ## [52] PABPC3 POTPABPC1 PABPC4L ## [55] PABPC5 PCSK5 PRIM2[0-1071] ## [58] PRIM2[1070-1902] PRKACB PRKACG ## [61] PTGES2[0-1587] PTGES2[1586-2202] RAB8B ## [64] RAB13 RAB18[0-855] RAB18[854-1815] ## [67] RAB2B RAB5A RAB5B ## [70] RAB15 RALB EZR ## [73] EZR[0-1458] EZR[1457-3771] MSN ## [76] RETREG3 RHOB RHOC ## [79] SLC44A2[0-2577] SLC44A2[2576-3657] SPART ## [82] SRP72[0-2604] SRP72[2603-3417] STOM[0-1047] ## [85] STOM[1046-1800] STOML3 TIMM29 ## [88] TLE4 TLE2 TLE2[0-1302] ## [91] TLE2[1301-3987] TMPRSS2 TOMM70 ## [94] TOR1B WASHC4 WFS1[0-2346] ## [97] WFS1[2345-3216] YIF1B ## 411 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ... ZYG11B \end{verbatim} \begin{alltt} \hlcom{# This includes paralogs, recombinations found by DGINN and additionnal genes } \hlcom{# included on purpose} \hlcom{# Number of genes from the original list not present in DGINN output} \hlstd{tab[(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlstd{F,}\hlstr{"Gene.name"}\hlstd{]} \end{alltt} \begin{verbatim} ## [1] "ADCK4" "ARL6IP6" "ATP5L" "C19orf52" "C1orf50" "ERO1LB" ## [7] "FAM134C" "FGFR1OP" "KIAA1033" "MFGE8" "NUPL1" "SIGMAR1" ## [13] "SPG20" "TCEB1" "TCEB2" "TOMM70A" "USP13" "VIMP" ## [19] "WHSC1" \end{verbatim} \begin{alltt} \hlkwd{names}\hlstd{(dginnT)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,} \hlstr{"dginn-primate_NbSpecies"}\hlstd{,} \hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{,} \hlstr{"dginn-primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{,} \hlstr{"dginn-primate_MEME.NbSites"}\hlstd{,} \hlstr{"dginn-primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{)} \end{alltt} \end{kframe} \end{knitrout} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlkwd{table}\hlstd{(dginnT}\hlopt{$}\hlstd{`dginn-primate_BUSTED`)} \hlkwd{table}\hlstd{(dginnT}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2`)} \hlkwd{table}\hlstd{(dginnT}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8`)} \hlkwd{table}\hlstd{(dginnT}\hlopt{$}\hlstd{`dginn-primate_BppM1M2`)} \hlkwd{table}\hlstd{(dginnT}\hlopt{$}\hlstd{`dginn-primate_BppM7M8`)} \hlkwd{table}\hlstd{(dginnT}\hlopt{$}\hlstd{`dginn-primate_BUSTED`}\hlopt{==}\hlstr{"na"}\hlstd{,dginnT}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2`}\hlopt{==}\hlstr{"na"}\hlstd{, dginnT}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8`}\hlopt{==}\hlstr{"na"}\hlstd{,} \hlstd{dginnT}\hlopt{$}\hlstd{`dginn-primate_BppM1M2`}\hlopt{==}\hlstr{"na"}\hlstd{, dginnT}\hlopt{$}\hlstd{`dginn-primate_BppM7M8`}\hlopt{==}\hlstr{"na"} \hlstd{)} \end{alltt} \end{kframe} \end{knitrout} \subsection{Join Table and DGINN table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnT,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)} \hlkwd{table}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BUSTED`)} \end{alltt} \begin{verbatim} ## ## N na Y ## 155 12 147 \end{verbatim} \begin{alltt} \hlkwd{table}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2`)} \end{alltt} \begin{verbatim} ## ## N na Y ## 216 26 72 \end{verbatim} \begin{alltt} \hlkwd{table}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8`)} \end{alltt} \begin{verbatim} ## ## N na Y ## 161 40 113 \end{verbatim} \begin{alltt} \hlkwd{table}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM1M2`)} \end{alltt} \begin{verbatim} ## ## N na Y ## 252 21 41 \end{verbatim} \begin{alltt} \hlkwd{table}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8`)} \end{alltt} \begin{verbatim} ## ## N na Y ## 173 22 119 \end{verbatim} \begin{alltt} \hlkwd{table}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BUSTED`}\hlopt{==}\hlstr{"na"} \hlopt{|} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2`}\hlopt{==}\hlstr{"na"} \hlopt{|} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8`}\hlopt{==}\hlstr{"na"} \hlopt{|} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM1M2`}\hlopt{==}\hlstr{"na"}\hlopt{|} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8`}\hlopt{==}\hlstr{"na"} \hlstd{)} \end{alltt} \begin{verbatim} ## ## FALSE TRUE ## 274 40 \end{verbatim} \end{kframe} \end{knitrout} \subsection{Add DGINN results on bat dataset} DGINN results from different analysis. \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlcom{# original table} \hlstd{dginnbats}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/DGINN_202005281339summary_cleaned.tab"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} \hlcom{# rerun on corrected alignment} \hlstd{dginnbatsnew1}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/DGINN_202011262248_summary.tab"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} \hlstd{dginnbatsnew2}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/DGINN_202012192053_summary.tab"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} \hlcom{# colomne choice, BUSTED and Bppml form first file, codeml from the other one} \hlstd{dginnbatsnew}\hlkwb{<-}\hlstd{dginnbatsnew1} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{omegaM0codeml}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{omegaM0codeml} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2_p.value}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2_p.value} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2_NbSites}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2_NbSites} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2_PSS}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2_PSS} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8_p.value}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8_p.value} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8_NbSites}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8_NbSites} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8_PSS}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8_PSS} \hlcom{####} \hlcom{## RIPK1 is actually a primat results} \hlcom{## 1. Take it and put it at the right place} \hlstd{ripk1}\hlkwb{<-}\hlkwd{as.vector}\hlstd{(dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,])} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.PSS`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.PSS`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.p.value`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.PSS`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.PSS`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.p.value`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.PSS`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.PSS`))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"GeneSize"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{GeneSize} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_NbSpecies"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{NbSpecies} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{omegaM0Bpp} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_omegaM0codeml"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{omegaM0codeml} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BUSTED"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BUSTED} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BUSTED_p.value} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_MEME.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{MEME_NbSites} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_MEME.PSS"}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(ripk1}\hlopt{$}\hlstd{MEME_PSS))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2_p.value} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2_NbSites} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2_PSS} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8_p.value} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8_NbSites} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8_PSS} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2_p.value} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2_NbSites} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2_PSS} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8_p.value} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8_NbSites} \hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8_PSS} \hlcom{## 2. Remove it} \hlstd{dginnbatsnew}\hlkwb{<-}\hlstd{dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{!=}\hlstr{"RIPK1"}\hlstd{,]} \hlcom{## suppress redundant lines} \hlstd{dginnbats}\hlkwb{<-}\hlstd{dginnbats[(dginnbats}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlnum{FALSE}\hlstd{,]} \hlkwd{names}\hlstd{(dginnbatsnew)}\hlkwb{<-}\hlkwd{names}\hlstd{(dginnbats)} \hlcom{##############"} \hlstd{dginnbatsnew[,}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(dginnbatsnew[,}\hlnum{4}\hlstd{])} \hlstd{dginnbats[,}\hlnum{6}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{6}\hlstd{]))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{dginnbats[,}\hlnum{8}\hlstd{]}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{8}\hlstd{])} \hlstd{dginnbats[,}\hlnum{12}\hlstd{]}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{12}\hlstd{])} \hlstd{dginnbats[,}\hlnum{13}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{13}\hlstd{]))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlstd{dginnbats[,}\hlnum{16}\hlstd{]}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{16}\hlstd{])} \hlstd{dginnbats[,}\hlnum{17}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{17}\hlstd{]))} \end{alltt} {\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} \hlcom{## replace by new data} \hlstd{dginnbats}\hlkwb{<-}\hlkwd{rbind}\hlstd{(dginnbats, dginnbatsnew)} \hlkwd{names}\hlstd{(dginnbats)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,} \hlstr{"cooper.batsGene"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"bats_"}\hlstd{,} \hlkwd{names}\hlstd{(dginnbats)[}\hlopt{-}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{)]))} \hlkwd{names}\hlstd{(dginnbats)} \end{alltt} \begin{verbatim} ## [1] "File" "bats_Name" ## [3] "cooper.batsGene" "bats_GeneSize" ## [5] "bats_NbSpecies" "bats_omegaM0Bpp" ## [7] "bats_omegaM0codeml" "bats_BUSTED" ## [9] "bats_BUSTED.p.value" "bats_MEME.NbSites" ## [11] "bats_MEME.PSS" "bats_BppM1M2" ## [13] "bats_BppM1M2.p.value" "bats_BppM1M2.NbSites" ## [15] "bats_BppM1M2.PSS" "bats_BppM7M8" ## [17] "bats_BppM7M8.p.value" "bats_BppM7M8.NbSites" ## [19] "bats_BppM7M8.PSS" "bats_codemlM1M2" ## [21] "bats_codemlM1M2.p.value" "bats_codemlM1M2.NbSites" ## [23] "bats_codemlM1M2.PSS" "bats_codemlM7M8" ## [25] "bats_codemlM7M8.p.value" "bats_codemlM7M8.NbSites" ## [27] "bats_codemlM7M8.PSS" \end{verbatim} \begin{alltt} \hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnbats,} \hlkwc{by}\hlstd{=}\hlstr{"cooper.batsGene"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)} \end{alltt} \end{kframe} \end{knitrout} \subsection{Write the new table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlkwd{write.table}\hlstd{(tab,} \hlstr{"covid_comp_complete.txt"}\hlstd{,} \hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)} \end{alltt} \end{kframe} \end{knitrout} \section{Second Table} Table containing the DGINN results for both Primates and bats. Conserve all genes. \subsection{Primates} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{dginnT}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)} \hlkwd{dim}\hlstd{(dginnT)} \end{alltt} \begin{verbatim} ## [1] 412 27 \end{verbatim} \begin{alltt} \hlkwd{names}\hlstd{(dginnT)} \end{alltt} \begin{verbatim} ## [1] "File" "Name" "Gene" ## [4] "GeneSize" "NbSpecies" "omegaM0Bpp" ## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value" ## [10] "MEME.NbSites" "MEME.PSS" "BppM1M2" ## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" ## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites" ## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value" ## [22] "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8" ## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS" \end{verbatim} \begin{alltt} \hlcom{# Rename the columns to include primate} \hlkwd{names}\hlstd{(dginnT)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,} \hlstr{"dginn-primate_NbSpecies"}\hlstd{,} \hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{,} \hlstr{"dginn-primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn-primate_BUSTED"}\hlstd{,} \hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{,} \hlstr{"dginn-primate_MEME.NbSites"}\hlstd{,} \hlstr{"dginn-primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{)} \end{alltt} \end{kframe} \end{knitrout} \subsection{Bats} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlcom{# original table} \hlstd{dginnbats}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/DGINN_202005281339summary_cleaned-LE201108.txt"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} \hlcom{# rerun on corrected alignment} \hlstd{dginnbatsnew}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/DGINN_202011262248_hyphybpp-202012192053_codeml-summary.txt"}\hlstd{),} \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} \end{alltt} \end{kframe} \end{knitrout} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlcom{# Add both columns } \hlstd{dginnbatsnew}\hlopt{$}\hlstd{Lucie.s.comments}\hlkwb{<-}\hlstr{""} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{Action.taken}\hlkwb{<-}\hlstr{""} \hlcom{# Homogenize column names} \hlstd{dginnbats}\hlopt{$}\hlstd{BUSTED_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BUSTED.p.value} \hlstd{dginnbats}\hlopt{$}\hlstd{MEME_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{MEME.NbSites} \hlstd{dginnbats}\hlopt{$}\hlstd{MEME_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{MEME.PSS} \hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2.p.value} \hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2.NbSites} \hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2.PSS} \hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8.p.value} \hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8.NbSites} \hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8.PSS} \hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2.p.value} \hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2.NbSites} \hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2.PSS} \hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8.p.value} \hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8.NbSites} \hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8.PSS} \end{alltt} \end{kframe} \end{knitrout} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlcom{# Order columns in the same order in both tables} \hlstd{dginnbats}\hlkwb{<-}\hlstd{dginnbats[,}\hlkwd{names}\hlstd{(dginnbatsnew)]} \hlkwd{names}\hlstd{(dginnbatsnew)} \hlopt{%in%} \hlkwd{names}\hlstd{(dginnbats)} \end{alltt} \begin{verbatim} ## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE ## [14] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE ## [27] TRUE TRUE TRUE \end{verbatim} \begin{alltt} \hlkwd{names}\hlstd{(dginnbats)}\hlopt{==}\hlkwd{names}\hlstd{(dginnbatsnew)} \end{alltt} \begin{verbatim} ## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE ## [14] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE ## [27] TRUE TRUE TRUE \end{verbatim} \begin{alltt} \hlcom{# Put RIPK aside} \hlstd{ripk1}\hlkwb{<-}\hlstd{dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{27}\hlstd{]} \hlcom{# Add it to primate table} \hlkwd{names}\hlstd{(ripk1)}\hlkwb{<-}\hlkwd{names}\hlstd{(dginnT)} \hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`)} \hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`)} \hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`)} \hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`)} \hlstd{dginnT}\hlkwb{<-}\hlkwd{rbind}\hlstd{(dginnT, ripk1)} \hlcom{## Remove it Ripk1 from bats} \hlstd{dginnbatsnew}\hlkwb{<-}\hlstd{dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{!=}\hlstr{"RIPK1"}\hlstd{,]} \hlcom{## suppress redundant lines} \hlstd{dginnbats}\hlkwb{<-}\hlstd{dginnbats[(dginnbats}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlnum{FALSE}\hlstd{,]} \hlkwd{names}\hlstd{(dginnbatsnew)}\hlkwb{<-}\hlkwd{names}\hlstd{(dginnbats)} \hlcom{## replace by new data} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{omegaM0Bpp}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(dginnbatsnew}\hlopt{$}\hlstd{omegaM0Bpp)} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{BppM1M2_p.value}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(dginnbatsnew}\hlopt{$}\hlstd{BppM1M2_p.value)} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{BppM7M8_p.value}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(dginnbatsnew}\hlopt{$}\hlstd{BppM7M8_p.value)} \hlstd{dginnbats}\hlkwb{<-}\hlkwd{rbind}\hlstd{(dginnbats, dginnbatsnew)} \hlkwd{names}\hlstd{(dginnbats)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"bats_File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"bats_"}\hlstd{,} \hlkwd{names}\hlstd{(dginnbats)[}\hlopt{-}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{)]))} \hlkwd{names}\hlstd{(dginnbats)} \end{alltt} \begin{verbatim} ## [1] "bats_File" "bats_Name" ## [3] "Gene.name" "bats_GeneSize" ## [5] "bats_NbSpecies" "bats_omegaM0Bpp" ## [7] "bats_omegaM0codeml" "bats_BUSTED" ## [9] "bats_BUSTED_p.value" "bats_MEME_NbSites" ## [11] "bats_MEME_PSS" "bats_BppM1M2" ## [13] "bats_BppM1M2_p.value" "bats_BppM1M2_NbSites" ## [15] "bats_BppM1M2_PSS" "bats_BppM7M8" ## [17] "bats_BppM7M8_p.value" "bats_BppM7M8_NbSites" ## [19] "bats_BppM7M8_PSS" "bats_codemlM1M2" ## [21] "bats_codemlM1M2_p.value" "bats_codemlM1M2_NbSites" ## [23] "bats_codemlM1M2_PSS" "bats_codemlM7M8" ## [25] "bats_codemlM7M8_p.value" "bats_codemlM7M8_NbSites" ## [27] "bats_codemlM7M8_PSS" "bats_Lucie.s.comments" ## [29] "bats_Action.taken" \end{verbatim} \end{kframe} \end{knitrout} \subsection{Merged table} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlcom{#tidy.opts = list(width.cutoff = 60)} \hlkwd{dim}\hlstd{(dginnT)} \end{alltt} \begin{verbatim} ## [1] 413 27 \end{verbatim} \begin{alltt} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name} \end{alltt} \begin{verbatim} ## [1] AAR2 AASS AATF ## [4] ABCC1 ACAD9 ACADM ## [7] ACE2 ACSL3 ADAM9 ## [10] ADAM9[0-3120] ADAM9[3119-3927] ADAMTS1 ## [13] AGPS AKAP8 AKAP8L ## [16] AKAP9 ALG11 ALG5 ## [19] ALG8 ANO6 AP2A2 ## [22] AP2M1 AP3B1 ARF6 ## [25] ATE1 ATP13A3 ATP1B1 ## [28] ATP5MGL ATP6AP1 ATP6V1A ## [31] BAG5 BCKDK BCS1L ## [34] BRD2 BRD4 BZW2 ## [37] C1H1ORF50 CCDC86 CDK5RAP2 ## [40] CENPF CEP112 CEP135 ## [43] CEP135[0-3264] CEP135[3263-3678] CEP250 ## [46] CEP350 CEP43 CEP68 ## [49] CHMP2A CHPF CHPF2 ## [52] CISD3 CIT CLCC1 ## [55] CLIP4 CNTRL COL6A1 ## [58] COLGALT1 COMT COQ8B ## [61] COQ8A CRTC3 CSDE1 ## [64] CSNK2A1 CSNK2A2 CSNK2B ## [67] CSNK2B[0-609] CSNK2B[608-2568] CUL2 ## [70] CWC27 CYB5B CYB5R3 ## [73] CYB5R1 DCAF7 DCAKD ## [76] DCTPP1 DDX10 DDX21 ## [79] DDX21[0-717] DDX21[716-2538] DDX50 ## [82] DNAJC11 DNAJC19 DNAJC15 ## [85] DNMT1 DPH5 DPH5[0-702] ## [88] DPH5[701-1326] DPY19L2 DPY19L1 ## [91] ECSIT EDEM3 EIF4E2 ## [94] EIF4H ELOC EMC1 ## [97] ERC1 ERGIC1 ERLEC1 ## [100] ERMP1 ERO1B ERP44 ## [103] ETFA EXOSC2 EXOSC3 ## [106] EXOSC3[0-1446] EXOSC3[1445-1980] EXOSC5 ## [109] EXOSC8 F2RL1 FAM162A ## [112] FAM8A1 FAM98A FAR2 ## [115] FASTKD5 FBLN5 FBN1 ## [118] FBN3 FBN2 FBXL12 ## [121] FKBP10 FKBP15 FKBP7 ## [124] FOXRED2 FYCO1 G3BP1 ## [127] G3BP2 GCC1 GCC2 ## [130] GDF15 GFER GGCX ## [133] GGH GHITM GIGYF2 ## [136] GLA GNB4 GNB2 ## [139] GNB1 GNB3 GNG5 ## [142] GNG5 GOLGA2 GOLGA3 ## [145] GOLGA7 GOLGA7[0-312] GOLGA7[311-549] ## [148] GOLGB1 GORASP1 GPAA1 ## [151] GPX1 GPX1[0-1218] GPX1[1217-2946] ## [154] GRIPAP1 GRPEL1 GTF2F2 ## [157] HDAC2 HDAC1 HEATR3 ## [160] HECTD1 HMOX1 HOOK1 ## [163] HS2ST1 HS6ST2 HS6ST3 ## [166] HSBP1 HYOU1 IDE ## [169] IL17RA IMPDH1 IMPDH2 ## [172] INHBE INTS4 ITGB1 ## [175] ITGB1[0-2328] ITGB1[2327-2844] JAKMIP1 ## [178] LARP1 LARP4B LARP7 ## [181] LMAN2 LMAN2L LOX ## [184] MAP7D1 MARK1 MARK2 ## [187] MARK3 MAT2B MDN1 ## [190] MEPCE MIB1 MIPOL1 ## [193] MOGS MOV10 MPHOSPH10 ## [196] MRPS2 MRPS25 MRPS27 ## [199] MRPS5 MRPS5[0-1569] MRPS5[1568-3783] ## [202] MARC1 MARC2 MTCH1 ## [205] MYCBP2 MGRN1 NARS2 ## [208] NAT14 NDFIP2 NDFIP2[0-768] ## [211] NDFIP2[767-1314] NDUFAF1 NDUFAF2 ## [214] NDUFAF2[0-258] NDUFAF2[257-744] NDUFB9 ## [217] NEK9 NEU1 NGDN ## [220] NGLY1 NIN NINL ## [223] NLRX1 NOL10 NPC2 ## [226] NPTX1 NSD2 NUP210 ## [229] NUP214 NUP54 NUP58 ## [232] NUP58[0-1824] NUP58[1823-2367] NUP62 ## [235] NUP88 NUP98 NUTF2 ## [238] OS9 PABPC3 POTPABPC1 ## [241] PABPC1 PABPC4 PABPC4L ## [244] PABPC5 PCNT PCSK6 ## [247] PCSK5 PDE4DIP PDZD11 ## [250] PIGO PIGS PITRM1 ## [253] PKP2 PLAT PLD3 ## [256] PLEKHA5 PLEKHF2 PLOD2 ## [259] PMPCA PMPCB POFUT1 ## [262] KDELC1 KDELC2 POLA1 ## [265] POLA2 POR PPIL3 ## [268] PPT1 PRIM1 PRIM2 ## [271] PRIM2[0-1071] PRIM2[1070-1902] PRKACB ## [274] PRKACG PRKACA PRKAR2A ## [277] PRKAR2B PRRC2B PSMD8 ## [280] PTBP2 PTGES2 PTGES2[0-1587] ## [283] PTGES2[1586-2202] PUSL1 PVR ## [286] QSOX2 RAB10 RAB8B ## [289] RAB13 RAB14 RAB18 ## [292] RAB18[0-855] RAB18[854-1815] RAB1A ## [295] RAB2B RAB2A RAB5C ## [298] RAB5A RAB5B RAB7A ## [301] RAB15 RAB8A RAE1 ## [304] RALB RALA RAP1GDS1 ## [307] RBM28 RBM41 RBX1 ## [310] EZR EZR[0-1458] EZR[1457-3771] ## [313] RDX MSN REEP5 ## [316] REEP6 RETREG3 RHOB ## [319] RHOC RHOA RIPK1 ## [322] RNF41 RPL36 RRP9 ## [325] RTN4 SAAL1 SBNO1 ## [328] SCAP SCARB1 SCCPDH ## [331] SDF2 SEPSECS SIL1 ## [334] SIRT5 SLC25A21 SLC27A2 ## [337] SLC30A6 SLC30A7 SLC30A9 ## [340] SLC44A2 SLC44A2[0-2577] SLC44A2[2576-3657] ## [343] SLC9A3R1 SLU7 SMOC1 ## [346] SNIP1 SPART SRP19 ## [349] SRP54 SRP72 SRP72[0-2604] ## [352] SRP72[2603-3417] STC2 STOM ## [355] STOM[0-1047] STOM[1046-1800] STOML3 ## [358] STOML2 SUN2 TAPT1 ## [361] TARS2 TBCA TBK1 ## [364] TBKBP1 TCF12 THTPA ## [367] TIMM10 TIMM10B TIMM29 ## [370] TIMM8B TIMM9 TLE1 ## [373] TLE3 TLE4 TLE2 ## [376] TLE2[0-1302] TLE2[1301-3987] AES ## [379] TM2D3 TMED5 TMEM39B ## [382] TMEM97 TMPRSS2 TOMM70 ## [385] TOR1A TOR1B TOR1AIP1 ## [388] TRIM59 TRMT1 TUBGCP2 ## [391] TUBGCP3 TYSND1 UBAP2 ## [394] UBAP2L UBXN8 UGGT2 ## [397] UPF1 USP54 VPS11 ## [400] VPS39 WASHC4 WFS1 ## [403] WFS1[0-2346] WFS1[2345-3216] YIF1A ## [406] YIF1B ZC3H18 ZC3H7A ## [409] ZDHHC5 ZNF318 ZNF503 ## [412] ZYG11B RIPK1 ## 416 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ... SELENOS \end{verbatim} \begin{alltt} \hlkwd{dim}\hlstd{(dginnbats)} \end{alltt} \begin{verbatim} ## [1] 353 29 \end{verbatim} \begin{alltt} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name} \end{alltt} \begin{verbatim} ## [1] AAR2 AASS AATF ## [4] ABCC1 ACAD9 ACADM ## [7] ACE2 ACSL3 ADAM9 ## [10] ADAM9[0-2769] ADAM9[2768-3030] ADAMTS1 ## [13] AGPS AKAP8 AKAP8L ## [16] AKAP9 ALG11 ALG5 ## [19] ALG8 ANO6 AP2A2 ## [22] AP2M1 AP3B1 ARF6 ## [25] ARL6IP6 ATP13A3 ATP1B1 ## [28] ATP5MG ATP6AP1 ATP6V1A ## [31] BAG5 BCKDK BCS1 ## [34] BRD2 BRD4 BZW2 ## [37] CUNH1ORF50 CCDC86 CDK5RAP2 ## [40] CENPF CEP112 CEP135 ## [43] CEP250 CEP350 CEP68 ## [46] CHMP2A CHPF CHPF2 ## [49] CISD3 CIT CLCC1 ## [52] CLIP4 CNTRL COLGALT1 ## [55] COMT CRTC3 CSDE1 ## [58] CSNK2A2 CSNK2B CUL2 ## [61] CWC27 CYB5BR3 DCAF7 ## [64] DCAKD DCTPP1 DDX10 ## [67] DNAJC11 DNAJC19 DNMT1 ## [70] DPH5 DPY19L1 ECSIT ## [73] EDEM3 EIF4E2 EIF4H ## [76] ELOC EMC1 ERC1 ## [79] ERGIC1 ERLEC1 ERMP1 ## [82] ERP44 EXOSC2 EXOSC3 ## [85] EXOSC5 EXOSC8 F2RL1 ## [88] FAM162A FAM8A1 FAM98A ## [91] FAR2 FASTKD5 FBLN5 ## [94] FBN1 FBN2 FBXL12 ## [97] FKBP10 FKBP15 FKBP7 ## [100] FOXRED2 FYCO1 G3BP1 ## [103] G3BP2 GCC1 GCC2 ## [106] GDF15 GFER GGCX ## [109] GGH GHITM GIGYF2 ## [112] GLA GNG5 GOLGA2 ## [115] GOLGA3 GOLGB1 GORASP1 ## [118] GPAA1 GPX1 GRIPAP1 ## [121] GRPEL1 GTF2F2 HDAC2 ## [124] HEATR3 HECTD1 HMOX1 ## [127] HOOK1 HS2ST1 HS6ST2 ## [130] HYOU1 IDE IDE[0-2343] ## [133] IDE[2342-3240] IDE[3239-4911] IL17RA ## [136] IMPDH2 INHBE ITGB1 ## [139] JAKMIP1 LARP1 LARP4B ## [142] LARP7 LMAN2 LOX ## [145] MAP7D1 MARK1 MARK2 ## [148] MARK3 MAT2B MDN1 ## [151] MEPCE MFGE8 MIB1 ## [154] MIPOL1 MOGS MPHOSPH10 ## [157] MRPS2 MRPS25 MRPS27 ## [160] MRPS5 MTCH1 MYCBP2 ## [163] NARS2 NAT14 NDFIP2 ## [166] NDUFAF1 NDUFAF2 NDUFB9 ## [169] NEK9 NEU1 NGDN ## [172] NGLY1 NIN NINL ## [175] NLRX1 NOL10 NPC2 ## [178] NPTX1 NSD2 NUP210 ## [181] NUP214 NUP54 NUP58 ## [184] NUP62 NUP88 NUP98 ## [187] NUTF2 OS9 PABPC4 ## [190] PCNT PCSK5 PDZD11 ## [193] PIGO PIGS PITRM1 ## [196] PKP2 PLAT PLD3 ## [199] PLEKHA5 PLEKHF2 PLOD2 ## [202] PMPCA PMPCB POFUT1 ## [205] KDELC1 KDELC2 POLA1 ## [208] POLA2 POR PPIL3 ## [211] PPT1 PRIM1 PRIM2 ## [214] PRKACA PRKAR2A PRKAR2B ## [217] PRRC2B PSMD8 PTBP2 ## [220] PTGES2 PTGES2[0-513] PTGES2[512-2070] ## [223] PUSL1 PVR QSOX2 ## [226] RAB10 RAB14 RAB18 ## [229] RAB1A RAB2A RAB5C ## [232] RAB7A RAB8A RAE1 ## [235] RALA RAP1GDS1 RBM28 ## [238] RBM41 RBX1 REEP5 ## [241] REEP6 RETREG3 RHOA ## [244] RIPK1 RNF41 RPL36 ## [247] RRP9 RTN4 SAAL1 ## [250] SBNO1 SCAP SCARB1 ## [253] SCARB1[0-2004] SCARB1[2003-2289] SCCPDH ## [256] SELENOS[0-927] SELENOS[926-1137] SEPSECS ## [259] SIGMAR1 SIL1 SIRT5 ## [262] SLC25A21 SLC27A2 SLC30A6 ## [265] SLC30A7 SLC30A9 SLC44A2 ## [268] SLC44A2[0-2820] SLC44A2[2819-3792] SLC9A3R1 ## [271] SLU7 SMOC1 SNIP1 ## [274] SPART SRP19 SRP54 ## [277] SRP72 STC2 STOM ## [280] STOML2 SUN2 TAPT1 ## [283] TBK1 TBKBP1 TCF12 ## [286] THTPA TIMM10 TIMM10B ## [289] TIMM29 TIMM8B TIMM9 ## [292] TLE1 TLE3 TLE5 ## [295] TM2D3 TMED5 TMEM97 ## [298] TOMM70 TOR1A TOR1AIP1 ## [301] TRIM59 TRMT1 TUBGCP2 ## [304] TUBGCP3 UBAP2 UBAP2L ## [307] UBXN8 UGGT2 UPF1 ## [310] USP13 USP54 VPS11 ## [313] VPS39 WASHC4 WFS1 ## [316] YIF1A ZC3H18 ZC3H18[0-1101] ## [319] ZC3H18[1100-3678] ZC3H7A ZDHHC5 ## [322] ZNF318 ZNF503 ZYG11B ## [325] ATE1 FGFR1OP COL6A1 ## [328] COQ8B CYB5B DDX21 ## [331] ELOB ERO1B ETFA ## [334] GNB1 GOLGA7 HSBP1 ## [337] INTS4 MOV10 MARC1 ## [340] PABPC1 PCSK6 PDE4DIP ## [343] RDX REEP6-A REEP6-B ## [346] SDF2 SELENOS TARS2 ## [349] TBCA TMEM39B TMPRSS2 ## [352] TMPRSS2 TYSND1 ## 352 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ... REEP6-B \end{verbatim} \end{kframe} \end{knitrout} Manual corrections: TMPRSS2 in bats \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{,]} \end{alltt} \begin{verbatim} ## bats_File bats_Name Gene.name ## 2810 TMPRSS2_bat_same_mafft_prank TMPRSS2 TMPRSS2 ## 2910 TMPRSS2_bat_select_cut_mafft_prank TMPRSS2 TMPRSS2 ## bats_GeneSize bats_NbSpecies bats_omegaM0Bpp ## 2810 1174 12 0.140290584008726 ## 2910 574 12 0.129489038364869 ## bats_omegaM0codeml bats_BUSTED bats_BUSTED_p.value ## 2810 0.145 N 0.9333 ## 2910 0.127 N 0.9358 ## bats_MEME_NbSites ## 2810 12 ## 2910 19 ## bats_MEME_PSS ## 2810 630, 644, 649, 688, 775, 888, 921, 1003, 1051, 1055, 1066, 1173 ## 2910 59, 73, 78, 108, 115, 117, 121, 133, 144, 241, 259, 288, 321, 403, 421, 451, 455, 466, 573 ## bats_BppM1M2 bats_BppM1M2_p.value bats_BppM1M2_NbSites ## 2810 N 0.999999010422051 0 ## 2910 N 0.999999906049202 0 ## bats_BppM1M2_PSS bats_BppM7M8 bats_BppM7M8_p.value ## 2810 na N 0.621882294670985 ## 2910 na N 0.334893426994811 ## bats_BppM7M8_NbSites bats_BppM7M8_PSS bats_codemlM1M2 ## 2810 0 na N ## 2910 0 na N ## bats_codemlM1M2_p.value bats_codemlM1M2_NbSites ## 2810 1.0 0 ## 2910 1.0 0 ## bats_codemlM1M2_PSS bats_codemlM7M8 bats_codemlM7M8_p.value ## 2810 na N 0.788991288016829 ## 2910 na N 0.4210515526274131 ## bats_codemlM7M8_NbSites bats_codemlM7M8_PSS ## 2810 0 na ## 2910 0 na ## bats_Lucie.s.comments bats_Action.taken ## 2810 ## 2910 \end{verbatim} \begin{alltt} \hlcom{# keeping the uncut one} \hlcom{# renaming the other one TMPRSS2_cut} \hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats.File}\hlopt{==}\hlstr{"TMPRSS2_bat_select_cut_mafft_prank"}\hlstd{,}\hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"TMPRSS2_cut"} \end{alltt} \end{kframe} \end{knitrout} RIPK1: ANcestral version kept, suppress it "RIPK1\_sequences\_filtered\_longestORFs\_mafft\_mincov\_prank" \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{dginnT}\hlkwb{<-}\hlstd{dginnT[dginnT}\hlopt{$}\hlstd{File}\hlopt{!=}\hlstr{"RIPK1_sequences_filtered_longestORFs_mafft_mincov_prank"}\hlstd{,]} \end{alltt} \end{kframe} \end{knitrout} REEP6 eA et B \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene.name)} \hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats_File}\hlopt{==}\hlstr{"REEP6_sequences_filtered_longestORFs_D210gp1_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"REEP6_old"} \hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats_File}\hlopt{==}\hlstr{"REEP6_LA_bat_select_mafft_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"REEP6"} \hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats_File}\hlopt{==}\hlstr{"REEP6_LB_bat_select_mafft_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"REEP6_like"} \end{alltt} \end{kframe} \end{knitrout} GNG5 \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnT}\hlopt{$}\hlstd{Gene.name)} \hlstd{dginnT[dginnT}\hlopt{$}\hlstd{File}\hlopt{==}\hlstr{"GNG5_sequences_filtered_longestORFs_D189gp2_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"GNG5_like"} \end{alltt} \end{kframe} \end{knitrout} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlkwd{dim}\hlstd{(dginnbats)} \end{alltt} \begin{verbatim} ## [1] 353 29 \end{verbatim} \begin{alltt} \hlkwd{dim}\hlstd{(dginnT)} \end{alltt} \begin{verbatim} ## [1] 412 27 \end{verbatim} \begin{alltt} \hlcom{# genes in common} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name[dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name]} \end{alltt} \begin{verbatim} ## [1] "AAR2" "AASS" "AATF" "ABCC1" "ACAD9" ## [6] "ACADM" "ACE2" "ACSL3" "ADAM9" "ADAMTS1" ## [11] "AGPS" "AKAP8" "AKAP8L" "AKAP9" "ALG11" ## [16] "ALG5" "ALG8" "ANO6" "AP2A2" "AP2M1" ## [21] "AP3B1" "ARF6" "ATE1" "ATP13A3" "ATP1B1" ## [26] "ATP6AP1" "ATP6V1A" "BAG5" "BCKDK" "BRD2" ## [31] "BRD4" "BZW2" "CCDC86" "CDK5RAP2" "CENPF" ## [36] "CEP112" "CEP135" "CEP250" "CEP350" "CEP68" ## [41] "CHMP2A" "CHPF" "CHPF2" "CISD3" "CIT" ## [46] "CLCC1" "CLIP4" "CNTRL" "COL6A1" "COLGALT1" ## [51] "COMT" "COQ8B" "CRTC3" "CSDE1" "CSNK2A2" ## [56] "CSNK2B" "CUL2" "CWC27" "CYB5B" "DCAF7" ## [61] "DCAKD" "DCTPP1" "DDX10" "DDX21" "DNAJC11" ## [66] "DNAJC19" "DNMT1" "DPH5" "DPY19L1" "ECSIT" ## [71] "EDEM3" "EIF4E2" "EIF4H" "ELOC" "EMC1" ## [76] "ERC1" "ERGIC1" "ERLEC1" "ERMP1" "ERO1B" ## [81] "ERP44" "ETFA" "EXOSC2" "EXOSC3" "EXOSC5" ## [86] "EXOSC8" "F2RL1" "FAM162A" "FAM8A1" "FAM98A" ## [91] "FAR2" "FASTKD5" "FBLN5" "FBN1" "FBN2" ## [96] "FBXL12" "FKBP10" "FKBP15" "FKBP7" "FOXRED2" ## [101] "FYCO1" "G3BP1" "G3BP2" "GCC1" "GCC2" ## [106] "GDF15" "GFER" "GGCX" "GGH" "GHITM" ## [111] "GIGYF2" "GLA" "GNB1" "GNG5" "GOLGA2" ## [116] "GOLGA3" "GOLGA7" "GOLGB1" "GORASP1" "GPAA1" ## [121] "GPX1" "GRIPAP1" "GRPEL1" "GTF2F2" "HDAC2" ## [126] "HEATR3" "HECTD1" "HMOX1" "HOOK1" "HS2ST1" ## [131] "HS6ST2" "HSBP1" "HYOU1" "IDE" "IL17RA" ## [136] "IMPDH2" "INHBE" "INTS4" "ITGB1" "JAKMIP1" ## [141] "LARP1" "LARP4B" "LARP7" "LMAN2" "LOX" ## [146] "MAP7D1" "MARK1" "MARK2" "MARK3" "MAT2B" ## [151] "MDN1" "MEPCE" "MIB1" "MIPOL1" "MOGS" ## [156] "MOV10" "MPHOSPH10" "MRPS2" "MRPS25" "MRPS27" ## [161] "MRPS5" "MARC1" "MTCH1" "MYCBP2" "NARS2" ## [166] "NAT14" "NDFIP2" "NDUFAF1" "NDUFAF2" "NDUFB9" ## [171] "NEK9" "NEU1" "NGDN" "NGLY1" "NIN" ## [176] "NINL" "NLRX1" "NOL10" "NPC2" "NPTX1" ## [181] "NSD2" "NUP210" "NUP214" "NUP54" "NUP58" ## [186] "NUP62" "NUP88" "NUP98" "NUTF2" "OS9" ## [191] "PABPC1" "PABPC4" "PCNT" "PCSK6" "PCSK5" ## [196] "PDE4DIP" "PDZD11" "PIGO" "PIGS" "PITRM1" ## [201] "PKP2" "PLAT" "PLD3" "PLEKHA5" "PLEKHF2" ## [206] "PLOD2" "PMPCA" "PMPCB" "POFUT1" "KDELC1" ## [211] "KDELC2" "POLA1" "POLA2" "POR" "PPIL3" ## [216] "PPT1" "PRIM1" "PRIM2" "PRKACA" "PRKAR2A" ## [221] "PRKAR2B" "PRRC2B" "PSMD8" "PTBP2" "PTGES2" ## [226] "PUSL1" "PVR" "QSOX2" "RAB10" "RAB14" ## [231] "RAB18" "RAB1A" "RAB2A" "RAB5C" "RAB7A" ## [236] "RAB8A" "RAE1" "RALA" "RAP1GDS1" "RBM28" ## [241] "RBM41" "RBX1" "RDX" "REEP5" "REEP6" ## [246] "RETREG3" "RHOA" "RNF41" "RPL36" "RRP9" ## [251] "RTN4" "SAAL1" "SBNO1" "SCAP" "SCARB1" ## [256] "SCCPDH" "SDF2" "SEPSECS" "SIL1" "SIRT5" ## [261] "SLC25A21" "SLC27A2" "SLC30A6" "SLC30A7" "SLC30A9" ## [266] "SLC44A2" "SLC9A3R1" "SLU7" "SMOC1" "SNIP1" ## [271] "SPART" "SRP19" "SRP54" "SRP72" "STC2" ## [276] "STOM" "STOML2" "SUN2" "TAPT1" "TARS2" ## [281] "TBCA" "TBK1" "TBKBP1" "TCF12" "THTPA" ## [286] "TIMM10" "TIMM10B" "TIMM29" "TIMM8B" "TIMM9" ## [291] "TLE1" "TLE3" "TM2D3" "TMED5" "TMEM39B" ## [296] "TMEM97" "TMPRSS2" "TOMM70" "TOR1A" "TOR1AIP1" ## [301] "TRIM59" "TRMT1" "TUBGCP2" "TUBGCP3" "TYSND1" ## [306] "UBAP2" "UBAP2L" "UBXN8" "UGGT2" "UPF1" ## [311] "USP54" "VPS11" "VPS39" "WASHC4" "WFS1" ## [316] "YIF1A" "ZC3H18" "ZC3H7A" "ZDHHC5" "ZNF318" ## [321] "ZNF503" "ZYG11B" "RIPK1" \end{verbatim} \begin{alltt} \hlkwd{length}\hlstd{(dginnT}\hlopt{$}\hlstd{Gene.name[dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name])} \end{alltt} \begin{verbatim} ## [1] 323 \end{verbatim} \begin{alltt} \hlcom{# genes only in primates} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name[(dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{]} \end{alltt} \begin{verbatim} ## [1] "ADAM9[0-3120]" "ADAM9[3119-3927]" "ATP5MGL" ## [4] "BCS1L" "C1H1ORF50" "CEP135[0-3264]" ## [7] "CEP135[3263-3678]" "CEP43" "COQ8A" ## [10] "CSNK2A1" "CSNK2B[0-609]" "CSNK2B[608-2568]" ## [13] "CYB5R3" "CYB5R1" "DDX21[0-717]" ## [16] "DDX21[716-2538]" "DDX50" "DNAJC15" ## [19] "DPH5[0-702]" "DPH5[701-1326]" "DPY19L2" ## [22] "EXOSC3[0-1446]" "EXOSC3[1445-1980]" "FBN3" ## [25] "GNB4" "GNB2" "GNB3" ## [28] "GNG5_like" "GOLGA7[0-312]" "GOLGA7[311-549]" ## [31] "GPX1[0-1218]" "GPX1[1217-2946]" "HDAC1" ## [34] "HS6ST3" "IMPDH1" "ITGB1[0-2328]" ## [37] "ITGB1[2327-2844]" "LMAN2L" "MRPS5[0-1569]" ## [40] "MRPS5[1568-3783]" "MARC2" "MGRN1" ## [43] "NDFIP2[0-768]" "NDFIP2[767-1314]" "NDUFAF2[0-258]" ## [46] "NDUFAF2[257-744]" "NUP58[0-1824]" "NUP58[1823-2367]" ## [49] "PABPC3" "POTPABPC1" "PABPC4L" ## [52] "PABPC5" "PRIM2[0-1071]" "PRIM2[1070-1902]" ## [55] "PRKACB" "PRKACG" "PTGES2[0-1587]" ## [58] "PTGES2[1586-2202]" "RAB8B" "RAB13" ## [61] "RAB18[0-855]" "RAB18[854-1815]" "RAB2B" ## [64] "RAB5A" "RAB5B" "RAB15" ## [67] "RALB" "EZR" "EZR[0-1458]" ## [70] "EZR[1457-3771]" "MSN" "RHOB" ## [73] "RHOC" "SLC44A2[0-2577]" "SLC44A2[2576-3657]" ## [76] "SRP72[0-2604]" "SRP72[2603-3417]" "STOM[0-1047]" ## [79] "STOM[1046-1800]" "STOML3" "TLE4" ## [82] "TLE2" "TLE2[0-1302]" "TLE2[1301-3987]" ## [85] "AES" "TOR1B" "WFS1[0-2346]" ## [88] "WFS1[2345-3216]" "YIF1B" \end{verbatim} \begin{alltt} \hlkwd{length}\hlstd{(dginnT}\hlopt{$}\hlstd{Gene.name[(dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])} \end{alltt} \begin{verbatim} ## [1] 89 \end{verbatim} \begin{alltt} \hlcom{# genes only in bats} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name[(dginnbats}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{]} \end{alltt} \begin{verbatim} ## [1] "ADAM9[0-2769]" "ADAM9[2768-3030]" "ARL6IP6" ## [4] "ATP5MG" "BCS1" "CUNH1ORF50" ## [7] "CYB5BR3" "IDE[0-2343]" "IDE[2342-3240]" ## [10] "IDE[3239-4911]" "MFGE8" "PTGES2[0-513]" ## [13] "PTGES2[512-2070]" "REEP6_old" "SCARB1[0-2004]" ## [16] "SCARB1[2003-2289]" "SELENOS[0-927]" "SELENOS[926-1137]" ## [19] "SIGMAR1" "SLC44A2[0-2820]" "SLC44A2[2819-3792]" ## [22] "TLE5" "USP13" "ZC3H18[0-1101]" ## [25] "ZC3H18[1100-3678]" "FGFR1OP" "ELOB" ## [28] "REEP6_like" "SELENOS" \end{verbatim} \begin{alltt} \hlkwd{length}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene.name[(dginnbats}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])} \end{alltt} \begin{verbatim} ## [1] 29 \end{verbatim} \end{kframe} \end{knitrout} \end{document}