Skip to content
Snippets Groups Projects
covid_comp_script0_table.tex 35 KiB
Newer Older
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
  \ifdim\Gin@nat@width>\linewidth
    \linewidth
  \else
    \Gin@nat@width
  \fi
}
\makeatother

\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb

\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
 \def\at@end@of@kframe{}%
 \ifinner\ifhmode%
  \def\at@end@of@kframe{\end{minipage}}%
  \begin{minipage}{\columnwidth}%
 \fi\fi%
 \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
 \colorbox{shadecolor}{##1}\hskip-\fboxsep
     % There is no \\@totalrightmargin, so:
     \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
 \MakeFramed {\advance\hsize-\width
   \@totalleftmargin\z@ \linewidth\hsize
   \@setminipage}}%
 {\par\unskip\endMakeFramed%
 \at@end@of@kframe}
\makeatother

\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX

\usepackage{alltt}   	% use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry}                		% See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper}                   		% ... or a4paper or a5paper or ... 
%\geometry{landscape}                		% Activate for for rotated page geometry
%\usepackage[parfill]{parskip}    		% Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx}				% Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
								% TeX will automatically convert eps --> pdf in pdflatex		
%\usepackage{amssymb}

\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}


\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Janvier 2021}							% Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle

\tableofcontents

\newpage

mcariou's avatar
mcariou committed
\section{1st table}
mcariou's avatar
mcariou committed

Table containing the DGINN results for both Primates and bats. Conserve all genes.

\subsection{Primates}

\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
mcariou's avatar
mcariou committed
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}

mcariou's avatar
mcariou committed
\hlstd{dginnT}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
      \hlstr{"data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
      \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}

\hlkwd{dim}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
## [1] 412  27
\end{verbatim}
\begin{alltt}
mcariou's avatar
mcariou committed
\hlcom{#names(dginnT)}

mcariou's avatar
mcariou committed
\hlcom{# Rename the columns to include primate}
\hlkwd{names}\hlstd{(dginnT)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,}
  \hlstr{"dginn-primate_NbSpecies"}\hlstd{,} \hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{,}
  \hlstr{"dginn-primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn-primate_BUSTED"}\hlstd{,}
  \hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{,} \hlstr{"dginn-primate_MEME.NbSites"}\hlstd{,}
  \hlstr{"dginn-primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,}
  \hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{,}
  \hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8"}\hlstd{,}
  \hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{,}
  \hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{,}  \hlstr{"dginn-primate_codemlM1M2"}\hlstd{,}
  \hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{,}
  \hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{,}     \hlstr{"dginn-primate_codemlM7M8"}\hlstd{,}
  \hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{,}
  \hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}

mcariou's avatar
mcariou committed
Add SELENOS

\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{selenos}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
      \hlstr{"data/resSELENOS.tab"}\hlstd{))}

\hlcom{# liste of colonne}

\hlstd{colonnes}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,}
  \hlstr{"NbSpecies"}\hlstd{,} \hlstr{"omegaM0Bpp"}\hlstd{,} \hlstr{"omegaM0codeml"}\hlstd{,} \hlstr{"BUSTED"}\hlstd{,}
  \hlstr{"BUSTED_p.value"}\hlstd{,}  \hlstr{"MEME_NbSites"}\hlstd{,} \hlstr{"MEME_PSS"}\hlstd{,} \hlstr{"BppM1M2"}\hlstd{,}
  \hlstr{"BppM1M2_p.value"}\hlstd{,} \hlstr{"BppM1M2_NbSites"}\hlstd{,} \hlstr{"BppM1M2_PSS"}\hlstd{,} \hlstr{"BppM7M8"}\hlstd{,}
  \hlstr{"BppM7M8_p.value"}\hlstd{,} \hlstr{"BppM7M8_NbSites"}\hlstd{,} \hlstr{"BppM7M8_PSS"}\hlstd{,}\hlstr{"codemlM1M2"}\hlstd{,}
  \hlstr{"codemlM1M2_p.value"}\hlstd{,} \hlstr{"codemlM1M2_NbSites"}\hlstd{,}  \hlstr{"codemlM1M2_PSS"}\hlstd{,} \hlstr{"codemlM7M8"}\hlstd{,}
  \hlstr{"codemlM7M8_p.value"}\hlstd{,} \hlstr{"codemlM7M8_NbSites"}\hlstd{,} \hlstr{"codemlM7M8_PSS"}\hlstd{)}

\hlstd{selenos}\hlkwb{<-}\hlstd{selenos[,colonnes]}
\end{alltt}
\end{kframe}
\end{knitrout}

\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{names}\hlstd{(selenos)}\hlkwb{<-}\hlkwd{names}\hlstd{(dginnT)}
\hlstd{selenos[,}\hlnum{6}\hlstd{]}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(selenos[,}\hlnum{6}\hlstd{])}
\hlstd{selenos[,}\hlnum{9}\hlstd{]}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(selenos[,}\hlnum{9}\hlstd{])}
\hlstd{selenos[,}\hlnum{11}\hlstd{]}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(selenos[,}\hlnum{11}\hlstd{])}

\hlstd{selenos[,}\hlnum{13}\hlstd{]}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(selenos[,}\hlnum{13}\hlstd{])}
\hlstd{selenos[,}\hlnum{17}\hlstd{]}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(selenos[,}\hlnum{17}\hlstd{])}
\hlstd{selenos[,}\hlnum{21}\hlstd{]}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(selenos[,}\hlnum{21}\hlstd{])}
\hlstd{selenos[,}\hlnum{25}\hlstd{]}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(selenos[,}\hlnum{25}\hlstd{])}

\hlcom{## convertir les pvalues}
\hlstd{dginnT}\hlkwb{<-}\hlkwd{rbind}\hlstd{(dginnT, selenos)}
\end{alltt}
\end{kframe}
\end{knitrout}
    
mcariou's avatar
mcariou committed
\subsection{Bats}

\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{# original table}
\hlstd{dginnbats}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
      \hlstr{"data/DGINN_202005281339summary_cleaned-LE201108.txt"}\hlstd{),}
                  \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)}

\hlcom{# rerun on corrected alignment}
\hlstd{dginnbatsnew}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
      \hlstr{"data/DGINN_202011262248_hyphybpp-202012192053_codeml-summary.txt"}\hlstd{),}
            \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{# Add both columns }
\hlstd{dginnbatsnew}\hlopt{$}\hlstd{Lucie.s.comments}\hlkwb{<-}\hlstr{""}
\hlstd{dginnbatsnew}\hlopt{$}\hlstd{Action.taken}\hlkwb{<-}\hlstr{""}

\hlcom{# Homogenize column names}
\hlstd{dginnbats}\hlopt{$}\hlstd{BUSTED_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BUSTED.p.value}
\hlstd{dginnbats}\hlopt{$}\hlstd{MEME_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{MEME.NbSites}
\hlstd{dginnbats}\hlopt{$}\hlstd{MEME_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{MEME.PSS}

\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2.p.value}
\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2.NbSites}
\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM1M2.PSS}

\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8.p.value}
\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8.NbSites}
\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{BppM7M8.PSS}

\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2.p.value}
\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2.NbSites}
\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM1M2.PSS}

\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8_p.value}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8.p.value}
\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8_NbSites}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8.NbSites}
\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8_PSS}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{codemlM7M8.PSS}
\end{alltt}
\end{kframe}
\end{knitrout}

\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{# Order columns in the same order in both tables}
\hlstd{dginnbats}\hlkwb{<-}\hlstd{dginnbats[,}\hlkwd{names}\hlstd{(dginnbatsnew)]}

\hlkwd{names}\hlstd{(dginnbatsnew)} \hlopt{%in%} \hlkwd{names}\hlstd{(dginnbats)}
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
mcariou's avatar
mcariou committed
## [27] TRUE TRUE TRUE
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginnbats)}\hlopt{==}\hlkwd{names}\hlstd{(dginnbatsnew)}
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
mcariou's avatar
mcariou committed
## [27] TRUE TRUE TRUE
\end{verbatim}
\begin{alltt}
\hlcom{# Put RIPK aside}
\hlstd{ripk1}\hlkwb{<-}\hlstd{dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{27}\hlstd{]}

\hlcom{# Add it to primate table}
\hlkwd{names}\hlstd{(ripk1)}\hlkwb{<-}\hlkwd{names}\hlstd{(dginnT)}

\hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`)}
\hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`)}
\hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`)}
\hlstd{ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(ripk1}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`)}

\hlstd{dginnT}\hlkwb{<-}\hlkwd{rbind}\hlstd{(dginnT, ripk1)}

\hlcom{## Remove it Ripk1 from bats}
\hlstd{dginnbatsnew}\hlkwb{<-}\hlstd{dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{!=}\hlstr{"RIPK1"}\hlstd{,]}


\hlcom{## suppress redundant lines}
\hlstd{dginnbats}\hlkwb{<-}\hlstd{dginnbats[(dginnbats}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlnum{FALSE}\hlstd{,]}
\hlkwd{names}\hlstd{(dginnbatsnew)}\hlkwb{<-}\hlkwd{names}\hlstd{(dginnbats)}

\hlcom{## replace by new data}
\hlstd{dginnbatsnew}\hlopt{$}\hlstd{omegaM0Bpp}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(dginnbatsnew}\hlopt{$}\hlstd{omegaM0Bpp)}
\hlstd{dginnbatsnew}\hlopt{$}\hlstd{BppM1M2_p.value}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(dginnbatsnew}\hlopt{$}\hlstd{BppM1M2_p.value)}
\hlstd{dginnbatsnew}\hlopt{$}\hlstd{BppM7M8_p.value}\hlkwb{<-}\hlkwd{as.factor}\hlstd{(dginnbatsnew}\hlopt{$}\hlstd{BppM7M8_p.value)}

\hlstd{dginnbats}\hlkwb{<-}\hlkwd{rbind}\hlstd{(dginnbats, dginnbatsnew)}

\hlkwd{names}\hlstd{(dginnbats)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"bats_File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"bats_"}\hlstd{,}
    \hlkwd{names}\hlstd{(dginnbats)[}\hlopt{-}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{)]))}
\hlkwd{names}\hlstd{(dginnbats)}
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
##  [1] "bats_File"               "bats_Name"               "Gene.name"               "bats_GeneSize"           "bats_NbSpecies"         
##  [6] "bats_omegaM0Bpp"         "bats_omegaM0codeml"      "bats_BUSTED"             "bats_BUSTED_p.value"     "bats_MEME_NbSites"      
## [11] "bats_MEME_PSS"           "bats_BppM1M2"            "bats_BppM1M2_p.value"    "bats_BppM1M2_NbSites"    "bats_BppM1M2_PSS"       
## [16] "bats_BppM7M8"            "bats_BppM7M8_p.value"    "bats_BppM7M8_NbSites"    "bats_BppM7M8_PSS"        "bats_codemlM1M2"        
## [21] "bats_codemlM1M2_p.value" "bats_codemlM1M2_NbSites" "bats_codemlM1M2_PSS"     "bats_codemlM7M8"         "bats_codemlM7M8_p.value"
## [26] "bats_codemlM7M8_NbSites" "bats_codemlM7M8_PSS"     "bats_Lucie.s.comments"   "bats_Action.taken"
mcariou's avatar
mcariou committed
\end{verbatim}
\end{kframe}
\end{knitrout}

\subsection{Merged table}

\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{#tidy.opts = list(width.cutoff = 60)}
\hlkwd{dim}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
## [1] 414  27
mcariou's avatar
mcariou committed
\end{verbatim}
\begin{alltt}
mcariou's avatar
mcariou committed
\hlcom{#dginnT$Gene.name}
mcariou's avatar
mcariou committed
\hlkwd{dim}\hlstd{(dginnbats)}
\end{alltt}
\begin{verbatim}
## [1] 353  29
\end{verbatim}
\begin{alltt}
mcariou's avatar
mcariou committed
\hlcom{#dginnbats$Gene.name}
mcariou's avatar
mcariou committed
\end{alltt}
\end{kframe}
\end{knitrout}

Manual corrections:

TMPRSS2 in bats
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{,]}
\end{alltt}
\begin{verbatim}
##                               bats_File bats_Name Gene.name
## 2810       TMPRSS2_bat_same_mafft_prank   TMPRSS2   TMPRSS2
## 2910 TMPRSS2_bat_select_cut_mafft_prank   TMPRSS2   TMPRSS2
##      bats_GeneSize bats_NbSpecies   bats_omegaM0Bpp
## 2810          1174             12 0.140290584008726
## 2910           574             12 0.129489038364869
##      bats_omegaM0codeml bats_BUSTED bats_BUSTED_p.value
## 2810              0.145           N              0.9333
## 2910              0.127           N              0.9358
##      bats_MEME_NbSites
## 2810                12
## 2910                19
##                                                                                   bats_MEME_PSS
## 2810                            630, 644, 649, 688, 775, 888, 921, 1003, 1051, 1055, 1066, 1173
## 2910 59, 73, 78, 108, 115, 117, 121, 133, 144, 241, 259, 288, 321, 403, 421, 451, 455, 466, 573
##      bats_BppM1M2 bats_BppM1M2_p.value bats_BppM1M2_NbSites
## 2810            N    0.999999010422051                    0
## 2910            N    0.999999906049202                    0
##      bats_BppM1M2_PSS bats_BppM7M8 bats_BppM7M8_p.value
## 2810               na            N    0.621882294670985
## 2910               na            N    0.334893426994811
##      bats_BppM7M8_NbSites bats_BppM7M8_PSS bats_codemlM1M2
## 2810                    0               na               N
## 2910                    0               na               N
##      bats_codemlM1M2_p.value bats_codemlM1M2_NbSites
## 2810                     1.0                       0
## 2910                     1.0                       0
##      bats_codemlM1M2_PSS bats_codemlM7M8 bats_codemlM7M8_p.value
## 2810                  na               N       0.788991288016829
## 2910                  na               N      0.4210515526274131
##      bats_codemlM7M8_NbSites bats_codemlM7M8_PSS
## 2810                       0                  na
## 2910                       0                  na
##      bats_Lucie.s.comments bats_Action.taken
## 2810                                        
## 2910
\end{verbatim}
\begin{alltt}
\hlcom{# keeping the uncut one}
\hlcom{# renaming the other one TMPRSS2_cut}
mcariou's avatar
mcariou committed
\hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene.name)}
mcariou's avatar
mcariou committed
\hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats_File}\hlopt{==}\hlstr{"TMPRSS2_bat_select_cut_mafft_prank"}\hlstd{,}\hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"TMPRSS2_cut"}
mcariou's avatar
mcariou committed
\end{alltt}
mcariou's avatar
mcariou committed
\end{kframe}
mcariou's avatar
mcariou committed
\end{knitrout}

RIPK1: ANcestral version kept, suppress it "RIPK1\_sequences\_filtered\_longestORFs\_mafft\_mincov\_prank"
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnT}\hlkwb{<-}\hlstd{dginnT[dginnT}\hlopt{$}\hlstd{File}\hlopt{!=}\hlstr{"RIPK1_sequences_filtered_longestORFs_mafft_mincov_prank"}\hlstd{,]}
\end{alltt}
\end{kframe}
\end{knitrout}

REEP6 eA et B
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene.name)}
\hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats_File}\hlopt{==}\hlstr{"REEP6_sequences_filtered_longestORFs_D210gp1_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"REEP6_old"}
\hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats_File}\hlopt{==}\hlstr{"REEP6_LA_bat_select_mafft_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"REEP6"}
\hlstd{dginnbats[dginnbats}\hlopt{$}\hlstd{bats_File}\hlopt{==}\hlstr{"REEP6_LB_bat_select_mafft_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"REEP6_like"}
\end{alltt}
\end{kframe}
\end{knitrout}

GNG5
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginnT}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnT}\hlopt{$}\hlstd{Gene.name)}
\hlstd{dginnT[dginnT}\hlopt{$}\hlstd{File}\hlopt{==}\hlstr{"GNG5_sequences_filtered_longestORFs_D189gp2_prank"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}\hlkwb{<-}\hlstr{"GNG5_like"}
\end{alltt}
\end{kframe}
\end{knitrout}


\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{dim}\hlstd{(dginnbats)}
\end{alltt}
\begin{verbatim}
## [1] 353  29
\end{verbatim}
\begin{alltt}
\hlkwd{dim}\hlstd{(dginnT)}
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
## [1] 413  27
mcariou's avatar
mcariou committed
\end{verbatim}
\begin{alltt}
\hlcom{# genes in common}
mcariou's avatar
mcariou committed
\hlstd{common}\hlkwb{<-}\hlstd{dginnT}\hlopt{$}\hlstd{Gene.name[dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name]}
\hlstd{common}
mcariou's avatar
mcariou committed
\end{alltt}
\begin{verbatim}
##   [1] "AAR2"      "AASS"      "AATF"      "ABCC1"     "ACAD9"    
##   [6] "ACADM"     "ACE2"      "ACSL3"     "ADAM9"     "ADAMTS1"  
##  [11] "AGPS"      "AKAP8"     "AKAP8L"    "AKAP9"     "ALG11"    
##  [16] "ALG5"      "ALG8"      "ANO6"      "AP2A2"     "AP2M1"    
##  [21] "AP3B1"     "ARF6"      "ATE1"      "ATP13A3"   "ATP1B1"   
##  [26] "ATP6AP1"   "ATP6V1A"   "BAG5"      "BCKDK"     "BRD2"     
##  [31] "BRD4"      "BZW2"      "CCDC86"    "CDK5RAP2"  "CENPF"    
##  [36] "CEP112"    "CEP135"    "CEP250"    "CEP350"    "CEP68"    
##  [41] "CHMP2A"    "CHPF"      "CHPF2"     "CISD3"     "CIT"      
##  [46] "CLCC1"     "CLIP4"     "CNTRL"     "COL6A1"    "COLGALT1" 
##  [51] "COMT"      "COQ8B"     "CRTC3"     "CSDE1"     "CSNK2A2"  
##  [56] "CSNK2B"    "CUL2"      "CWC27"     "CYB5B"     "DCAF7"    
##  [61] "DCAKD"     "DCTPP1"    "DDX10"     "DDX21"     "DNAJC11"  
##  [66] "DNAJC19"   "DNMT1"     "DPH5"      "DPY19L1"   "ECSIT"    
##  [71] "EDEM3"     "EIF4E2"    "EIF4H"     "ELOC"      "EMC1"     
##  [76] "ERC1"      "ERGIC1"    "ERLEC1"    "ERMP1"     "ERO1B"    
##  [81] "ERP44"     "ETFA"      "EXOSC2"    "EXOSC3"    "EXOSC5"   
##  [86] "EXOSC8"    "F2RL1"     "FAM162A"   "FAM8A1"    "FAM98A"   
##  [91] "FAR2"      "FASTKD5"   "FBLN5"     "FBN1"      "FBN2"     
##  [96] "FBXL12"    "FKBP10"    "FKBP15"    "FKBP7"     "FOXRED2"  
## [101] "FYCO1"     "G3BP1"     "G3BP2"     "GCC1"      "GCC2"     
## [106] "GDF15"     "GFER"      "GGCX"      "GGH"       "GHITM"    
## [111] "GIGYF2"    "GLA"       "GNB1"      "GNG5"      "GOLGA2"   
## [116] "GOLGA3"    "GOLGA7"    "GOLGB1"    "GORASP1"   "GPAA1"    
## [121] "GPX1"      "GRIPAP1"   "GRPEL1"    "GTF2F2"    "HDAC2"    
## [126] "HEATR3"    "HECTD1"    "HMOX1"     "HOOK1"     "HS2ST1"   
## [131] "HS6ST2"    "HSBP1"     "HYOU1"     "IDE"       "IL17RA"   
## [136] "IMPDH2"    "INHBE"     "INTS4"     "ITGB1"     "JAKMIP1"  
## [141] "LARP1"     "LARP4B"    "LARP7"     "LMAN2"     "LOX"      
## [146] "MAP7D1"    "MARK1"     "MARK2"     "MARK3"     "MAT2B"    
## [151] "MDN1"      "MEPCE"     "MIB1"      "MIPOL1"    "MOGS"     
## [156] "MOV10"     "MPHOSPH10" "MRPS2"     "MRPS25"    "MRPS27"   
## [161] "MRPS5"     "MARC1"     "MTCH1"     "MYCBP2"    "NARS2"    
## [166] "NAT14"     "NDFIP2"    "NDUFAF1"   "NDUFAF2"   "NDUFB9"   
## [171] "NEK9"      "NEU1"      "NGDN"      "NGLY1"     "NIN"      
## [176] "NINL"      "NLRX1"     "NOL10"     "NPC2"      "NPTX1"    
## [181] "NSD2"      "NUP210"    "NUP214"    "NUP54"     "NUP58"    
## [186] "NUP62"     "NUP88"     "NUP98"     "NUTF2"     "OS9"      
## [191] "PABPC1"    "PABPC4"    "PCNT"      "PCSK6"     "PCSK5"    
## [196] "PDE4DIP"   "PDZD11"    "PIGO"      "PIGS"      "PITRM1"   
## [201] "PKP2"      "PLAT"      "PLD3"      "PLEKHA5"   "PLEKHF2"  
## [206] "PLOD2"     "PMPCA"     "PMPCB"     "POFUT1"    "KDELC1"   
## [211] "KDELC2"    "POLA1"     "POLA2"     "POR"       "PPIL3"    
## [216] "PPT1"      "PRIM1"     "PRIM2"     "PRKACA"    "PRKAR2A"  
## [221] "PRKAR2B"   "PRRC2B"    "PSMD8"     "PTBP2"     "PTGES2"   
## [226] "PUSL1"     "PVR"       "QSOX2"     "RAB10"     "RAB14"    
## [231] "RAB18"     "RAB1A"     "RAB2A"     "RAB5C"     "RAB7A"    
## [236] "RAB8A"     "RAE1"      "RALA"      "RAP1GDS1"  "RBM28"    
## [241] "RBM41"     "RBX1"      "RDX"       "REEP5"     "REEP6"    
## [246] "RETREG3"   "RHOA"      "RNF41"     "RPL36"     "RRP9"     
## [251] "RTN4"      "SAAL1"     "SBNO1"     "SCAP"      "SCARB1"   
## [256] "SCCPDH"    "SDF2"      "SEPSECS"   "SIL1"      "SIRT5"    
## [261] "SLC25A21"  "SLC27A2"   "SLC30A6"   "SLC30A7"   "SLC30A9"  
## [266] "SLC44A2"   "SLC9A3R1"  "SLU7"      "SMOC1"     "SNIP1"    
## [271] "SPART"     "SRP19"     "SRP54"     "SRP72"     "STC2"     
## [276] "STOM"      "STOML2"    "SUN2"      "TAPT1"     "TARS2"    
## [281] "TBCA"      "TBK1"      "TBKBP1"    "TCF12"     "THTPA"    
## [286] "TIMM10"    "TIMM10B"   "TIMM29"    "TIMM8B"    "TIMM9"    
## [291] "TLE1"      "TLE3"      "TM2D3"     "TMED5"     "TMEM39B"  
## [296] "TMEM97"    "TMPRSS2"   "TOMM70"    "TOR1A"     "TOR1AIP1" 
## [301] "TRIM59"    "TRMT1"     "TUBGCP2"   "TUBGCP3"   "TYSND1"   
## [306] "UBAP2"     "UBAP2L"    "UBXN8"     "UGGT2"     "UPF1"     
## [311] "USP54"     "VPS11"     "VPS39"     "WASHC4"    "WFS1"     
## [316] "YIF1A"     "ZC3H18"    "ZC3H7A"    "ZDHHC5"    "ZNF318"   
mcariou's avatar
mcariou committed
## [321] "ZNF503"    "ZYG11B"    "SELENOS"   "RIPK1"
mcariou's avatar
mcariou committed
\end{verbatim}
\begin{alltt}
\hlkwd{length}\hlstd{(dginnT}\hlopt{$}\hlstd{Gene.name[dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name])}
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
## [1] 324
mcariou's avatar
mcariou committed
\end{verbatim}
\begin{alltt}
\hlcom{# genes only in primates}
mcariou's avatar
mcariou committed
\hlstd{onlyprimates}\hlkwb{<-}\hlstd{dginnT}\hlopt{$}\hlstd{Gene.name[(dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{]}
\hlstd{onlyprimates}
mcariou's avatar
mcariou committed
\end{alltt}
\begin{verbatim}
##  [1] "ADAM9[0-3120]"      "ADAM9[3119-3927]"   "ATP5MGL"           
##  [4] "BCS1L"              "C1H1ORF50"          "CEP135[0-3264]"    
##  [7] "CEP135[3263-3678]"  "CEP43"              "COQ8A"             
## [10] "CSNK2A1"            "CSNK2B[0-609]"      "CSNK2B[608-2568]"  
## [13] "CYB5R3"             "CYB5R1"             "DDX21[0-717]"      
## [16] "DDX21[716-2538]"    "DDX50"              "DNAJC15"           
## [19] "DPH5[0-702]"        "DPH5[701-1326]"     "DPY19L2"           
## [22] "EXOSC3[0-1446]"     "EXOSC3[1445-1980]"  "FBN3"              
## [25] "GNB4"               "GNB2"               "GNB3"              
## [28] "GNG5_like"          "GOLGA7[0-312]"      "GOLGA7[311-549]"   
## [31] "GPX1[0-1218]"       "GPX1[1217-2946]"    "HDAC1"             
## [34] "HS6ST3"             "IMPDH1"             "ITGB1[0-2328]"     
## [37] "ITGB1[2327-2844]"   "LMAN2L"             "MRPS5[0-1569]"     
## [40] "MRPS5[1568-3783]"   "MARC2"              "MGRN1"             
## [43] "NDFIP2[0-768]"      "NDFIP2[767-1314]"   "NDUFAF2[0-258]"    
## [46] "NDUFAF2[257-744]"   "NUP58[0-1824]"      "NUP58[1823-2367]"  
## [49] "PABPC3"             "POTPABPC1"          "PABPC4L"           
## [52] "PABPC5"             "PRIM2[0-1071]"      "PRIM2[1070-1902]"  
## [55] "PRKACB"             "PRKACG"             "PTGES2[0-1587]"    
## [58] "PTGES2[1586-2202]"  "RAB8B"              "RAB13"             
## [61] "RAB18[0-855]"       "RAB18[854-1815]"    "RAB2B"             
## [64] "RAB5A"              "RAB5B"              "RAB15"             
## [67] "RALB"               "EZR"                "EZR[0-1458]"       
## [70] "EZR[1457-3771]"     "MSN"                "RHOB"              
## [73] "RHOC"               "SLC44A2[0-2577]"    "SLC44A2[2576-3657]"
## [76] "SRP72[0-2604]"      "SRP72[2603-3417]"   "STOM[0-1047]"      
## [79] "STOM[1046-1800]"    "STOML3"             "TLE4"              
## [82] "TLE2"               "TLE2[0-1302]"       "TLE2[1301-3987]"   
## [85] "AES"                "TOR1B"              "WFS1[0-2346]"      
## [88] "WFS1[2345-3216]"    "YIF1B"
\end{verbatim}
\begin{alltt}
\hlkwd{length}\hlstd{(dginnT}\hlopt{$}\hlstd{Gene.name[(dginnT}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] 89
\end{verbatim}
\begin{alltt}
\hlcom{# genes only in bats}
mcariou's avatar
mcariou committed
\hlstd{onlybats}\hlkwb{<-}\hlstd{dginnbats}\hlopt{$}\hlstd{Gene.name[(dginnbats}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{]}
\hlstd{onlybats}
mcariou's avatar
mcariou committed
\end{alltt}
\begin{verbatim}
##  [1] "ADAM9[0-2769]"      "ADAM9[2768-3030]"   "ARL6IP6"           
##  [4] "ATP5MG"             "BCS1"               "CUNH1ORF50"        
##  [7] "CYB5BR3"            "IDE[0-2343]"        "IDE[2342-3240]"    
## [10] "IDE[3239-4911]"     "MFGE8"              "PTGES2[0-513]"     
## [13] "PTGES2[512-2070]"   "REEP6_old"          "SCARB1[0-2004]"    
## [16] "SCARB1[2003-2289]"  "SELENOS[0-927]"     "SELENOS[926-1137]" 
## [19] "SIGMAR1"            "SLC44A2[0-2820]"    "SLC44A2[2819-3792]"
## [22] "TLE5"               "USP13"              "ZC3H18[0-1101]"    
## [25] "ZC3H18[1100-3678]"  "FGFR1OP"            "ELOB"              
mcariou's avatar
mcariou committed
## [28] "REEP6_like"         "TMPRSS2_cut"
mcariou's avatar
mcariou committed
\end{verbatim}
\begin{alltt}
\hlkwd{length}\hlstd{(dginnbats}\hlopt{$}\hlstd{Gene.name[(dginnbats}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
## [1] 29
mcariou's avatar
mcariou committed
\end{verbatim}
\end{kframe}
\end{knitrout}

mcariou's avatar
mcariou committed
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(dginnT, dginnbats,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=T,} \hlkwc{all.y}\hlstd{=T)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 442  55
\end{verbatim}
\begin{alltt}
\hlcom{# add column "shared"/"only bats"/"only primates"}
\hlstd{tab}\hlopt{$}\hlstd{status}\hlkwb{<-}\hlstr{""}
\hlstd{tab}\hlopt{$}\hlstd{status[tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{common]}\hlkwb{<-}\hlstr{"shared"}
\hlstd{tab}\hlopt{$}\hlstd{status[tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{onlyprimates]}\hlkwb{<-}\hlstr{"onlyprimates"}
\hlstd{tab}\hlopt{$}\hlstd{status[tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{onlybats]}\hlkwb{<-}\hlstr{"onlybats"}
\hlkwd{table}\hlstd{(tab}\hlopt{$}\hlstd{status)}
\end{alltt}
\begin{verbatim}
## 
##     onlybats onlyprimates       shared 
mcariou's avatar
mcariou committed
##           29           89          324
mcariou's avatar
mcariou committed
\end{verbatim}
\begin{alltt}
\hlkwd{write.table}\hlstd{(tab,} \hlstr{"covid_comp_alldginn.txt"}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}


mcariou's avatar
mcariou committed
\section{Complete data}

mcariou's avatar
mcariou committed
Merge the previous tab with J Young's original table. 
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
\subsection{Read the original Young table}
mcariou's avatar
mcariou committed
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{young}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
  \hlstr{"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{),}
        \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{dim}\hlstd{(young)}
\end{alltt}
\begin{verbatim}
## [1] 332  84
\end{verbatim}
mcariou's avatar
mcariou committed
\begin{alltt}
\hlstd{young}\hlopt{$}\hlstd{PreyGene}\hlkwb{<-}\hlkwd{as.character}\hlstd{(young}\hlopt{$}\hlstd{PreyGene)}
\hlstd{young}\hlopt{$}\hlstd{PreyGene[young}\hlopt{$}\hlstd{PreyGene}\hlopt{==}\hlstr{"MTARC1"}\hlstd{]}\hlkwb{<-}\hlstr{"MARC1"}
\end{alltt}
mcariou's avatar
mcariou committed
\end{kframe}
\end{knitrout}

mcariou's avatar
mcariou committed
\subsection{Read the gene names conversion table}

mcariou's avatar
mcariou committed
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
mcariou's avatar
mcariou committed
\hlstd{usthem}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
  \hlstr{"/data/table_gene_name_correspondence.csv"}\hlstd{),}
  \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{";"}\hlstd{)}

\hlstd{young[young}\hlopt{$}\hlstd{PreyGene} \hlopt{%in%} \hlstd{usthem}\hlopt{$}\hlstd{Us,} \hlkwd{c}\hlstd{(}\hlstr{"PreyGene"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{)]}
mcariou's avatar
mcariou committed
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
##     PreyGene  Gene.name
## 57    TIMM29   C19orf52
## 107    ERO1B     ERO1LB
## 111    NUP58      NUPL1
## 115    COQ8B      ADCK4
## 118    SPART      SPG20
## 131     NSD2      WHSC1
## 149  RETREG3    FAM134C
## 158    MARC1 01/03/2020
## 197     ELOC      TCEB1
## 268   TOMM70    TOMM70A
## 269   WASHC4   KIAA1033
mcariou's avatar
mcariou committed
\end{verbatim}
\begin{alltt}
mcariou's avatar
mcariou committed
\hlstd{usthem[}\hlkwd{order}\hlstd{(usthem}\hlopt{$}\hlstd{Us),]}
\end{alltt}
\begin{verbatim}
##         Us     Else
## 1    COQ8B    ADCK4
## 2     ELOC    TCEB1
## 3    ERO1B   ERO1LB
## 4    MARC1   MTARC1
## 5     NSD2    WHSC1
## 6    NUP58    NUPL1
## 7    PCSK5         
## 8  RETREG3  FAM134C
## 9    SPART    SPG20
## 10  TIMM29 C19orf52
## 11  TOMM70  TOMM70A
## 12  WASHC4 KIAA1033
\end{verbatim}
\end{kframe}
\end{knitrout}

\subsection{Merge Young and DGINN table}

\textbf{Based on which column?}

How many genes in the Young table are not in the DGINN table. And who are they?
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{table}\hlstd{(young}\hlopt{$}\hlstd{PreyGene} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}
mcariou's avatar
mcariou committed
\end{alltt}
\begin{verbatim}
mcariou's avatar
mcariou committed
## 
## FALSE  TRUE 
##     3   329
\end{verbatim}
\begin{alltt}
\hlstd{young[(young}\hlopt{$}\hlstd{PreyGene} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{,} \hlstr{"PreyGene"}\hlstd{]}
\end{alltt}
\begin{verbatim}
## [1] "POGLUT3" "POGLUT2" "C1orf50"
\end{verbatim}
\begin{alltt}
\hlstd{tab[(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{young}\hlopt{$}\hlstd{PreyGene)}\hlopt{==}\hlnum{FALSE}\hlstd{,} \hlstr{"Gene.name"}\hlstd{]}
\end{alltt}
\begin{verbatim}
##   [1] "ACE2"               "ADAM9[0-2769]"      "ADAM9[0-3120]"     
##   [4] "ADAM9[2768-3030]"   "ADAM9[3119-3927]"   "AES"               
##   [7] "ATP5MGL"            "BCS1"               "C1H1ORF50"         
##  [10] "CEP135[0-3264]"     "CEP135[3263-3678]"  "COQ8A"             
##  [13] "CSNK2A1"            "CSNK2B[0-609]"      "CSNK2B[608-2568]"  
##  [16] "CUNH1ORF50"         "CYB5BR3"            "CYB5R1"            
##  [19] "DDX21[0-717]"       "DDX21[716-2538]"    "DDX50"             
##  [22] "DNAJC15"            "DPH5[0-702]"        "DPH5[701-1326]"    
##  [25] "DPY19L2"            "EXOSC3[0-1446]"     "EXOSC3[1445-1980]" 
##  [28] "EZR"                "EZR[0-1458]"        "EZR[1457-3771]"    
##  [31] "FBN3"               "FGFR1OP"            "GNB2"              
##  [34] "GNB3"               "GNB4"               "GNG5_like"         
##  [37] "GOLGA7[0-312]"      "GOLGA7[311-549]"    "GPX1[0-1218]"      
##  [40] "GPX1[1217-2946]"    "HDAC1"              "HS6ST3"            
##  [43] "IDE[0-2343]"        "IDE[2342-3240]"     "IDE[3239-4911]"    
##  [46] "IMPDH1"             "ITGB1[0-2328]"      "ITGB1[2327-2844]"  
##  [49] "KDELC1"             "KDELC2"             "LMAN2L"            
##  [52] "MARC2"              "MGRN1"              "MRPS5[0-1569]"     
##  [55] "MRPS5[1568-3783]"   "MSN"                "NDFIP2[0-768]"     
##  [58] "NDFIP2[767-1314]"   "NDUFAF2[0-258]"     "NDUFAF2[257-744]"  
##  [61] "NUP58[0-1824]"      "NUP58[1823-2367]"   "PABPC3"            
##  [64] "PABPC4L"            "PABPC5"             "PCSK5"             
##  [67] "POTPABPC1"          "PRIM2[0-1071]"      "PRIM2[1070-1902]"  
##  [70] "PRKACB"             "PRKACG"             "PTGES2[0-1587]"    
##  [73] "PTGES2[0-513]"      "PTGES2[1586-2202]"  "PTGES2[512-2070]"  
##  [76] "RAB13"              "RAB15"              "RAB18[0-855]"      
##  [79] "RAB18[854-1815]"    "RAB2B"              "RAB5A"             
##  [82] "RAB5B"              "RAB8B"              "RALB"              
##  [85] "REEP6_like"         "REEP6_old"          "RHOB"              
##  [88] "RHOC"               "SCARB1[0-2004]"     "SCARB1[2003-2289]" 
##  [91] "SELENOS[0-927]"     "SELENOS[926-1137]"  "SLC44A2[0-2577]"   
##  [94] "SLC44A2[0-2820]"    "SLC44A2[2576-3657]" "SLC44A2[2819-3792]"
##  [97] "SRP72[0-2604]"      "SRP72[2603-3417]"   "STOM[0-1047]"      
## [100] "STOM[1046-1800]"    "STOML3"             "TLE2"              
## [103] "TLE2[0-1302]"       "TLE2[1301-3987]"    "TLE4"              
## [106] "TMPRSS2"            "TMPRSS2_cut"        "TOR1B"             
## [109] "WFS1[0-2346]"       "WFS1[2345-3216]"    "YIF1B"             
## [112] "ZC3H18[0-1101]"     "ZC3H18[1100-3678]"
mcariou's avatar
mcariou committed
\end{verbatim}
\end{kframe}
\end{knitrout}
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
Merge them and keep only the krogan genes

\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
mcariou's avatar
mcariou committed
\hlcom{# creation of a dedicated column}
\hlstd{young}\hlopt{$}\hlstd{merge.Gene}\hlkwb{<-}\hlstd{young}\hlopt{$}\hlstd{PreyGene}
\hlstd{tab}\hlopt{$}\hlstd{merge.Gene}\hlkwb{<-}\hlstd{tab}\hlopt{$}\hlstd{Gene.name}
\hlstd{tablo}\hlkwb{<-}\hlkwd{merge}\hlstd{(young, tab,} \hlkwc{by}\hlstd{=}\hlstr{"merge.Gene"}\hlstd{,} \hlkwc{all.x}\hlstd{=}\hlnum{TRUE}\hlstd{)}
mcariou's avatar
mcariou committed

\hlkwd{write.table}\hlstd{(tablo,} \hlstr{"covid_comp_complete.txt"}\hlstd{,} \hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{quote}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}