diff --git a/covid_comp_script0_table.Rnw b/covid_comp_script0_table.Rnw new file mode 100644 index 0000000000000000000000000000000000000000..7243ad4eb56975729ac69194a134b2bb9983bc29 --- /dev/null +++ b/covid_comp_script0_table.Rnw @@ -0,0 +1,249 @@ +\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format +%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. +%\geometry{letterpaper} % ... or a4paper or a5paper or ... +%\geometry{landscape} % Activate for for rotated page geometry +%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent +%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode + % TeX will automatically convert eps --> pdf in pdflatex +%\usepackage{amssymb} + +\usepackage[utf8]{inputenc} +%\usepackage[cyr]{aeguill} +%\usepackage[francais]{babel} +%\usepackage{hyperref} + + +\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} +\author{Marie Cariou} +\date{Janvier 2021} % Activate to display a given date or no date + +\begin{document} +\maketitle + +\tableofcontents + +\newpage + +\section{Files manipulations} + +\subsection{Read Janet Young's table} + +<<>>= +workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/" + +tab<-read.delim(paste0(workdir, + "data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"), + fill=T, h=T, dec=",") +dim(tab) +@ + +\subsection{Read DGINN Young table} + +<<>>= +dginnY<-read.delim(paste0(workdir, + "data/summary_primate_young.res"), + fill=T, h=T) + +dim(dginnY) +@ + +\subsection{Joining Young and DGINN Young table} + +<<>>= +# correct gene names (MARC1) +val_remp=as.character(unique(dginnY$Gene)[(unique(dginnY$Gene) %in% + tab$Gene.name)==F]) +tab$Gene.name<-as.character(tab$Gene.name) +tab$Gene.name[158]<-val_remp +sum(unique(dginnY$Gene) %in% unique(tab$Gene.name)) +@ + +<<>>= + +add_col<-function(method="PamlM1M2"){ + +tmp<-dginnY[dginnY$Method==method, + c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")] + +names(tmp)<-c("Gene.name", paste0("Omega_", method), + paste0("PosSel_", method), paste0("PValue_", method), + paste0("NbSites_", method), paste0("PSS_", method)) + +tab<-merge(tab, tmp, by="Gene.name") + +return(tab) +} + +tab<-add_col("PamlM1M2") +tab<-add_col("PamlM7M8") +tab<-add_col("BppM1M2") +tab<-add_col("BppM7M8") + + +# Manip pour la colonne BUSTED + +tmp<-dginnY[dginnY$Method=="BUSTED",c("Gene", "Omega", "PosSel", "PValue")] +names(tmp)<-c("Gene.name", "Omega_BUSTED", "PosSel_BUSTED", "PValue_BUSTED") +tab<-merge(tab, tmp, by="Gene.name") + +tmp<-dginnY[dginnY$Method=="MEME",c("Gene", "NbSites", "PSS")] +names(tmp)<-c("Gene.name", "NbSites_MEME", "PSS_MEME") +tab<-merge(tab, tmp, by="Gene.name") + +@ + + +\subsection{Read DGINN Table} + +<<>>= +dginnT<-read.delim(paste0(workdir, + "data/DGINN_202005281649summary_cleaned.csv"), + fill=T, h=T, sep=",") + +dim(dginnT) + +names(dginnT) + +# Number of genes in dginn-primate output not present in the original table +dginnT[(dginnT$Gene %in% tab$Gene.name)==F,"Gene"] +# This includes paralogs, recombinations found by DGINN and additionnal genes +# included on purpose + +# Number of genes from the original list not present in DGINN output +tab[(tab$Gene.name %in% dginnT$Gene)==F,"Gene.name"] + + +names(dginnT)<-c("File", "Name", "Gene.name", "GeneSize", + "dginn-primate_NbSpecies", "dginn-primate_omegaM0Bpp", + "dginn-primate_omegaM0codeml", "dginn-primate_BUSTED", + "dginn-primate_BUSTED.p.value", "dginn-primate_MEME.NbSites", + "dginn-primate_MEME.PSS", "dginn-primate_BppM1M2", + "dginn-primate_BppM1M2.p.value", "dginn-primate_BppM1M2.NbSites", + "dginn-primate_BppM1M2.PSS", "dginn-primate_BppM7M8", + "dginn-primate_BppM7M8.p.value", "dginn-primate_BppM7M8.NbSites", + "dginn-primate_BppM7M8.PSS", "dginn-primate_codemlM1M2", + "dginn-primate_codemlM1M2.p.value", "dginn-primate_codemlM1M2.NbSites", + "dginn-primate_codemlM1M2.PSS", "dginn-primate_codemlM7M8", + "dginn-primate_codemlM7M8.p.value", "dginn-primate_codemlM7M8.NbSites", + "dginn-primate_codemlM7M8.PSS") +@ + +\subsection{Join Table and DGINN table} + +<<>>= +tab<-merge(tab,dginnT, by="Gene.name", all.x=T) +@ + +\subsection{Add DGINN results on bat dataset} + +DGINN results from different analysis. + +<<>>= +# original table +dginnbats<-read.delim(paste0(workdir, + "data/DGINN_202005281339summary_cleaned.tab"), + fill=T, h=T) + +# rerun on corrected alignment +dginnbatsnew1<-read.delim(paste0(workdir, + "data/DGINN_202011262248_summary.tab"), + fill=T, h=T) +dginnbatsnew2<-read.delim(paste0(workdir, + "data/DGINN_202012192053_summary.tab"), + fill=T, h=T) + +# colomne choice, BUSTED and Bppml form first file, codeml from the other one + +dginnbatsnew<-dginnbatsnew1 +dginnbatsnew$omegaM0codeml<-dginnbatsnew2$omegaM0codeml + +dginnbatsnew$codemlM1M2<-dginnbatsnew2$codemlM1M2 +dginnbatsnew$codemlM1M2_p.value<-dginnbatsnew2$codemlM1M2_p.value +dginnbatsnew$codemlM1M2_NbSites<-dginnbatsnew2$codemlM1M2_NbSites +dginnbatsnew$codemlM1M2_PSS<-dginnbatsnew2$codemlM1M2_PSS + +dginnbatsnew$codemlM7M8<-dginnbatsnew2$codemlM7M8 +dginnbatsnew$codemlM7M8_p.value<-dginnbatsnew2$codemlM7M8_p.value +dginnbatsnew$codemlM7M8_NbSites<-dginnbatsnew2$codemlM7M8_NbSites +dginnbatsnew$codemlM7M8_PSS<-dginnbatsnew2$codemlM7M8_PSS + +#### +## RIPK1 is actually a primat results +## 1. Take it and put it at the right place +ripk1<-as.vector(dginnbatsnew[dginnbatsnew$Gene=="RIPK1",]) +tab$`dginn-primate_omegaM0Bpp`<-as.numeric(as.character(tab$`dginn-primate_omegaM0Bpp`)) +tab$`dginn-primate_BUSTED.p.value`<-as.numeric(as.character(tab$`dginn-primate_BUSTED.p.value`)) +tab$`dginn-primate_BppM1M2.p.value`<-as.numeric(as.character(tab$`dginn-primate_BppM1M2.p.value`)) +tab$`dginn-primate_BppM7M8.p.value`<-as.numeric(as.character(tab$`dginn-primate_BppM7M8.p.value`)) +tab$`dginn-primate_BppM7M8.PSS`<-as.numeric(as.character(tab$`dginn-primate_BppM7M8.PSS`)) +tab$`dginn-primate_codemlM1M2.p.value`<-as.numeric(as.character(tab$`dginn-primate_codemlM1M2.p.value`)) +tab$`dginn-primate_codemlM1M2.PSS`<-as.numeric(as.character(tab$`dginn-primate_codemlM1M2.PSS`)) +tab$`dginn-primate_codemlM7M8.p.value`<-as.numeric(as.character(tab$`dginn-primate_codemlM7M8.p.value`)) +tab$`dginn-primate_codemlM7M8.PSS`<-as.numeric(as.character(tab$`dginn-primate_codemlM7M8.PSS`)) + +tab[tab$Gene.name=="RIPK1","GeneSize"]<-ripk1$GeneSize +tab[tab$Gene.name=="RIPK1","dginn-primate_NbSpecies"]<-ripk1$NbSpecies +tab[tab$Gene.name=="RIPK1","dginn-primate_omegaM0Bpp"]<-ripk1$omegaM0Bpp +tab[tab$Gene.name=="RIPK1","dginn-primate_omegaM0codeml"]<-ripk1$omegaM0codeml + +tab[tab$Gene.name=="RIPK1","dginn-primate_BUSTED"]<-ripk1$BUSTED +tab[tab$Gene.name=="RIPK1","dginn-primate_BUSTED.p.value"]<-ripk1$BUSTED_p.value +tab[tab$Gene.name=="RIPK1","dginn-primate_MEME.NbSites"]<-ripk1$MEME_NbSites +tab[tab$Gene.name=="RIPK1","dginn-primate_MEME.PSS"]<-as.numeric(as.character(ripk1$MEME_PSS)) + +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM1M2"]<-ripk1$BppM1M2 +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM1M2.p.value"]<-ripk1$BppM1M2_p.value +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM1M2.NbSites"]<-ripk1$BppM1M2_NbSites +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM1M2.PSS"]<-ripk1$BppM1M2_PSS + +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM7M8"]<-ripk1$BppM7M8 +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM7M8.p.value"]<-ripk1$BppM7M8_p.value +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM7M8.NbSites"]<-ripk1$BppM7M8_NbSites +tab[tab$Gene.name=="RIPK1","dginn-primate_BppM7M8.PSS"]<-ripk1$BppM7M8_PSS + +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM1M2"]<-ripk1$codemlM1M2 +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM1M2.p.value"]<-ripk1$codemlM1M2_p.value +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM1M2.NbSites"]<-ripk1$codemlM1M2_NbSites +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM1M2.PSS"]<-ripk1$codemlM1M2_PSS +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM7M8"]<-ripk1$codemlM7M8 +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM7M8.p.value"]<-ripk1$codemlM7M8_p.value +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM7M8.NbSites"]<-ripk1$codemlM7M8_NbSites +tab[tab$Gene.name=="RIPK1","dginn-primate_codemlM7M8.PSS"]<-ripk1$codemlM7M8_PSS + +## 2. Remove it +dginnbatsnew<-dginnbatsnew[dginnbatsnew$Gene!="RIPK1",] + + +## suppress redundant lines +dginnbats<-dginnbats[(dginnbats$Gene %in% dginnbatsnew$Gene)==FALSE,] +names(dginnbatsnew)<-names(dginnbats) + +##############" +dginnbatsnew[,4]<-as.numeric(dginnbatsnew[,4]) +dginnbats[,6]<-as.numeric(as.character(dginnbats[,6])) +dginnbats[,8]<-as.character(dginnbats[,8]) +dginnbats[,12]<-as.character(dginnbats[,12]) +dginnbats[,13]<-as.numeric(as.character(dginnbats[,13])) +dginnbats[,16]<-as.character(dginnbats[,16]) +dginnbats[,17]<-as.numeric(as.character(dginnbats[,17])) + +## replace by new data +dginnbats<-rbind(dginnbats, dginnbatsnew) + +names(dginnbats)<-c("File", "bats_Name", "cooper.batsGene", paste0("bats_", + names(dginnbats)[-(1:3)])) +names(dginnbats) + +tab<-merge(tab,dginnbats, by="cooper.batsGene", all.x=T) + +@ + +\subsection{Write the new table} + +<<>>= + +write.table(tab, "covid_comp_complete.txt", row.names=FALSE, quote=FALSE, sep="\t") +@ + +\end{document} + diff --git a/covid_comp_script0_table.pdf b/covid_comp_script0_table.pdf new file mode 100644 index 0000000000000000000000000000000000000000..669dd5b72c62078bbb831a30e6085797bff4df95 Binary files /dev/null and b/covid_comp_script0_table.pdf differ diff --git a/covid_comp_script0_table.tex b/covid_comp_script0_table.tex new file mode 100644 index 0000000000000000000000000000000000000000..c8b1af626e8cf56061f18834acff7cdbb2a492db --- /dev/null +++ b/covid_comp_script0_table.tex @@ -0,0 +1,434 @@ +\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color} +% maxwidth is the original width if it is less than linewidth +% otherwise use linewidth (to make sure the graphics do not exceed the margin) +\makeatletter +\def\maxwidth{ % + \ifdim\Gin@nat@width>\linewidth + \linewidth + \else + \Gin@nat@width + \fi +} +\makeatother + +\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} +\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}% +\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}% +\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}% +\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% +\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}% +\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}% +\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}% +\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}% +\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}% +\let\hlipl\hlkwb + +\usepackage{framed} +\makeatletter +\newenvironment{kframe}{% + \def\at@end@of@kframe{}% + \ifinner\ifhmode% + \def\at@end@of@kframe{\end{minipage}}% + \begin{minipage}{\columnwidth}% + \fi\fi% + \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep + \colorbox{shadecolor}{##1}\hskip-\fboxsep + % There is no \\@totalrightmargin, so: + \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% + \MakeFramed {\advance\hsize-\width + \@totalleftmargin\z@ \linewidth\hsize + \@setminipage}}% + {\par\unskip\endMakeFramed% + \at@end@of@kframe} +\makeatother + +\definecolor{shadecolor}{rgb}{.97, .97, .97} +\definecolor{messagecolor}{rgb}{0, 0, 0} +\definecolor{warningcolor}{rgb}{1, 0, 1} +\definecolor{errorcolor}{rgb}{1, 0, 0} +\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX + +\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format +%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. +%\geometry{letterpaper} % ... or a4paper or a5paper or ... +%\geometry{landscape} % Activate for for rotated page geometry +%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent +%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode + % TeX will automatically convert eps --> pdf in pdflatex +%\usepackage{amssymb} + +\usepackage[utf8]{inputenc} +%\usepackage[cyr]{aeguill} +%\usepackage[francais]{babel} +%\usepackage{hyperref} + + +\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} +\author{Marie Cariou} +\date{Janvier 2021} % Activate to display a given date or no date +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} +\begin{document} +\maketitle + +\tableofcontents + +\newpage + +\section{Files manipulations} + +\subsection{Read Janet Young's table} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"} + +\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} + \hlstr{"data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)} +\hlkwd{dim}\hlstd{(tab)} +\end{alltt} +\begin{verbatim} +## [1] 332 84 +\end{verbatim} +\end{kframe} +\end{knitrout} + +\subsection{Read DGINN Young table} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{dginnY}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} + \hlstr{"data/summary_primate_young.res"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} + +\hlkwd{dim}\hlstd{(dginnY)} +\end{alltt} +\begin{verbatim} +## [1] 1992 7 +\end{verbatim} +\end{kframe} +\end{knitrout} + +\subsection{Joining Young and DGINN Young table} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlcom{# correct gene names (MARC1)} +\hlstd{val_remp}\hlkwb{=}\hlkwd{as.character}\hlstd{(}\hlkwd{unique}\hlstd{(dginnY}\hlopt{$}\hlstd{Gene)[(}\hlkwd{unique}\hlstd{(dginnY}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} + \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F])} +\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)} +\hlstd{tab}\hlopt{$}\hlstd{Gene.name[}\hlnum{158}\hlstd{]}\hlkwb{<-}\hlstd{val_remp} +\hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginnY}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))} +\end{alltt} +\begin{verbatim} +## [1] 332 +\end{verbatim} +\end{kframe} +\end{knitrout} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{add_col}\hlkwb{<-}\hlkwa{function}\hlstd{(}\hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{)\{} + +\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstd{method,} + \hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} + +\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"Omega_"}\hlstd{, method),} + \hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),} + \hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))} + +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} + +\hlkwd{return}\hlstd{(tab)} +\hlstd{\}} + +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM1M2"}\hlstd{)} +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM7M8"}\hlstd{)} +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM1M2"}\hlstd{)} +\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM7M8"}\hlstd{)} + + +\hlcom{# Manip pour la colonne BUSTED} + +\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"BUSTED"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{)]} +\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"Omega_BUSTED"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{,} \hlstr{"PValue_BUSTED"}\hlstd{)} +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} + +\hlstd{tmp}\hlkwb{<-}\hlstd{dginnY[dginnY}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]} +\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"NbSites_MEME"}\hlstd{,} \hlstr{"PSS_MEME"}\hlstd{)} +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)} +\end{alltt} +\end{kframe} +\end{knitrout} + + +\subsection{Read DGINN Table} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{dginnT}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} + \hlstr{"data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)} + +\hlkwd{dim}\hlstd{(dginnT)} +\end{alltt} +\begin{verbatim} +## [1] 412 27 +\end{verbatim} +\begin{alltt} +\hlkwd{names}\hlstd{(dginnT)} +\end{alltt} +\begin{verbatim} +## [1] "File" "Name" "Gene" "GeneSize" "NbSpecies" +## [6] "omegaM0Bpp" "omegaM0codeml" "BUSTED" "BUSTED.p.value" "MEME.NbSites" +## [11] "MEME.PSS" "BppM1M2" "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS" +## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites" "BppM7M8.PSS" "codemlM1M2" +## [21] "codemlM1M2.p.value" "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8" "codemlM7M8.p.value" +## [26] "codemlM7M8.NbSites" "codemlM7M8.PSS" +\end{verbatim} +\begin{alltt} +\hlcom{# Number of genes in dginn-primate output not present in the original table} +\hlstd{dginnT[(dginnT}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F,}\hlstr{"Gene"}\hlstd{]} +\end{alltt} +\begin{verbatim} +## [1] ACE2 ADAM9[0-3120] ADAM9[3119-3927] ATP5MGL C1H1ORF50 CEP135[0-3264] +## [7] CEP135[3263-3678] CEP43 COQ8B COQ8A CSNK2A1 CSNK2B[0-609] +## [13] CSNK2B[608-2568] CYB5R1 DDX21[0-717] DDX21[716-2538] DDX50 DNAJC15 +## [19] DPH5[0-702] DPH5[701-1326] DPY19L2 ELOC ERO1B EXOSC3[0-1446] +## [25] EXOSC3[1445-1980] FBN3 GNB4 GNB2 GNB3 GOLGA7[0-312] +## [31] GOLGA7[311-549] GPX1[0-1218] GPX1[1217-2946] HDAC1 HS6ST3 IMPDH1 +## [37] ITGB1[0-2328] ITGB1[2327-2844] LMAN2L MRPS5[0-1569] MRPS5[1568-3783] MARC2 +## [43] MGRN1 NDFIP2[0-768] NDFIP2[767-1314] NDUFAF2[0-258] NDUFAF2[257-744] NSD2 +## [49] NUP58 NUP58[0-1824] NUP58[1823-2367] PABPC3 POTPABPC1 PABPC4L +## [55] PABPC5 PCSK5 PRIM2[0-1071] PRIM2[1070-1902] PRKACB PRKACG +## [61] PTGES2[0-1587] PTGES2[1586-2202] RAB8B RAB13 RAB18[0-855] RAB18[854-1815] +## [67] RAB2B RAB5A RAB5B RAB15 RALB EZR +## [73] EZR[0-1458] EZR[1457-3771] MSN RETREG3 RHOB RHOC +## [79] SLC44A2[0-2577] SLC44A2[2576-3657] SPART SRP72[0-2604] SRP72[2603-3417] STOM[0-1047] +## [85] STOM[1046-1800] STOML3 TIMM29 TLE4 TLE2 TLE2[0-1302] +## [91] TLE2[1301-3987] TMPRSS2 TOMM70 TOR1B WASHC4 WFS1[0-2346] +## [97] WFS1[2345-3216] YIF1B +## 411 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ADAM9 ADAM9[0-3120] ADAM9[3119-3927] ADAMTS1 AES AGPS AKAP8 ... ZYG11B +\end{verbatim} +\begin{alltt} +\hlcom{# This includes paralogs, recombinations found by DGINN and additionnal genes } +\hlcom{# included on purpose} + +\hlcom{# Number of genes from the original list not present in DGINN output} +\hlstd{tab[(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlstd{dginnT}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlstd{F,}\hlstr{"Gene.name"}\hlstd{]} +\end{alltt} +\begin{verbatim} +## [1] "ADCK4" "ARL6IP6" "ATP5L" "C19orf52" "C1orf50" "ERO1LB" "FAM134C" "FGFR1OP" "KIAA1033" "MFGE8" "NUPL1" +## [12] "SIGMAR1" "SPG20" "TCEB1" "TCEB2" "TOMM70A" "USP13" "VIMP" "WHSC1" +\end{verbatim} +\begin{alltt} +\hlkwd{names}\hlstd{(dginnT)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"Name"}\hlstd{,} \hlstr{"Gene.name"}\hlstd{,} \hlstr{"GeneSize"}\hlstd{,} + \hlstr{"dginn-primate_NbSpecies"}\hlstd{,} \hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{,} + \hlstr{"dginn-primate_omegaM0codeml"}\hlstd{,} \hlstr{"dginn-primate_BUSTED"}\hlstd{,} + \hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{,} \hlstr{"dginn-primate_MEME.NbSites"}\hlstd{,} + \hlstr{"dginn-primate_MEME.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2"}\hlstd{,} + \hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{,} + \hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8"}\hlstd{,} + \hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{,} + \hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2"}\hlstd{,} + \hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{,} + \hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8"}\hlstd{,} + \hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{,} \hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{,} + \hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{)} +\end{alltt} +\end{kframe} +\end{knitrout} + +\subsection{Join Table and DGINN table} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnT,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)} +\end{alltt} +\end{kframe} +\end{knitrout} + +\subsection{Add DGINN results on bat dataset} + +DGINN results from different analysis. + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlcom{# original table} +\hlstd{dginnbats}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} + \hlstr{"data/DGINN_202005281339summary_cleaned.tab"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} + +\hlcom{# rerun on corrected alignment} +\hlstd{dginnbatsnew1}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} + \hlstr{"data/DGINN_202011262248_summary.tab"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} +\hlstd{dginnbatsnew2}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} + \hlstr{"data/DGINN_202012192053_summary.tab"}\hlstd{),} + \hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)} + +\hlcom{# colomne choice, BUSTED and Bppml form first file, codeml from the other one} + +\hlstd{dginnbatsnew}\hlkwb{<-}\hlstd{dginnbatsnew1} +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{omegaM0codeml}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{omegaM0codeml} + +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2} +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2_p.value}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2_p.value} +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2_NbSites}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2_NbSites} +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM1M2_PSS}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM1M2_PSS} + +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8} +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8_p.value}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8_p.value} +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8_NbSites}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8_NbSites} +\hlstd{dginnbatsnew}\hlopt{$}\hlstd{codemlM7M8_PSS}\hlkwb{<-}\hlstd{dginnbatsnew2}\hlopt{$}\hlstd{codemlM7M8_PSS} + +\hlcom{####} +\hlcom{## RIPK1 is actually a primat results} +\hlcom{## 1. Take it and put it at the right place} +\hlstd{ripk1}\hlkwb{<-}\hlkwd{as.vector}\hlstd{(dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,])} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_omegaM0Bpp`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BUSTED.p.value`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM1M2.p.value`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.p.value`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.PSS`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_BppM7M8.PSS`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.p.value`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.PSS`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM1M2.PSS`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.p.value`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.p.value`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.PSS`}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{`dginn-primate_codemlM7M8.PSS`))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"GeneSize"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{GeneSize} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_NbSpecies"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{NbSpecies} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_omegaM0Bpp"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{omegaM0Bpp} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_omegaM0codeml"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{omegaM0codeml} + +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BUSTED"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BUSTED} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BUSTED.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BUSTED_p.value} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_MEME.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{MEME_NbSites} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_MEME.PSS"}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(ripk1}\hlopt{$}\hlstd{MEME_PSS))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2_p.value} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2_NbSites} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM1M2.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM1M2_PSS} + +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8_p.value} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8_NbSites} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_BppM7M8.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{BppM7M8_PSS} + +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2_p.value} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2_NbSites} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM1M2.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM1M2_PSS} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8.p.value"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8_p.value} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8.NbSites"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8_NbSites} +\hlstd{tab[tab}\hlopt{$}\hlstd{Gene.name}\hlopt{==}\hlstr{"RIPK1"}\hlstd{,}\hlstr{"dginn-primate_codemlM7M8.PSS"}\hlstd{]}\hlkwb{<-}\hlstd{ripk1}\hlopt{$}\hlstd{codemlM7M8_PSS} + +\hlcom{## 2. Remove it} +\hlstd{dginnbatsnew}\hlkwb{<-}\hlstd{dginnbatsnew[dginnbatsnew}\hlopt{$}\hlstd{Gene}\hlopt{!=}\hlstr{"RIPK1"}\hlstd{,]} + + +\hlcom{## suppress redundant lines} +\hlstd{dginnbats}\hlkwb{<-}\hlstd{dginnbats[(dginnbats}\hlopt{$}\hlstd{Gene} \hlopt{%in%} \hlstd{dginnbatsnew}\hlopt{$}\hlstd{Gene)}\hlopt{==}\hlnum{FALSE}\hlstd{,]} +\hlkwd{names}\hlstd{(dginnbatsnew)}\hlkwb{<-}\hlkwd{names}\hlstd{(dginnbats)} + +\hlcom{##############"} +\hlstd{dginnbatsnew[,}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(dginnbatsnew[,}\hlnum{4}\hlstd{])} +\hlstd{dginnbats[,}\hlnum{6}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{6}\hlstd{]))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{dginnbats[,}\hlnum{8}\hlstd{]}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{8}\hlstd{])} +\hlstd{dginnbats[,}\hlnum{12}\hlstd{]}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{12}\hlstd{])} +\hlstd{dginnbats[,}\hlnum{13}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{13}\hlstd{]))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlstd{dginnbats[,}\hlnum{16}\hlstd{]}\hlkwb{<-}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{16}\hlstd{])} +\hlstd{dginnbats[,}\hlnum{17}\hlstd{]}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(dginnbats[,}\hlnum{17}\hlstd{]))} +\end{alltt} + + +{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt} +\hlcom{## replace by new data} +\hlstd{dginnbats}\hlkwb{<-}\hlkwd{rbind}\hlstd{(dginnbats, dginnbatsnew)} + +\hlkwd{names}\hlstd{(dginnbats)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,} \hlstr{"cooper.batsGene"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"bats_"}\hlstd{,} + \hlkwd{names}\hlstd{(dginnbats)[}\hlopt{-}\hlstd{(}\hlnum{1}\hlopt{:}\hlnum{3}\hlstd{)]))} +\hlkwd{names}\hlstd{(dginnbats)} +\end{alltt} +\begin{verbatim} +## [1] "File" "bats_Name" "cooper.batsGene" "bats_GeneSize" +## [5] "bats_NbSpecies" "bats_omegaM0Bpp" "bats_omegaM0codeml" "bats_BUSTED" +## [9] "bats_BUSTED.p.value" "bats_MEME.NbSites" "bats_MEME.PSS" "bats_BppM1M2" +## [13] "bats_BppM1M2.p.value" "bats_BppM1M2.NbSites" "bats_BppM1M2.PSS" "bats_BppM7M8" +## [17] "bats_BppM7M8.p.value" "bats_BppM7M8.NbSites" "bats_BppM7M8.PSS" "bats_codemlM1M2" +## [21] "bats_codemlM1M2.p.value" "bats_codemlM1M2.NbSites" "bats_codemlM1M2.PSS" "bats_codemlM7M8" +## [25] "bats_codemlM7M8.p.value" "bats_codemlM7M8.NbSites" "bats_codemlM7M8.PSS" +\end{verbatim} +\begin{alltt} +\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab,dginnbats,} \hlkwc{by}\hlstd{=}\hlstr{"cooper.batsGene"}\hlstd{,} \hlkwc{all.x}\hlstd{=T)} +\end{alltt} +\end{kframe} +\end{knitrout} + +\subsection{Write the new table} + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} +\begin{alltt} +\hlkwd{write.table}\hlstd{(tab,} \hlstr{"covid_comp_complete.txt"}\hlstd{,} \hlkwc{row.names}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{quote}\hlstd{=}\hlnum{FALSE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)} +\end{alltt} +\end{kframe} +\end{knitrout} + +\end{document} +