Commit f58fd485 authored by your name's avatar your name
Browse files

rapport 2: comparaison des analyses de Jeanette et DGINN sur les mêmes alignements

parents
This source diff could not be displayed because it is too large. You can view the blob instead.
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Mai 2020} % Activate to display a given date or no date
\begin{document}
\maketitle
\section{Files manipulations}
I will compare Janet results to DGINN results, on the SAME alignment.
\subsection{Read Janet table}
<<>>=
tab<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv",
fill=T, h=T, dec=",")
dim(tab)
names(tab)
@
\subsection{Read DGINN table}
<<>>=
dginn<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/summary.res",
fill=T, h=T)
dim(dginn)
names(dginn)
@
\subsection{Joining table}
\subsubsection{Based on which column?}
<<>>=
head(tab)[,1:5]
# gene avec un nom bizar dans certaines colomne
tab[158,1:10]
#
length(unique(dginn$Gene))
length(unique(tab$PreyGene))
length(unique(tab$Gene.name))
#quelle paire de colonne contient le plus de noms identiques
sum(unique(dginn$Gene) %in% unique(tab$PreyGene))
sum(unique(dginn$Gene) %in% unique(tab$Gene.name))
# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. Je soupçonne que c'est celle là:
tab[158,1:10]
# Verif:
tab[,1:10][(tab$Gene.name %in% unique(dginn$Gene))==F,]
# yep
# Remplacement manuel par
as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F])
# dans le tableau de Janet
val_remp=as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F])
tab$Gene.name<-as.character(tab$Gene.name)
tab$Gene.name[158]<-val_remp
sum(unique(dginn$Gene) %in% unique(tab$Gene.name))
@
\subsubsection{new columns}
<<>>=
add_col<-function(method="PamlM1M2"){
tmp<-dginn[dginn$Method==method,
c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", paste0("Omega_", method),
paste0("PosSel_", method), paste0("PValue_", method), paste0("NbSites_", method), paste0("PSS_", method))
tab<-merge(tab, tmp, by="Gene.name")
return(tab)
}
tab<-add_col("PamlM1M2")
tab<-add_col("PamlM7M8")
tab<-add_col("BppM1M2")
tab<-add_col("BppM7M8")
# Manip pour la colonne BUSTED
tmp<-dginn[dginn$Method=="BUSTED",c("Gene", "Omega", "PosSel", "PValue")]
names(tmp)<-c("Gene.name", "Omega_BUSTED", "PosSel_BUSTED", "PValue_BUSTED")
tab<-merge(tab, tmp, by="Gene.name")
tmp<-dginn[dginn$Method=="MEME",c("Gene", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", "NbSites_MEME", "PSS_MEME")
tab<-merge(tab, tmp, by="Gene.name")
@
\subsubsection{Write new table}
<<>>=
write.table(tab, "COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt",
row.names=F, quote=F, sep="\t")
@
\subsection{Figure}
\subsubsection{Omega}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8>>=
plot(tab$whole.gene.dN.dS.model.0, tab$Omega_PamlM7M8,
xlab="Omega tabJanet", ylab="Omega DGINN")
@
Quels sont les 2 gènes qui s'écartent de la bissectrice?
<<>>=
tab[tab$whole.gene.dN.dS.model.0<0.2 & tab$Omega_PamlM7M8>0.4,c("Gene.name")]
tab[tab$whole.gene.dN.dS.model.0<0.6 & tab$Omega_PamlM7M8>0.7,c("Gene.name")]
@
\subsubsection{pvalues pour M7M8}
Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + ligne "PamlM7M8", pour la sortie de dginn.
<<pvalM7M8>>=
plot(tab$pVal.M8vsM7, tab$PValue_PamlM7M8, pch=20,
xlab="p-value tabJanet", ylab="p-value DGINN", main="M7vM8 Paml")
points(tab$pVal.M8vsM7[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05],
tab$PValue_PamlM7M8[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05],
col="red", pch=20)
points(tab$pVal.M8vsM7[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05],
tab$PValue_PamlM7M8[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05],
col="green", pch=20)
legend("topleft", c("<0.05 in PamlM7M8 but >0.05 in Janet M8vsM7","<0.05 in Janet M8vsM7 but >0.05 in PamlM7M8"),
pch=20, col=c("red", "green"))
@
Quels sont les gènes en couleur:
<<>>=
na.omit(tab[(tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05),c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")])
na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05),c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")])
@
Focus sur le gène CIT pour lequel la différence est vraiment assez importante:
<<cit>>=
dginn[dginn$Gene=="CIT",]
tab[tab$Gene.name=="CIT",1:20]
@
\subsubsection{Concordance est méthodes}
Est-ce que les gènes avec une faible p-value sont détecté par 1,2,3,4 ou 5 méthodes en général?
<<stripchart>>=
nontab<-tab[tab$pVal.M8vsM7>=0.05,c("Gene.name","PosSel_PamlM1M2", "PosSel_PamlM7M8","PosSel_BppM1M2",
"PosSel_BppM7M8", "PosSel_BUSTED")]
non<-apply(nontab, 1, function(x) sum(x=="Y"))
ouitab<-tab[tab$pVal.M8vsM7<0.05,c("Gene.name","PosSel_PamlM1M2", "PosSel_PamlM7M8","PosSel_BppM1M2",
"PosSel_BppM7M8", "PosSel_BUSTED")]
oui<-apply(ouitab, 1, function(x) sum(x=="Y"))
stripchart(x=list(oui, non), method="jitter", jitter=0.2,
vertical=T, pch=20, cex=0.5,
group.names=c("Yes Janet", "No Janet"),
ylab="Nb YES from dginn")
@
%\subsubsection{Comparaison des codons?}
%Subtable with lines with both methods showing positive selection.
<<eval=FALSE, echo=FALSE>>=
#cas ou selection + dans les 2 cas
sel<-na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8<0.05),c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8", "Number.of.codons.with.BEB....0.9", "Codons.under.positive.selection..BEB..0.9...alignment.position.", "NbSites_PamlM7M8","PSS_PamlM7M8")])
dim(sel)
head(sel)
@
<<nsites, eval=FALSE, echo=FALSE>>=
plot(sel$Number.of.codons.with.BEB....0.9, sel$NbSites_PamlM7M8)
# toujours plus de codon dans la version de janet
listdginn<-sapply(sel$PSS_PamlM7M8, function(x){
tmp<-strsplit(as.character(x), split=",")[[1]]
names(tmp)<-rep("dginn", length(tmp))
return(tmp)
})
names(listdginn)<-sel$Gene.name
listjanet<-sapply(sel$Codons.under.positive.selection..BEB..0.9...alignment.position., function(x){
tmp<-strsplit(as.character(x), split=",")[[1]]
tmp2<-sapply(tmp, function(x) strsplit(as.character(x), split="_")[[1]][1])
tmp2<-unlist(tmp2)
names(tmp2)<-rep("janet", length(tmp2))
return(unlist(tmp2))
})
names(listjanet)<-sel$Gene.name
listjoined<-mapply(c, listdginn, listjanet, SIMPLIFY=FALSE)
@
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Mai 2020} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\section{Files manipulations}
I will compare Janet results to DGINN results, on the SAME alignment.
\subsection{Read Janet table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv"}\hlstd{,}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{dec}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 332 84
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] "PreyGene"
## [2] "PreyGene_JYname"
## [3] "BaitShort"
## [4] "Gene.name"
## [5] "list"
## [6] "description"
## [7] "other.names"
## [8] "top40_posSeln"
## [9] "Num.primate.seqs"
## [10] "Alignment.length..nucleotides."
## [11] "Alignment.length..codons."
## [12] "whole.gene.dN.dS.model.0"
## [13] "total.tree.length"
## [14] "total.dN.tree.length"
## [15] "total.dS.tree.length"
## [16] "p.value.M8vsM8a..raw."
## [17] "p.value.M8vsM8a..BH.corrected."
## [18] "pVal.M8vsM7"
## [19] "pVal.M8vsM7.adj"
## [20] "pVal.M2vsM1"
## [21] "pVal.M2vsM1.adj"
## [22] "X..codons.under.positive.selection"
## [23] "dN.dS.of.positively.selected.codons"
## [24] "Number.of.codons.with.BEB....0.9"
## [25] "Codons.under.positive.selection..BEB..0.9...alignment.position."
## [26] "cooper.batsGene"
## [27] "cooper.batsGene_Ensembl_ID"
## [28] "cooper.batsIsoform_Ensembl_ID"
## [29] "cooper.batsSpecies"
## [30] "cooper.batsReference_length.aa."
## [31] "cooper.batsPercent_analyzed"
## [32] "cooper.batsAverage_dNdS"
## [33] "cooper.batsMaximum_dS"
## [34] "cooper.batsAverage_M7_tree"
## [35] "cooper.batsAverage_M8_tree"
## [36] "cooper.batsM7_log_likelihood"
## [37] "cooper.batsM8_log_likelihood"
## [38] "cooper.batsM7.M8_p_value"
## [39] "cooper.batsM8a_log_likelihood"
## [40] "cooper.batsM8.M8a_pvalue"
## [41] "cooper.batsBEB_hits.pp.0.95."
## [42] "cooper.batsBEB_sites"
## [43] "cooper.primates.Gene"
## [44] "cooper.primates.Gene_Ensembl_ID"
## [45] "cooper.primates.Isoform_Ensembl_ID"
## [46] "cooper.primates.Species"
## [47] "cooper.primates.Reference_length.aa."
## [48] "cooper.primates.Percent_analyzed"
## [49] "cooper.primates.Average_dNdS"
## [50] "cooper.primates.Maximum_dS"
## [51] "cooper.primates.Average_M7_tree"
## [52] "cooper.primates.Average_M8_tree"
## [53] "cooper.primates.M7_log_likelihood"
## [54] "cooper.primates.M8_log_likelihood"
## [55] "cooper.primates.M7.M8_p_value"
## [56] "cooper.primates.M8a_log_likelihood"
## [57] "cooper.primates.M8.M8a_pvalue"
## [58] "cooper.primates.BEB_hits.pp.0.95."
## [59] "cooper.primates.BEB_sites"
## [60] "hawkins_Gene"
## [61] "hawkins_Positive.Selection..M8vM8a.p.value"
## [62] "hawkins_Positive.Selection..M8vM8a.FDR.corrected.p.value"
## [63] "hawkins_Gene.Name.Alias"
## [64] "hawkins_Connection.to.immunity.or.pathogens"
## [65] "hawkins_Connection.to.reproduction"
## [66] "hawkins_Connection.to.collagen"
## [67] "hawkins_Connection.to.peroxisome"
## [68] "hawkins_Gene.Description.for.Human.Ortholog..from.Genbank.GENE.database."
## [69] "CpGmask.numNT"
## [70] "CpGmask.numAA"
## [71] "CpGmask.overall.dN.dS"
## [72] "CpGmask.total.tree.length"
## [73] "CpGmask.total.dN.tree.length"
## [74] "CpGmask.total.dS.tree.length"
## [75] "CpGmask.pVal.M8vsM8a"
## [76] "CpGmask.pVal.M8vsM8a.adj"
## [77] "CpGmask.pVal.M8vsM7"
## [78] "CpGmask.pVal.M8vsM7.adj"
## [79] "CpGmask.pVal.M2vsM1"
## [80] "CpGmask.pVal.M2vsM1.adj"
## [81] "CpGmask.percent.sites.under.positive.selection"
## [82] "CpGmask.dN.dS.of.selected.sites"
## [83] "CpGmask.num.sites.with.BEB...0.9"
## [84] "CpGmask.which.sites.have.BEB...0.9"
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsection{Read DGINN table}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dginn}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/summary.res"}\hlstd{,}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T)}
\hlkwd{dim}\hlstd{(dginn)}
\end{alltt}
\begin{verbatim}
## [1] 1992 7
\end{verbatim}
\begin{alltt}
\hlkwd{names}\hlstd{(dginn)}
\end{alltt}
\begin{verbatim}
## [1] "Gene" "Omega" "Method" "PosSel" "PValue" "NbSites" "PSS"
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsection{Joining table}
\subsubsection{Based on which column?}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{head}\hlstd{(tab)[,}\hlnum{1}\hlopt{:}\hlnum{5}\hlstd{]}
\end{alltt}
\begin{verbatim}
## PreyGene PreyGene_JYname BaitShort Gene.name list
## 1 PCNT PCNT nsp13 PCNT list26_COV_list4dataset2nonOrf
## 2 PVR PVR orf8 PVR list23_COV_list1orf
## 3 POLA1 POLA1 nsp1 POLA1 list24_COV_list2nonOrf
## 4 FASTKD5 FASTKD5 M FASTKD5 list26_COV_list4dataset2nonOrf
## 5 PRIM2 PRIM2 nsp1 PRIM2 list24_COV_list2nonOrf
## 6 ITGB1 ITGB1 orf8 ITGB1 list25_COV_list3dataset2orf
\end{verbatim}
\begin{alltt}
\hlcom{# gene avec un nom bizar dans certaines colomne}
\hlstd{tab[}\hlnum{158}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{10}\hlstd{]}
\end{alltt}
\begin{verbatim}
## PreyGene PreyGene_JYname BaitShort Gene.name list
## 158 MTARC1 01/03/2020 nsp7 01/03/2020 list24_COV_list2nonOrf
## description other.names top40_posSeln
## 158 mitochondrial amidoxime reducing component 1 MOSC1 no
## Num.primate.seqs Alignment.length..nucleotides.
## 158 24 1023
\end{verbatim}
\begin{alltt}
\hlcom{#}
\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene))}
\end{alltt}
\begin{verbatim}
## [1] 332
\end{verbatim}
\begin{alltt}
\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{PreyGene))}
\end{alltt}
\begin{verbatim}
## [1] 332
\end{verbatim}
\begin{alltt}
\hlkwd{length}\hlstd{(}\hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))}
\end{alltt}
\begin{verbatim}
## [1] 332
\end{verbatim}
\begin{alltt}
\hlcom{#quelle paire de colonne contient le plus de noms identiques}
\hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{PreyGene))}
\end{alltt}
\begin{verbatim}
## [1] 314
\end{verbatim}
\begin{alltt}
\hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))}
\end{alltt}
\begin{verbatim}
## [1] 331
\end{verbatim}
\begin{alltt}
\hlcom{# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne. Je soupçonne que c'est celle là:}
\hlstd{tab[}\hlnum{158}\hlstd{,}\hlnum{1}\hlopt{:}\hlnum{10}\hlstd{]}
\end{alltt}
\begin{verbatim}
## PreyGene PreyGene_JYname BaitShort Gene.name list
## 158 MTARC1 01/03/2020 nsp7 01/03/2020 list24_COV_list2nonOrf
## description other.names top40_posSeln
## 158 mitochondrial amidoxime reducing component 1 MOSC1 no
## Num.primate.seqs Alignment.length..nucleotides.
## 158 24 1023
\end{verbatim}
\begin{alltt}
\hlcom{# Verif:}
\hlstd{tab[,}\hlnum{1}\hlopt{:}\hlnum{10}\hlstd{][(tab}\hlopt{$}\hlstd{Gene.name} \hlopt{%in%} \hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene))}\hlopt{==}\hlstd{F,]}
\end{alltt}
\begin{verbatim}
## PreyGene PreyGene_JYname BaitShort Gene.name list
## 158 MTARC1 01/03/2020 nsp7 01/03/2020 list24_COV_list2nonOrf
## description other.names top40_posSeln
## 158 mitochondrial amidoxime reducing component 1 MOSC1 no
## Num.primate.seqs Alignment.length..nucleotides.
## 158 24 1023
\end{verbatim}
\begin{alltt}
\hlcom{# yep}
\hlcom{# Remplacement manuel par}
\hlkwd{as.character}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)[(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F])}
\end{alltt}
\begin{verbatim}
## [1] "MARC1"
\end{verbatim}
\begin{alltt}
\hlcom{# dans le tableau de Janet}
\hlstd{val_remp}\hlkwb{=}\hlkwd{as.character}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)[(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlstd{tab}\hlopt{$}\hlstd{Gene.name)}\hlopt{==}\hlstd{F])}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name)}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[}\hlnum{158}\hlstd{]}\hlkwb{<-}\hlstd{val_remp}
\hlkwd{sum}\hlstd{(}\hlkwd{unique}\hlstd{(dginn}\hlopt{$}\hlstd{Gene)} \hlopt{%in%} \hlkwd{unique}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name))}
\end{alltt}
\begin{verbatim}
## [1] 332
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsubsection{new columns}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{add_col}\hlkwb{<-}\hlkwa{function}\hlstd{(}\hlkwc{method}\hlstd{=}\hlstr{"PamlM1M2"}\hlstd{)\{}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstd{method,}
\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlkwd{paste0}\hlstd{(}\hlstr{"Omega_"}\hlstd{, method),}
\hlkwd{paste0}\hlstd{(}\hlstr{"PosSel_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PValue_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"NbSites_"}\hlstd{, method),} \hlkwd{paste0}\hlstd{(}\hlstr{"PSS_"}\hlstd{, method))}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\hlkwd{return}\hlstd{(tab)}
\hlstd{\}}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM1M2"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"PamlM7M8"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM1M2"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{add_col}\hlstd{(}\hlstr{"BppM7M8"}\hlstd{)}
\hlcom{# Manip pour la colonne BUSTED}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"BUSTED"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"Omega"}\hlstd{,} \hlstr{"PosSel"}\hlstd{,} \hlstr{"PValue"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"Omega_BUSTED"}\hlstd{,} \hlstr{"PosSel_BUSTED"}\hlstd{,} \hlstr{"PValue_BUSTED"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\hlstd{tmp}\hlkwb{<-}\hlstd{dginn[dginn}\hlopt{$}\hlstd{Method}\hlopt{==}\hlstr{"MEME"}\hlstd{,}\hlkwd{c}\hlstd{(}\hlstr{"Gene"}\hlstd{,} \hlstr{"NbSites"}\hlstd{,} \hlstr{"PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(tmp)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"NbSites_MEME"}\hlstd{,} \hlstr{"PSS_MEME"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{merge}\hlstd{(tab, tmp,} \hlkwc{by}\hlstd{=}\hlstr{"Gene.name"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsubsection{Write new table}
\begin{knitrout}