Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mcariou/2020_dginn_covid19
  • ciri/ps_sars-cov-2/2021_dginn_covid19
2 results
Show changes
Commits on Source (25)
Showing
with 3788 additions and 2390 deletions
x
ACAD9
ACADM
ACE2
ADAM9
AGPS
AKAP9
ATP6AP1
CDK5RAP2
CISD3
COL6A1
EDEM3
ERGIC1
GGH
GOLGA7
GRIPAP1
GRPEL1
IDE
IMPDH2
INHBE
ITGB1
LMAN2
MARK2
MIPOL1
MOV10
NUP214
POLA1
PRIM1
PUSL1
RAB18
RAP1GDS1
SCCPDH
SLC27A2
SLC30A9
SLC44A2
SLC9A3R1
TBK1
TOR1AIP1
VPS39
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Mai 2020} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Files manipulations}
I will compare Janet Young's results to DGINN results, on the SAME alignment.
\subsection{Read Janet Young's table}
<<>>=
tab<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/COVID_PAMLresults_332hits_plusBatScreens_2020_Apr14.csv",
fill=T, h=T, dec=",")
dim(tab)
names(tab)
@
\subsection{Read DGINN table}
<<>>=
dginn<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/summary.res",
fill=T, h=T)
dim(dginn)
names(dginn)
@
\subsection{Joining table}
\subsubsection{Based on which column?}
<<>>=
head(tab)[,1:5]
# gene avec un nom bizar dans certaines colomne
tab[158,1:10]
#
length(unique(dginn$Gene))
length(unique(tab$PreyGene))
length(unique(tab$Gene.name))
#quelle paire de colonne contient le plus de noms identiques
sum(unique(dginn$Gene) %in% unique(tab$PreyGene))
sum(unique(dginn$Gene) %in% unique(tab$Gene.name))
# dginn$Gene et tab$Gene.name presque identiques sauf 1 ligne.
# Je soupçonne que c'est celle là:
tab[158,1:10]
# Verif:
tab[,1:10][(tab$Gene.name %in% unique(dginn$Gene))==F,]
# yep
# Remplacement manuel par
as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F])
# dans le tableau de Janet
val_remp=as.character(unique(dginn$Gene)[(unique(dginn$Gene) %in% tab$Gene.name)==F])
tab$Gene.name<-as.character(tab$Gene.name)
tab$Gene.name[158]<-val_remp
sum(unique(dginn$Gene) %in% unique(tab$Gene.name))
@
\subsubsection{New columns}
<<>>=
add_col<-function(method="PamlM1M2"){
tmp<-dginn[dginn$Method==method,
c("Gene", "Omega", "PosSel", "PValue", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", paste0("Omega_", method),
paste0("PosSel_", method), paste0("PValue_", method),
paste0("NbSites_", method), paste0("PSS_", method))
tab<-merge(tab, tmp, by="Gene.name")
return(tab)
}
tab<-add_col("PamlM1M2")
tab<-add_col("PamlM7M8")
tab<-add_col("BppM1M2")
tab<-add_col("BppM7M8")
# Manip pour la colonne BUSTED
tmp<-dginn[dginn$Method=="BUSTED",c("Gene", "Omega", "PosSel", "PValue")]
names(tmp)<-c("Gene.name", "Omega_BUSTED", "PosSel_BUSTED", "PValue_BUSTED")
tab<-merge(tab, tmp, by="Gene.name")
tmp<-dginn[dginn$Method=="MEME",c("Gene", "NbSites", "PSS")]
names(tmp)<-c("Gene.name", "NbSites_MEME", "PSS_MEME")
tab<-merge(tab, tmp, by="Gene.name")
@
\subsection{Write new table}
<<>>=
write.table(tab,
"COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt",
row.names=F, quote=F, sep="\t")
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparisons Primates}
\subsection{DGINN results on Janet Young's alignments (DGINN-Young-primate) VS Janet Young's results}
\subsubsection{Omega}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8>>=
plot(tab$whole.gene.dN.dS.model.0, tab$Omega_PamlM7M8,
xlab="Omega Young-primate", ylab="Omega DGINN-Young-primate")
@
Quels sont les 2 gènes qui s'écartent de la bissectrice?
<<>>=
tab[tab$whole.gene.dN.dS.model.0<0.2 & tab$Omega_PamlM7M8>0.4,c("Gene.name")]
tab[tab$whole.gene.dN.dS.model.0<0.6 & tab$Omega_PamlM7M8>0.7,c("Gene.name")]
@
\subsubsection{pvalues pour M7M8}
Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + ligne "PamlM7M8", pour la sortie de dginn.
<<pvalM7M8>>=
plot(tab$pVal.M8vsM7, tab$PValue_PamlM7M8, pch=20,
xlab="p-value Young-primate", ylab="p-value DGINN-Young-primate", main="M7vM8 Paml")
points(tab$pVal.M8vsM7[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05],
tab$PValue_PamlM7M8[tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05],
col="red", pch=20)
points(tab$pVal.M8vsM7[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05],
tab$PValue_PamlM7M8[tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05],
col="green", pch=20)
legend("topleft", c("<0.05 in DGINN-Young-primate PamlM7M8 but >0.05 in Young M8vsM7",
"<0.05 in Young M8vsM7 but >0.05 in DGINN-Young-primate PamlM7M8"),
pch=20, col=c("red", "green"))
@
Quels sont les gènes en couleur:
<<>>=
na.omit(tab[(tab$pVal.M8vsM7>0.05 & tab$PValue_PamlM7M8<0.05),
c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")])
na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8>0.05),
c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8")])
@
Focus sur le gène CIT pour lequel la différence est vraiment assez importante:
<<cit>>=
dginn[dginn$Gene=="CIT",]
tab[tab$Gene.name=="CIT",1:20]
@
\subsubsection{Concordance des méthodes}
Est-ce que les gènes avec une faible p-value sont détecté par 1,2,3,4 ou 5 méthodes en général?
<<stripchart>>=
nontab<-tab[tab$pVal.M8vsM7>=0.05,c("Gene.name","PosSel_PamlM1M2", "PosSel_PamlM7M8","PosSel_BppM1M2",
"PosSel_BppM7M8", "PosSel_BUSTED")]
non<-apply(nontab, 1, function(x) sum(x=="Y"))
ouitab<-tab[tab$pVal.M8vsM7<0.05,c("Gene.name","PosSel_PamlM1M2", "PosSel_PamlM7M8","PosSel_BppM1M2",
"PosSel_BppM7M8", "PosSel_BUSTED")]
oui<-apply(ouitab, 1, function(x) sum(x=="Y"))
stripchart(x=list(oui, non), method="jitter", jitter=0.2,
vertical=T, pch=20, cex=0.5,
group.names=c("Yes Young", "No Young"),
ylab="Nb YES from dginn")
@
\subsection{Résultats Cooper-primate VS Young-primate}
\subsubsection{How many genes in the Cooper-primate columns?}
<<>>=
# Temporary table with necessary columns
tmp<-tab[,c("Gene.name", "whole.gene.dN.dS.model.0", "pVal.M8vsM7",
"cooper.primates.Gene", "cooper.primates.Average_dNdS",
"cooper.primates.M7.M8_p_value")]
dim(tmp)
# Lines with values in the cooper Gene names column
dim(tmp[tmp$cooper.primates.Gene!="",])
# Line with values (no NA) in the Cooper dNdS column
sum(is.na(tmp$cooper.primates.Average_dNdS)==F)
# Line with values (no NA) in the Cooper pvalue column
sum(is.na(tmp$cooper.primates.M7.M8_p_value)==F)
@
\subsubsection{Omega}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS"
<<omegaM7M8coop>>=
plot(tab$whole.gene.dN.dS.model.0, tab$cooper.primates.Average_dNdS,
xlab="Omega Young-primate", ylab="Omega Cooper-primate")
@
\subsubsection{pvalues pour M7M8}
Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne cooper.primates.M7.M8\_p\_value (p-value de l'analyse de Cooper).
<<pvalM7M8coop>>=
plot(tab$pVal.M8vsM7, tab$cooper.primates.M7.M8_p_value, pch=20,
xlab="p-value Young", ylab="p-value Cooper-primate", main="M7vM8 Paml-primate")
points(tab$pVal.M8vsM7[tab$pVal.M8vsM7>0.05 & tab$cooper.primates.M7.M8_p_value<0.05],
tab$cooper.primates.M7.M8_p_value[tab$pVal.M8vsM7>0.05 & tab$cooper.primates.M7.M8_p_value<0.05],
col="red", pch=20)
points(tab$pVal.M8vsM7[tab$pVal.M8vsM7<0.05 & tab$cooper.primates.M7.M8_p_value>0.05],
tab$cooper.primates.M7.M8_p_value[tab$pVal.M8vsM7<0.05 & tab$cooper.primates.M7.M8_p_value>0.05],
col="green", pch=20)
legend("topleft", c("<0.05 in Cooper PamlM7M8 but >0.05 in Young M8vsM7",
"<0.05 in Young M8vsM7 but >0.05 in Cooper PamlM7M8"),
pch=20, col=c("red", "green"))
@
\subsection{Résultats DGINN sur alignement de Janet-Young (DGINN-Young-primate) VS Cooper-primates}
\subsubsection{Omega}
Comparaison des Omega: colonne colonne "cooper.primates.Average\_dNdS" VS omega de DGINN.
<<omegaM7M8comp3>>=
plot(tab$Omega_PamlM7M8, tab$cooper.primates.Average_dNdS,
xlab="Omega DGINN-Young-primate", ylab="Omega Cooper-primate")
@
\subsubsection{pvalues pour M7M8}
Cette fois, je compare la colonne R "pVal.M8vsM7", à la colonne "PValue" + ligne "PamlM7M8", pour la sortie de dginn.
<<pvalM7M8comp3>>=
plot(tab$PValue_PamlM7M8, tab$cooper.primates.M7.M8_p_value, pch=20,
xlab="p-value DGINN-Young-primate", ylab="p-value Cooper-primate", main="M7vM8 Paml")
points(tab$PValue_PamlM7M8[tab$PValue_PamlM7M8>0.05 & tab$cooper.primates.M7.M8_p_value<0.05],
tab$cooper.primates.M7.M8_p_value[tab$PValue_PamlM7M8>0.05 & tab$cooper.primates.M7.M8_p_value<0.05],
col="red", pch=20)
points(tab$PValue_PamlM7M8[tab$PValue_PamlM7M8<0.05 & tab$cooper.primates.M7.M8_p_value>0.05],
tab$cooper.primates.M7.M8_p_value[tab$PValue_PamlM7M8<0.05 & tab$cooper.primates.M7.M8_p_value>0.05],
col="green", pch=20)
legend("topleft", c("<0.05 in Cooper-primate PamlM7M8 but >0.05 in DGINN-Young-primate M8vsM7",
"<0.05 in DGINN-Young-primate M8vsM7 but >0.05 in Cooper-primate PamlM7M8"),
pch=20, col=c("red", "green"))
@
\subsection{Overlap}
I will draw a venn diagramm for the positive genes in the 3 analyses.
\subsubsection{Library and subtable}
<<sub>>=
library(VennDiagram)
# keeps only genes analysed in all 3 experiments
tmp<-na.omit(tab[,c("Gene.name", "pVal.M8vsM7", "cooper.primates.M7.M8_p_value",
"PosSel_PamlM7M8", "PValue_PamlM7M8")])
dim(tmp)
@
Il reste 186 gènes
<<vennprimate>>=
area1dginn<-sum(tmp$PosSel_PamlM7M8=="Y")
area2jean<-sum(tmp$pVal.M8vsM7<0.05)
area3coop<-sum(tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T)
n12<-sum(tmp$PosSel_PamlM7M8=="Y" & tmp$pVal.M8vsM7<0.05)
n23<-sum(tmp$pVal.M8vsM7<0.05 & tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T)
n13<-sum(tmp$PosSel_PamlM7M8=="Y" & tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T)
n123<-sum(tmp$PosSel_PamlM7M8=="Y" & tmp$pVal.M8vsM7<0.05 &
tmp$cooper.primates.M7.M8_p_val<0.05, na.rm=T)
draw.triple.venn(area1dginn, area2jean, area3coop,
n12, n23, n13, n123,
category=c("DGINN-Young-primate", "Young-primate", "Cooper-primate"))
@
\subsection{Mondrian}
<<mondrianprimates>>=
library(Mondrian)
monddata<-as.data.frame(tmp$Gene.name)
monddata$primates_dginn_young<-ifelse(tmp$PosSel_PamlM7M8=="Y", 1,0)
monddata$primates_young<-ifelse(tmp$pVal.M8vsM7<0.05, 1, 0)
monddata$primates_cooper<-ifelse(tmp$cooper.primates.M7.M8_p_val<0.05, 1, 0)
mondrian(monddata[,2:4])
@
%\subsection{Comparaison des codons?}
%Subtable with lines with both methods showing positive selection.
<<eval=FALSE, echo=FALSE>>=
%<<selec>>=
#cas ou selection + dans les 2 cas
sel<-na.omit(tab[(tab$pVal.M8vsM7<0.05 & tab$PValue_PamlM7M8<0.05),c("Gene.name", "pVal.M8vsM7", "PValue_PamlM7M8", "whole.gene.dN.dS.model.0", "Omega_PamlM7M8", "Number.of.codons.with.BEB....0.9", "Codons.under.positive.selection..BEB..0.9...alignment.position.", "NbSites_PamlM7M8","PSS_PamlM7M8")])
dim(sel)
head(sel)
@
<<nsites, eval=FALSE, echo=FALSE>>=
%<<nsites>>=
plot(sel$Number.of.codons.with.BEB....0.9, sel$NbSites_PamlM7M8)
# toujours plus de codon dans la version de janet Young
listdginn<-sapply(sel$PSS_PamlM7M8, function(x){
tmp<-strsplit(as.character(x), split=",")[[1]]
names(tmp)<-rep("dginn", length(tmp))
return(tmp)
})
names(listdginn)<-sel$Gene.name
listjanet<-sapply(sel$Codons.under.positive.selection..BEB..0.9...alignment.position., function(x){
tmp<-strsplit(as.character(x), split=",")[[1]]
tmp2<-sapply(tmp, function(x) strsplit(as.character(x), split="_")[[1]][1])
tmp2<-unlist(tmp2)
names(tmp2)<-rep("young", length(tmp2))
return(unlist(tmp2))
})
names(listjanet)<-sel$Gene.name
listjoined<-mapply(c, listdginn, listjanet, SIMPLIFY=FALSE)
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Bats Comparisons}
\subsection{Add DGINN results for Bats}
Lecture du tableau.
<<readtab4test, eval=F>>=
# Tableau tel qu'il est à cet étape du script, pour travailler sur l'ajout du nouveau tableau bats sans recommencer au début à chaque fois.
tab<-read.delim("COVID_PAMLresults_332hits_plusBatScreens_plusDGINN_20200506.txt",
header=TRUE, sep="\t")
@
<<>>=
#dginnbatsold<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/2020_bats_completeResults.csv",
# fill=T, h=T)
dginnbats<-read.delim("/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/data/DGINN_202005281339summary_cleaned.tab",
fill=T, h=T)
dim(dginnbats)
names(dginnbats)
length(unique(dginnbats$Gene))
length(unique(tab$cooper.batsGene))
table(unique(tab$cooper.batsGene) %in% unique(dginnbats$Gene))
@
Which genes in the Cooper table are not in the gene output?
<<>>=
unique(tab$cooper.batsGene)[unique(tab$cooper.batsGene) %in% unique(dginnbats$Gene)==F]
@
Merge tables:
<<bats>>=
names(dginnbats)<-c("File", "bats_Name", "cooper.batsGene", paste0("bats_", names(dginnbats)[-(1:3)]))
tab<-merge(tab,dginnbats, by="cooper.batsGene", all.x=T)
@
\subsection{Cooper-bats results vs DGINN-bats results}
\subsubsection{Omega}
<<omegaM7M8bats>>=
plot(tab$cooper.batsAverage_dNdS, tab$bats_omegaM0codeml,
xlab="Omega Cooper-bats", ylab="Omega DGINN-bats")
@
\subsubsection{pvalues pour M7M8}
<<pvalM7M8bats>>=
tab$bats_codemlM7M8.p.value<-as.numeric(as.character(tab$bats_codemlM7M8.p.value))
plot(tab$cooper.batsM7.M8_p_value, tab$bats_codemlM7M8.p.value, pch=20,
xlab="p-value Cooper-bats", ylab="p-value DGINN-bats", main="M7vM8 Paml")
points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value>0.05 & tab$bats_codemlM7M8.p.value<0.05],
tab$bats_codemlM7M8.p.value[tab$cooper.batsM7.M8_p_value>0.05 & tab$bats_codemlM7M8.p.value<0.05],
col="red", pch=20)
points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value<0.05 & tab$bats_codemlM7M8.p.value>0.05],
tab$bats_codemlM7M8.p.value[tab$cooper.batsM7.M8_p_value<0.05 & tab$bats_codemlM7M8.p.value>0.05],
col="green", pch=20)
legend("topleft", c("<0.05 in DGINN-bats but >0.05 in Cooper-bats",
"<0.05 in Cooper-bats but >0.05 in DGINN-bats"),
pch=20, col=c("red", "green"))
@
\subsection{Comparaison Cooper-Hawkins}
\subsubsection{pvalues pour M7M8}
<<pvalM7M8comp2>>=
plot(tab$cooper.batsM7.M8_p_value, tab$hawkins_Positive.Selection..M8vM8a.p.value,
pch=20, xlab="p-value Cooper-bats", ylab="p-value hawkins-bats", main="M7vM8 Paml")
points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value>0.05 &
tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05],
tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$cooper.batsM7.M8_p_value>0.05 &
tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05],
col="red", pch=20)
points(tab$cooper.batsM7.M8_p_value[tab$cooper.batsM7.M8_p_value<0.05 &
tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05],
tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$cooper.batsM7.M8_p_value<0.05 &
tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05],
col="green", pch=20)
legend("topleft", c("<0.05 in Hawkins but >0.05 in Cooper",
"<0.05 in Cooper but >0.05 in Hawkins"),
pch=20, col=c("red", "green"))
@
\subsection{Comparaison dginn-Hawkins}
<<pvalM7M8compautre>>=
plot(tab$hawkins_Positive.Selection..M8vM8a.p.value, tab$bats_codemlM7M8.p.value,
pch=20, xlab="p-value hawkins-bats", ylab="p-value DGINN-bats", main="M7vM8 Paml")
points(tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05 &
tab$bats_codemlM7M8.p.value<0.05],
tab$bats_codemlM7M8.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value>0.05 &
tab$bats_codemlM7M8.p.value<0.05], col="red", pch=20)
points(tab$hawkins_Positive.Selection..M8vM8a.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05 &
tab$bats_codemlM7M8.p.value>0.05],
tab$bats_codemlM7M8.p.value[tab$hawkins_Positive.Selection..M8vM8a.p.value<0.05 &
tab$bats_codemlM7M8.p.value>0.05], col="green", pch=20)
legend("topleft", c("<0.05 in DGINN-bats but >0.05 in Hawkins",
"<0.05 in Hawkinsbut >0.05 in DGINN-bats"),
pch=20, col=c("red", "green"))
@
\subsection{Diagramme de Venn}
I will draw a venn diagramm for the positive genes in the 3 analyses.
\subsubsection{subtab}
<<subbats>>=
tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8.p.value", "hawkins_Positive.Selection..M8vM8a.p.value", "cooper.batsM7.M8_p_value")])
dim(tmp)
@
154 genes (present in the 3 experiments)
\subsubsection{figure}
<<vennbats>>=
area1dginn<-sum(tmp$bats_codemlM7M8.p.value<0.05, na.rm=T)
area2hawk<-sum(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, na.rm=T)
area3coop<-sum(tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T)
n12<-sum(tmp$bats_codemlM7M8.p.value<0.05 & tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, na.rm=T)
n23<-sum(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05 & tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T)
n13<-sum(tmp$bats_codemlM7M8.p.value<0.05 & tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T)
n123<-sum(tmp$bats_codemlM7M8.p.value<0.05 & tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05 & tmp$cooper.batsM7.M8_p_value<0.05, na.rm=T)
draw.triple.venn(area1dginn, area2hawk, area3coop,
n12, n23, n13, n123,
category=c("DGINN-Young-bats", "Hawkins-bats", "Cooper-bats"))
@
\subsection{Mondrian}
<<mondrianbats>>=
library(Mondrian)
monddata<-as.data.frame(tmp$Gene.name)
monddata$bats_dginn<-ifelse(tmp$bats_codemlM7M8.p.value<0.05, 1,0)
monddata$bats_hawkins<-ifelse(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
monddata$bats_cooper<-ifelse(tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
mondrian(monddata[,2:4])
@
\section{To do}
Comparaison G4 pas G4
\end{document}
File deleted
This diff is collapsed.
source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -15,7 +15,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{janvier 2021} % Activate to display a given date or no date
\date{Mars 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
......@@ -29,11 +29,14 @@
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home,"CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
tab$Gene.name<-as.character(tab$Gene.name.x)
tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1"
@
\section{Comparison Bats}
......@@ -41,28 +44,42 @@ dim(tab)
\subsection{Cooper-bats results VS DGINN-bats results}
<<omegaM7M8bats>>=
tab$bats_omegaM0codeml[tab$bats_omegaM0codeml=="na"]<-NA
plot(tab$cooper.batsAverage_dNdS, as.numeric(as.character(tab$bats_omegaM0codeml)),
xlab="Omega Cooper-bats", ylab="Omega DGINN-bats")
plot(tab$cooper.batsAverage_dNdS,
as.numeric(as.character(tab$bats_omegaM0codeml)),
xlab="Omega Cooper-bats",
ylab="Omega DGINN-bats")
abline(0,1)
abline(lm(as.numeric(as.character(tab$bats_omegaM0codeml))~
tab$cooper.batsAverage_dNdS),
col="red")
outlier<-tab[tab$cooper.batsAverage_dNdS>0.35 &
as.numeric(as.character(tab$bats_omegaM0codeml))<0.3,]
text(x=outlier$cooper.batsAverage_dNdS,
y=as.numeric(as.character(outlier$bats_omegaM0codeml)),
outlier$Gene.name)
@
\subsection{Cooper-bats VS Hawkins-bats and DGINN-bats VS Hawkins-bats}
\textit{I don't think we have the omega values}
\section{Overlap}
\subsection{Data}
<<subbats>>=
tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8.p.value", "hawkins_Positive.Selection..M8vM8a.p.value", "cooper.batsM7.M8_p_value", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8")])
tmp$bats_codemlM7M8.p.value<-as.numeric(as.character(tmp$bats_codemlM7M8.p.value))
tmp<-na.omit(tab[,c("Gene.name", "bats_codemlM7M8_p.value",
"hawkins_Positive.Selection..M8vM8a.p.value",
"cooper.batsM7.M8_p_value", "bats_BUSTED",
"bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2",
"bats_codemlM7M8")])
tmp$bats_codemlM7M8_p.value[tmp$bats_codemlM7M8_p.value=="na"]<-NA
tmp$bats_codemlM7M8_p.value<-as.numeric(
as.character(tmp$bats_codemlM7M8_p.value))
dim(tmp)
@
174 genes (present in the 3 experiments)
170 genes (present in the 3 experiments)
\subsection{Mondrian}
......@@ -70,18 +87,25 @@ dim(tmp)
library(Mondrian)
monddata<-as.data.frame(tmp$Gene.name)
monddata$bats_hawkins<-ifelse(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
monddata$bats_cooper<-ifelse(tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
monddata$bats_hawkins<-ifelse(
tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
monddata$bats_cooper<-ifelse(
tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
dginntmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y", tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y", tmp$bats_BppM7M8=="Y", tmp$bats_BUSTED=="Y"))
dginntmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y",
tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y",
tmp$bats_BppM7M8=="Y",
tmp$bats_BUSTED=="Y"))
monddata$bats_dginn<-ifelse(dginntmp>=3, 1,0)
mondrian(monddata[,2:4], labels=c("DGINN >=3", "hawkins", "Cooper"))
mondrian(monddata[,2:4],
labels=c("DGINN >=3", "hawkins", "Cooper"))
monddata$bats_dginn<-ifelse(dginntmp>=4, 1,0)
mondrian(monddata[,2:4], labels=c("DGINN >=4", "hawkins", "Cooper"))
mondrian(monddata[,2:4],
labels=c("DGINN >=4", "hawkins", "Cooper"))
@
\subsection{subsetR}
......@@ -90,8 +114,10 @@ mondrian(monddata[,2:4], labels=c("DGINN >=4", "hawkins", "Cooper"))
library(UpSetR)
upsetdata<-as.data.frame(tmp$Gene.name)
upsetdata$bats_hawkins<-ifelse(tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
upsetdata$bats_cooper<-ifelse(tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
upsetdata$bats_hawkins<-ifelse(
tmp$hawkins_Positive.Selection..M8vM8a.p.value<0.05, 1, 0)
upsetdata$bats_cooper<-ifelse(
tmp$cooper.batsM7.M8_p_value<0.05, 1, 0)
upsetdata$bats_dginn<-ifelse(dginntmp>=3, 1,0)
......@@ -105,9 +131,39 @@ main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
@
<<>>=
source("covid_comp_shiny.R")
df<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
names(df)
dftmp<-tab[,c("bats_File", "bats_Name",
"Gene.name", "bats_GeneSize",
"bats_NbSpecies", "bats_omegaM0Bpp",
"bats_omegaM0codeml", "bats_BUSTED",
"bats_BUSTED_p.value", "bats_MEME_NbSites",
"bats_MEME_PSS", "bats_BppM1M2",
"bats_BppM1M2_p.value", "bats_BppM1M2_NbSites",
"bats_BppM1M2_PSS", "bats_BppM7M8",
"bats_BppM7M8_p.value", "bats_BppM7M8_NbSites",
"bats_BppM7M8_PSS", "bats_codemlM1M2",
"bats_codemlM1M2_p.value", "bats_codemlM1M2_NbSites",
"bats_codemlM1M2_PSS", "bats_codemlM7M8",
"bats_codemlM7M8_p.value", "bats_codemlM7M8_NbSites" ,
"bats_codemlM7M8_PSS")]
names(dftmp)<-names(df)
makeFig1(dftmp)
@
\end{document}
\end{document}
No preview for this file type
......@@ -65,7 +65,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{janvier 2021} % Activate to display a given date or no date
\date{Mars 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
......@@ -81,15 +81,20 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{workdir}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,}\hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"covid_comp/covid_comp_complete.txt"}\hlstd{),} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 333 161
## [1] 332 141
\end{verbatim}
\begin{alltt}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name}\hlkwb{<-}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{Gene.name.x)}
\hlstd{tab}\hlopt{$}\hlstd{Gene.name[tab}\hlopt{$}\hlstd{PreyGene}\hlopt{==}\hlstr{"MTARC1"}\hlstd{]}\hlkwb{<-}\hlstr{"MTARC1"}
\end{alltt}
\end{kframe}
\end{knitrout}
......@@ -100,13 +105,22 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS,} \hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml)),}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Cooper-bats"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-bats"}\hlstd{)}
\end{alltt}
\hlstd{tab}\hlopt{$}\hlstd{bats_omegaM0codeml[tab}\hlopt{$}\hlstd{bats_omegaM0codeml}\hlopt{==}\hlstr{"na"}\hlstd{]}\hlkwb{<-}\hlnum{NA}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlkwd{plot}\hlstd{(tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS,}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml)),}
\hlkwc{xlab}\hlstd{=}\hlstr{"Omega Cooper-bats"}\hlstd{,}
\hlkwc{ylab}\hlstd{=}\hlstr{"Omega DGINN-bats"}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{1}\hlstd{)}
\hlkwd{abline}\hlstd{(}\hlkwd{lm}\hlstd{(}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml))}\hlopt{~}
\hlstd{tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS),}
\hlkwc{col}\hlstd{=}\hlstr{"red"}\hlstd{)}
\hlstd{outlier}\hlkwb{<-}\hlstd{tab[tab}\hlopt{$}\hlstd{cooper.batsAverage_dNdS}\hlopt{>}\hlnum{0.35} \hlopt{&}
\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tab}\hlopt{$}\hlstd{bats_omegaM0codeml))}\hlopt{<}\hlnum{0.3}\hlstd{,]}
\hlkwd{text}\hlstd{(}\hlkwc{x}\hlstd{=outlier}\hlopt{$}\hlstd{cooper.batsAverage_dNdS,}
\hlkwc{y}\hlstd{=}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(outlier}\hlopt{$}\hlstd{bats_omegaM0codeml)),}
\hlstd{outlier}\hlopt{$}\hlstd{Gene.name)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/omegaM7M8bats-1}
......@@ -115,8 +129,6 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\subsection{Cooper-bats VS Hawkins-bats and DGINN-bats VS Hawkins-bats}
\textit{I don't think we have the omega values}
\section{Overlap}
\subsection{Data}
......@@ -124,13 +136,15 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_codemlM7M8.p.value"}\hlstd{,} \hlstr{"hawkins_Positive.Selection..M8vM8a.p.value"}\hlstd{,} \hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{)])}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8.p.value))}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning: NAs introduits lors de la conversion automatique}}\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(tab[,}\hlkwd{c}\hlstd{(}\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_codemlM7M8_p.value"}\hlstd{,}
\hlstr{"hawkins_Positive.Selection..M8vM8a.p.value"}\hlstd{,}
\hlstr{"cooper.batsM7.M8_p_value"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,}
\hlstr{"bats_BppM1M2"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,}
\hlstr{"bats_codemlM7M8"}\hlstd{)])}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value[tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value}\hlopt{==}\hlstr{"na"}\hlstd{]}\hlkwb{<-}\hlnum{NA}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value}\hlkwb{<-}\hlkwd{as.numeric}\hlstd{(}
\hlkwd{as.character}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM7M8_p.value))}
\hlkwd{dim}\hlstd{(tmp)}
\end{alltt}
\begin{verbatim}
......@@ -139,7 +153,7 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\end{kframe}
\end{knitrout}
174 genes (present in the 3 experiments)
170 genes (present in the 3 experiments)
\subsection{Mondrian}
......@@ -149,21 +163,28 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\hlkwd{library}\hlstd{(Mondrian)}
\hlstd{monddata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp}\hlopt{$}\hlstd{Gene.name)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{monddata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{dginntmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{, tmp}\hlopt{$}\hlstd{bats_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{dginntmp}\hlkwb{<-}\hlkwd{rowSums}\hlstd{(}\hlkwd{cbind}\hlstd{(tmp}\hlopt{$}\hlstd{bats_codemlM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_codemlM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM1M2}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BppM7M8}\hlopt{==}\hlstr{"Y"}\hlstd{,}
\hlstd{tmp}\hlopt{$}\hlstd{bats_BUSTED}\hlopt{==}\hlstr{"Y"}\hlstd{))}
\hlstd{monddata}\hlopt{$}\hlstd{bats_dginn}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginntmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=3"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=3"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianbats-1}
\begin{kframe}\begin{alltt}
\hlstd{monddata}\hlopt{$}\hlstd{bats_dginn}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginntmp}\hlopt{>=}\hlnum{4}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],} \hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=4"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\hlkwd{mondrian}\hlstd{(monddata[,}\hlnum{2}\hlopt{:}\hlnum{4}\hlstd{],}
\hlkwc{labels}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"DGINN >=4"}\hlstd{,} \hlstr{"hawkins"}\hlstd{,} \hlstr{"Cooper"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/mondrianbats-2}
......@@ -178,8 +199,10 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\hlkwd{library}\hlstd{(UpSetR)}
\hlstd{upsetdata}\hlkwb{<-}\hlkwd{as.data.frame}\hlstd{(tmp}\hlopt{$}\hlstd{Gene.name)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_hawkins}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{hawkins_Positive.Selection..M8vM8a.p.value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_cooper}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(}
\hlstd{tmp}\hlopt{$}\hlstd{cooper.batsM7.M8_p_value}\hlopt{<}\hlnum{0.05}\hlstd{,} \hlnum{1}\hlstd{,} \hlnum{0}\hlstd{)}
\hlstd{upsetdata}\hlopt{$}\hlstd{bats_dginn}\hlkwb{<-}\hlkwd{ifelse}\hlstd{(dginntmp}\hlopt{>=}\hlnum{3}\hlstd{,} \hlnum{1}\hlstd{,}\hlnum{0}\hlstd{)}
......@@ -199,9 +222,60 @@ Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlstr{"covid_comp_shiny.R"}\hlstd{)}
\hlstd{df}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,}
\hlstr{"/data/DGINN_202005281649summary_cleaned.csv"}\hlstd{),}
\hlkwc{fill}\hlstd{=T,} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\hlkwd{names}\hlstd{(df)}
\end{alltt}
\begin{verbatim}
## [1] "File" "Name" "Gene"
## [4] "GeneSize" "NbSpecies" "omegaM0Bpp"
## [7] "omegaM0codeml" "BUSTED" "BUSTED.p.value"
## [10] "MEME.NbSites" "MEME.PSS" "BppM1M2"
## [13] "BppM1M2.p.value" "BppM1M2.NbSites" "BppM1M2.PSS"
## [16] "BppM7M8" "BppM7M8.p.value" "BppM7M8.NbSites"
## [19] "BppM7M8.PSS" "codemlM1M2" "codemlM1M2.p.value"
## [22] "codemlM1M2.NbSites" "codemlM1M2.PSS" "codemlM7M8"
## [25] "codemlM7M8.p.value" "codemlM7M8.NbSites" "codemlM7M8.PSS"
\end{verbatim}
\begin{alltt}
\hlstd{dftmp}\hlkwb{<-}\hlstd{tab[,}\hlkwd{c}\hlstd{(}\hlstr{"bats_File"}\hlstd{,} \hlstr{"bats_Name"}\hlstd{,}
\hlstr{"Gene.name"}\hlstd{,} \hlstr{"bats_GeneSize"}\hlstd{,}
\hlstr{"bats_NbSpecies"}\hlstd{,} \hlstr{"bats_omegaM0Bpp"}\hlstd{,}
\hlstr{"bats_omegaM0codeml"}\hlstd{,} \hlstr{"bats_BUSTED"}\hlstd{,}
\hlstr{"bats_BUSTED_p.value"}\hlstd{,} \hlstr{"bats_MEME_NbSites"}\hlstd{,}
\hlstr{"bats_MEME_PSS"}\hlstd{,} \hlstr{"bats_BppM1M2"}\hlstd{,}
\hlstr{"bats_BppM1M2_p.value"}\hlstd{,} \hlstr{"bats_BppM1M2_NbSites"}\hlstd{,}
\hlstr{"bats_BppM1M2_PSS"}\hlstd{,} \hlstr{"bats_BppM7M8"}\hlstd{,}
\hlstr{"bats_BppM7M8_p.value"}\hlstd{,} \hlstr{"bats_BppM7M8_NbSites"}\hlstd{,}
\hlstr{"bats_BppM7M8_PSS"}\hlstd{,} \hlstr{"bats_codemlM1M2"}\hlstd{,}
\hlstr{"bats_codemlM1M2_p.value"}\hlstd{,} \hlstr{"bats_codemlM1M2_NbSites"}\hlstd{,}
\hlstr{"bats_codemlM1M2_PSS"}\hlstd{,} \hlstr{"bats_codemlM7M8"}\hlstd{,}
\hlstr{"bats_codemlM7M8_p.value"}\hlstd{,} \hlstr{"bats_codemlM7M8_NbSites"} \hlstd{,}
\hlstr{"bats_codemlM7M8_PSS"}\hlstd{)]}
\hlkwd{names}\hlstd{(dftmp)}\hlkwb{<-}\hlkwd{names}\hlstd{(df)}
\hlkwd{makeFig1}\hlstd{(dftmp)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/unnamed-chunk-2-1}
\end{knitrout}
\end{document}
This diff is collapsed.
source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -15,7 +15,7 @@
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Janvier 2021} % Activate to display a given date or no date
\date{March 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
......@@ -28,27 +28,68 @@
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
<<eval=FALSE>>=
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
@
<<>>=
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_alldginn.txt"), h=T, sep="\t")
dim(tab)
@
\section{Comparison of dataset}
\subsection{Data}
<<data>>=
tmp<-na.omit(tab[,c("Gene.name", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8",
"bats_codemlM1M2", "bats_codemlM7M8", "dginn.primate_codemlM1M2",
"dginn.primate_codemlM7M8", "dginn.primate_BppM1M2",
"dginn.primate_BppM7M8", "dginn.primate_BUSTED")])
tmp<-na.omit(tab[,c("Gene.name", "bats_BUSTED", "bats_BppM1M2",
"bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8",
"dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8",
"dginn.primate_BppM1M2", "dginn.primate_BppM7M8",
"dginn.primate_BUSTED")])
col<-c("Gene.name", "bats_BUSTED", "bats_BppM1M2",
"bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8",
"dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8",
"dginn.primate_BppM1M2", "dginn.primate_BppM7M8",
"dginn.primate_BUSTED")
dim(tmp)
@
\subsection{Omega plot}
<<>>=
tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA
x=as.numeric(as.character(
tab$dginn.primate_omegaM0Bpp[tab$status=="shared"]))
tab$bats_omegaM0Bpp[tab$bats_omegaM0Bpp=="na"]<-NA
y=as.numeric(as.character(
tab$bats_omegaM0Bpp[tab$status=="shared"]))
names(x)<-tab$Gene.name[tab$status=="shared"]
plot(x,y, xlab="bpp omega primate", ylab="bpp omega bats", cex=0.5)
abline(0,1)
abline(lm(y~x), col="red")
text(x[x>0.5 &y<0.4], (y[x>0.5 &y<0.4]+0.01),
names(x)[x>0.5 &y<0.4], cex=0.7)
text(x[x<0.45 &y>0.45], (y[x<0.45 &y>0.45]+0.01),
names(x)[x<0.45 &y>0.45], cex=0.7)
text(x[x>0.45 &y>0.4], (y[x>0.45 &y>0.4]+0.01),
names(x)[x>0.45 &y>0.4], cex=0.7)
@
\subsection{Mondrian}
<<mondrianbats>>=
......@@ -56,21 +97,28 @@ library(Mondrian)
monddata<-as.data.frame(tmp$Gene.name)
batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y", tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y", tmp$bats_BppM7M8=="Y", tmp$bats_BUSTED=="Y"))
primatetmp<-rowSums(cbind(tmp$"dginn.primate_codemlM1M2"=="Y",
tmp$"dginn.primate_codemlM7M8"=="Y", tmp$"dginn.primate_BppM1M2"=="Y",
tmp$"dginn.primate_BppM7M8"=="Y", tmp$"dginn.primate_BUSTED"=="Y"))
batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y",
tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y",
tmp$bats_BppM7M8=="Y",
tmp$bats_BUSTED=="Y"))
primatetmp<-rowSums(cbind(tmp$"dginn.primate_codemlM1M2"=="Y",
tmp$"dginn.primate_codemlM7M8"=="Y",
tmp$"dginn.primate_BppM1M2"=="Y",
tmp$"dginn.primate_BppM7M8"=="Y",
tmp$"dginn.primate_BUSTED"=="Y"))
monddata$bats_dginn3<-ifelse(batstmp>=3, 1,0)
monddata$primate_dginn3<-ifelse(primatetmp>=3, 1,0)
monddata$bats_dginn4<-ifelse(batstmp>=4, 1,0)
monddata$primate_dginn4<-ifelse(primatetmp>=4, 1,0)
mondrian(monddata[,2:3], labels=c("DGINN bats >3", "DGINN primate >3"))
mondrian(monddata[,2:3],
labels=c("DGINN bats >3", "DGINN primate >3"))
mondrian(monddata[,4:5], labels=c("DGINN bats >4", "DGINN primate >4"))
mondrian(monddata[,4:5],
labels=c("DGINN bats >4", "DGINN primate >4"))
@
......@@ -142,7 +190,11 @@ tablo<-as.data.frame(tmp$Gene.name)
tablo$nbats<-batstmp
tablo$nprimates<-primatetmp
plot(NULL, xlim=c(-0.5,5.5), ylim=c(-3,5.5), xlab="bats", ylab="primates", main="Genes supported by x,y methods in bats and primates", bty="n", xaxt="n", yaxt="n")
plot(NULL, xlim=c(-0.5,5.5), ylim=c(-3,5.5),
xlab="bats", ylab="primates",
main="Genes supported by x,y methods in bats and primates",
bty="n",
xaxt="n", yaxt="n")
text(x=rep(-0.6, 6), y=0:5, 0:5)
text(y=rep(-0.65, 6), x=0:5, 0:5)
......@@ -157,12 +209,15 @@ sapply(seq(from=-0.5, to=5.5, by=1), function(x){
for (p in 0:5){
for (b in 0:5){
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==b & tablo$nprimates==p]
if(length(tmp)>0 & length(tmp)<8){
text(b,seq(from=(p-0.4), to=(p+0.4), length.out = length(tmp)), tmp, cex=0.4)
}else if (length(tmp)>8 & length(tmp)<16){
if(length(tmp)>0 & length(tmp)<=8){
text(b,seq(from=(p-0.4), to=(p+0.4), length.out = length(tmp)),
tmp, cex=0.4)
}else if (length(tmp)>8 & length(tmp)<=16){
print(c(p, b))
text((b-0.3),seq(from=(p-0.4), to=(p+0.4), length.out = 8), tmp[1:8], cex=0.4)
text((b+0.3),seq(from=(p-0.4), to=(p+0.4), length.out = (length(tmp)-8)), tmp[9:length(tmp)], cex=0.4)
text((b-0.3),seq(from=(p-0.4), to=(p+0.4), length.out = 8),
tmp[1:8], cex=0.4)
text((b+0.3),seq(from=(p-0.4), to=(p+0.4), length.out = (length(tmp)-8)),
tmp[9:length(tmp)], cex=0.4)
}else if (length(tmp)>16){
text(b,p, paste0(length(tmp), " values"))
}
......@@ -172,28 +227,288 @@ for (p in 0:5){
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==1]
text(-0.4,-1.2, "p=1/n=0", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-1.1, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=3, length.out = length(tmp)-18),-1.3, tmp[19:length(tmp)], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = 19),
-1.1,
tmp[1:19],
cex=0.4)
text(seq(from=0.1, to=5.5, length.out = length(tmp)-19),
-1.3,
tmp[20:length(tmp)],
cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==1 & tablo$nprimates==1]
text(-0.4,-1.7, "p=1/n=1", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-1.6, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=2, length.out = length(tmp)-18),-1.8, tmp[19:length(tmp)], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = 18),
-1.6,
tmp[1:18],
cex=0.4)
text(seq(from=0.1, to=4.5, length.out = length(tmp)-18),
-1.8,
tmp[19:length(tmp)],
cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==0]
text(-0.4,-2.2, "p=0/n=0", cex=0.6)
text(-0.4,-2.3, "p=0/n=0", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 17),-2.1, tmp[1:17], cex=0.4)
text(seq(from=0.1, to=4.4, length.out = length(tmp)-17),-2.3, tmp[18:length(tmp)], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = 17),-2.3, tmp[18:34], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = length(tmp)-34),-2.5, tmp[35:length(tmp)], cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==2 & tablo$nprimates==0]
text(-0.4,-2.7, "p=0/n=2", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-2.6, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=1, length.out = length(tmp)-18),-2.8, tmp[19:length(tmp)], cex=0.4)
text(-0.4,-2.9, "p=0/n=2", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-2.8, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=1, length.out = length(tmp)-18),-3.0, tmp[19:length(tmp)], cex=0.4)
@
<<>>=
write.csv(tablo[tablo$nbats>=3,"tmp$Gene.name"], "batssup3.csv",
row.names=FALSE,
quote=FALSE)
write.csv(tablo[tablo$nprimates>=3,"tmp$Gene.name"], "primatessup3.csv",
row.names=FALSE,
quote=FALSE)
write.csv(tablo, "primatesVbats.csv",
row.names=FALSE,
quote=FALSE)
@
Restreindre ce tableau aux gènes présent dans l'analyse de Krogan.
<<setup, include=FALSE, cache=FALSE, tidy=TRUE>>=
options(tidy=TRUE, width=70)
@
<<>>=
# Reading the Krogan table
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"),
fill=T, h=T, dec=",")
dim(tab)
#Adding ACE2 and TMPRSS2
krogan<-c(as.character(tab$merge.Gene), "ACE2", "TMPRSS2")
# The list
length(krogan)
krogan
#In the table, I select line that match the krogan gene name liste
tabloK<-tablo[tablo$`tmp$Gene.name` %in% krogan,]
# How many gene lost?
dim(tablo)
dim(tabloK)
# Les gènes perdus (dans le tableau mais pas dans la liste de Krogan)
sort(tablo$`tmp$Gene.name`[tablo$`tmp$Gene.name` %in% krogan==F])
# Les gènes de Krogan non présent dans cette liste
sort(krogan[krogan %in% tablo$`tmp$Gene.name`==F])
write.csv(tabloK, "primatesVbats_onlykrogan.csv", row.names=FALSE, quote=FALSE)
@
\section{Tanglegram}
<<eval=TRUE>>=
#install.packages('dendextend') # stable CRAN version
library(dendextend) # load the package
#install.packages("phytools") # stable CRAN version
library(phytools) # load the package
library(ggraph)
library(igraph)
library(tidyverse)
tmp<-tablo[(tablo$nbats!=0 | tablo$nprimates!=0),]
#tmp<-head(tablo, 20)
#tmp<-rbind(as.matrix(tmp), c("outgroup", 50, 50))
tmp<-as.data.frame(tmp)
matbats<-hclust(dist(tmp$nbats))
matpri<-hclust(dist(tmp$nprimates))
tmp[order(tmp$nbats),]
dendpri<-as.dendrogram(matpri)
dendbats<-as.dendrogram(matbats)
labels(dendpri)<-as.character(tmp$`tmp$Gene.name`[labels(dendpri)])
labels(dendbats)<-as.character(tmp$`tmp$Gene.name`[labels(dendbats)])
tmp[order(tmp$nprimates, decreasing=FALSE),]$'tmp$Gene.name'-> order
dendpri<-dendextend::rotate(dendpri, order=order)
tmp[order(tmp$nbats, decreasing=FALSE),]$'tmp$Gene.name'-> order
dendbats<-dendextend::rotate(dendbats, order=order)
#### Il faut swapper certains neud de l'arbrese
class(labels(dendpri))
dend12 <- dendlist(dendbats, dendpri)
png("figure/tanglegramm.png", width = 1800, height = 3000)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=2,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=1,
lab.cex=1,
k_labels=6)
dev.off()
@
<<eval=TRUE>>=
ace<-264
tmprss2<-75
znf318<-81
sepsecs<-228
tbk1<-273
ripk1<-224
col<-rep("grey", length(labels(dendpri)))
col[ace]<-"black"
col[tmprss2]<-"black"
col[znf318]<-"black"
col[sepsecs]<-"black"
col[tbk1]<-"black"
col[ripk1]<-"black"
font<-rep(1, length(labels(dendpri))*2)
#font[ace]<-1.3
#font[tmprss2]<-1.3
#font[length(labels(dendpri))+160]<-1.3
png("figure/tanglegramm.png", width = 1800, height = 3000)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=2,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=1,
lab.cex=font,
k_labels=6,
color_lines=col)
dev.off()
@
<<>>=
tmp<-tablo[(tablo$nbats>=3 | tablo$nprimates>=3),]
dim(tmp)
tmp<-as.data.frame(tmp)
names(tmp)<-c("tmp.Gene.name", "nbats", "nprimates")
matbats<-hclust(dist(tmp$nbats))
matpri<-hclust(dist(tmp$nprimates))
#tmp[order(tmp$nbats),]
dendpri<-as.dendrogram(matpri)
dendbats<-as.dendrogram(matbats)
labels(dendpri)<-as.character(tmp$tmp.Gene.name[labels(dendpri)])
labels(dendbats)<-as.character(tmp$tmp.Gene.name[labels(dendbats)])
tmp[order(tmp$nprimates, decreasing=FALSE),]$tmp.Gene.name-> order
dendpri<-dendextend::rotate(dendpri, order=order)
tmp[order(tmp$nbats, decreasing=FALSE),]$tmp.Gene.name-> order
dendbats<-dendextend::rotate(dendbats, order=order)
#### Il faut swapper certains neuds de l'arbres
class(labels(dendpri))
dend12 <- dendlist(dendbats, dendpri)
ace<-97
tmprss2<-27
znf318<-31
sepsecs<-69
tbk1<-106
ripk1<-68
col<-rep("lightblue", length(labels(dendpri)))
plusplus<-tmp$tmp.Gene.name[tmp$nbats>=3 & tmp$nprimates>=3]
col[which(labels(dendbats) %in% plusplus)]<-"pink"
interest<-c("TMPRSS2","ZNF318", "SEPSECS","TBK1", "RIPK1")
col[which(labels(dendbats) %in% interest)]<-"blue"
interestpp<-c("ACE2")
col[which(labels(dendbats) %in% interestpp)]<-"red"
png("figure/tanglegrammsup3.png", width = 500, height = 1200)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=2,
lab.cex=1,
k_labels=6,
color_lines=col)
dev.off()
### Changer couleurs des groupes
## changer couleurs des lines sel vs sel or sel vs non-sel
setEPS()
postscript("figure/tanglegramsup3.eps", height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=2,
lab.cex=1,
# k_labels=6,
color_lines=col)
dev.off()
labels_colors(dend12[[1]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nbats))
labels_colors(dend12[[2]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nprimates))
labels_colors(dend12[[1]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nbats))
labels_colors(dend12[[2]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nprimates))
setEPS()
postscript("figure/tanglegramsup3_V2.eps", height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
main_left=" bats",
main_right = "primates ",
lwd=0.5,
cex_main=2,
lab.cex=1,
# k_labels=6,
color_lines=col)
dev.off()
@
\end{document}
No preview for this file type
This diff is collapsed.
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis, maic}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
output from covid\_comp\_dataset.
<<>>=
tablo<-read.table("primatesVbats.csv",
h=T, sep=",")
@
Output MAIC formatted by Léa. This table includes the DGINN "score".
<<>>=
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
maic<-read.table(paste0(workdir, "data/covid_comp_maic.txt"),
h=T)
@
\section{MAIC}
\subsection{Boxplot}
<<boxplot, fig.height=12>>=
par(mfrow=c(2,1))
boxplot(maic$rank~maic$nbats, notch=TRUE, varwidth=TRUE, xlab="score DGINN", ylab="rank MAIC", main="Bats")
stripchart(maic$rank~maic$nbats, method="jitter", vertical=TRUE, pch=1, cex=0.3, add=TRUE)
boxplot(maic$rank~maic$nprimates, notch=TRUE, xlab="score DGINN", ylab="rank MAIC", main="Primates")
stripchart(maic$rank~maic$nprimates, method="jitter", vertical=TRUE, pch=1, cex=0.3, add=TRUE)
@
\subsection{Dotchart}
<<dotbats, fig.height=8>>=
tmp<-maic[maic$nbats>=3, c("gene", "rank", "nbats")]
tmp<-tmp[order(tmp$rank, decreasing = TRUE),]
tmp$col<-"black"
tmp$col[tmp$gene=="ACE2"]<-"red"
tmp$col[tmp$gene=="TMPRSS2"]<-"red"
tmp$pch[tmp$nbats==5]<-1
tmp$pch[tmp$nbats==4]<-20
tmp$pch[tmp$nbats==3]<-4
dotchart(tmp$rank, main="Bats DGINN >=3", xlab="rank MAIC", labels=tmp$gene, pch=tmp$pch, col=tmp$col)
legend("topright", c("5 (score DGINN)", "4", "3"), pch=c(1,20,4))
@
<<dotprimates, fig.height=12>>=
tmp<-maic[maic$nprimates>=3, c("gene", "rank", "nprimates")]
tmp<-tmp[order(tmp$rank, decreasing = TRUE),]
tmp$pch[tmp$nprimates==5]<-1
tmp$pch[tmp$nprimates==4]<-20
tmp$pch[tmp$nprimates==3]<-4
tmp$col<-"black"
tmp$col[tmp$gene=="ACE2"]<-"red"
tmp$col[tmp$gene=="TMPRSS2"]<-"red"
dotchart(tmp$rank, main="Primates DGINN >=3", xlab="rank MAIC", labels=tmp$gene, pch=tmp$pch, cex=0.8, col=tmp$col)
legend("topright", c("5 (score DGINN)", "4", "3"), pch=c(1,20,4))
@
\section{Pan Corona}
<<>>=
pancorona<-read.table(paste0(workdir, "data/pancorona_S5.csv"),
h=T, fill = TRUE, sep="\t")
names(pancorona)<-c("tmp.Gene.name", names(pancorona)[-1])
# Genes en commun
pancorona$tmp.Gene.name[pancorona$tmp.Gene.name %in% tablo$tmp.Gene.name]
# Uniquement dans le tableau pancorona
sort(pancorona$tmp.Gene.name[(pancorona$tmp.Gene.name %in% tablo$tmp.Gene.name)==FALSE])
## Uniquement dans tableau
sort(tablo$tmp.Gene.name[(tablo$tmp.Gene.name %in% pancorona$tmp.Gene.name)==FALSE])
@
<<pancorona, fig.height=8>>=
pancorona<-pancorona[,c("tmp.Gene.name", "TOTAL")]
pandginn<-na.omit(merge(pancorona, tablo, by="tmp.Gene.name", all.x=TRUE))
pandginn<-pandginn[order(pandginn$nprimates),]
pandginn<-pandginn[order(pandginn$TOTAL),]
dotchart(as.matrix(pandginn[,2]), labels = pandginn$tmp.Gene.name, xlim=c(0,5))
points(pandginn[,4], 1:nrow(pandginn), col="blue", pch=20, cex=0.7)
points(pandginn[,3], 1:nrow(pandginn), col="blue", pch=4)
legend("bottomright", c("pancorona score", "dginn primate score", "dginn bats score"), pch=c(1,20,4), col=c("black", "blue", "blue"))
@
A-t-on un enrichissement en Pan-corona dans nos gènes sous PS?
<<>>=
pandginnall<-merge(pancorona, tablo, by="tmp.Gene.name", all.x=FALSE,all.y=TRUE)
dim(pandginnall)
# test indépendance: under PS / in the pancorona list
table(is.na(pandginnall$TOTAL)==FALSE)
table(pandginnall$nbats>=3)
chi<-table(is.na(pandginnall$TOTAL)==FALSE,pandginnall$nbats>=3)
chi
chisq.test(chi)
table(is.na(pandginnall$TOTAL)==FALSE)
table(pandginnall$nprimates>=3)
chi<-table(is.na(pandginnall$TOTAL)==FALSE,pandginnall$nprimates>=3)
chi
chisq.test(chi)
@
No enrichment in PanCORONA in our genes under PS.
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis, maic}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
output from covid\_comp\_dataset.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tablo}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlstr{"primatesVbats.csv"}\hlstd{,}
\hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
Output MAIC formatted by Léa. This table includes the DGINN "score".
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{maic}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/covid_comp_maic.txt"}\hlstd{),}
\hlkwc{h}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\section{MAIC}
\subsection{Boxplot}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{1}\hlstd{))}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{varwidth}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Bats"}\hlstd{)}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in bxp(list(stats = structure(c(21, 825, 1664, 2860, 5392, 15, 625.5, : some notches went outside hinges ('box'): maybe set notch=FALSE}}\begin{alltt}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Primates"}\hlstd{)}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/boxplot-1}
\end{knitrout}
\subsection{Dotchart}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nbats"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Bats DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotbats-1}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nprimates"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Primates DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{cex}\hlstd{=}\hlnum{0.8}\hlstd{,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotprimates-1}
\end{knitrout}
\section{Pan Corona}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/pancorona_S5.csv"}\hlstd{),}
\hlkwc{h}\hlstd{=T,} \hlkwc{fill} \hlstd{=} \hlnum{TRUE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{names}\hlstd{(pancorona)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwd{names}\hlstd{(pancorona)[}\hlopt{-}\hlnum{1}\hlstd{])}
\hlcom{# Genes en commun}
\hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name[pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name]}
\end{alltt}
\begin{verbatim}
## [1] TBK1 MARK3 GIGYF2 MARK2 G3BP1 LARP1 ACE2 PABPC1
## [9] TMPRSS2 AP3B1 CLCC1 CSDE1 HECTD1 MARK1 MEPCE PDE4DIP
## [17] POR PRKAR2B RAB5C RTN4 SRP54 UBAP2 UBAP2L UBXN8
## [25] SPART BZW2 EIF4E2 SMOC1 STOML2 DDX21 FAM98A G3BP2
## [33] MOV10 PABPC4 UPF1
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{# Uniquement dans le tableau pancorona}
\hlkwd{sort}\hlstd{(pancorona}\hlopt{$}\hlstd{tmp.Gene.name[(pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] ANPEP ATXN2L BTF3 CKAP5 CTSB CTSL
## [7] CYB5R3 DDX1 DDX5 DDX58 DHX9 DNM1L
## [13] EEF1A1 EIF2A EIF3F EIF4B EZR FLNA
## [19] FURIN FUS GSK3A GSK3B HDLBP HNRNPA1
## [25] HNRNPD HNRNPF HNRNPU IFIH1 IGF2BP1 IKBKB
## [31] IKBKE IRF3 ISG15 KPNA3 KPNB1 MYH9
## [37] NCL POLD1 POLR2B PRKRA RBM14 RCHY1
## [43] RPL13A RPL26 RPS13 RPS17 RPS19 RPS9
## [49] SDCBP SERBP1 SGTA SLC1A5 SNAP47 SSB
## [55] STING1 SYNCRIP TANC1 TBCB TMPRSS11D TRAF3
## [61] TUBA4A TUBB2A TUBB4A TUBB6 USP10 VPS36
## [67] XRCC5 XRCC6 YBX1 YTHDF2
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{## Uniquement dans tableau }
\hlkwd{sort}\hlstd{(tablo}\hlopt{$}\hlstd{tmp.Gene.name[(tablo}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] AAR2 AASS AATF ABCC1 ACAD9 ACADM
## [7] ACSL3 ADAM9 ADAMTS1 AGPS AKAP8 AKAP8L
## [13] AKAP9 ALG11 ALG5 ALG8 ANO6 AP2A2
## [19] AP2M1 ARF6 ATE1 ATP13A3 ATP1B1 ATP6AP1
## [25] ATP6V1A BAG5 BCKDK BRD2 BRD4 CCDC86
## [31] CDK5RAP2 CENPF CEP112 CEP135 CEP250 CEP350
## [37] CEP68 CHMP2A CHPF CHPF2 CISD3 CIT
## [43] CLIP4 CNTRL COL6A1 COLGALT1 COMT COQ8B
## [49] CRTC3 CSNK2A2 CSNK2B CUL2 CWC27 CYB5B
## [55] DCAF7 DCAKD DCTPP1 DDX10 DNAJC11 DNAJC19
## [61] DNMT1 DPH5 DPY19L1 ECSIT EDEM3 EIF4H
## [67] ELOC EMC1 ERC1 ERGIC1 ERLEC1 ERMP1
## [73] ERO1B ERP44 ETFA EXOSC2 EXOSC3 EXOSC5
## [79] EXOSC8 F2RL1 FAM162A FAM8A1 FAR2 FASTKD5
## [85] FBLN5 FBN1 FBN2 FBXL12 FKBP10 FKBP15
## [91] FKBP7 FOXRED2 FYCO1 GCC1 GCC2 GDF15
## [97] GFER GGCX GGH GHITM GLA GNB1
## [103] GNG5 GOLGA2 GOLGA3 GOLGA7 GOLGB1 GORASP1
## [109] GPAA1 GPX1 GRIPAP1 GRPEL1 GTF2F2 HDAC2
## [115] HEATR3 HMOX1 HOOK1 HS2ST1 HS6ST2 HSBP1
## [121] HYOU1 IDE IL17RA IMPDH2 INHBE INTS4
## [127] ITGB1 JAKMIP1 KDELC1 KDELC2 LARP4B LARP7
## [133] LMAN2 LOX MAP7D1 MARC1 MAT2B MDN1
## [139] MIB1 MIPOL1 MOGS MPHOSPH10 MRPS2 MRPS25
## [145] MRPS27 MRPS5 MTCH1 MYCBP2 NARS2 NAT14
## [151] NDFIP2 NDUFAF1 NDUFAF2 NDUFB9 NEK9 NEU1
## [157] NGDN NGLY1 NIN NINL NLRX1 NOL10
## [163] NPC2 NPTX1 NSD2 NUP210 NUP214 NUP54
## [169] NUP58 NUP62 NUP88 NUP98 NUTF2 OS9
## [175] PCNT PCSK5 PCSK6 PDZD11 PIGO PIGS
## [181] PITRM1 PKP2 PLAT PLD3 PLEKHA5 PLEKHF2
## [187] PLOD2 PMPCA PMPCB POFUT1 POLA1 POLA2
## [193] PPIL3 PPT1 PRIM1 PRIM2 PRKACA PRKAR2A
## [199] PRRC2B PSMD8 PTBP2 PTGES2 PUSL1 PVR
## [205] QSOX2 RAB10 RAB14 RAB18 RAB1A RAB2A
## [211] RAB7A RAB8A RAE1 RALA RAP1GDS1 RBM28
## [217] RBM41 RBX1 RDX REEP5 REEP6 RETREG3
## [223] RHOA RIPK1 RNF41 RPL36 RRP9 SAAL1
## [229] SBNO1 SCAP SCARB1 SCCPDH SDF2 SELENOS
## [235] SEPSECS SIL1 SIRT5 SLC25A21 SLC27A2 SLC30A6
## [241] SLC30A7 SLC30A9 SLC44A2 SLC9A3R1 SLU7 SNIP1
## [247] SRP19 SRP72 STC2 STOM SUN2 TAPT1
## [253] TARS2 TBCA TBKBP1 TCF12 THTPA TIMM10
## [259] TIMM10B TIMM29 TIMM8B TIMM9 TLE1 TLE3
## [265] TM2D3 TMED5 TMEM39B TMEM97 TOMM70 TOR1A
## [271] TOR1AIP1 TRIM59 TRMT1 TUBGCP2 TUBGCP3 TYSND1
## [277] UGGT2 USP54 VPS11 VPS39 WASHC4 WFS1
## [283] YIF1A ZC3H18 ZC3H7A ZDHHC5 ZNF318 ZNF503
## [289] ZYG11B
## 324 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlstd{pancorona[,}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlstr{"TOTAL"}\hlstd{)]}
\hlstd{pandginn}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(}\hlkwd{merge}\hlstd{(pancorona, tablo,} \hlkwc{by}\hlstd{=}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=}\hlnum{TRUE}\hlstd{))}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{nprimates),]}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{TOTAL),]}
\hlkwd{dotchart}\hlstd{(}\hlkwd{as.matrix}\hlstd{(pandginn[,}\hlnum{2}\hlstd{]),} \hlkwc{labels} \hlstd{= pandginn}\hlopt{$}\hlstd{tmp.Gene.name,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{4}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{3}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{4}\hlstd{)}
\hlkwd{legend}\hlstd{(}\hlstr{"bottomright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"pancorona score"}\hlstd{,} \hlstr{"dginn primate score"}\hlstd{,} \hlstr{"dginn bats score"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{),} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"black"}\hlstd{,} \hlstr{"blue"}\hlstd{,} \hlstr{"blue"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/pancorona-1}
\end{knitrout}
A-t-on un enrichissement en Pan-corona dans nos gènes sous PS?
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pandginnall}\hlkwb{<-}\hlkwd{merge}\hlstd{(pancorona, tablo,} \hlkwc{by}\hlstd{=}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=}\hlnum{FALSE}\hlstd{,}\hlkwc{all.y}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\hlkwd{dim}\hlstd{(pandginnall)}
\end{alltt}
\begin{verbatim}
## [1] 324 4
\end{verbatim}
\begin{alltt}
\hlcom{# test indépendance: under PS / in the pancorona list}
\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 289 35
\end{verbatim}
\begin{alltt}
\hlkwd{table}\hlstd{(pandginnall}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 286 38
\end{verbatim}
\begin{alltt}
\hlstd{chi}\hlkwb{<-}\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{,pandginnall}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{)}
\hlstd{chi}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## FALSE 255 34
## TRUE 31 4
\end{verbatim}
\begin{alltt}
\hlkwd{chisq.test}\hlstd{(chi)}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in chisq.test(chi): Chi-squared approximation may be incorrect}}\begin{verbatim}
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: chi
## X-squared = 7.6869e-31, df = 1, p-value = 1
\end{verbatim}
\begin{alltt}
\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 289 35
\end{verbatim}
\begin{alltt}
\hlkwd{table}\hlstd{(pandginnall}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{)}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## 236 88
\end{verbatim}
\begin{alltt}
\hlstd{chi}\hlkwb{<-}\hlkwd{table}\hlstd{(}\hlkwd{is.na}\hlstd{(pandginnall}\hlopt{$}\hlstd{TOTAL)}\hlopt{==}\hlnum{FALSE}\hlstd{,pandginnall}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{)}
\hlstd{chi}
\end{alltt}
\begin{verbatim}
##
## FALSE TRUE
## FALSE 212 77
## TRUE 24 11
\end{verbatim}
\begin{alltt}
\hlkwd{chisq.test}\hlstd{(chi)}
\end{alltt}
\begin{verbatim}
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: chi
## X-squared = 0.15992, df = 1, p-value = 0.6892
\end{verbatim}
\end{kframe}
\end{knitrout}
No enrichment in PanCORONA in our genes under PS.
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis, maic}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
output from covid\_comp\_dataset.
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tablo}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlstr{"primatesVbats.csv"}\hlstd{,}
\hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{","}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
Output MAIC formatted by Léa. This table includes the DGINN "score".
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/"}
\hlstd{workdir}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"CIRI_BIBS_projects/2020_05_Etienne_covid/"}\hlstd{)}
\hlstd{maic}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/covid_comp_maic.txt"}\hlstd{),}
\hlkwc{h}\hlstd{=T)}
\end{alltt}
\end{kframe}
\end{knitrout}
\section{MAIC}
\subsection{Boxplot}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{1}\hlstd{))}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{varwidth}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Bats"}\hlstd{)}
\end{alltt}
{\ttfamily\noindent\color{warningcolor}{\#\# Warning in bxp(list(stats = structure(c(21, 825, 1664, 2860, 5392, 15, 625.5, : some notches went outside hinges ('box'): maybe set notch=FALSE}}\begin{alltt}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nbats,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\hlkwd{boxplot}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{notch}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"score DGINN"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{main}\hlstd{=}\hlstr{"Primates"}\hlstd{)}
\hlkwd{stripchart}\hlstd{(maic}\hlopt{$}\hlstd{rank}\hlopt{~}\hlstd{maic}\hlopt{$}\hlstd{nprimates,} \hlkwc{method}\hlstd{=}\hlstr{"jitter"}\hlstd{,} \hlkwc{vertical}\hlstd{=}\hlnum{TRUE}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{1}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.3}\hlstd{,} \hlkwc{add}\hlstd{=}\hlnum{TRUE}\hlstd{)}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/boxplot-1}
\end{knitrout}
\subsection{Dotchart}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nbats}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nbats"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nbats}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Bats DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotbats-1}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{tmp}\hlkwb{<-}\hlstd{maic[maic}\hlopt{$}\hlstd{nprimates}\hlopt{>=}\hlnum{3}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"gene"}\hlstd{,} \hlstr{"rank"}\hlstd{,} \hlstr{"nprimates"}\hlstd{)]}
\hlstd{tmp}\hlkwb{<-}\hlstd{tmp[}\hlkwd{order}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{decreasing} \hlstd{=} \hlnum{TRUE}\hlstd{),]}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{5}\hlstd{]}\hlkwb{<-}\hlnum{1}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{4}\hlstd{]}\hlkwb{<-}\hlnum{20}
\hlstd{tmp}\hlopt{$}\hlstd{pch[tmp}\hlopt{$}\hlstd{nprimates}\hlopt{==}\hlnum{3}\hlstd{]}\hlkwb{<-}\hlnum{4}
\hlstd{tmp}\hlopt{$}\hlstd{col}\hlkwb{<-}\hlstr{"black"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"ACE2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlstd{tmp}\hlopt{$}\hlstd{col[tmp}\hlopt{$}\hlstd{gene}\hlopt{==}\hlstr{"TMPRSS2"}\hlstd{]}\hlkwb{<-}\hlstr{"red"}
\hlkwd{dotchart}\hlstd{(tmp}\hlopt{$}\hlstd{rank,} \hlkwc{main}\hlstd{=}\hlstr{"Primates DGINN >=3"}\hlstd{,} \hlkwc{xlab}\hlstd{=}\hlstr{"rank MAIC"}\hlstd{,} \hlkwc{labels}\hlstd{=tmp}\hlopt{$}\hlstd{gene,} \hlkwc{pch}\hlstd{=tmp}\hlopt{$}\hlstd{pch,} \hlkwc{cex}\hlstd{=}\hlnum{0.8}\hlstd{,} \hlkwc{col}\hlstd{=tmp}\hlopt{$}\hlstd{col)}
\hlkwd{legend}\hlstd{(}\hlstr{"topright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"5 (score DGINN)"}\hlstd{,} \hlstr{"4"}\hlstd{,} \hlstr{"3"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/dotprimates-1}
\end{knitrout}
\section{Pan Corona}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlkwd{read.table}\hlstd{(}\hlkwd{paste0}\hlstd{(workdir,} \hlstr{"data/pancorona_S5.csv"}\hlstd{),}
\hlkwc{h}\hlstd{=T,} \hlkwc{fill} \hlstd{=} \hlnum{TRUE}\hlstd{,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{names}\hlstd{(pancorona)}\hlkwb{<-}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwd{names}\hlstd{(pancorona)[}\hlopt{-}\hlnum{1}\hlstd{])}
\hlcom{# Genes en commun}
\hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name[pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name]}
\end{alltt}
\begin{verbatim}
## [1] TBK1 MARK3 GIGYF2 MARK2 G3BP1 LARP1 ACE2 PABPC1
## [9] TMPRSS2 AP3B1 CLCC1 CSDE1 HECTD1 MARK1 MEPCE PDE4DIP
## [17] POR PRKAR2B RAB5C RTN4 SRP54 UBAP2 UBAP2L UBXN8
## [25] SPART BZW2 EIF4E2 SMOC1 STOML2 DDX21 FAM98A G3BP2
## [33] MOV10 PABPC4 UPF1
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{# Uniquement dans le tableau pancorona}
\hlkwd{sort}\hlstd{(pancorona}\hlopt{$}\hlstd{tmp.Gene.name[(pancorona}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{tablo}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] ANPEP ATXN2L BTF3 CKAP5 CTSB CTSL
## [7] CYB5R3 DDX1 DDX5 DDX58 DHX9 DNM1L
## [13] EEF1A1 EIF2A EIF3F EIF4B EZR FLNA
## [19] FURIN FUS GSK3A GSK3B HDLBP HNRNPA1
## [25] HNRNPD HNRNPF HNRNPU IFIH1 IGF2BP1 IKBKB
## [31] IKBKE IRF3 ISG15 KPNA3 KPNB1 MYH9
## [37] NCL POLD1 POLR2B PRKRA RBM14 RCHY1
## [43] RPL13A RPL26 RPS13 RPS17 RPS19 RPS9
## [49] SDCBP SERBP1 SGTA SLC1A5 SNAP47 SSB
## [55] STING1 SYNCRIP TANC1 TBCB TMPRSS11D TRAF3
## [61] TUBA4A TUBB2A TUBB4A TUBB6 USP10 VPS36
## [67] XRCC5 XRCC6 YBX1 YTHDF2
## 105 Levels: ACE2 ANPEP AP3B1 ATXN2L BTF3 BZW2 CKAP5 CLCC1 ... YTHDF2
\end{verbatim}
\begin{alltt}
\hlcom{## Uniquement dans tableau }
\hlkwd{sort}\hlstd{(tablo}\hlopt{$}\hlstd{tmp.Gene.name[(tablo}\hlopt{$}\hlstd{tmp.Gene.name} \hlopt{%in%} \hlstd{pancorona}\hlopt{$}\hlstd{tmp.Gene.name)}\hlopt{==}\hlnum{FALSE}\hlstd{])}
\end{alltt}
\begin{verbatim}
## [1] AAR2 AASS AATF ABCC1 ACAD9 ACADM
## [7] ACSL3 ADAM9 ADAMTS1 AGPS AKAP8 AKAP8L
## [13] AKAP9 ALG11 ALG5 ALG8 ANO6 AP2A2
## [19] AP2M1 ARF6 ATE1 ATP13A3 ATP1B1 ATP6AP1
## [25] ATP6V1A BAG5 BCKDK BRD2 BRD4 CCDC86
## [31] CDK5RAP2 CENPF CEP112 CEP135 CEP250 CEP350
## [37] CEP68 CHMP2A CHPF CHPF2 CISD3 CIT
## [43] CLIP4 CNTRL COL6A1 COLGALT1 COMT COQ8B
## [49] CRTC3 CSNK2A2 CSNK2B CUL2 CWC27 CYB5B
## [55] DCAF7 DCAKD DCTPP1 DDX10 DNAJC11 DNAJC19
## [61] DNMT1 DPH5 DPY19L1 ECSIT EDEM3 EIF4H
## [67] ELOC EMC1 ERC1 ERGIC1 ERLEC1 ERMP1
## [73] ERO1B ERP44 ETFA EXOSC2 EXOSC3 EXOSC5
## [79] EXOSC8 F2RL1 FAM162A FAM8A1 FAR2 FASTKD5
## [85] FBLN5 FBN1 FBN2 FBXL12 FKBP10 FKBP15
## [91] FKBP7 FOXRED2 FYCO1 GCC1 GCC2 GDF15
## [97] GFER GGCX GGH GHITM GLA GNB1
## [103] GNG5 GOLGA2 GOLGA3 GOLGA7 GOLGB1 GORASP1
## [109] GPAA1 GPX1 GRIPAP1 GRPEL1 GTF2F2 HDAC2
## [115] HEATR3 HMOX1 HOOK1 HS2ST1 HS6ST2 HSBP1
## [121] HYOU1 IDE IL17RA IMPDH2 INHBE INTS4
## [127] ITGB1 JAKMIP1 KDELC1 KDELC2 LARP4B LARP7
## [133] LMAN2 LOX MAP7D1 MARC1 MAT2B MDN1
## [139] MIB1 MIPOL1 MOGS MPHOSPH10 MRPS2 MRPS25
## [145] MRPS27 MRPS5 MTCH1 MYCBP2 NARS2 NAT14
## [151] NDFIP2 NDUFAF1 NDUFAF2 NDUFB9 NEK9 NEU1
## [157] NGDN NGLY1 NIN NINL NLRX1 NOL10
## [163] NPC2 NPTX1 NSD2 NUP210 NUP214 NUP54
## [169] NUP58 NUP62 NUP88 NUP98 NUTF2 OS9
## [175] PCNT PCSK5 PCSK6 PDZD11 PIGO PIGS
## [181] PITRM1 PKP2 PLAT PLD3 PLEKHA5 PLEKHF2
## [187] PLOD2 PMPCA PMPCB POFUT1 POLA1 POLA2
## [193] PPIL3 PPT1 PRIM1 PRIM2 PRKACA PRKAR2A
## [199] PRRC2B PSMD8 PTBP2 PTGES2 PUSL1 PVR
## [205] QSOX2 RAB10 RAB14 RAB18 RAB1A RAB2A
## [211] RAB7A RAB8A RAE1 RALA RAP1GDS1 RBM28
## [217] RBM41 RBX1 RDX REEP5 REEP6 RETREG3
## [223] RHOA RIPK1 RNF41 RPL36 RRP9 SAAL1
## [229] SBNO1 SCAP SCARB1 SCCPDH SDF2 SELENOS
## [235] SEPSECS SIL1 SIRT5 SLC25A21 SLC27A2 SLC30A6
## [241] SLC30A7 SLC30A9 SLC44A2 SLC9A3R1 SLU7 SNIP1
## [247] SRP19 SRP72 STC2 STOM SUN2 TAPT1
## [253] TARS2 TBCA TBKBP1 TCF12 THTPA TIMM10
## [259] TIMM10B TIMM29 TIMM8B TIMM9 TLE1 TLE3
## [265] TM2D3 TMED5 TMEM39B TMEM97 TOMM70 TOR1A
## [271] TOR1AIP1 TRIM59 TRMT1 TUBGCP2 TUBGCP3 TYSND1
## [277] UGGT2 USP54 VPS11 VPS39 WASHC4 WFS1
## [283] YIF1A ZC3H18 ZC3H7A ZDHHC5 ZNF318 ZNF503
## [289] ZYG11B
## 324 Levels: AAR2 AASS AATF ABCC1 ACAD9 ACADM ACE2 ACSL3 ... ZYG11B
\end{verbatim}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{pancorona}\hlkwb{<-}\hlstd{pancorona[,}\hlkwd{c}\hlstd{(}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlstr{"TOTAL"}\hlstd{)]}
\hlstd{pandginn}\hlkwb{<-}\hlkwd{na.omit}\hlstd{(}\hlkwd{merge}\hlstd{(pancorona, tablo,} \hlkwc{by}\hlstd{=}\hlstr{"tmp.Gene.name"}\hlstd{,} \hlkwc{all.x}\hlstd{=}\hlnum{TRUE}\hlstd{))}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{nprimates),]}
\hlstd{pandginn}\hlkwb{<-}\hlstd{pandginn[}\hlkwd{order}\hlstd{(pandginn}\hlopt{$}\hlstd{TOTAL),]}
\hlkwd{dotchart}\hlstd{(}\hlkwd{as.matrix}\hlstd{(pandginn[,}\hlnum{2}\hlstd{]),} \hlkwc{labels} \hlstd{= pandginn}\hlopt{$}\hlstd{tmp.Gene.name,} \hlkwc{xlim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{4}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.7}\hlstd{)}
\hlkwd{points}\hlstd{(pandginn[,}\hlnum{3}\hlstd{],} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(pandginn),} \hlkwc{col}\hlstd{=}\hlstr{"blue"}\hlstd{,} \hlkwc{pch}\hlstd{=}\hlnum{4}\hlstd{)}
\hlkwd{legend}\hlstd{(}\hlstr{"bottomright"}\hlstd{,} \hlkwd{c}\hlstd{(}\hlstr{"pancorona score"}\hlstd{,} \hlstr{"dginn primate score"}\hlstd{,} \hlstr{"dginn bats score"}\hlstd{),} \hlkwc{pch}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{20}\hlstd{,}\hlnum{4}\hlstd{),} \hlkwc{col}\hlstd{=}\hlkwd{c}\hlstd{(}\hlstr{"black"}\hlstd{,} \hlstr{"blue"}\hlstd{,} \hlstr{"blue"}\hlstd{))}
\end{alltt}
\end{kframe}
\includegraphics[width=\maxwidth]{figure/pancorona-1}
\end{knitrout}
\end{document}
This diff is collapsed.
No preview for this file type