Newer
Older
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<>>=
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1"
@
\section{Comparisons Primates}
\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_1>>=
tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA
tab$dginn.primate_omegaM0Bpp<-as.numeric(as.character(
tab$dginn.primate_omegaM0Bpp))
plot(tab$whole.gene.dN.dS.model.0,
tab$dginn.primate_omegaM0Bpp,
xlab="Omega Young-primate",
ylab="DGINN-full's",
cex=0.3)
abline(lm(tab$dginn.primate_omegaM0Bpp~tab$whole.gene.dN.dS.model.0),
col="red")
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 &
tab$dginn.primate_omegaM0Bpp>0.5,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.1 &
tab$dginn.primate_omegaM0Bpp>0.3,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.33 &
tab$dginn.primate_omegaM0Bpp<0.2,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.6 &
tab$dginn.primate_omegaM0Bpp<0.6,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)
@
\subsection{Janet Young's results (Young-primate) VS Cooper's result}
Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS".
<<omegaM7M8_2>>=
tab$cooper.primates.Average_dNdS<-as.numeric(as.character(
tab$cooper.primates.Average_dNdS))
plot(tab$whole.gene.dN.dS.model.0,
tab$cooper.primates.Average_dNdS,
xlab="Omega Young-primate",
ylab="Omega Cooper-primate",
cex=0.3)
abline(lm(tab$cooper.primates.Average_dNdS~tab$whole.gene.dN.dS.model.0),
col="red")
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.15 &
tab$cooper.primates.Average_dNdS>0.4,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.3 &
tab$cooper.primates.Average_dNdS>0.5,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.3 &
tab$cooper.primates.Average_dNdS<0.1,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier$Gene.name, cex=0.5)
@
\subsection{Cooper's results (Cooper-primate) VS DGINN-full's results}
Comparaison des Omega: colonne "cooper.primates.Average\_dNdS" VS colonne "omega" dans la sortie de dginn.
<<omegaM7M8_3>>=
plot(tab$cooper.primates.Average_dNd,
tab$dginn.primate_omegaM0Bpp,
xlab="Omega Cooper-primate",
ylab="DGINN-full's",
cex=0.3)
abline(lm(tab$dginn.primate_omegaM0Bpp~tab$cooper.primates.Average_dNd), col="red")
outlier<-tab[tab$cooper.primates.Average_dNd<0.4 &
tab$dginn.primate_omegaM0Bpp>0.5,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier<-tab[tab$cooper.primates.Average_dNd<0.1 &
tab$dginn.primate_omegaM0Bpp>0.3,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$cooper.primates.Average_dNd>0.7 &
tab$dginn.primate_omegaM0Bpp<0.3,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
outlier<-tab[tab$cooper.primates.Average_dNd>0.45 &
tab$dginn.primate_omegaM0Bpp<0.2,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
@
\section{Overlap}
\subsection{Mondrian}
<<mondrianprimates>>=
library(Mondrian)
monddata<-as.data.frame(tab$Gene.name)
dim(monddata)
dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y",
tab$dginn.primate_BppM1M2=="Y",
tab$dginn.primate_BppM7M8=="Y",
tab$dginn.primate_codemlM1M2=="Y",
tab$dginn.primate_codemlM7M8=="Y"))
monddata$primates_young<-ifelse(
tab$pVal.M8vsM7<0.05, 1, 0)
monddata$primate_cooper<-ifelse(
tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
monddata$primates_dginn_full<-ifelse(
dginnfulltmp>=3, 1,0)
mondrian(na.omit(monddata[,2:4]),
labels=c("Young", "Cooper", "DGINN-full >=3" ))
monddata$primates_dginn_full<-ifelse(
dginnfulltmp>=4, 1,0)
mondrian(na.omit(monddata[,2:4]),
labels=c("Young", "Cooper", "DGINN-full >=4"))
@
\subsection{subsetR}
Just another representation of the same result.
<<subsetprimates>>=
library(UpSetR)
upsetdata<-as.data.frame(tab$Gene.name)
upsetdata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
upsetdata$primate_cooper<-ifelse(
tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
###
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)
upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
@
\section{Gene List}
Genes under positive selection for at least 4 methods.
<<>>=
dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y",
tab$dginn.primate_BppM1M2=="Y",
tab$dginn.primate_BppM7M8=="Y",
tab$dginn.primate_codemlM1M2=="Y",
tab$dginn.primate_codemlM7M8=="Y"))
tab$Gene.name[dginnfulltmp>=4 & is.na(dginnfulltmp)==F]
tab$Gene.name[dginnfulltmp>=3 & is.na(dginnfulltmp)==F]
tmp<-tab[dginnfulltmp>=4 & is.na(dginnfulltmp)==F,
c("Gene.name","dginn.primate_BUSTED", "dginn.primate_BppM1M2",
"dginn.primate_BppM7M8","dginn.primate_codemlM1M2","dginn.primate_codemlM7M8")]
write.table(tmp, "geneList_DGINN_full_primate_pos4.txt", row.names=F, quote=F)
@
\section{Shiny like}
<<shiny, fig.height=11>>=
makeFig1 <- function(df){
# prepare data for colors etc
colMethods <- c("deepskyblue4", "darkorange" , "deepskyblue3" , "mediumseagreen" , "yellow3" , "black")
nameMethods <- c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8", "MEME")
metColor <- data.frame(Name = nameMethods , Col = colMethods , stringsAsFactors = FALSE)
# subset for this specific figure
#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)
xt <- df[, c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8")]
xt$Gene <- df$Gene
nbrMeth <- 5
# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)
xt[,1:5] <- ifelse(xt[,1:5] == "Y", 1, 0)
# sort and Filter the 0 lines
xt<-xt[order(rowSums(xt[,1:5])),]
xt<-xt[rowSums(xt[,1:5])>2,]
row.names(xt)<-xt$Gene
xt<-xt[,1:5]
colFig1 <- metColor[which(metColor$Name %in% colnames(xt)) , ]
##### PART 1 : NUMBER OF METHODS
par(xpd = NA , mar=c(2,7,4,0) , oma = c(0,0,0,0) , mgp = c(3,0.3,0))
h = barplot(
t(xt),
border = NA ,
axes = F ,
col = adjustcolor(colFig1$Col, alpha.f = 1),
horiz = T ,
las = 2 ,
main = "Methods detecting positive selection" ,
cex.main = 0.85,
cex.names = min(50/nrow(xt), 1.5)
)
axis(3, line = 0, at = c(0:nbrMeth), label = c("0", rep("", nbrMeth -1), nbrMeth), tck = 0.02)
legend("bottomleft",
horiz = T,
border = colFig1$Col,
legend = colFig1$Name,
fill = colFig1$Col,
cex = 0.8,
bty = "n",
xpd = NA
)
}
@
<<>>=
source("covid_comp_shiny.R")
df<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
names(df)
dftmp<-tab[,c("File", "Name", "Gene.name",
"GeneSize", "dginn.primate_NbSpecies", "dginn.primate_omegaM0Bpp",
"dginn.primate_omegaM0codeml", "dginn.primate_BUSTED", "dginn.primate_BUSTED.p.value",
"dginn.primate_MEME.NbSites", "dginn.primate_MEME.PSS", "dginn.primate_BppM1M2",
"dginn.primate_BppM1M2.p.value", "dginn.primate_BppM1M2.NbSites", "dginn.primate_BppM1M2.PSS",
"dginn.primate_BppM7M8", "dginn.primate_BppM7M8.p.value", "dginn.primate_BppM7M8.NbSites",
"dginn.primate_BppM7M8.PSS", "dginn.primate_codemlM1M2", "dginn.primate_codemlM1M2.p.value",
"dginn.primate_codemlM1M2.NbSites","dginn.primate_codemlM1M2.PSS", "dginn.primate_codemlM7M8",
"dginn.primate_codemlM7M8.p.value", "dginn.primate_codemlM7M8.NbSites" , "dginn.primate_codemlM7M8.PSS")]
names(dftmp)<-names(df)
makeFig1(dftmp)
@
\end{document}