Skip to content
Snippets Groups Projects
covid_comp_dataset.Rnw 13.5 KiB
Newer Older
\documentclass[11pt, oneside]{article}   	% use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry}                		% See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper}                   		% ... or a4paper or a5paper or ... 
%\geometry{landscape}                		% Activate for for rotated page geometry
%\usepackage[parfill]{parskip}    		% Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx}				% Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
								% TeX will automatically convert eps --> pdf in pdflatex		
%\usepackage{amssymb}

\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}


\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
mcariou's avatar
mcariou committed
\date{March 2021}							% Activate to display a given date or no date

\begin{document}
\maketitle

\tableofcontents

\newpage

\section{Data}

Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.

mcariou's avatar
mcariou committed
<<eval=FALSE>>=
mcariou's avatar
mcariou committed
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")

tab<-read.delim(paste0(workdir, 
  "covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
@

mcariou's avatar
mcariou committed
<<>>=
mcariou's avatar
mcariou committed
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
mcariou's avatar
mcariou committed

tab<-read.delim(paste0(workdir, 
  "covid_comp/covid_comp_alldginn.txt"), h=T, sep="\t")
dim(tab)
@

\section{Comparison of dataset}

\subsection{Data}

<<data>>=
mcariou's avatar
mcariou committed
tmp<-na.omit(tab[,c("Gene.name", "bats_BUSTED", "bats_BppM1M2", 
    "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8", 
    "dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8", 
    "dginn.primate_BppM1M2", "dginn.primate_BppM7M8", 
    "dginn.primate_BUSTED")])
col<-c("Gene.name", "bats_BUSTED", "bats_BppM1M2", 
    "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8", 
    "dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8", 
    "dginn.primate_BppM1M2", "dginn.primate_BppM7M8", 
    "dginn.primate_BUSTED")
mcariou's avatar
mcariou committed

\subsection{Omega plot}

<<>>=
mcariou's avatar
mcariou committed
tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA
x=as.numeric(as.character(
  tab$dginn.primate_omegaM0Bpp[tab$status=="shared"]))

tab$bats_omegaM0Bpp[tab$bats_omegaM0Bpp=="na"]<-NA
y=as.numeric(as.character(
  tab$bats_omegaM0Bpp[tab$status=="shared"]))

mcariou's avatar
mcariou committed
names(x)<-tab$Gene.name[tab$status=="shared"]
  
plot(x,y, xlab="bpp omega primate", ylab="bpp omega bats", cex=0.5)
abline(0,1)
abline(lm(y~x), col="red")

mcariou's avatar
mcariou committed
text(x[x>0.5 &y<0.4], (y[x>0.5 &y<0.4]+0.01), 
     names(x)[x>0.5 &y<0.4], cex=0.7)
text(x[x<0.45 &y>0.45], (y[x<0.45 &y>0.45]+0.01), 
     names(x)[x<0.45 &y>0.45], cex=0.7)
text(x[x>0.45 &y>0.4], (y[x>0.45 &y>0.4]+0.01), 
     names(x)[x>0.45 &y>0.4], cex=0.7)
mcariou's avatar
mcariou committed

@

\subsection{Mondrian}

<<mondrianbats>>=
library(Mondrian)

monddata<-as.data.frame(tmp$Gene.name)

mcariou's avatar
mcariou committed
batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y", 
                       tmp$bats_codemlM7M8=="Y", 
                       tmp$bats_BppM1M2=="Y",
                       tmp$bats_BppM7M8=="Y", 
                       tmp$bats_BUSTED=="Y")) 

primatetmp<-rowSums(cbind(tmp$"dginn.primate_codemlM1M2"=="Y",
                          tmp$"dginn.primate_codemlM7M8"=="Y", 
                          tmp$"dginn.primate_BppM1M2"=="Y", 
                          tmp$"dginn.primate_BppM7M8"=="Y", 
                          tmp$"dginn.primate_BUSTED"=="Y")) 
                                       
monddata$bats_dginn3<-ifelse(batstmp>=3, 1,0)
monddata$primate_dginn3<-ifelse(primatetmp>=3, 1,0)
monddata$bats_dginn4<-ifelse(batstmp>=4, 1,0)
monddata$primate_dginn4<-ifelse(primatetmp>=4, 1,0)

mcariou's avatar
mcariou committed
mondrian(monddata[,2:3], 
         labels=c("DGINN bats >3", "DGINN primate >3"))
mcariou's avatar
mcariou committed
mondrian(monddata[,4:5], 
         labels=c("DGINN bats >4", "DGINN primate >4"))

@

\subsection{subsetR}

<<subsetbats>>=
library(UpSetR)

upset(monddata, nsets = 4, matrix.color = "#DC267F", 
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")

upset(monddata[,1:3], nsets = 2, matrix.color = "#DC267F", 
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")

upset(monddata[,c(1,4,5)], nsets = 2, matrix.color = "#DC267F", 
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
@

\section{Which are these genes?}

\subsection{Gene under positive selection in both bats and primates}

4 methods:

<<>>=
monddata[monddata$bats_dginn4==1 & monddata$primate_dginn4==1,]
@

3 methods:

<<>>=
monddata[monddata$bats_dginn3==1 & monddata$primate_dginn3==1,]
@


\subsection{Gene under positive selection only in primates}

4 methods:

<<>>=
monddata[monddata$bats_dginn4==0 & monddata$primate_dginn4==1,]
@

3 methods:

<<t>>=
monddata[monddata$bats_dginn3==0 & monddata$primate_dginn3==1,]
@


\subsection{Gene under positive selection only in bats}

4 methods:

<<>>=
monddata[monddata$bats_dginn4==1 & monddata$primate_dginn4==0,]
@

3 methods:

<<>>=
monddata[monddata$bats_dginn3==1 & monddata$primate_dginn3==0,]
@

\subsection{Figure tableau}

<<tablo>>=
tablo<-as.data.frame(tmp$Gene.name)
tablo$nbats<-batstmp
tablo$nprimates<-primatetmp

mcariou's avatar
mcariou committed
plot(NULL, xlim=c(-0.5,5.5), ylim=c(-3,5.5), 
     xlab="bats", ylab="primates", 
     main="Genes supported by x,y methods in bats and primates", 
     bty="n", 
     xaxt="n", yaxt="n")

text(x=rep(-0.6, 6), y=0:5, 0:5)
text(y=rep(-0.65, 6), x=0:5, 0:5)
sapply(seq(from=-0.5, to=5.5, by=1), function(x){
  segments(x0=x, x1=x, y0=-0.5, y1=5.5)
})

sapply(seq(from=-0.5, to=5.5, by=1), function(x){
  segments(x0=-0.5, x1=5.5, y0=x, y1=x)
})

for (p in 0:5){
  for (b in 0:5){
    tmp<-tablo$`tmp$Gene.name`[tablo$nbats==b & tablo$nprimates==p]
mcariou's avatar
mcariou committed
    if(length(tmp)>0 & length(tmp)<=8){
mcariou's avatar
mcariou committed
      text(b,seq(from=(p-0.4), to=(p+0.4), length.out = length(tmp)), 
           tmp, cex=0.4)
mcariou's avatar
mcariou committed
    }else if (length(tmp)>8 & length(tmp)<=16){
      print(c(p, b))
mcariou's avatar
mcariou committed
      text((b-0.3),seq(from=(p-0.4), to=(p+0.4), length.out = 8), 
           tmp[1:8], cex=0.4)      
      text((b+0.3),seq(from=(p-0.4), to=(p+0.4), length.out = (length(tmp)-8)), 
           tmp[9:length(tmp)], cex=0.4)
    }else if (length(tmp)>16){
      text(b,p, paste0(length(tmp), " values"))
      }  
  }
}


tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==1]
text(-0.4,-1.2, "p=1/n=0", cex=0.6)
mcariou's avatar
mcariou committed
text(seq(from=0.1, to=5.5, length.out = 19),
mcariou's avatar
mcariou committed
     -1.1, 
mcariou's avatar
mcariou committed
     tmp[1:19], 
mcariou's avatar
mcariou committed
     cex=0.4)
mcariou's avatar
mcariou committed
text(seq(from=0.1, to=5.5, length.out = length(tmp)-19),
mcariou's avatar
mcariou committed
     -1.3, 
mcariou's avatar
mcariou committed
     tmp[20:length(tmp)],
mcariou's avatar
mcariou committed
     cex=0.4)

tmp<-tablo$`tmp$Gene.name`[tablo$nbats==1 & tablo$nprimates==1]
text(-0.4,-1.7, "p=1/n=1", cex=0.6)
mcariou's avatar
mcariou committed
text(seq(from=0.1, to=5.5, length.out = 18),
     -1.6, 
     tmp[1:18], 
     cex=0.4)
text(seq(from=0.1, to=4.5, length.out = length(tmp)-18),
     -1.8, 
     tmp[19:length(tmp)], 
     cex=0.4)


tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==0]
mcariou's avatar
mcariou committed
text(-0.4,-2.3, "p=0/n=0", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 17),-2.1, tmp[1:17], cex=0.4)
mcariou's avatar
mcariou committed
text(seq(from=0.1, to=5.5, length.out = 17),-2.3, tmp[18:34], cex=0.4)
text(seq(from=0.1, to=5.5, length.out = length(tmp)-34),-2.5, tmp[35:length(tmp)], cex=0.4)
mcariou's avatar
mcariou committed
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==2 & tablo$nprimates==0]
text(-0.4,-2.9, "p=0/n=2", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-2.8, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=1, length.out = length(tmp)-18),-3.0, tmp[19:length(tmp)], cex=0.4)
mcariou's avatar
mcariou committed
<<>>=
mcariou's avatar
mcariou committed
write.csv(tablo[tablo$nbats>=3,"tmp$Gene.name"], "batssup3.csv", 
          row.names=FALSE, 
          quote=FALSE)

write.csv(tablo[tablo$nprimates>=3,"tmp$Gene.name"], "primatessup3.csv", 
          row.names=FALSE, 
          quote=FALSE)
write.csv(tablo, "primatesVbats.csv", 
          row.names=FALSE, 
          quote=FALSE)
mcariou's avatar
mcariou committed
@

mcariou's avatar
mcariou committed
Restreindre ce tableau aux gènes présent dans l'analyse de Krogan.
mcariou's avatar
mcariou committed
<<setup, include=FALSE, cache=FALSE, tidy=TRUE>>=
options(tidy=TRUE, width=70)
@
mcariou's avatar
mcariou committed
<<>>=
# Reading the Krogan table
tab<-read.delim(paste0(workdir, 
mcariou's avatar
mcariou committed
  "covid_comp/covid_comp_complete.txt"),
mcariou's avatar
mcariou committed
	fill=T, h=T, dec=",")
dim(tab)


#Adding ACE2 and TMPRSS2
mcariou's avatar
mcariou committed
krogan<-c(as.character(tab$merge.Gene),  "ACE2", "TMPRSS2")
mcariou's avatar
mcariou committed

# The list
length(krogan)
krogan

#In the table, I select line that match the krogan gene name liste
tabloK<-tablo[tablo$`tmp$Gene.name` %in% krogan,]

# How many gene lost?
dim(tablo)
dim(tabloK)

# Les gènes perdus (dans le tableau mais pas dans la liste de Krogan)
sort(tablo$`tmp$Gene.name`[tablo$`tmp$Gene.name` %in% krogan==F])

# Les gènes de Krogan non présent dans cette liste
sort(krogan[krogan %in% tablo$`tmp$Gene.name`==F])


write.csv(tabloK, "primatesVbats_onlykrogan.csv", row.names=FALSE, quote=FALSE)
@
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
\section{Tanglegram}

mcariou's avatar
mcariou committed
#install.packages('dendextend') # stable CRAN version
library(dendextend) # load the package
#install.packages("phytools") # stable CRAN version
library(phytools) # load the package
library(ggraph)
library(igraph)
library(tidyverse)
 
tmp<-tablo[(tablo$nbats!=0 | tablo$nprimates!=0),]
mcariou's avatar
mcariou committed
#tmp<-head(tablo, 20)
mcariou's avatar
mcariou committed
#tmp<-rbind(as.matrix(tmp), c("outgroup", 50, 50))
tmp<-as.data.frame(tmp)
matbats<-hclust(dist(tmp$nbats))
matpri<-hclust(dist(tmp$nprimates))

tmp[order(tmp$nbats),]

dendpri<-as.dendrogram(matpri)
dendbats<-as.dendrogram(matbats)

labels(dendpri)<-as.character(tmp$`tmp$Gene.name`[labels(dendpri)])
labels(dendbats)<-as.character(tmp$`tmp$Gene.name`[labels(dendbats)])


tmp[order(tmp$nprimates, decreasing=FALSE),]$'tmp$Gene.name'-> order
dendpri<-dendextend::rotate(dendpri, order=order)

tmp[order(tmp$nbats, decreasing=FALSE),]$'tmp$Gene.name'-> order
dendbats<-dendextend::rotate(dendbats, order=order)

#### Il faut swapper certains neud de l'arbrese
class(labels(dendpri))

dend12 <- dendlist(dendbats, dendpri)

png("figure/tanglegramm.png", width = 1800, height = 3000)
mcariou's avatar
mcariou committed
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, 
           edge.lwd=0, margin_inner=6, 
           margin_top=2,
           main_left="                  bats", 
           main_right = "primates                  ", 
           lwd=0.5, 
           cex_main=1,
           lab.cex=1, 
           k_labels=6)

dev.off()

@

<<eval=TRUE>>=
ace<-264
tmprss2<-75
znf318<-81
sepsecs<-228
tbk1<-273
ripk1<-224

col<-rep("grey", length(labels(dendpri)))
col[ace]<-"black" 
col[tmprss2]<-"black"
col[znf318]<-"black"
col[sepsecs]<-"black"
col[tbk1]<-"black" 
col[ripk1]<-"black"

font<-rep(1, length(labels(dendpri))*2)
#font[ace]<-1.3
#font[tmprss2]<-1.3
#font[length(labels(dendpri))+160]<-1.3

png("figure/tanglegramm.png", width = 1800, height = 3000)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, 
           edge.lwd=0, margin_inner=6, 
           margin_top=2,
           main_left="                  bats", 
           main_right = "primates                  ", 
           lwd=0.5, 
           cex_main=1,
           lab.cex=font, 
           k_labels=6,
           color_lines=col)

dev.off()
mcariou's avatar
mcariou committed

<<>>=
tmp<-tablo[(tablo$nbats>=3 | tablo$nprimates>=3),]
dim(tmp)
tmp<-as.data.frame(tmp)
names(tmp)<-c("tmp.Gene.name", "nbats", "nprimates")

matbats<-hclust(dist(tmp$nbats))
matpri<-hclust(dist(tmp$nprimates))
#tmp[order(tmp$nbats),]

dendpri<-as.dendrogram(matpri)
dendbats<-as.dendrogram(matbats)

labels(dendpri)<-as.character(tmp$tmp.Gene.name[labels(dendpri)])
labels(dendbats)<-as.character(tmp$tmp.Gene.name[labels(dendbats)])

tmp[order(tmp$nprimates, decreasing=FALSE),]$tmp.Gene.name-> order
dendpri<-dendextend::rotate(dendpri, order=order)

tmp[order(tmp$nbats, decreasing=FALSE),]$tmp.Gene.name-> order
dendbats<-dendextend::rotate(dendbats, order=order)

#### Il faut swapper certains neuds de l'arbres
class(labels(dendpri))

dend12 <- dendlist(dendbats, dendpri)

ace<-97
tmprss2<-27
znf318<-31
sepsecs<-69
tbk1<-106
ripk1<-68

col<-rep("lightblue", length(labels(dendpri)))

plusplus<-tmp$tmp.Gene.name[tmp$nbats>=3 & tmp$nprimates>=3]
col[which(labels(dendbats) %in% plusplus)]<-"pink"

interest<-c("TMPRSS2","ZNF318", "SEPSECS","TBK1", "RIPK1")
col[which(labels(dendbats) %in% interest)]<-"blue"

interestpp<-c("ACE2")
col[which(labels(dendbats) %in% interestpp)]<-"red"




png("figure/tanglegrammsup3.png", width = 500, height = 1200)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, 
           edge.lwd=0, margin_inner=6,
           margin_top=3,
           main_left="          bats", 
           main_right = "primates         ", 
           lwd=0.5, 
           cex_main=2,
           lab.cex=1, 
           k_labels=6,
           color_lines=col)

dev.off()

### Changer couleurs des groupes
## changer couleurs des lines sel vs sel or sel vs non-sel

setEPS()
postscript("figure/tanglegramsup3.eps", height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, 
           edge.lwd=0, margin_inner=6,
           margin_top=3,
           main_left="          bats", 
           main_right = "primates         ", 
           lwd=0.5, 
           cex_main=2,
           lab.cex=1, 
     #      k_labels=6,
           color_lines=col)
dev.off()


labels_colors(dend12[[1]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nbats))
labels_colors(dend12[[2]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nprimates))


labels_colors(dend12[[1]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nbats))
labels_colors(dend12[[2]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nprimates))
setEPS()
postscript("figure/tanglegramsup3_V2.eps", height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, 
           edge.lwd=0, margin_inner=6,
           margin_top=3,
           main_left="          bats", 
           main_right = "primates         ", 
           lwd=0.5, 
           cex_main=2,
           lab.cex=1, 
     #      k_labels=6,
           color_lines=col)
dev.off()

@




\end{document}