\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format %\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. %\geometry{letterpaper} % ... or a4paper or a5paper or ... %\geometry{landscape} % Activate for for rotated page geometry %\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent %\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode % TeX will automatically convert eps --> pdf in pdflatex %\usepackage{amssymb} \usepackage[utf8]{inputenc} %\usepackage[cyr]{aeguill} %\usepackage[francais]{babel} %\usepackage{hyperref} \title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis} \author{Marie Cariou} \date{March 2021} % Activate to display a given date or no date \begin{document} \maketitle \tableofcontents \newpage \section{Data} Analysis were formatted by the script covid\_comp\_script0\_table.Rnw. <<eval=FALSE>>= home<-"/home/adminmarie/Documents/" workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/") tab<-read.delim(paste0(workdir, "covid_comp/covid_comp_complete.txt"), h=T, sep="\t") dim(tab) @ <<>>= home<-"/home/adminmarie/Documents/" workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/") tab<-read.delim(paste0(workdir, "covid_comp/covid_comp_alldginn.txt"), h=T, sep="\t") dim(tab) @ \section{Comparison of dataset} \subsection{Data} <<data>>= tmp<-na.omit(tab[,c("Gene.name", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8", "dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8", "dginn.primate_BppM1M2", "dginn.primate_BppM7M8", "dginn.primate_BUSTED")]) col<-c("Gene.name", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8", "bats_codemlM1M2", "bats_codemlM7M8", "dginn.primate_codemlM1M2", "dginn.primate_codemlM7M8", "dginn.primate_BppM1M2", "dginn.primate_BppM7M8", "dginn.primate_BUSTED") dim(tmp) @ \subsection{Omega plot} <<>>= tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA x=as.numeric(as.character( tab$dginn.primate_omegaM0Bpp[tab$status=="shared"])) tab$bats_omegaM0Bpp[tab$bats_omegaM0Bpp=="na"]<-NA y=as.numeric(as.character( tab$bats_omegaM0Bpp[tab$status=="shared"])) names(x)<-tab$Gene.name[tab$status=="shared"] plot(x,y, xlab="bpp omega primate", ylab="bpp omega bats", cex=0.5) abline(0,1) abline(lm(y~x), col="red") text(x[x>0.5 &y<0.4], (y[x>0.5 &y<0.4]+0.01), names(x)[x>0.5 &y<0.4], cex=0.7) text(x[x<0.45 &y>0.45], (y[x<0.45 &y>0.45]+0.01), names(x)[x<0.45 &y>0.45], cex=0.7) text(x[x>0.45 &y>0.4], (y[x>0.45 &y>0.4]+0.01), names(x)[x>0.45 &y>0.4], cex=0.7) @ \subsection{Mondrian} <<mondrianbats>>= library(Mondrian) monddata<-as.data.frame(tmp$Gene.name) batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y", tmp$bats_codemlM7M8=="Y", tmp$bats_BppM1M2=="Y", tmp$bats_BppM7M8=="Y", tmp$bats_BUSTED=="Y")) primatetmp<-rowSums(cbind(tmp$"dginn.primate_codemlM1M2"=="Y", tmp$"dginn.primate_codemlM7M8"=="Y", tmp$"dginn.primate_BppM1M2"=="Y", tmp$"dginn.primate_BppM7M8"=="Y", tmp$"dginn.primate_BUSTED"=="Y")) monddata$bats_dginn3<-ifelse(batstmp>=3, 1,0) monddata$primate_dginn3<-ifelse(primatetmp>=3, 1,0) monddata$bats_dginn4<-ifelse(batstmp>=4, 1,0) monddata$primate_dginn4<-ifelse(primatetmp>=4, 1,0) mondrian(monddata[,2:3], labels=c("DGINN bats >3", "DGINN primate >3")) mondrian(monddata[,4:5], labels=c("DGINN bats >4", "DGINN primate >4")) @ \subsection{subsetR} <<subsetbats>>= library(UpSetR) upset(monddata, nsets = 4, matrix.color = "#DC267F", main.bar.color = "#648FFF", sets.bar.color = "#FE6100") upset(monddata[,1:3], nsets = 2, matrix.color = "#DC267F", main.bar.color = "#648FFF", sets.bar.color = "#FE6100") upset(monddata[,c(1,4,5)], nsets = 2, matrix.color = "#DC267F", main.bar.color = "#648FFF", sets.bar.color = "#FE6100") @ \section{Which are these genes?} \subsection{Gene under positive selection in both bats and primates} 4 methods: <<>>= monddata[monddata$bats_dginn4==1 & monddata$primate_dginn4==1,] @ 3 methods: <<>>= monddata[monddata$bats_dginn3==1 & monddata$primate_dginn3==1,] @ \subsection{Gene under positive selection only in primates} 4 methods: <<>>= monddata[monddata$bats_dginn4==0 & monddata$primate_dginn4==1,] @ 3 methods: <<t>>= monddata[monddata$bats_dginn3==0 & monddata$primate_dginn3==1,] @ \subsection{Gene under positive selection only in bats} 4 methods: <<>>= monddata[monddata$bats_dginn4==1 & monddata$primate_dginn4==0,] @ 3 methods: <<>>= monddata[monddata$bats_dginn3==1 & monddata$primate_dginn3==0,] @ \subsection{Figure tableau} <<tablo>>= tablo<-as.data.frame(tmp$Gene.name) tablo$nbats<-batstmp tablo$nprimates<-primatetmp plot(NULL, xlim=c(-0.5,5.5), ylim=c(-3,5.5), xlab="bats", ylab="primates", main="Genes supported by x,y methods in bats and primates", bty="n", xaxt="n", yaxt="n") text(x=rep(-0.6, 6), y=0:5, 0:5) text(y=rep(-0.65, 6), x=0:5, 0:5) sapply(seq(from=-0.5, to=5.5, by=1), function(x){ segments(x0=x, x1=x, y0=-0.5, y1=5.5) }) sapply(seq(from=-0.5, to=5.5, by=1), function(x){ segments(x0=-0.5, x1=5.5, y0=x, y1=x) }) for (p in 0:5){ for (b in 0:5){ tmp<-tablo$`tmp$Gene.name`[tablo$nbats==b & tablo$nprimates==p] if(length(tmp)>0 & length(tmp)<=8){ text(b,seq(from=(p-0.4), to=(p+0.4), length.out = length(tmp)), tmp, cex=0.4) }else if (length(tmp)>8 & length(tmp)<=16){ print(c(p, b)) text((b-0.3),seq(from=(p-0.4), to=(p+0.4), length.out = 8), tmp[1:8], cex=0.4) text((b+0.3),seq(from=(p-0.4), to=(p+0.4), length.out = (length(tmp)-8)), tmp[9:length(tmp)], cex=0.4) }else if (length(tmp)>16){ text(b,p, paste0(length(tmp), " values")) } } } tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==1] text(-0.4,-1.2, "p=1/n=0", cex=0.6) text(seq(from=0.1, to=5.5, length.out = 19), -1.1, tmp[1:19], cex=0.4) text(seq(from=0.1, to=5.5, length.out = length(tmp)-19), -1.3, tmp[20:length(tmp)], cex=0.4) tmp<-tablo$`tmp$Gene.name`[tablo$nbats==1 & tablo$nprimates==1] text(-0.4,-1.7, "p=1/n=1", cex=0.6) text(seq(from=0.1, to=5.5, length.out = 18), -1.6, tmp[1:18], cex=0.4) text(seq(from=0.1, to=4.5, length.out = length(tmp)-18), -1.8, tmp[19:length(tmp)], cex=0.4) tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==0] text(-0.4,-2.3, "p=0/n=0", cex=0.6) text(seq(from=0.1, to=5.5, length.out = 17),-2.1, tmp[1:17], cex=0.4) text(seq(from=0.1, to=5.5, length.out = 17),-2.3, tmp[18:34], cex=0.4) text(seq(from=0.1, to=5.5, length.out = length(tmp)-34),-2.5, tmp[35:length(tmp)], cex=0.4) tmp<-tablo$`tmp$Gene.name`[tablo$nbats==2 & tablo$nprimates==0] text(-0.4,-2.9, "p=0/n=2", cex=0.6) text(seq(from=0.1, to=5.5, length.out = 18),-2.8, tmp[1:18], cex=0.4) text(seq(from=0.1, to=1, length.out = length(tmp)-18),-3.0, tmp[19:length(tmp)], cex=0.4) @ <<>>= write.csv(tablo[tablo$nbats>=3,"tmp$Gene.name"], "batssup3.csv", row.names=FALSE, quote=FALSE) write.csv(tablo[tablo$nprimates>=3,"tmp$Gene.name"], "primatessup3.csv", row.names=FALSE, quote=FALSE) write.csv(tablo, "primatesVbats.csv", row.names=FALSE, quote=FALSE) @ Restreindre ce tableau aux gènes présent dans l'analyse de Krogan. <<setup, include=FALSE, cache=FALSE, tidy=TRUE>>= options(tidy=TRUE, width=70) @ <<>>= # Reading the Krogan table tab<-read.delim(paste0(workdir, "covid_comp/covid_comp_complete.txt"), fill=T, h=T, dec=",") dim(tab) #Adding ACE2 and TMPRSS2 krogan<-c(as.character(tab$merge.Gene), "ACE2", "TMPRSS2") # The list length(krogan) krogan #In the table, I select line that match the krogan gene name liste tabloK<-tablo[tablo$`tmp$Gene.name` %in% krogan,] # How many gene lost? dim(tablo) dim(tabloK) # Les gènes perdus (dans le tableau mais pas dans la liste de Krogan) sort(tablo$`tmp$Gene.name`[tablo$`tmp$Gene.name` %in% krogan==F]) # Les gènes de Krogan non présent dans cette liste sort(krogan[krogan %in% tablo$`tmp$Gene.name`==F]) write.csv(tabloK, "primatesVbats_onlykrogan.csv", row.names=FALSE, quote=FALSE) @ \section{Tanglegram} <<eval=TRUE>>= #install.packages('dendextend') # stable CRAN version library(dendextend) # load the package #install.packages("phytools") # stable CRAN version library(phytools) # load the package library(ggraph) library(igraph) library(tidyverse) tmp<-tablo[(tablo$nbats!=0 | tablo$nprimates!=0),] #tmp<-head(tablo, 20) #tmp<-rbind(as.matrix(tmp), c("outgroup", 50, 50)) tmp<-as.data.frame(tmp) matbats<-hclust(dist(tmp$nbats)) matpri<-hclust(dist(tmp$nprimates)) tmp[order(tmp$nbats),] dendpri<-as.dendrogram(matpri) dendbats<-as.dendrogram(matbats) labels(dendpri)<-as.character(tmp$`tmp$Gene.name`[labels(dendpri)]) labels(dendbats)<-as.character(tmp$`tmp$Gene.name`[labels(dendbats)]) tmp[order(tmp$nprimates, decreasing=FALSE),]$'tmp$Gene.name'-> order dendpri<-dendextend::rotate(dendpri, order=order) tmp[order(tmp$nbats, decreasing=FALSE),]$'tmp$Gene.name'-> order dendbats<-dendextend::rotate(dendbats, order=order) #### Il faut swapper certains neud de l'arbrese class(labels(dendpri)) dend12 <- dendlist(dendbats, dendpri) png("figure/tanglegramm.png", width = 1800, height = 3000) tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, edge.lwd=0, margin_inner=6, margin_top=2, main_left=" bats", main_right = "primates ", lwd=0.5, cex_main=1, lab.cex=1, k_labels=6) dev.off() @ <<eval=TRUE>>= ace<-264 tmprss2<-75 znf318<-81 sepsecs<-228 tbk1<-273 ripk1<-224 col<-rep("grey", length(labels(dendpri))) col[ace]<-"black" col[tmprss2]<-"black" col[znf318]<-"black" col[sepsecs]<-"black" col[tbk1]<-"black" col[ripk1]<-"black" font<-rep(1, length(labels(dendpri))*2) #font[ace]<-1.3 #font[tmprss2]<-1.3 #font[length(labels(dendpri))+160]<-1.3 png("figure/tanglegramm.png", width = 1800, height = 3000) tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, edge.lwd=0, margin_inner=6, margin_top=2, main_left=" bats", main_right = "primates ", lwd=0.5, cex_main=1, lab.cex=font, k_labels=6, color_lines=col) dev.off() @ <<>>= tmp<-tablo[(tablo$nbats>=3 | tablo$nprimates>=3),] dim(tmp) tmp<-as.data.frame(tmp) names(tmp)<-c("tmp.Gene.name", "nbats", "nprimates") matbats<-hclust(dist(tmp$nbats)) matpri<-hclust(dist(tmp$nprimates)) #tmp[order(tmp$nbats),] dendpri<-as.dendrogram(matpri) dendbats<-as.dendrogram(matbats) labels(dendpri)<-as.character(tmp$tmp.Gene.name[labels(dendpri)]) labels(dendbats)<-as.character(tmp$tmp.Gene.name[labels(dendbats)]) tmp[order(tmp$nprimates, decreasing=FALSE),]$tmp.Gene.name-> order dendpri<-dendextend::rotate(dendpri, order=order) tmp[order(tmp$nbats, decreasing=FALSE),]$tmp.Gene.name-> order dendbats<-dendextend::rotate(dendbats, order=order) #### Il faut swapper certains neuds de l'arbres class(labels(dendpri)) dend12 <- dendlist(dendbats, dendpri) ace<-97 tmprss2<-27 znf318<-31 sepsecs<-69 tbk1<-106 ripk1<-68 col<-rep("lightblue", length(labels(dendpri))) plusplus<-tmp$tmp.Gene.name[tmp$nbats>=3 & tmp$nprimates>=3] col[which(labels(dendbats) %in% plusplus)]<-"pink" interest<-c("TMPRSS2","ZNF318", "SEPSECS","TBK1", "RIPK1") col[which(labels(dendbats) %in% interest)]<-"blue" interestpp<-c("ACE2") col[which(labels(dendbats) %in% interestpp)]<-"red" png("figure/tanglegrammsup3.png", width = 500, height = 1200) tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, edge.lwd=0, margin_inner=6, margin_top=3, main_left=" bats", main_right = "primates ", lwd=0.5, cex_main=2, lab.cex=1, k_labels=6, color_lines=col) dev.off() ### Changer couleurs des groupes ## changer couleurs des lines sel vs sel or sel vs non-sel setEPS() postscript("figure/tanglegramsup3.eps", height=15, width=5) tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, edge.lwd=0, margin_inner=6, margin_top=3, main_left=" bats", main_right = "primates ", lwd=0.5, cex_main=2, lab.cex=1, # k_labels=6, color_lines=col) dev.off() labels_colors(dend12[[1]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nbats)) labels_colors(dend12[[2]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nprimates)) labels_colors(dend12[[1]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nbats)) labels_colors(dend12[[2]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nprimates)) setEPS() postscript("figure/tanglegramsup3_V2.eps", height=15, width=5) tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE, edge.lwd=0, margin_inner=6, margin_top=3, main_left=" bats", main_right = "primates ", lwd=0.5, cex_main=2, lab.cex=1, # k_labels=6, color_lines=col) dev.off() @ \end{document}