Skip to content
Snippets Groups Projects
Commit 5efe4925 authored by mcariou's avatar mcariou
Browse files

update covid_comp_dataset

parent ec8dfbe5
No related branches found
No related tags found
No related merge requests found
Showing
with 11651 additions and 147 deletions
......@@ -10,21 +10,35 @@ To achieve this, we characterized the evolutionary history of the SARS-CoV-2 int
## Data formating
requisite R packages: formatR, tinytex
Requisite R packages: formatR, tinytex
~
Script to merge DGINN outputs from different batch of analysis and included or correct rows corresponding to genes ran on corrected alignmenents.
```
rnw_scripts/
rnw_scripts/covid_comp_script0_table.pdf
```
Input tables in **data/**.
Output tables in **out_tab**
Output tables in **out_tab/**
The tables output from this script will be used for the following analysis steps.
## Comparison between datasets primates and bats
Requisite R packages: Mondrian, UpSetR, dendextend, ggraph, igraph, tidyverse,viridis.
~
Script to compare bats and primates screen.
```
rnw_scripts/covid_comp_dataset.pdf
```
Input tables in **out_tab/**.
Output tables in **figure/**
## Primates and bats
## Comparaison with MAIC score and pancorona analysis
## Dataset comparison
makeFig1 <- function(df){
# prepare data for colors etc
colMethods <- c("deepskyblue4", "darkorange" , "deepskyblue3" , "mediumseagreen" , "yellow3" , "black")
nameMethods <- c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8", "MEME")
metColor <- data.frame(Name = nameMethods , Col = colMethods , stringsAsFactors = FALSE)
# subset for this specific figure
#df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)
xt <- df[, c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8")]
xt$Gene <- df$Gene
nbrMeth <- 5
# reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)
xt[,1:5] <- ifelse(xt[,1:5] == "Y", 1, 0)
# sort and Filter the 0 lines
xt<-xt[order(rowSums(xt[,1:5])),]
xt<-na.omit(xt[rowSums(xt[,1:5])>2,])
row.names(xt)<-xt$Gene
xt<-xt[,1:5]
colFig1 <- metColor[which(metColor$Name %in% colnames(xt)) , ]
##### PART 1 : NUMBER OF METHODS
par(xpd = NA , mar=c(2,7,4,0) , oma = c(0,0,0,0) , mgp = c(3,0.3,0))
h = barplot(
t(xt),
border = NA ,
axes = F ,
col = adjustcolor(colFig1$Col, alpha.f = 1),
horiz = T ,
las = 2 ,
main = "Methods detecting positive selection" ,
cex.main = 0.85,
cex.names = min(50/nrow(xt), 1.5)
)
axis(3, line = 0, at = c(0:nbrMeth), label = c("0", rep("", nbrMeth -1), nbrMeth), tck = 0.02)
legend("bottomleft",
horiz = T,
border = colFig1$Col,
legend = colFig1$Name,
fill = colFig1$Col,
cex = 0.8,
bty = "n",
xpd = NA
)
}
df<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"),
fill=T, h=T, sep=",")
File added
File added
File added
File added
File added
File added
File added
File added
figure/tanglegramm.png

1.29 MiB

figure/tanglegrammsup3.png

227 KiB

This diff is collapsed.
This diff is collapsed.
Gene.name dginn.primate_BUSTED dginn.primate_BppM1M2 dginn.primate_BppM7M8 dginn.primate_codemlM1M2 dginn.primate_codemlM7M8
ACADM Y Y Y Y Y
BCS1L Y N Y Y Y
BRD4 Y Y Y N Y
CDK5RAP2 Y Y Y Y Y
CEP135 N Y Y Y Y
CEP68 Y Y Y Y Y
CLIP4 Y N Y Y Y
DNMT1 Y Y Y Y Y
DPH5 N Y Y Y Y
EMC1 Y Y Y Y Y
ERO1LB Y N Y Y Y
FYCO1 Y Y Y Y Y
GCC2 Y N Y Y Y
GGH Y Y Y Y Y
GHITM N Y Y Y Y
GIGYF2 Y N Y Y Y
GLA Y N Y Y Y
GOLGA7 Y Y Y Y Y
HECTD1 Y Y Y Y Y
IDE Y Y Y Y na
ITGB1 Y Y Y Y Y
LARP1 Y N Y Y Y
LARP4B Y N Y Y Y
LMAN2 Y N Y Y Y
MARK1 Y Y Y N Y
MIPOL1 N Y Y Y Y
MPHOSPH10 Y N Y Y Y
MYCBP2 Y Y Y Y Y
NDUFAF2 Y Y Y Y Y
NDUFB9 Y N Y Y Y
NUPL1 Y Y Y Y Y
PCNT Y N Y Y Y
POLA1 Y Y Y Y na
PRIM2 Y Y Y Y Y
PRKAR2A N Y Y Y Y
PVR Y Y Y Y Y
REEP6 Y Y Y Y Y
RIPK1 N Y Y Y Y
SAAL1 Y N Y Y Y
SEPSECS Y Y Y Y Y
SIRT5 N Y Y Y Y
SLC25A21 N Y Y Y Y
SLC27A2 N Y Y Y Y
TMEM39B Y N Y Y Y
TOR1AIP1 Y Y Y Y Y
TUBGCP2 Y N Y Y Y
UBAP2 N Y Y Y Y
UGGT2 N Y Y Y Y
VPS39 Y Y Y Y Y
ZNF318 Y Y Y Y Y
File moved
File moved
File moved
File moved
......@@ -29,21 +29,30 @@
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<eval=FALSE>>=
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
home<-"/home/adminmarie/Documents/CIRI_BIBS_projects/"
workdir<-paste0(home, "2020_05_Etienne_covid/2020_dginn_covid19/")
@
<<>>=
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
"out_tab/covid_comp_alldginn.txt"), h=T, sep="\t")
dim(tab)
@
Necessary packages:
<<>>=
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
library(Mondrian)
library(UpSetR)
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_alldginn.txt"), h=T, sep="\t")
dim(tab)
#install.packages('dendextend') # stable CRAN version
library(dendextend) # load the package
#install.packages("phytools") # stable CRAN version
#library(phytools) # load the package
library(ggraph)
library(igraph)
library(tidyverse)
library(viridis)
@
\section{Comparison of dataset}
......@@ -66,7 +75,7 @@ dim(tmp)
\subsection{Omega plot}
<<>>=
<<1_plot_omega, fig.path="../figure/">>=
tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA
x=as.numeric(as.character(
tab$dginn.primate_omegaM0Bpp[tab$status=="shared"]))
......@@ -92,9 +101,7 @@ text(x[x>0.45 &y>0.4], (y[x>0.45 &y>0.4]+0.01),
\subsection{Mondrian}
<<mondrianbats>>=
library(Mondrian)
<<1_mondrianbats, fig.path="../figure/">>=
monddata<-as.data.frame(tmp$Gene.name)
batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y",
......@@ -124,9 +131,7 @@ mondrian(monddata[,4:5],
\subsection{subsetR}
<<subsetbats>>=
library(UpSetR)
<<1_subsetbats, fig.path="../figure/">>=
upset(monddata, nsets = 4, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
......@@ -185,7 +190,7 @@ monddata[monddata$bats_dginn3==1 & monddata$primate_dginn3==0,]
\subsection{Figure tableau}
<<tablo>>=
<<1_tablo, fig.path="../figure/">>=
tablo<-as.data.frame(tmp$Gene.name)
tablo$nbats<-batstmp
tablo$nprimates<-primatetmp
......@@ -263,14 +268,17 @@ text(seq(from=0.1, to=1, length.out = length(tmp)-18),-3.0, tmp[19:length(tmp)],
@
<<>>=
write.csv(tablo[tablo$nbats>=3,"tmp$Gene.name"], "batssup3.csv",
write.csv(tablo[tablo$nbats>=3,"tmp$Gene.name"],
paste0(workdir, "out_tab/batssup3.csv"),
row.names=FALSE,
quote=FALSE)
write.csv(tablo[tablo$nprimates>=3,"tmp$Gene.name"], "primatessup3.csv",
write.csv(tablo[tablo$nprimates>=3,"tmp$Gene.name"],
paste0(workdir, "out_tab/primatessup3.csv"),
row.names=FALSE,
quote=FALSE)
write.csv(tablo, "primatesVbats.csv",
write.csv(tablo,
paste0(workdir, "out_tab/primatesVbats.csv"),
row.names=FALSE,
quote=FALSE)
@
......@@ -282,7 +290,7 @@ options(tidy=TRUE, width=70)
<<>>=
# Reading the Krogan table
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"),
"out_tab/covid_comp_complete.txt"),
fill=T, h=T, dec=",")
dim(tab)
......@@ -308,19 +316,15 @@ sort(tablo$`tmp$Gene.name`[tablo$`tmp$Gene.name` %in% krogan==F])
sort(krogan[krogan %in% tablo$`tmp$Gene.name`==F])
write.csv(tabloK, "primatesVbats_onlykrogan.csv", row.names=FALSE, quote=FALSE)
write.csv(tabloK,
paste0(workdir, "out_tab/primatesVbats_onlykrogan.csv"),
row.names=FALSE, quote=FALSE)
@
\section{Tanglegram}
<<eval=TRUE>>=
#install.packages('dendextend') # stable CRAN version
library(dendextend) # load the package
#install.packages("phytools") # stable CRAN version
library(phytools) # load the package
library(ggraph)
library(igraph)
library(tidyverse)
<<1_tanglegram, eval=TRUE, fig.path="../figure/">>=
tmp<-tablo[(tablo$nbats!=0 | tablo$nprimates!=0),]
#tmp<-head(tablo, 20)
......@@ -349,7 +353,8 @@ class(labels(dendpri))
dend12 <- dendlist(dendbats, dendpri)
png("figure/tanglegramm.png", width = 1800, height = 3000)
png(paste0(workdir, "figure/tanglegramm.png"),
width = 1800, height = 3000)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=2,
......@@ -385,7 +390,8 @@ font<-rep(1, length(labels(dendpri))*2)
#font[tmprss2]<-1.3
#font[length(labels(dendpri))+160]<-1.3
png("figure/tanglegramm.png", width = 1800, height = 3000)
png(paste0(workdir, "figure/tanglegramm.png"),
width = 1800, height = 3000)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=2,
......@@ -402,7 +408,7 @@ dev.off()
@
<<>>=
<<1_tanglegram_tests, fig.path="../figure/">>=
tmp<-tablo[(tablo$nbats>=3 | tablo$nprimates>=3),]
dim(tmp)
tmp<-as.data.frame(tmp)
......@@ -450,7 +456,8 @@ col[which(labels(dendbats) %in% interestpp)]<-"red"
png("figure/tanglegrammsup3.png", width = 500, height = 1200)
png(paste0(workdir, "figure/tanglegrammsup3.png"),
width = 500, height = 1200)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
......@@ -468,7 +475,8 @@ dev.off()
## changer couleurs des lines sel vs sel or sel vs non-sel
setEPS()
postscript("figure/tanglegramsup3.eps", height=15, width=5)
postscript(paste0(workdir, "figure/tanglegramsup3.eps"),
height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
......@@ -489,7 +497,8 @@ labels_colors(dend12[[2]])<-rep(rainbow(15)[c(1:3, 9:11)], table(tmp$nprimates))
labels_colors(dend12[[1]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nbats))
labels_colors(dend12[[2]])<-rep(viridis(10)[c(1:3, 7:9)], table(tmp$nprimates))
setEPS()
postscript("figure/tanglegramsup3_V2.eps", height=15, width=5)
postscript(paste0(workdir, "figure/tanglegramsup3_V2.eps"),
height=15, width=5)
tanglegram(dend12, columns_width=c(3, 3,3), axes=FALSE,
edge.lwd=0, margin_inner=6,
margin_top=3,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment