Commit 03fae992 authored by your name's avatar your name
Browse files

script for primate/bats comparisons

parent 82f55ae4
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{Janvier 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.
<<>>=
workdir<-"/home/adminmarie/Documents/CIRI_BIBS_projects/2020_05_Etienne_covid/"
tab<-read.delim(paste0(workdir,
"covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
@
\section{Comparison of dataset}
\subsection{Data}
<<data>>=
tmp<-na.omit(tab[,c("Gene.name", "bats_BUSTED", "bats_BppM1M2", "bats_BppM7M8",
"bats_codemlM1M2", "bats_codemlM7M8", "dginn.primate_codemlM1M2",
"dginn.primate_codemlM7M8", "dginn.primate_BppM1M2",
"dginn.primate_BppM7M8", "dginn.primate_BUSTED")])
dim(tmp)
@
\subsection{Mondrian}
<<mondrianbats>>=
library(Mondrian)
monddata<-as.data.frame(tmp$Gene.name)
batstmp<-rowSums(cbind(tmp$bats_codemlM1M2=="Y", tmp$bats_codemlM7M8=="Y",
tmp$bats_BppM1M2=="Y", tmp$bats_BppM7M8=="Y", tmp$bats_BUSTED=="Y"))
primatetmp<-rowSums(cbind(tmp$"dginn.primate_codemlM1M2"=="Y",
tmp$"dginn.primat_codemlM7M8"=="Y", tmp$"dginn.primate_BppM1M2"=="Y",
tmp$"dginn.primate_BppM7M8"=="Y", tmp$"dginn.primate_BUSTED"=="Y"))
monddata$bats_dginn3<-ifelse(batstmp>=3, 1,0)
monddata$primate_dginn3<-ifelse(primatetmp>=3, 1,0)
monddata$bats_dginn4<-ifelse(batstmp>=4, 1,0)
monddata$primate_dginn4<-ifelse(primatetmp>=4, 1,0)
mondrian(monddata[,2:3], labels=c("DGINN bats >3", "DGINN primate >3"))
mondrian(monddata[,4:5], labels=c("DGINN bats >4", "DGINN primate >4"))
@
\subsection{subsetR}
<<subsetbats>>=
library(UpSetR)
upset(monddata, nsets = 4, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
upset(monddata[,1:3], nsets = 2, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
upset(monddata[,c(1,4,5)], nsets = 2, matrix.color = "#DC267F",
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")
@
\section{Which are these genes?}
\subsection{Gene under positive selection in both bats and primates}
4 methods:
<<>>=
monddata[monddata$bats_dginn4==1 & monddata$primate_dginn4==1,]
@
3 methods:
<<>>=
monddata[monddata$bats_dginn3==1 & monddata$primate_dginn3==1,]
@
\subsection{Gene under positive selection only in primates}
4 methods:
<<>>=
monddata[monddata$bats_dginn4==0 & monddata$primate_dginn4==1,]
@
3 methods:
<<t>>=
monddata[monddata$bats_dginn3==0 & monddata$primate_dginn3==1,]
@
\subsection{Gene under positive selection only in bats}
4 methods:
<<>>=
monddata[monddata$bats_dginn4==1 & monddata$primate_dginn4==0,]
@
3 methods:
<<>>=
monddata[monddata$bats_dginn3==1 & monddata$primate_dginn3==0,]
@
\subsection{Figure tableau}
<<tablo>>=
tablo<-as.data.frame(tmp$Gene.name)
tablo$nbats<-batstmp
tablo$nprimates<-primatetmp
plot(NULL, xlim=c(-0.5,5.5), ylim=c(-3,5.5), xlab="bats", ylab="primates", main="Genes supported by x,y methods in bats and primates", bty="n", xaxt="n", yaxt="n")
text(x=rep(-0.6, 6), y=0:5, 0:5)
text(y=rep(-0.65, 6), x=0:5, 0:5)
sapply(seq(from=-0.5, to=5.5, by=1), function(x){
segments(x0=x, x1=x, y0=-0.5, y1=5.5)
})
sapply(seq(from=-0.5, to=5.5, by=1), function(x){
segments(x0=-0.5, x1=5.5, y0=x, y1=x)
})
for (p in 0:5){
for (b in 0:5){
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==b & tablo$nprimates==p]
if(length(tmp)>0 & length(tmp)<8){
text(b,seq(from=(p-0.4), to=(p+0.4), length.out = length(tmp)), tmp, cex=0.4)
}else if (length(tmp)>8 & length(tmp)<16){
print(c(p, b))
text((b-0.3),seq(from=(p-0.4), to=(p+0.4), length.out = 8), tmp[1:8], cex=0.4)
text((b+0.3),seq(from=(p-0.4), to=(p+0.4), length.out = (length(tmp)-8)), tmp[9:length(tmp)], cex=0.4)
}else if (length(tmp)>16){
text(b,p, paste0(length(tmp), " values"))
}
}
}
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==1]
text(-0.4,-1.2, "p=1/n=0", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-1.1, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=3, length.out = length(tmp)-18),-1.3, tmp[19:length(tmp)], cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==1 & tablo$nprimates==1]
text(-0.4,-1.7, "p=1/n=1", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-1.6, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=2, length.out = length(tmp)-18),-1.8, tmp[19:length(tmp)], cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==0 & tablo$nprimates==0]
text(-0.4,-2.2, "p=0/n=0", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 17),-2.1, tmp[1:17], cex=0.4)
text(seq(from=0.1, to=4.4, length.out = length(tmp)-17),-2.3, tmp[18:length(tmp)], cex=0.4)
tmp<-tablo$`tmp$Gene.name`[tablo$nbats==2 & tablo$nprimates==0]
text(-0.4,-2.7, "p=0/n=2", cex=0.6)
text(seq(from=0.1, to=5.5, length.out = 18),-2.6, tmp[1:18], cex=0.4)
text(seq(from=0.1, to=1, length.out = length(tmp)-18),-2.8, tmp[19:length(tmp)], cex=0.4)
@
\end{document}
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment