Skip to content
Snippets Groups Projects
covid_comp_primate.Rnw 10 KiB
Newer Older
mcariou's avatar
mcariou committed
\documentclass[11pt, oneside]{article}   	% use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry}                		% See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper}                   		% ... or a4paper or a5paper or ... 
%\geometry{landscape}                		% Activate for for rotated page geometry
%\usepackage[parfill]{parskip}    		% Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx}				% Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
								% TeX will automatically convert eps --> pdf in pdflatex		
%\usepackage{amssymb}

\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}


\title{Positive selection on genes interacting with SARS-Cov2, comparison of different analysis}
\author{Marie Cariou}
\date{October 2020}							% Activate to display a given date or no date

\begin{document}
\maketitle

\tableofcontents

\newpage


\section{Data}

Analysis were formatted by the script covid\_comp\_script0\_table.Rnw.

<<>>=
mcariou's avatar
mcariou committed
home<-"/home/adminmarie/Documents/"
workdir<-paste0(home, "CIRI_BIBS_projects/2020_05_Etienne_covid/")
mcariou's avatar
mcariou committed

tab<-read.delim(paste0(workdir, 
  "covid_comp/covid_comp_complete.txt"), h=T, sep="\t")
dim(tab)
mcariou's avatar
mcariou committed
tab$Gene.name<-as.character(tab$Gene.name.x)
mcariou's avatar
mcariou committed
tab$Gene.name[tab$PreyGene=="MTARC1"]<-"MTARC1"
@

\section{Comparisons Primates}

\subsection{Janet Young's results (Young-primate) VS DGINN-full's results}

Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "omega" dans la sortie de dginn.

mcariou's avatar
mcariou committed
<<omegaM7M8_1>>=
tab$dginn.primate_omegaM0Bpp[tab$dginn.primate_omegaM0Bpp=="na"]<-NA
tab$dginn.primate_omegaM0Bpp<-as.numeric(as.character(
  tab$dginn.primate_omegaM0Bpp))

plot(tab$whole.gene.dN.dS.model.0, 
     tab$dginn.primate_omegaM0Bpp, 
     xlab="Omega Young-primate", 
     ylab="DGINN-full's",
     cex=0.3)
mcariou's avatar
mcariou committed
abline(0,1)
mcariou's avatar
mcariou committed
abline(lm(tab$dginn.primate_omegaM0Bpp~tab$whole.gene.dN.dS.model.0), 
       col="red")
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.4 & 
               tab$dginn.primate_omegaM0Bpp>0.5,]
mcariou's avatar
mcariou committed
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)

mcariou's avatar
mcariou committed
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.1 & 
               tab$dginn.primate_omegaM0Bpp>0.3,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)  

outlier<-tab[tab$whole.gene.dN.dS.model.0>0.33 & 
               tab$dginn.primate_omegaM0Bpp<0.2,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name)  

outlier<-tab[tab$whole.gene.dN.dS.model.0>0.6 & 
               tab$dginn.primate_omegaM0Bpp<0.6,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name) 
mcariou's avatar
mcariou committed
@

\subsection{Janet Young's results (Young-primate) VS Cooper's result}

Comparaison des Omega: colonne L "whole.gene.dN.dS.model.0" VS colonne "cooper.primates.Average\_dNdS".

mcariou's avatar
mcariou committed
<<omegaM7M8_2>>=
tab$cooper.primates.Average_dNdS<-as.numeric(as.character(
  tab$cooper.primates.Average_dNdS))

plot(tab$whole.gene.dN.dS.model.0, 
     tab$cooper.primates.Average_dNdS, 
     xlab="Omega Young-primate",
     ylab="Omega Cooper-primate",
     cex=0.3)
mcariou's avatar
mcariou committed
abline(0,1)
mcariou's avatar
mcariou committed
abline(lm(tab$cooper.primates.Average_dNdS~tab$whole.gene.dN.dS.model.0), 
       col="red")
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.15 & 
               tab$cooper.primates.Average_dNdS>0.4,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier$Gene.name, cex=0.5)
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$whole.gene.dN.dS.model.0<0.3 & 
               tab$cooper.primates.Average_dNdS>0.5,]
mcariou's avatar
mcariou committed
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
mcariou's avatar
mcariou committed
outlier$Gene.name, cex=0.5)
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$whole.gene.dN.dS.model.0>0.3 & 
               tab$cooper.primates.Average_dNdS<0.1,]
text(x=outlier$whole.gene.dN.dS.model.0,
y=outlier$cooper.primates.Average_dNdS,
outlier$Gene.name, cex=0.5)
mcariou's avatar
mcariou committed
@

\subsection{Cooper's results (Cooper-primate) VS DGINN-full's results}

Comparaison des Omega: colonne "cooper.primates.Average\_dNdS" VS colonne "omega" dans la sortie de dginn.

mcariou's avatar
mcariou committed
<<omegaM7M8_3>>=
plot(tab$cooper.primates.Average_dNd, 
     tab$dginn.primate_omegaM0Bpp, 
     xlab="Omega Cooper-primate", 
     ylab="DGINN-full's",
     cex=0.3)
mcariou's avatar
mcariou committed
abline(0,1)
mcariou's avatar
mcariou committed
abline(lm(tab$dginn.primate_omegaM0Bpp~tab$cooper.primates.Average_dNd), col="red")
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$cooper.primates.Average_dNd<0.4 & 
               tab$dginn.primate_omegaM0Bpp>0.5,]
mcariou's avatar
mcariou committed
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
mcariou's avatar
mcariou committed
outlier$Gene.name, cex=0.5)
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$cooper.primates.Average_dNd<0.1 & 
               tab$dginn.primate_omegaM0Bpp>0.3,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$cooper.primates.Average_dNd>0.7 & 
               tab$dginn.primate_omegaM0Bpp<0.3,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
outlier<-tab[tab$cooper.primates.Average_dNd>0.45 & 
               tab$dginn.primate_omegaM0Bpp<0.2,]
text(x=outlier$cooper.primates.Average_dNd,
y=outlier$dginn.primate_omegaM0Bpp,
outlier$Gene.name, cex=0.5)
@
mcariou's avatar
mcariou committed

\section{Overlap}

\subsection{Mondrian}

<<mondrianprimates>>=
library(Mondrian)

monddata<-as.data.frame(tab$Gene.name)
dim(monddata)

mcariou's avatar
mcariou committed
dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y", 
                            tab$dginn.primate_BppM1M2=="Y", 
                            tab$dginn.primate_BppM7M8=="Y", 
                            tab$dginn.primate_codemlM1M2=="Y", 
                            tab$dginn.primate_codemlM7M8=="Y")) 
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
monddata$primates_young<-ifelse(
  tab$pVal.M8vsM7<0.05, 1, 0)
monddata$primate_cooper<-ifelse(
  tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
monddata$primates_dginn_full<-ifelse(
  dginnfulltmp>=3, 1,0)
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
mondrian(na.omit(monddata[,2:4]), 
         labels=c("Young", "Cooper", "DGINN-full >=3" ))
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
monddata$primates_dginn_full<-ifelse(
  dginnfulltmp>=4, 1,0)
mcariou's avatar
mcariou committed

mcariou's avatar
mcariou committed
mondrian(na.omit(monddata[,2:4]), 
         labels=c("Young", "Cooper", "DGINN-full >=4"))
mcariou's avatar
mcariou committed
@


\subsection{subsetR}

Just another representation of the same result.

<<subsetprimates>>=
library(UpSetR)
upsetdata<-as.data.frame(tab$Gene.name)

upsetdata$primates_young<-ifelse(tab$pVal.M8vsM7<0.05, 1, 0)
mcariou's avatar
mcariou committed
upsetdata$primate_cooper<-ifelse(
  tab$cooper.primates.M7.M8_p_value<0.05, 1, 0)
mcariou's avatar
mcariou committed
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=3, 1,0)


upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F", 
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")

###
upsetdata$primates_dginn_full<-ifelse(dginnfulltmp>=4, 1,0)

upset(na.omit(upsetdata), nsets = 3, matrix.color = "#DC267F", 
main.bar.color = "#648FFF", sets.bar.color = "#FE6100")

@

\section{Gene List}

Genes under positive selection for at least 4 methods.

<<>>=
dginnfulltmp<-rowSums(cbind(tab$dginn.primate_BUSTED=="Y",
 tab$dginn.primate_BppM1M2=="Y", 
tab$dginn.primate_BppM7M8=="Y", 
tab$dginn.primate_codemlM1M2=="Y", 
tab$dginn.primate_codemlM7M8=="Y")) 

tab$Gene.name[dginnfulltmp>=4 & is.na(dginnfulltmp)==F]

tab$Gene.name[dginnfulltmp>=3 & is.na(dginnfulltmp)==F]

tmp<-tab[dginnfulltmp>=4 & is.na(dginnfulltmp)==F, 
c("Gene.name","dginn.primate_BUSTED", "dginn.primate_BppM1M2",
 "dginn.primate_BppM7M8","dginn.primate_codemlM1M2","dginn.primate_codemlM7M8")]

write.table(tmp, "geneList_DGINN_full_primate_pos4.txt", row.names=F, quote=F)
@


\section{Shiny like}

<<shiny, fig.height=11>>=
makeFig1 <- function(df){

  # prepare data for colors etc
  colMethods <- c("deepskyblue4", "darkorange" , "deepskyblue3" , "mediumseagreen" , "yellow3" , "black")
  nameMethods <- c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8", "MEME")
  metColor <- data.frame(Name = nameMethods , Col = colMethods , stringsAsFactors = FALSE)
  
  # subset for this specific figure
  #df <- df[df$nbY >= 1, ] # to drop genes found by 0 methods (big datasets)
  xt <- df[, c("BUSTED", "BppM1M2", "BppM7M8", "codemlM1M2", "codemlM7M8")]
  xt$Gene <- df$Gene
  nbrMeth <- 5
  # reverse order of dataframe so that genes with the most Y are at the bottom (to be on top of the barplot)
  xt[,1:5] <- ifelse(xt[,1:5] == "Y", 1, 0)
  # sort and Filter the 0 lines
  xt<-xt[order(rowSums(xt[,1:5])),]
  xt<-xt[rowSums(xt[,1:5])>2,]  

  row.names(xt)<-xt$Gene
  xt<-xt[,1:5]

  colFig1 <- metColor[which(metColor$Name %in% colnames(xt)) , ]
  
  ##### PART 1 : NUMBER OF METHODS
  par(xpd = NA , mar=c(2,7,4,0) ,   oma = c(0,0,0,0) , mgp = c(3,0.3,0))
  
  h =  barplot(
    t(xt),
    border = NA ,
    axes = F ,
    col = adjustcolor(colFig1$Col, alpha.f = 1),
    horiz = T ,
    las  = 2 ,
    main = "Methods detecting positive selection" , 
    cex.main = 0.85,
    cex.names  = min(50/nrow(xt), 1.5)
  )
  
  axis(3, line = 0, at = c(0:nbrMeth), label = c("0", rep("", nbrMeth -1), nbrMeth), tck = 0.02)
  
  legend("bottomleft",
         horiz = T,
         border = colFig1$Col,
         legend = colFig1$Name, 
         fill = colFig1$Col,
         cex = 0.8,
         bty = "n",
         xpd = NA
  )
}
@

<<>>=
source("covid_comp_shiny.R")


df<-read.delim(paste0(workdir,
"/data/DGINN_202005281649summary_cleaned.csv"), 
      fill=T, h=T, sep=",")

names(df)
dftmp<-tab[,c("File", "Name", "Gene.name", 
       "GeneSize", "dginn.primate_NbSpecies", "dginn.primate_omegaM0Bpp", 
       "dginn.primate_omegaM0codeml", "dginn.primate_BUSTED", "dginn.primate_BUSTED.p.value",
       "dginn.primate_MEME.NbSites", "dginn.primate_MEME.PSS", "dginn.primate_BppM1M2",
       "dginn.primate_BppM1M2.p.value", "dginn.primate_BppM1M2.NbSites", "dginn.primate_BppM1M2.PSS", 
       "dginn.primate_BppM7M8", "dginn.primate_BppM7M8.p.value", "dginn.primate_BppM7M8.NbSites",
       "dginn.primate_BppM7M8.PSS", "dginn.primate_codemlM1M2", "dginn.primate_codemlM1M2.p.value", 
       "dginn.primate_codemlM1M2.NbSites","dginn.primate_codemlM1M2.PSS", "dginn.primate_codemlM7M8",
       "dginn.primate_codemlM7M8.p.value", "dginn.primate_codemlM7M8.NbSites" , "dginn.primate_codemlM7M8.PSS")]

names(dftmp)<-names(df)
makeFig1(dftmp)

@

\end{document}