Skip to content
Snippets Groups Projects
Commit 52187cdf authored by mcariou's avatar mcariou
Browse files

gwas

parent 0f72b8b0
No related branches found
No related tags found
No related merge requests found
\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparing GWAS and PS on GWAS}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
\section{Data}
\subsection{GWAS 1}
-Article:
https://www.nejm.org/doi/full/10.1056/NEJMoa2020283
-Data:
https://ikmb.shinyapps.io/COVID-19\_GWAS\_Browser/
-Locally:
<<>>=
home<-"/home/adminmarie/Documents/CIRI_BIBS_projects/"
datapath<-paste0(home, "2020_05_Etienne_covid/data/GWAS/")
gwas1<-paste0(datapath, "meta_analysis_II.hg38.gwascatalogformat.tsv")
@
\subsection{GWAS 2}
-Article:
Genetic mechanisms of critical illness in COVID-19
https://www.nature.com/articles/s41586-020-03065-y
-Data:
https://genomicc.org/data/
-Locally:
<<>>=
gwas2<-list.files(datapath, pattern="genomicc")
gwas2<-paste0(datapath, gwas2)
@
\subsection{DGINN data}
<<>>=
home<-"/home/adminmarie/Documents/CIRI_BIBS_projects/"
tabpath<-paste0(home, "2020_05_Etienne_covid/2020_dginn_covid19/")
@
<<>>=
#table
tab<-read.delim(paste0(tabpath,
"out_tab/covid_comp_alldginn.txt"), h=T, sep="\t")
dim(tab)
# fasta
fasta<-list.files(datapath, pattern="FYCO")
@
\subsection{FYCO1}
https://www.ensembl.org/Homo\_sapiens/Gene/Summary?g=ENSG00000163820;r=3:45917899-45995824
coordinate: Chromosome 3: 45,917,899-45,995,824
\subsection{Objectif}
Table :
\begin{itemize}
\item pos dans genome de ref
\item pos dans l'aln
\item under PS oui/non
\item GWAS1 oui/non
\end{itemize}
\section{GWAS data}
<<>>=
deb<-45917899
end<-45995824
len<-45995824-45917899
thres<-0.10
posref<-deb:end
fyco1tab<-as.data.frame(posref)
# GWAS 1
cmd<-paste0("cat ", gwas1, " | grep '^3' > tmp")
system(cmd)
gwastab<-read.table("tmp", h=T)
system("rm tmp")
gwastab<-gwastab[(gwastab$chromosome==3 & gwastab$base_pair_location>deb & gwastab$base_pair_location<end),]
## filtrer sur pvalue et créer colonne F/T GWAS
gwas1pos<-gwastab$base_pair_location[gwastab$p_value<thres]
fyco1tab$gwas1<-ifelse(posref %in% gwas1pos, TRUE, FALSE)
## Same for GWAS 2
gwastab<-read.table(gwas2[1],h=T, sep="\t")
gwastab<-gwastab[(gwastab$CHR==3 & gwastab$POS>deb & gwastab$POS<end),]
gwas1pos<-gwastab$POS[gwastab$Pval<thres]
file1<-ifelse(posref %in% gwas1pos, TRUE, FALSE)
gwastab<-read.table(gwas2[2],h=T, sep="\t")
gwastab<-gwastab[(gwastab$CHR==3 & gwastab$POS>deb & gwastab$POS<end),]
gwas1pos<-gwastab$POS[gwastab$Pval<thres]
file2<-ifelse(posref %in% gwas1pos, TRUE, FALSE)
gwastab<-read.table(gwas2[3],h=T, sep="\t")
gwastab<-gwastab[(gwastab$CHR==3 & gwastab$POS>deb & gwastab$POS<end),]
gwas1pos<-gwastab$POS[gwastab$Pval<thres]
file3<-ifelse(posref %in% gwas1pos, TRUE, FALSE)
gwastab<-read.table(gwas2[4],h=T, sep="\t")
gwastab<-gwastab[(gwastab$CHR==3 & gwastab$POS>deb & gwastab$POS<end),]
gwas1pos<-gwastab$POS[gwastab$Pval<thres]
file4<-ifelse(posref %in% gwas1pos, TRUE, FALSE)
fyco1tab$gwas2<-((file1 | file2) | file3 ) | file4
## Voir cohérence entre les 2
table(fyco1tab[,2:3])
@
How to convert coordinates from alignments to genomic coordinate to the multi species alignment coordinates?
1. I downloaded the cds sequence to know for which transcript I should refer to
FYCO1-205 is the right length. ENST00000535325.5, le cds de référence fait 4497 bp, l'alignement fait 4503 pb
FYCO1-201 has one exon missing, as moste of the sequences in the alignment
I will get the coordinates of exons within this transcript ENST00000535325.5
cat /Xnfs/ciridb/shared_data/Genomes/Ensembl/Hs/GRCh38/Annot/v77/Homo_sapiens.GRCh38.77.gtf | grep "ENST00000535325" | awk '{print $1 " " $3 " " $4 " " $5 " " $10 " " $14 " " $17 " " $18}' > human_fyco1.gtf
(())
There might be a max 6bp discrepency between positions, we will see...
gtf
Le gene est en antisens co
<<>>=
gtf<-read.table("../../data/GWAS/human_fyco1.gtf", h=F)
#gtf$V3=gtf$V3-45900000
#gtf$V4=gtf$V4-45900000
cds<-gtf[gtf$V2 %in% c("CDS"),]
cds$len=cds$V4-cds$V3
sum(cds$len)
# I think there is an exon missing outdated gtf?
#make a vector with positions
cat<-numeric()
for (i in 1:nrow(cds)){
tmp<-cds$V3[i]:cds$V4[i]
cat<-c(cat,tmp)
}
cat<-sort(cat)
cds_tab<-cbind(cat, length(cat):1)
colnames(cds_tab)<-c("posref", "pos_aln")
fyco1tab<-merge(fyco1tab, cds_tab, by="posref", all.x=T)
@
add PS sites
<<>>=
PS<-tab[tab$Gene.name=="FYCO1","dginn.primate_MEME.PSS"]
PS<-unlist(strsplit(as.character(PS), split = ", "))
@
\end{document}
File added
\documentclass[11pt, oneside]{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt} % use "amsart" instead of "article" for AMSLaTeX format
%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots.
%\geometry{letterpaper} % ... or a4paper or a5paper or ...
%\geometry{landscape} % Activate for for rotated page geometry
%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent
%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
% TeX will automatically convert eps --> pdf in pdflatex
%\usepackage{amssymb}
\usepackage[utf8]{inputenc}
%\usepackage[cyr]{aeguill}
%\usepackage[francais]{babel}
%\usepackage{hyperref}
\title{Positive selection on genes interacting with SARS-Cov2, comparing GWAS and PS on GWAS}
\author{Marie Cariou}
\date{March 2021} % Activate to display a given date or no date
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
\tableofcontents
\newpage
\section{Data}
\section{Data}
\subsection{GWAS 1}
-Article:
https://www.nejm.org/doi/full/10.1056/NEJMoa2020283
-Data:
https://ikmb.shinyapps.io/COVID-19\_GWAS\_Browser/
-Locally:
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/"}
\hlstd{datapath}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"2020_05_Etienne_covid/data/GWAS/"}\hlstd{)}
\hlstd{gwas1}\hlkwb{<-}\hlkwd{paste0}\hlstd{(datapath,} \hlstr{"meta_analysis_II.hg38.gwascatalogformat.tsv"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{GWAS 2}
-Article:
Genetic mechanisms of critical illness in COVID-19
https://www.nature.com/articles/s41586-020-03065-y
-Data:
https://genomicc.org/data/
-Locally:
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{gwas2}\hlkwb{<-}\hlkwd{list.files}\hlstd{(datapath,} \hlkwc{pattern}\hlstd{=}\hlstr{"genomicc"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{DGINN data}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{home}\hlkwb{<-}\hlstr{"/home/adminmarie/Documents/CIRI_BIBS_projects/"}
\hlstd{tabpath}\hlkwb{<-}\hlkwd{paste0}\hlstd{(home,} \hlstr{"2020_05_Etienne_covid/2020_dginn_covid19/"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\begin{knitrout}
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlcom{#table}
\hlstd{tab}\hlkwb{<-}\hlkwd{read.delim}\hlstd{(}\hlkwd{paste0}\hlstd{(tabpath,}
\hlstr{"out_tab/covid_comp_alldginn.txt"}\hlstd{),} \hlkwc{h}\hlstd{=T,} \hlkwc{sep}\hlstd{=}\hlstr{"\textbackslash{}t"}\hlstd{)}
\hlkwd{dim}\hlstd{(tab)}
\end{alltt}
\begin{verbatim}
## [1] 442 56
\end{verbatim}
\begin{alltt}
\hlcom{# fasta}
\hlstd{fasta}\hlkwb{<-}\hlkwd{list.files}\hlstd{(datapath,} \hlkwc{pattern}\hlstd{=}\hlstr{"FYCO"}\hlstd{)}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{FYCO1}
https://www.ensembl.org/Homo\_sapiens/Gene/Summary?g=ENSG00000163820;r=3:45917899-45995824
coordinate: Chromosome 3: 45,917,899-45,995,824
\subsection{Objectif}
Table :
\begin{itemize}
\item pos dans genome de ref
\item pos dans l'aln
\item under PS oui/non
\item GWAS oui/non
\end{itemize}
\end{document}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment