From 4071b2609551e6db613e97e00fd5c4d9f112fec1 Mon Sep 17 00:00:00 2001
From: mcariou <115-mcariou@users.noreply.gitbio.ens-lyon.fr>
Date: Mon, 20 Sep 2021 10:41:19 +0200
Subject: [PATCH] clean space

---
 doc/1_reference_legio_phylo.Rnw | 134 ++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 doc/1_reference_legio_phylo.Rnw

diff --git a/doc/1_reference_legio_phylo.Rnw b/doc/1_reference_legio_phylo.Rnw
new file mode 100644
index 0000000..6928376
--- /dev/null
+++ b/doc/1_reference_legio_phylo.Rnw
@@ -0,0 +1,134 @@
+\documentclass[11pt, oneside]{article}      % use "amsart" instead of "article" for AMSLaTeX format
+%\usepackage{geometry}                      % See geometry.pdf to learn the layout options. There are lots.
+%\geometry{letterpaper}                         % ... or a4paper or a5paper or ... 
+%\geometry{landscape}                       % Activate for for rotated page geometry
+%\usepackage[parfill]{parskip}          % Activate to begin paragraphs with an empty line rather than an indent
+%\usepackage{graphicx}              % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode
+                                % TeX will automatically convert eps --> pdf in pdflatex        
+%\usepackage{amssymb}
+
+\usepackage[utf8]{inputenc}
+%\usepackage[cyr]{aeguill}
+%\usepackage[francais]{babel}
+%\usepackage{hyperref}
+
+\usepackage{graphicx}
+\usepackage{titling}
+\usepackage{listings}
+\usepackage{upquote}
+\usepackage{hyperref}
+
+\usepackage{xcolor}
+%\definecolor{gray}{rgb}{0.5,0.5,0.5}
+
+\lstnewenvironment{code}[1][]{
+    \lstset{
+%       upquote=true,
+        columns=flexible,
+        basicstyle=\ttfamily,
+        language=[LaTeX]TeX,
+        texcsstyle=*\color{blue},
+        commentstyle=\color{gray},
+        frame=single,
+        rulecolor=\color{green!5},
+        backgroundcolor=\color{green!5},
+    }
+}{}
+
+
+\title{Reference phylogeny for Legionella}
+\author{Marie Cariou}
+\date{Septembre 2021}                            % Activate to display a given date or no date
+
+\begin{document}
+\maketitle
+
+\tableofcontents
+
+\newpage
+
+\section{Introduction}
+
+\subsection{Objective}
+
+We aim at the reconstruction of a reference phylogeny for all legionella species (contained in Bustein et al. 2016 or Gupta et al. 2020) and 5 selected strains of \textit{legionella pneumophila}
+
+We chose to use the 78 genes selected by Burstein et al. 2016. The first objective is then to recover individual gene sequences for all these genomes.
+
+\subsection{Data}
+
+Data from the first publication:
+
+\url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5050043/}
+\url{https://www.ncbi.nlm.nih.gov/bioproject/PRJNA285910}
+<<>>=
+home<-"~/Documents/CIRI_BIBS_projects/2021_04_Doublet/pipeline/phylolegio/"
+data<-paste0(home, "data/")
+
+datalist1<-list.files(data, pattern="NIHMS817051")
+
+datalist1
+@
+
+<<>>=
+tab1<-read.table(paste0(data, datalist1[1]), skip=2, sep="\t", fill=TRUE, header=TRUE, comment.char = "")
+head(tab1)
+@
+
+<<>>=
+tabPRJNA<-read.table(paste0(data, "PRJNA285910_AssemblyDetails.txt"), header=FALSE, sep="\t", skip=2, fill=TRUE)
+tabPRJNA<-tabPRJNA[,1:6]
+names(tabPRJNA)<-c("Assembly",	"Level",	"WGS",	"BioSample",	"Strain",	"Taxonomy")
+@
+
+\url{https://pubmed.ncbi.nlm.nih.gov/33881638/}
+
+<<>>=
+datalist2<-list.files(data, pattern="Gupta")
+
+datalist2
+
+gupta1<-read.table(paste0(data, datalist2[1]), sep="\t", fill=TRUE, header=TRUE, comment.char = "")
+
+head(gupta1)
+
+gupta2<-read.table(paste0(data, datalist2[2]), sep="\t", fill=TRUE, header=TRUE, comment.char = "")
+
+head(gupta2)
+
+head(tab1)
+
+@
+
+Je ne parviens pas à retrouver la correspondance entre les numéro d'accession fourni dans le tableau 1 de la publi et des numéros d'accession d'assemblage GEnBank
+
+<<>>=
+ncbi<-read.table(paste0(data, "tab_ncbi_file.csv"), sep=";", fill=TRUE, header=TRUE, comment.char = "")
+sp<-c("adelaidensis", "birminghamensis", "brunensis", "cherrii")
+ncbi2<-read.table(paste0(data, "tab_ncbi_contigs_parsed.csv"), sep=";", fill=TRUE, header=TRUE, comment.char = "")
+
+@
+
+\section{Get genes sequences}
+
+
+
+
+\subsection{Get sequences from Burstein et al.}
+
+\subsection{Get 78 sequences from Gupta species}
+
+\subsection{Get legionella pneumophila strains sequences}
+
+\section{Phylogeny}
+
+\subsection{Genes alignement}
+
+\subsection{Concatenate}
+
+\subsection{Supertree}
+
+
+\end{document}
+
+
-- 
GitLab