From 4071b2609551e6db613e97e00fd5c4d9f112fec1 Mon Sep 17 00:00:00 2001 From: mcariou <115-mcariou@users.noreply.gitbio.ens-lyon.fr> Date: Mon, 20 Sep 2021 10:41:19 +0200 Subject: [PATCH] clean space --- doc/1_reference_legio_phylo.Rnw | 134 ++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 doc/1_reference_legio_phylo.Rnw diff --git a/doc/1_reference_legio_phylo.Rnw b/doc/1_reference_legio_phylo.Rnw new file mode 100644 index 0000000..6928376 --- /dev/null +++ b/doc/1_reference_legio_phylo.Rnw @@ -0,0 +1,134 @@ +\documentclass[11pt, oneside]{article} % use "amsart" instead of "article" for AMSLaTeX format +%\usepackage{geometry} % See geometry.pdf to learn the layout options. There are lots. +%\geometry{letterpaper} % ... or a4paper or a5paper or ... +%\geometry{landscape} % Activate for for rotated page geometry +%\usepackage[parfill]{parskip} % Activate to begin paragraphs with an empty line rather than an indent +%\usepackage{graphicx} % Use pdf, png, jpg, or eps with pdflatex; use eps in DVI mode + % TeX will automatically convert eps --> pdf in pdflatex +%\usepackage{amssymb} + +\usepackage[utf8]{inputenc} +%\usepackage[cyr]{aeguill} +%\usepackage[francais]{babel} +%\usepackage{hyperref} + +\usepackage{graphicx} +\usepackage{titling} +\usepackage{listings} +\usepackage{upquote} +\usepackage{hyperref} + +\usepackage{xcolor} +%\definecolor{gray}{rgb}{0.5,0.5,0.5} + +\lstnewenvironment{code}[1][]{ + \lstset{ +% upquote=true, + columns=flexible, + basicstyle=\ttfamily, + language=[LaTeX]TeX, + texcsstyle=*\color{blue}, + commentstyle=\color{gray}, + frame=single, + rulecolor=\color{green!5}, + backgroundcolor=\color{green!5}, + } +}{} + + +\title{Reference phylogeny for Legionella} +\author{Marie Cariou} +\date{Septembre 2021} % Activate to display a given date or no date + +\begin{document} +\maketitle + +\tableofcontents + +\newpage + +\section{Introduction} + +\subsection{Objective} + +We aim at the reconstruction of a reference phylogeny for all legionella species (contained in Bustein et al. 2016 or Gupta et al. 2020) and 5 selected strains of \textit{legionella pneumophila} + +We chose to use the 78 genes selected by Burstein et al. 2016. The first objective is then to recover individual gene sequences for all these genomes. + +\subsection{Data} + +Data from the first publication: + +\url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5050043/} +\url{https://www.ncbi.nlm.nih.gov/bioproject/PRJNA285910} +<<>>= +home<-"~/Documents/CIRI_BIBS_projects/2021_04_Doublet/pipeline/phylolegio/" +data<-paste0(home, "data/") + +datalist1<-list.files(data, pattern="NIHMS817051") + +datalist1 +@ + +<<>>= +tab1<-read.table(paste0(data, datalist1[1]), skip=2, sep="\t", fill=TRUE, header=TRUE, comment.char = "") +head(tab1) +@ + +<<>>= +tabPRJNA<-read.table(paste0(data, "PRJNA285910_AssemblyDetails.txt"), header=FALSE, sep="\t", skip=2, fill=TRUE) +tabPRJNA<-tabPRJNA[,1:6] +names(tabPRJNA)<-c("Assembly", "Level", "WGS", "BioSample", "Strain", "Taxonomy") +@ + +\url{https://pubmed.ncbi.nlm.nih.gov/33881638/} + +<<>>= +datalist2<-list.files(data, pattern="Gupta") + +datalist2 + +gupta1<-read.table(paste0(data, datalist2[1]), sep="\t", fill=TRUE, header=TRUE, comment.char = "") + +head(gupta1) + +gupta2<-read.table(paste0(data, datalist2[2]), sep="\t", fill=TRUE, header=TRUE, comment.char = "") + +head(gupta2) + +head(tab1) + +@ + +Je ne parviens pas à retrouver la correspondance entre les numéro d'accession fourni dans le tableau 1 de la publi et des numéros d'accession d'assemblage GEnBank + +<<>>= +ncbi<-read.table(paste0(data, "tab_ncbi_file.csv"), sep=";", fill=TRUE, header=TRUE, comment.char = "") +sp<-c("adelaidensis", "birminghamensis", "brunensis", "cherrii") +ncbi2<-read.table(paste0(data, "tab_ncbi_contigs_parsed.csv"), sep=";", fill=TRUE, header=TRUE, comment.char = "") + +@ + +\section{Get genes sequences} + + + + +\subsection{Get sequences from Burstein et al.} + +\subsection{Get 78 sequences from Gupta species} + +\subsection{Get legionella pneumophila strains sequences} + +\section{Phylogeny} + +\subsection{Genes alignement} + +\subsection{Concatenate} + +\subsection{Supertree} + + +\end{document} + + -- GitLab