From 1c5dabe33ed1402c0a647744a755266fa21b2392 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Fri, 4 Mar 2022 17:13:35 +0100
Subject: [PATCH] good_practices.tex: remove label{} tags

---
 .gitignore         |  1 +
 Makefile           |  6 ++--
 README.md          | 85 +++++++++++++++++-----------------------------
 good_practices.tex | 36 ++++++++++----------
 4 files changed, 53 insertions(+), 75 deletions(-)

diff --git a/.gitignore b/.gitignore
index ee2960d..7888543 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@
 *.DS_Store
 *.tags*
 *.html
+public/
diff --git a/Makefile b/Makefile
index d4a1d52..a398b38 100644
--- a/Makefile
+++ b/Makefile
@@ -6,9 +6,7 @@ public/github-pandoc.css: github-pandoc.css
 	cp github-pandoc.css public/github-pandoc.css
 
 public/good_practices.html: good_practices.tex bibliography.bib  github-pandoc.css
-	pandoc good_practices.tex --bibliography=bibliography.bib -c github-pandoc.css --citeproc --toc --standalone --number-sections -o public/good_practices.html \
-	&& sed -Ei 's|>\[sec:(.*)\]<|>\1<|g' public/good_practices.html
+	pandoc good_practices.tex --bibliography=bibliography.bib -c github-pandoc.css --citeproc --toc --standalone --number-sections -o public/good_practices.html
 
 README.md: good_practices.tex bibliography.bib
-	pandoc good_practices.tex --bibliography=bibliography.bib  --citeproc --toc --standalone --number-sections -o README.md \
-	&& sed -Ei 's|\[\\\[sec:(.*)\\\]\]|[\1]|g' README.md
\ No newline at end of file
+	pandoc good_practices.tex --bibliography=bibliography.bib  --citeproc --toc --standalone --number-sections -o README.md
diff --git a/README.md b/README.md
index 816f434..8bb88b3 100644
--- a/README.md
+++ b/README.md
@@ -9,9 +9,8 @@ title: |
 ---
 
 -   [Introduction](#introduction)
--   [[1]{.toc-section-number} Project organization
-    []{#sec:project.organisation
-    label="sec:project.organisation"}](#project-organization)
+-   [[1]{.toc-section-number} Project
+    organization](#project-organization)
     -   [[1.1]{.toc-section-number} Text files at the root of the
         project
         directory](#text-files-at-the-root-of-the-project-directory)
@@ -21,17 +20,14 @@ title: |
     -   [[1.4]{.toc-section-number} `doc` folder](#doc-folder)
     -   [[1.5]{.toc-section-number} `results` folder](#results-folder)
     -   [[1.6]{.toc-section-number} `bin` folder](#bin-folder)
--   [[2]{.toc-section-number} Data Management []{#sec:data.managment
-    label="sec:data.managment"}](#data-management)
+-   [[2]{.toc-section-number} Data Management](#data-management)
     -   [[2.1]{.toc-section-number} Public archives](#public-archives)
     -   [[2.2]{.toc-section-number} PSMN:](#psmn)
     -   [[2.3]{.toc-section-number} Code safety](#code-safety)
--   [[3]{.toc-section-number} Versioning []{#sec:versioning
-    label="sec:versioning"}](#versioning)
+-   [[3]{.toc-section-number} Versioning](#versioning)
     -   [[3.1]{.toc-section-number} Installing `git`](#installing-git)
     -   [[3.2]{.toc-section-number} Using git](#using-git)
--   [[4]{.toc-section-number} Coding []{#sec:coding
-    label="sec:coding"}](#coding)
+-   [[4]{.toc-section-number} Coding](#coding)
     -   [[4.1]{.toc-section-number} Write programs for people, not
         computers](#write-programs-for-people-not-computers)
     -   [[4.2]{.toc-section-number} Apply a naming
@@ -48,27 +44,23 @@ However, most of us are never taught how to do this correctly and
 efficiently. The resulting problems are multiple and easily avoidable.
 This document summarizes a set of good practices in bioinformatics.
 
-1.  Section [\[sec:project.organisation\]](#sec:project.organisation){reference-type="ref"
-    reference="sec:project.organisation"}, presents the organization of
-    your working folder for a given bioinformatics project.
+1.  Section 1, presents the organization of your working folder for a
+    given bioinformatics project.
 
-2.  Section [\[sec:data.managment\]](#sec:data.managment){reference-type="ref"
-    reference="sec:data.managment"}, lists the resources available to
-    manage and secure data in your project.
+2.  Section 2, lists the resources available to manage and secure data
+    in your project.
 
-3.  Section [\[sec:versioning\]](#sec:versioning){reference-type="ref"
-    reference="sec:versioning"}, presents the `git` code versioning
-    system and some examples on how to use it.
+3.  Section 3, presents the `git` code versioning system and some
+    examples on how to use it.
 
-4.  Section [\[sec:coding\]](#sec:coding){reference-type="ref"
-    reference="sec:coding"}, enumerates some rules to follow when you
-    write code. These rules will ease the reproducibility of your
-    analysis and collaborative development for your project.
+4.  Section 4, enumerates some rules to follow when you write code.
+    These rules will ease the reproducibility of your analysis and
+    collaborative development for your project.
 
 These good practices were compiled from different sources, often
-overlapping, listed in the References starting page of this document.
+overlapping, listed in the References of this document.
 
-# Project organization []{#sec:project.organisation label="sec:project.organisation"}
+# Project organization
 
 The first step at the start of a bioinformatic project is to plan for
 the structure of the project. Following this structure will facilitate
@@ -145,9 +137,7 @@ and time-table.
 
 The `data` folder must contain a `.gitignore` file whose content is
 simply "`*`". Therefore, `git` will ignore files in this folder (See
-Section [\[sec:data.managment\]](#sec:data.managment){reference-type="ref"
-reference="sec:data.managment"} for more information on data
-management).\
+Section 2 for more information on data management).\
 A general rule for data management is to have a single authoritative
 representation of every piece of data in the system.\
 The `data` folder must contain only the raw data for your project. No
@@ -172,8 +162,7 @@ can create a `data/examples` folder that contains small toy data sets to
 test your scripts or software as described in the `README` file. This
 will give to others the possibility to validate your work, and allow you
 to check if new modifications work correctly, hence saving you a lot of
-time (See Section [\[sec:coding\]](#sec:coding){reference-type="ref"
-reference="sec:coding"} for more information on testing).
+time (See Section 4 for more information on testing).
 
 ## `src` folder
 
@@ -207,8 +196,7 @@ history as long as it's gone in the final version).
 
 The `tests` folder must contain a list of tests files that can be
 executed to test your code. This will be explained in more detail in the
-Section [\[sec:coding\]](#sec:coding){reference-type="ref"
-reference="sec:coding"} on tests-driven development.
+Section 4 on tests-driven development.
 
 ## `doc` folder
 
@@ -231,9 +219,7 @@ the generation of documents in the `doc` folder.
 
 The `results` folder must contain a `.gitignore` file whose content is
 simply "`*`". Therefore, `git` will ignore files in this folder (See
-Section [\[sec:data.managment\]](#sec:data.managment){reference-type="ref"
-reference="sec:data.managment"} for more information on data
-management).\
+Section 2 for more information on data management).\
 Every generated results or temporary files must go to the results
 folder. This also means that the integrality of the `results` folder can
 be regenerated from the `data`, `bin` and `src` folders. If this is not
@@ -265,7 +251,7 @@ from the internet or other sources. This folder can also be
 automatically filed if necessary by the execution of the content of the
 `src` folder.
 
-# Data Management []{#sec:data.managment label="sec:data.managment"}
+# Data Management
 
 In this section we will present some rules to manage your project data.
 Given the size of current NGS data set one must find the balance between
@@ -276,11 +262,9 @@ Your code and documentation are also valuable sets of files. Using,
 (and the computer of every collaborator in the project), on the gitbio
 server and on the backup of the gitbio server (updated every 24h). The
 details of the code and documentation management within your project are
-developed in `src` and `doc` paragraph of the
-Section [\[sec:project.organisation\]](#sec:project.organisation){reference-type="ref"
-reference="sec:project.organisation"}. In this section, we focus on
-replicating the `data` and `results` folder content of your project, on
-multiple sites, in order to secure it.\
+developed in `src` and `doc` paragraph of the Section 1. In this
+section, we focus on replicating the `data` and `results` folder content
+of your project, on multiple sites, in order to secure it.\
 From the time spent to get the material, to the cost of the reagents and
 of the sequencing, your data are precious. Moreover for reproducibility
 concern you should always keep a raw version of your data to go back to.
@@ -332,13 +316,11 @@ working on your project if they use the PSMN computing facilities.\
 Most of the human bioinformatic work will result in the production of
 lines of code or text. While important, the size of such data is often
 quite small and should be copied to other places as often as possible.\
-When using a version control system (See
-Section [\[sec:versioning\]](#sec:versioning){reference-type="ref"
-reference="sec:versioning"}), making regular pushes to the LBMC gitbio
-server will not only make you gain time to deal with different versions
-of your project but also save a copy of your code on the server. You can
-also make instantaneous or daily backup in your home directory at the
-PSNM.
+When using a version control system (See Section 3), making regular
+pushes to the LBMC gitbio server will not only make you gain time to
+deal with different versions of your project but also save a copy of
+your code on the server. You can also make instantaneous or daily backup
+in your home directory at the PSNM.
 
 With the LBMC you can also use the [Silexe
 server](http://biowiki.biologie.ens-lyon.fr/). The CNRS provides a
@@ -347,7 +329,7 @@ synchronize folders on their servers (100Gb). The UE provides a
 synchronization service called [b2drop](https://b2drop.eudat.eu) to
 synchronize folders on their servers (20Gb).
 
-# Versioning []{#sec:versioning label="sec:versioning"}
+# Versioning
 
 Biologists keep their lab journal up to date so their future self or
 other people can check on and reproduce their work. In bioinformatics
@@ -373,8 +355,7 @@ point of your project relies on the granularity of those time points.
 Therefore, you should try to make incremental changes to your project
 and record them with the version control software as often as possible.
 This will also help you to comply with the recommendations of the
-Section [\[sec:coding\]](#sec:coding){reference-type="ref"
-reference="sec:coding"} on coding.\
+Section 4 on coding.\
 You can find the LBMC course on `git` at
 [https://gitbio.ens-lyon.fr/LBMC/hub/formations/git_basis/](https://gitbio.ens-lyon.fr/LBMC/hub/formations/git_basis/-/blob/master/README.md)
 
@@ -454,7 +435,7 @@ community around `git` so most of your problems with it should find
 their answer online or in the LBMC. Also, don't forget to go to the
 `git` formation organized in the LBMC !
 
-# Coding []{#sec:coding label="sec:coding"}
+# Coding
 
 In this section we are going to introduce some concept and rules to
 follow and implement. The goal of this section is to write better code
@@ -568,5 +549,3 @@ aim at checking the assembly of elementary components in your code.
 Integration tests can be used with the content of your `data/examples`
 folder to check after each step of your pipeline if you get the expected
 results.
-
-[]{#sec:bibliography label="sec:bibliography"}
diff --git a/good_practices.tex b/good_practices.tex
index 62a2ff7..b3f89c5 100644
--- a/good_practices.tex
+++ b/good_practices.tex
@@ -25,15 +25,15 @@ The resulting problems are multiple and easily avoidable.
 This document summarizes a set of good practices in bioinformatics.
 
 \begin{enumerate}
-  \item Section~\ref{sec:project.organisation}, presents the organization of your working folder for a given bioinformatics project.
-  \item Section~\ref{sec:data.managment}, lists the resources available to manage and secure data in your project.
-  \item Section~\ref{sec:versioning}, presents the \texttt{git} code versioning system and some examples on how to use it.
-  \item Section~\ref{sec:coding}, enumerates some rules to follow when you write code. These rules will ease the reproducibility of your analysis and collaborative development for your project.
+  \item Section 1, presents the organization of your working folder for a given bioinformatics project.
+  \item Section 2, lists the resources available to manage and secure data in your project.
+  \item Section 3, presents the \texttt{git} code versioning system and some examples on how to use it.
+  \item Section 4, enumerates some rules to follow when you write code. These rules will ease the reproducibility of your analysis and collaborative development for your project.
 \end{enumerate}
 
-These good practices were compiled from different sources, often overlapping, listed in the References starting page \pageref{sec:bibliography} of this document.
+These good practices were compiled from different sources, often overlapping, listed in the References of this document.
 
-\section{Project organization \label{sec:project.organisation}}
+\section{Project organization}
 
 The first step at the start of a bioinformatic project is to plan for the structure of the project.
 Following this structure will facilitate collaboration with others or your future self. In this section we are going to present a guide for your project organization. This guide should cover most bioinformatic project requirements. This section aims at facilitating collaboration with other bioinformaticians in the LBMC or even yourself in the future. You are strongly encouraged to follow it and to enforce its policies in your team.
@@ -77,7 +77,7 @@ You can use reputable DOI-issuing repository such as \href{https://figshare.com/
 
 \subsection{\texttt{data} folder}
 
-The \texttt{data} folder must contain a \texttt{.gitignore} file whose content is simply ``\texttt{*}''. Therefore, \texttt{git} will ignore files in this folder (See Section~\ref{sec:data.managment} for more information on data management).\\
+The \texttt{data} folder must contain a \texttt{.gitignore} file whose content is simply ``\texttt{*}''. Therefore, \texttt{git} will ignore files in this folder (See Section 2 for more information on data management).\\
 
 A general rule for data management is to have a single authoritative representation of every piece of data in the system.\\
 
@@ -85,7 +85,7 @@ The \texttt{data} folder must contain only the raw data for your project. No scr
 
 Data files in this folder, and in general, should contain some metadata like a time stamp and few biologically meaningful key words. We advise you to use the following naming convention: \texttt{2020\_12\_31-informative\_name.file}. An informative file name can for example be a compilation of the species lineage replicate and sequencing technology name. With this format, sorting the file by name will also sort them by date and the most important metadata will be kept in the file name. Avoid, space and special characters (use {\bf \_} instead). When possible use open file formats that are easier to handle with standard tools and help to promote open science.\\
 
-When writing script or code, it’s important to be able to test it. You can create a \texttt{data/examples} folder that contains small toy data sets to test your scripts or software as described in the \texttt{README} file. This will give to others the possibility to validate your work, and allow you to check if new modifications work correctly, hence saving you a lot of time (See Section~\ref{sec:coding} for more information on testing).
+When writing script or code, it’s important to be able to test it. You can create a \texttt{data/examples} folder that contains small toy data sets to test your scripts or software as described in the \texttt{README} file. This will give to others the possibility to validate your work, and allow you to check if new modifications work correctly, hence saving you a lot of time (See Section 4 for more information on testing).
 
 \subsection{\texttt{src} folder}
 
@@ -99,7 +99,7 @@ The content of the \texttt{src/tests/} folder, must be regularly commited to you
 
 \subsubsection{\texttt{tests} folder}
 
-The \texttt{tests} folder must contain a list of tests files that can be executed to test your code. This will be explained in more detail in the Section~\ref{sec:coding} on tests-driven development.
+The \texttt{tests} folder must contain a list of tests files that can be executed to test your code. This will be explained in more detail in the Section 4 on tests-driven development.
 
 
 \subsection{\texttt{doc} folder}
@@ -110,7 +110,7 @@ We advise you to keep an electronic lab notebook in a \texttt{doc/reports} subfo
 
 \subsection{\texttt{results} folder}
 
-The \texttt{results} folder must contain a \texttt{.gitignore} file whose content is simply ``\texttt{*}''. Therefore, \texttt{git} will ignore files in this folder (See Section~\ref{sec:data.managment} for more information on data management).\\
+The \texttt{results} folder must contain a \texttt{.gitignore} file whose content is simply ``\texttt{*}''. Therefore, \texttt{git} will ignore files in this folder (See Section 2 for more information on data management).\\
 
 Every generated results or temporary files must go to the results folder. This also means that the integrality of the \texttt{results} folder can be regenerated from the \texttt{data}, \texttt{bin} and \texttt{src} folders. If this is not the case for a given result file, delete it and write the necessary code in \texttt{src} to regenerate it.\\
 
@@ -122,11 +122,11 @@ Even if we don’t enforce a backup policy for the \texttt{results} folder keep
 
 The \texttt{bin} folder which historically contains any compiled binary file must also contain third party scripts and software. You should be able to fill this folder with the information contained in the dependencies section of the \texttt{README} file or \texttt{doc/} folder. The compiled file from your work can be recompiled and the third party material can be got back from the internet or other sources. This folder can also be automatically filed if necessary by the execution of the content of the \texttt{src} folder.
 
-\section{Data Management \label{sec:data.managment}}
+\section{Data Management}
 
 In this section we will present some rules to manage your project data. Given the size of current NGS data set one must find the balance between securing the data for his/her project and avoid the needless replication of gigabytes of data.\\
 
-Your code and documentation are also valuable sets of files. Using, \texttt{git} means that a copy of these files exist at least on your computer (and the computer of every collaborator in the project), on the gitbio server and on the backup of the gitbio server (updated every 24h). The details of the code and documentation management within your project are developed in \texttt{src} and \texttt{doc} paragraph of the Section~\ref{sec:project.organisation}. In this section, we focus on replicating the \texttt{data} and \texttt{results} folder content of your project, on multiple sites, in order to secure it. \\
+Your code and documentation are also valuable sets of files. Using, \texttt{git} means that a copy of these files exist at least on your computer (and the computer of every collaborator in the project), on the gitbio server and on the backup of the gitbio server (updated every 24h). The details of the code and documentation management within your project are developed in \texttt{src} and \texttt{doc} paragraph of the Section 1. In this section, we focus on replicating the \texttt{data} and \texttt{results} folder content of your project, on multiple sites, in order to secure it. \\
 
 From the time spent to get the material, to the cost of the reagents and of the sequencing, your data are precious. Moreover for reproducibility concern you should always keep a raw version of your data to go back to. Those two points mean that you must make a backup of your raw data as soon as possible (the external hard or thumb drive on which you can get them doesn’t count). When you receive data, it’s also always important to document them. Write a simple \texttt{description.txt} file in the same folder that describes your data and how they were generated. These metadata of your data are important to archive and index them. There are numerous conventions for metadata terms that you can follow, like the \href{http://dublincore.org/documents/dcmi-terms/}{dublin core}. Metadata will also be useful for the persons that are going to reuse your data (in meta-analysis for example) and to cite them.
 
@@ -152,14 +152,14 @@ A second copy of the raw data can be placed in your PSMN team folder \texttt{/Xn
 
 Most of the human bioinformatic work will result in the production of lines of code or text. While important, the size of such data is often quite small and should be copied to other places as often as possible.\\
 
-When using a version control system (See Section~\ref{sec:versioning}), making regular pushes to the LBMC gitbio server will not only make you gain time to deal with different versions of your project but also save a copy of your code on the server.
+When using a version control system (See Section 3), making regular pushes to the LBMC gitbio server will not only make you gain time to deal with different versions of your project but also save a copy of your code on the server.
 You can also make instantaneous or daily backup in your home directory at the PSNM.
 
 With the LBMC you can also use the \href{http://biowiki.biologie.ens-lyon.fr/}{Silexe server}.
 The CNRS provides a synchronization service called \href{https://mycore.cnrs.fr/}{MyCore} to synchronize folders on their servers (100Gb).
 The UE provides a synchronization service called \href{https://b2drop.eudat.eu}{b2drop} to synchronize folders on their servers (20Gb).
 
-\section{Versioning \label{sec:versioning}}
+\section{Versioning}
 
 Biologists keep their lab journal up to date so their future self or other people can check on and reproduce their work. In bioinformatics versioning can be seen as a bioinformatic journal where you can comment the addition of new functions to your project. This also means that you can go back at any point of this journal to revert to your code at an earlier state.
 
@@ -168,7 +168,7 @@ Successful branches can then be merged back into the main branch to include new
 
 The strength of a code versioning system is to do all of the above transparently. You don’t have to keep different versions of your files; it’s the versioning software job. By going to another branch or time point, your working directory will be changed to match the status of the files at that point. If you jump back, the files will be changed back to the condition where you came from.\\
 
-The flexibility of the version control software to jump to a given time point of your project relies on the granularity of those time points. Therefore, you should try to make incremental changes to your project and record them with the version control software as often as possible. This will also help you to comply with the recommendations of the Section~\ref{sec:coding} on coding.\\
+The flexibility of the version control software to jump to a given time point of your project relies on the granularity of those time points. Therefore, you should try to make incremental changes to your project and record them with the version control software as often as possible. This will also help you to comply with the recommendations of the Section 4 on coding.\\
 
 You can find the LBMC course on \texttt{git} at \href{https://gitbio.ens-lyon.fr/LBMC/hub/formations/git_basis/-/blob/master/README.md}{https://gitbio.ens-lyon.fr/LBMC/hub/formations/git\_basis/}
 
@@ -232,7 +232,7 @@ git push -u origin master
 
 The full documentation of every command and possibilities with git is well beyond the scope of this document. However, you can access a complete and well-written documentation on the website \href{https://git-scm.com/book/en/v2}{git-scm.com}. There is also a huge community around \texttt{git} so most of your problems with it should find their answer online or in the LBMC. Also, don’t forget to go to the \texttt{git} formation organized in the LBMC !
 
-\section{Coding \label{sec:coding}}
+\section{Coding}
 
 In this section we are going to introduce some concept and rules to follow and implement. The goal of this section is to write better code and scripts in your projects, with validated and reproducible results.
 
@@ -280,7 +280,7 @@ Integration tests are one level of complexity above the unit tests. They aim at
 
 \nocite{wilsonBestPracticesScientific2014, masumTenSimpleRules2013, bolandTenSimpleRules2017, listTenSimpleRules2017, nobleQuickGuideOrganizing2009, leipzigReviewBioinformaticPipeline2016, dudleyQuickGuideDeveloping2009, sandveTenSimpleRules2013, vicensTenSimpleRules2007, taschukTenSimpleRules2017, mieleNineQuickTips2019}
 
-\bibliography{bibliography.bib}
-\bibliographystyle{plain}\label{sec:bibliography}
+\bibliography*{bibliography.bib}
+\bibliographystyle{plain}
 
 \end{document}
-- 
GitLab