From 37586ce4a05b74c6a446128d939c88f55756b53e Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Wed, 28 Apr 2021 19:53:32 +0200 Subject: [PATCH] doc/: add doc --- doc/Makefile | 20 +++++ doc/make.bat | 35 +++++++++ doc/source/README.rst | 152 +++++++++++++++++++++++++++++++++++++ doc/source/bed_handler.rst | 85 +++++++++++++++++++++ doc/source/conf.py | 61 +++++++++++++++ doc/source/gc_content.rst | 45 +++++++++++ doc/source/index.rst | 23 ++++++ doc/source/modules.rst | 9 +++ doc/source/visu.rst | 29 +++++++ 9 files changed, 459 insertions(+) create mode 100644 doc/Makefile create mode 100644 doc/make.bat create mode 100644 doc/source/README.rst create mode 100644 doc/source/bed_handler.rst create mode 100644 doc/source/conf.py create mode 100644 doc/source/gc_content.rst create mode 100644 doc/source/index.rst create mode 100644 doc/source/modules.rst create mode 100644 doc/source/visu.rst diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..6247f7e --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/source/README.rst b/doc/source/README.rst new file mode 100644 index 0000000..8f8cb64 --- /dev/null +++ b/doc/source/README.rst @@ -0,0 +1,152 @@ +BigWig visu +=========== + +Description +----------- + +This project contains three mains submodules that can be found in the ``src/`` directory: +* The module ``bed_handler`` allows to make some operations on bed files. This module was designed for a particular project and it is unlikly that you want to use it. +* The module ``gc_content`` allows to create violin plots displaying the GC content of the two or more bed files given in input. It also make a Wilcoxon test to see if the regions contains in those bed files show a difference in their GC-content. +* The module ``visu`` allows to display the coverage from bigwig files, created from different conditions, in specific genomic regions defines in two or more bed files. + +Prerequisites +------------- + +This project requires ``python>=3.8`` to work correctly and the following modules must be installed: +* Lazyparser>=0.2.0 +* Pandas>=1.0.3 +* Loguru>=0.5.3 +* numpy>=1.17.4 +* pyfaidx>=0.5.7 +* biopython>=1.75 +* seaborn>=0.10.1 +* matplotlib>=3.1.2 + +Usage +----- + +``gc_content`` module +~~~~~~~~~~~~~~~~~~~~~ + +To launch the ``gc_content`` module you must enter the following command at the root of this project: + +.. code:: console + + $ python3 -m src.gc_content [PARAMS] + + +Where [PARAMS] corresponds to the parameters given to the program. The list of available params is defined below + ++---------------------+---------------------------------------------------------------------------------------+ +| Required parameters | Description | ++---------------------+---------------------------------------------------------------------------------------+ +| -B / --beds | A list of beds files containing the regions for which we want to display the coverage | ++---------------------+---------------------------------------------------------------------------------------+ +| -b / --bed_names | A list of names identifying each bed files given in the -B / --beds parameter | ++---------------------+---------------------------------------------------------------------------------------+ +| -g / --genome | A Fasta file containing the entire genome of an organism of interest | ++---------------------+---------------------------------------------------------------------------------------+ + ++---------------------+----------------------------------------------------------------------------------------------------+ +| Optional parameters | Description | ++---------------------+----------------------------------------------------------------------------------------------------+ +| -e / --environment | Number of nucleotides to display around the genomic intervals defined in the bed files (default 0) | ++---------------------+----------------------------------------------------------------------------------------------------+ +| -f / --ft_names | A name identifying the kind of genomic intervals defined in the bed files (default: interval) | ++---------------------+----------------------------------------------------------------------------------------------------+ + + +Note that you can also display the help for this module by typing: + +.. code:: console + + $ python3 -m src.gc_content --help + + +The list of element must be separated by a comma when you're writing the command. For example, for the parameter ``-B`` if you want to enter 3 beds file you can type + +.. code:: console + + $ python3 -m src.gc_content -B bed1.bed bed2.bed bed3.bed [...] + + +The ``[...]`` represent the last part of the command to write. + +``visu`` module +~~~~~~~~~~~~~~~ + + +To launch the ``visu`` module you must enter the following command at the root of this project: + +.. code:: console + + $ python3 -m src.visu [PARAMS] + + +Where [PARAMS] corresponds to the parameters given to the program. The list of available params is defined below + ++-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Required parameters | Description | ++-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``-d / --design`` | A tabulated file containing 3 columns. The first column contains a bigwig filename, the second contains the condition name and the last one contains the replicate of the condition. | ++-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``-B / --bw_folder`` | The folder containing the bigwig files mentioned in the first column of the 'design' table | ++-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``-r / --region_beds`` | A list of one or many bed files containing the genomic intervals to display | ++-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -R / --region_names | A list of names identifying genomic intervals insides the beds given with the ``-r / --region_beds`` parameter. | ++-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +Example of the content in the design file: + ++--------+-----------+-----------+ +| bigwig | condition | replicate | ++--------+-----------+-----------+ +| bw1.bw | Cond1 | R1 | ++--------+-----------+-----------+ +| bw2.bw | Cond1 | R2 | ++--------+-----------+-----------+ +| bw1.bw | Cond2 | R1 | ++--------+-----------+-----------+ +| bw2.bw | Cond2 | R2 | ++--------+-----------+-----------+ + + + ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Optional parameters | Description | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -n / --nb_bin | An integer corresponding to the number of bins to use to represent the genomic intervals given with -r / --region_beds arguments. (default 100) | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -f / --figure_type | The kind of representation wanted (barplot or metagene) (default metagene) | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -N / --norm | A number corresponding to a bin for which the coverage will become 1. 'None' for no normalisation (default 'None'). Note that this parameter can also take a file (description after this table) | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -s / --show_replicates | 'y' to create a figure showing the coverage for all replicates 'n' to display only the average coverage across the conditions defined in -d / --design parameter. (default y) | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -e / --environment | A list of two integers. The first corresponds to the number of nucleotides to display around the genomic intervals of interest defined with the -r / --region_beds parameter and the second corresponds to the number of bins to use (default 0 0) | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -b / --border_name | A list of two strings. The name of the left and right border to display in the figures between the genomic intervals defined with the -r / --region_beds and their environment (default '' '') | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -o / --output | Folder where the figures will be stored (default .) | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| -y / --ylim | A list of two integers that corresponds to the y-axis range in the figure. (default None) | ++------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +The figure to give to the ``--norm`` parameter must be defined like this: + ++-----------+----------+-----------+------+ +| condition | region | replicate | coef | ++-----------+----------+-----------+------+ +| Cond1 | interval | R1 | 0.5 | ++-----------+----------+-----------+------+ +| Cond1 | interval | R2 | 0.4 | ++-----------+----------+-----------+------+ +| Cond2 | interval | R1 | 0.84 | ++-----------+----------+-----------+------+ +| Cond2 | interval | R2 | 0.2 | ++-----------+----------+-----------+------+ + +* The column condition must contain the same conditions defined in the ``condition`` column of the design file. +* The column replicate must contain the same replicates names in the ``replicate`` column of the design file. +* The column coef contain a value used to normalise the coverage diff --git a/doc/source/bed_handler.rst b/doc/source/bed_handler.rst new file mode 100644 index 0000000..d93013a --- /dev/null +++ b/doc/source/bed_handler.rst @@ -0,0 +1,85 @@ +bed\_handler package +==================== + +Submodules +---------- + +bed\_handler.bed\_resize module +------------------------------- + +.. automodule:: src.bed_handler.bed_resize + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.config module +-------------------------- + +.. automodule:: src.bed_handler.config + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.filter\_bed module +------------------------------- + +.. automodule:: src.bed_handler.filter_bed + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.filter\_gene module +-------------------------------- + +.. automodule:: src.bed_handler.filter_gene + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.get\_gene\_locations module +---------------------------------------- + +.. automodule:: src.bed_handler.get_gene_locations + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.get\_gene\_regulated\_by\_ddx module +------------------------------------------------- + +.. automodule:: src.bed_handler.get_gene_regulated_by_ddx + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.get\_last\_exons module +------------------------------------ + +.. automodule:: src.bed_handler.get_last_exons + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.get\_other\_exon\_in\_same\_gene module +---------------------------------------------------- + +.. automodule:: src.bed_handler.get_other_exon_in_same_gene + :members: + :undoc-members: + :show-inheritance: + +bed\_handler.select\_regulated\_near\_ctcf\_exons module +-------------------------------------------------------- + +.. automodule:: src.bed_handler.select_regulated_near_ctcf_exons + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: src.bed_handler + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000..9fc21bf --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,61 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +#sys.path.insert(0, os.path.abspath('.')) +#sys.path.insert(0, os.path.abspath('../')) +#sys.path.insert(0, os.path.abspath('../../')) +sys.path.insert(0, os.path.abspath('../../')) +#sys.path.insert(0, os.path.abspath('../../src/bed_handler')) +#sys.path.insert(0, os.path.abspath('../../src/gc_content')) +#sys.path.insert(0, os.path.abspath('../../src/visu')) + +# -- Project information ----------------------------------------------------- + +project = 'bigwig_visu' +copyright = '2021, Fontrodona' +author = 'Fontrodona' + +# The full version, including alpha/beta/rc tags +release = '0.1' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] diff --git a/doc/source/gc_content.rst b/doc/source/gc_content.rst new file mode 100644 index 0000000..30ce374 --- /dev/null +++ b/doc/source/gc_content.rst @@ -0,0 +1,45 @@ +gc\_content package +=================== + +Submodules +---------- + +gc\_content.config module +------------------------- + +.. automodule:: src.gc_content.config + :members: + :undoc-members: + :show-inheritance: + +gc\_content.gc\_content module +------------------------------ + +.. automodule:: src.gc_content.gc_content + :members: + :undoc-members: + :show-inheritance: + +gc\_content.gc\_stats module +---------------------------- + +.. automodule:: src.gc_content.gc_stats + :members: + :undoc-members: + :show-inheritance: + +gc\_content.stat\_annot module +------------------------------ + +.. automodule:: src.gc_content.stat_annot + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: src.gc_content + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000..7c0009c --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,23 @@ +.. bigwig_visu documentation master file, created by + sphinx-quickstart on Wed Apr 28 18:56:52 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to bigwig_visu's documentation! +======================================= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + README.rst + modules.rst + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/doc/source/modules.rst b/doc/source/modules.rst new file mode 100644 index 0000000..5d3a00d --- /dev/null +++ b/doc/source/modules.rst @@ -0,0 +1,9 @@ +src +=== + +.. toctree:: + :maxdepth: 4 + + bed_handler + gc_content + visu diff --git a/doc/source/visu.rst b/doc/source/visu.rst new file mode 100644 index 0000000..f9c2ba3 --- /dev/null +++ b/doc/source/visu.rst @@ -0,0 +1,29 @@ +visu package +============ + +Submodules +---------- + +visu.config module +------------------ + +.. automodule:: src.visu.config + :members: + :undoc-members: + :show-inheritance: + +visu.figure\_maker module +------------------------- + +.. automodule:: src.visu.figure_maker + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: src.visu + :members: + :undoc-members: + :show-inheritance: -- GitLab