initial commit

a6614856 · nfontrod · a6614856 · a6614856 · a6614856 · a6614856
Commit a6614856 authored Oct 21, 2020 by nfontrod
--- a/.gitignore
+++ b/.gitignore
+.idea/*
+src/visu/__pycache__/*.pyc
+src/bed_handler/__pycache__/*.pyc
--- a/data/.gitignore
+++ b/data/.gitignore
+*
+!.gitignore
--- a/results/.gitignore
+++ b/results/.gitignore
+*
+!.gitignore
--- a/src/bed_handler/__init__.py
+++ b/src/bed_handler/__init__.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description:
+"""
--- a/src/bed_handler/__main__.py
+++ b/src/bed_handler/__main__.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description: Create the bed file that will be used for the bigwig \
+visualisation
+"""
+from .filter_gene import create_filtered_bed
+from .get_gene_locations import create_region_bed
+def launcher():
+    """
+    Create the necessary bed file to visualise bigwig file
+    """
+    create_filtered_bed()
+    create_region_bed()
+launcher()
--- a/src/bed_handler/config.py
+++ b/src/bed_handler/config.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description: A class containing all the variables used in this submodule
+"""
+from pathlib import Path
+class OutputBed:
+    """
+    A class containing the location of output bed files
+    """
+    output = Path(__file__).parents[2] / "results" / "bed_file"
+    filtered_gene = output / "filtered_gene.bed"
+    body_gene = output / "body_gene.bed"
+    tss_gene = output / "tss_gene.bed"
+    tts_gene = output / "tts_gene.bed"
+    after_gene = output / "after_gene.bed"
+class BedConfig:
+    """
+    A class containing all the variables used in this submodule
+    """
+    base = Path(__file__).parents[2]
+    ddx_genes = base / "data" / "DDX5_17_genes.txt"
+    gene_bed = base / "data" / "bed" / "gene.bed"
+    exon_bed = base / "data" / "bed" / "exon.bed"
+    bed = OutputBed
+    size = 5000
+class TestConfig:
+    """Contains variable used in docstring tests"""
+    base = Path(__file__).parents[2] / "tests" / "files"
+    list_genes = base / "list_genes.txt"
+    gene_bed = base / "genes.bed"
+    exon_bed = base / "exons.bed"
+    small_bw = base / "small.bw"
\ No newline at end of file
--- a/src/bed_handler/filter_gene.py
+++ b/src/bed_handler/filter_gene.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description: Contains functions to filter the gene of interest in a bed file.
+"""
+import pandas as pd
+from pathlib import Path
+from doctest import testmod
+from .config import BedConfig, TestConfig
+from typing import List
+def select_gene_of_interest(gene_file: Path) -> List[int]:
+    """
+    Get the fasterDb gene id located in tge file `gene_file`.
+    :param gene_file: A file containing a list of gene of interest
+    :return: The list of gene of interest
+    >>> select_gene_of_interest(TestConfig.list_genes)
+    [73, 75, 89, 123, 128]
+    """
+    with gene_file.open('r') as infile:
+        gene_list = infile.read().splitlines()
+    return [int(gene_id) for gene_id in gene_list if gene_list]
+def filter_bed(bed_file: Path, gene_list: List[int]) -> pd.DataFrame:
+    """
+    load a bed containing FasterDB gene and only recover the gene of \
+    interest within it.
+    :param bed_file: A bed file containing genes
+    :param gene_list: a list of gene of interest
+    :return: The bed file bed containing only genes located in gene_list
+    >>> filter_bed(TestConfig.gene_bed, [1, 5, 9])
+       #ref     start       end  id     score strand
+    0    18  28645943  28682388   1      DSC2      -
+    4    13  45766989  45775176   5     KCTD4      -
+    8    13  45967450  45992516   9  SLC25A30      -
+    """
+    df = pd.read_csv(bed_file, sep="\t")
+    return df[df["id"].isin(gene_list)]
+def create_filtered_bed() -> None:
+    """
+    Create a bed file containing only the genes of interest.
+    """
+    gene_list = select_gene_of_interest(BedConfig.ddx_genes)
+    df = filter_bed(BedConfig.gene_bed, gene_list)
+    BedConfig.bed.output.mkdir(exist_ok=True, parents=True)
+    df.to_csv(BedConfig.bed.filtered_gene, sep="\t", index=False)
+if __name__ == "__main__":
+    testmod()
--- a/src/bed_handler/get_gene_locations.py
+++ b/src/bed_handler/get_gene_locations.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description: Create a bed file containing the gene locations of interest.
+"""
+from pathlib import Path
+from typing import Dict, List
+from .config import BedConfig, TestConfig
+from doctest import testmod
+def load_exon_bed(bed_exon: Path) -> Dict:
+    """
+    Load a bed file containing exons.
+    :param bed_exon: A bed of exons
+    :return: A dictionary linking each gene to it's exons
+    >>> d = load_exon_bed(TestConfig.exon_bed)[1]
+    >>> d[1]
+    ['18', '28681865', '28682388', '1_1', '0', '-']
+    >>> d[2]
+    ['18', '28681183', '28681432', '1_2', '0', '-']
+    """
+    dic = {}
+    with bed_exon.open('r') as inbed:
+        for line in inbed:
+            if not line.startswith("#"):
+                cline = line.replace("\n", "").split("\t")
+                gene, pos = map(int, cline[3].split("_"))
+                if gene not in dic:
+                    dic[gene] = {pos: cline}
+                else:
+                    dic[gene][pos] = cline
+    return dic
+def get_gene_body(gene: List[str], exons: Dict) -> List:
+    """
+    Get the gene body of the gene `gene`.
+    :param gene: A gene
+    :param exons: A dictionary of the exons inside that gene
+    :return: The gene body of the gene
+    >>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
+    ...      2: ['5', '130', '140', '1_2', 'Test', '+'],
+    ...      3: ['5', '160', '200', '1_3', 'Test', '+']}
+    >>> get_gene_body(['5', '100', '200', '1', 'Test', '+'], e)
+    ['5', '130', '140', '1', 'Test', '+']
+    >>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
+    ...      2: ['5', '130', '140', '1_2', 'Test', '+'],
+    ...      3: ['5', '160', '170', '1_3', 'Test', '+'],
+    ...      4: ['5', '190', '200', '1_3', 'Test', '+']}
+    >>> get_gene_body(['5', '100', '200', '1', 'Test', '+'], e)
+    ['5', '130', '170', '1', 'Test', '+']
+    >>> e = {4: ['5', '100', '110', '1_4', 'Test', '-'],
+    ...      3: ['5', '130', '140', '1_3', 'Test', '-'],
+    ...      2: ['5', '160', '170', '1_2', 'Test', '-'],
+    ...      1: ['5', '190', '200', '1_1', 'Test', '-']}
+    >>> get_gene_body(['5', '100', '200', '1', 'Test', '-'], e)
+    ['5', '130', '170', '1', 'Test', '-']
+    """
+    exon_positions = sorted(list(exons.keys()))
+    if gene[5] == "+":
+        gene[1] = exons[exon_positions[1]][1]
+        gene[2] = exons[exon_positions[-2]][2]
+    else:
+        gene[1] = exons[exon_positions[-2]][1]
+        gene[2] = exons[exon_positions[1]][2]
+    return gene
+def get_gene_tss(gene: List[str], exons: Dict) -> List:
+    """
+    Get the gene tss of the gene `gene`.
+    :param gene: A gene
+    :param exons: A dictionary of the exons inside that gene
+    :return: The gene tss of the gene
+    >>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
+    ...      2: ['5', '130', '140', '1_2', 'Test', '+'],
+    ...      3: ['5', '160', '170', '1_3', 'Test', '+'],
+    ...      4: ['5', '190', '200', '1_3', 'Test', '+']}
+    >>> get_gene_tss(['5', '100', '200', '1', 'Test', '+'], e)
+    ['5', '100', '130', '1', 'Test', '+']
+    >>> e = {4: ['5', '100', '110', '1_4', 'Test', '-'],
+    ...      3: ['5', '130', '140', '1_3', 'Test', '-'],
+    ...      2: ['5', '160', '170', '1_2', 'Test', '-'],
+    ...      1: ['5', '190', '200', '1_1', 'Test', '-']}
+    >>> get_gene_tss(['5', '100', '200', '1', 'Test', '-'], e)
+    ['5', '170', '200', '1', 'Test', '-']
+    """
+    exon_positions = sorted(list(exons.keys()))
+    if gene[5] == "+":
+        gene[2] = exons[exon_positions[1]][1]
+    else:
+        gene[1] = exons[exon_positions[1]][2]
+    return gene
+def get_gene_tts(gene: List[str], exons: Dict) -> List:
+    """
+    Get the gene tts of the gene `gene`.
+    :param gene: A gene
+    :param exons: A dictionary of the exons inside that gene
+    :return: The gene tts of the gene
+    >>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
+    ...      2: ['5', '130', '140', '1_2', 'Test', '+'],
+    ...      3: ['5', '160', '170', '1_3', 'Test', '+'],
+    ...      4: ['5', '190', '200', '1_3', 'Test', '+']}
+    >>> get_gene_tts(['5', '100', '200', '1', 'Test', '+'], e)
+    ['5', '170', '200', '1', 'Test', '+']
+    >>> e = {4: ['5', '100', '110', '1_4', 'Test', '-'],
+    ...      3: ['5', '130', '140', '1_3', 'Test', '-'],
+    ...      2: ['5', '160', '170', '1_2', 'Test', '-'],
+    ...      1: ['5', '190', '200', '1_1', 'Test', '-']}
+    >>> get_gene_tts(['5', '100', '200', '1', 'Test', '-'], e)
+    ['5', '100', '130', '1', 'Test', '-']
+    """
+    exon_positions = sorted(list(exons.keys()))
+    if gene[5] == "+":
+        gene[1] = exons[exon_positions[-2]][2]
+    else:
+        gene[2] = exons[exon_positions[-2]][1]
+    return gene
+def get_after_gene(gene: List[str], size: int) -> List:
+    """
+    Get the gene tts of the gene `gene`.
+    :param gene: A gene
+    :param size: The size of the region after the gene to check
+    >>> get_after_gene(['5', '100', '200', '1', 'Test', '+'], 100)
+    ['5', '200', '300', '1', 'Test', '+']
+    >>> get_after_gene(['5', '100', '200', '1', 'Test', '-'], 100)
+    ['5', '0', '100', '1', 'Test', '-']
+    """
+    if gene[5] == "+":
+        gene[1] = gene[2]
+        gene[2] = str(int(gene[2]) + size)
+    else:
+        gene[2] = gene[1]
+        gene[1] = str(int(gene[1]) - size)
+    return gene
+def write_bed(bed_file: Path, dic_exon: Dict, region: str,
+              outfile: Path) -> None:
+    """
+    Write a bed file containing the gene body, the gene tss or tts or \
+    the region after the gene.
+    :param bed_file: A bed file containing the genes of interest.
+    :param dic_exon: A dictionary containing the exons within the genes
+    :param region: The region of interest
+    :param outfile: The output file of interest
+    """
+    with bed_file.open('r') as infile, outfile.open('w') as out:
+        for line in infile:
+            if line.startswith("#"):
+                out.write(line)
+            else:
+                cline = line.replace('\n', '').split('\t')
+                gene = int(cline[3])
+                exons = dic_exon[gene]
+                if len(exons) < 3:
+                    raise ValueError(f"The gene have to few exons "
+                                     f"{len(exons)}. It should at "
+                                     f"least have 3 exons")
+                if region == "body":
+                    cline = get_gene_body(cline, exons)
+                elif region == 'tss':
+                    cline = get_gene_tss(cline, exons)
+                elif region == 'tts':
+                    cline = get_gene_tts(cline, exons)
+                else:
+                    cline = get_after_gene(cline, BedConfig.size)
+                out.write('\t'.join(cline) + '\n')
+def create_region_bed() -> None:
+    """
+    Create the bed with the wanted regions.
+    """
+    dic_exon = load_exon_bed(BedConfig.exon_bed)
+    write_bed(BedConfig.bed.filtered_gene, dic_exon, 'body',
+              BedConfig.bed.body_gene)
+    write_bed(BedConfig.bed.filtered_gene, dic_exon, 'tss',
+              BedConfig.bed.tss_gene)
+    write_bed(BedConfig.bed.filtered_gene, dic_exon, 'tts',
+              BedConfig.bed.tts_gene)
+    write_bed(BedConfig.bed.filtered_gene, dic_exon, 'after',
+              BedConfig.bed.after_gene)
+if __name__ == "__main__":
+    testmod()
--- a/src/visu/__init__.py
+++ b/src/visu/__init__.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description:
+"""
--- a/src/visu/__main__.py
+++ b/src/visu/__main__.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description:  Create a figure showing the ChIP-Seq coverage of particular \
+gene regions from ChIP-seq experiment.
+"""
+from .figure_maker import create_figure
+import lazyparser as lp
+from pathlib import Path
+from typing import List
+@lp.parse(design='file', region_bed='file',
+          nb_bin="nb_bin > 5", figure_type=['metagene', 'barplot'],
+          show_replicate=['y', 'n', 'Y', 'N'])
+def launcher(design: str, bw_folder: str, region_bed: str,
+             region_name: str, nb_bin: int = 100,
+             figure_type: str = 'metagene',
+             show_replicate: str = 'y', environment: List[int] = (0, 0),
+             border_names: List[str] = ('', ''),
+             output: str = '.') -> None:
+    """
+    Create A metagene or a barplot figure from bigwig file on regions defined \
+    in the bed file provided with 'region_bed' parameter.
+    :param design: A tabulated file containing 3 columns. The first columns \
+    contains a bigwig filename, the second contains the condition name and \
+    the last one contains the replicate of the condition.
+    :param bw_folder: The folder containing the bigwig file mentioned in \
+    the first column of the 'design' table.
+    :param region_bed: A bed file containing the regions to visualise
+    :param region_name: The name of the region analysed
+    :param nb_bin: The number of bins used to represents the regions of \
+    'region_bed'.
+    :param figure_type: The kind of representation wanted (barplot or metagene)
+    :param show_replicate: True to create a figure showing the replicate \
+    false else.
+    :param environment: A list of two int. The first contains the number of \
+    nucleotide to represent around the region of interest and the second,
+    the number of bin used to represent those surrounding regions.
+    :param border_names: The name of the borders
+    :param output: Folder where the results will be created
+    """
+    if environment[0] < 0 or environment[1] < 0 or \
+        environment[0] < environment[1]:
+        raise ValueError(f"The two values given with --environment must "
+                         f"be greater than 0 and the first value must be "
+                         f"greater than the second")
+    show_rep = True if show_replicate.lower() == 'y' else False
+    create_figure(Path(design), Path(bw_folder), Path(region_bed),
+                  region_name, nb_bin, figure_type, show_rep, environment,
+                  border_names, Path(output))
+launcher()
--- a/src/visu/config.py
+++ b/src/visu/config.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description:
+"""
+from ..bed_handler.config import BedConfig
\ No newline at end of file
--- a/src/visu/figure_maker.py
+++ b/src/visu/figure_maker.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description:
+"""
+from pathlib import Path
+from typing import List, Union, Any
+from doctest import testmod
+from ..bed_handler.config import TestConfig
+import pandas as pd
+import pyBigWig as pbw
+import seaborn as sns
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+def load_bed(bed: Path) -> List[List[Union[int, str]]]:
+    """
+    Read a bed file and return the lines within it.
+    :param bed: A bed file containing the regions of interest
+    :return:The list of feature inside the bed
+    >>> load_bed(TestConfig.gene_bed)[0]
+    ['18', 28645943, 28682388, 1, 'DSC2', '-']
+    """
+    list_regions = []
+    with bed.open('r') as inbed:
+        for line in inbed:
+            if not line.startswith("#"):
+                cline = line.replace("\n", "").split("\t")
+                list_regions.append([cline[0], int(cline[1]), int(cline[2]),
+                                     int(cline[3]), cline[4], cline[5]])
+    return list_regions
+def inspect_bigwig_regions(bw: Any, region: List,
+                           replicate: str, nb_bin: int, resize: List[int],
+                           condition_name: str,
+                           ) -> pd.DataFrame:
+    """
+    get the coverage value inside the bigwig region `region`.
+    :param bw: A opened bigwig file
+    :param region: The region of interest
+    :param replicate: The replicate name
+    :param nb_bin: The number of bin that will represent the region
+    :param resize: The number of nucleotide used to extend the region
+    in both side
+    :param condition_name: the name of the condition
+    :return: a table with the coverage of this region
+    >>> my_bw = pbw.open(str(TestConfig.small_bw))
+    >>> region = ['1', 10, 25, 1, 'Test', '+']
+    >>> inspect_bigwig_regions(my_bw, region, 'R1', 5, [4, 2], 'cond1')
+        coverage  bin condition replicate
+    0   0.000000   -2     cond1        R1
+    1   0.500000   -1     cond1        R1
+    2  75.000000    0     cond1        R1
+    3  20.000000    1     cond1        R1
+    4  10.000000    2     cond1        R1
+    5   4.666667    3     cond1        R1
+    6   2.000000    4     cond1        R1
+    7   1.000000    5     cond1        R1
+    8   0.500000    6     cond1        R1
+    >>> region = ['1', 110, 133, 1, 'Test', '-']
+    >>> inspect_bigwig_regions(my_bw, region, 'R1', 5, [4, 2], 'cond1')
+       coverage  bin condition replicate
+    0      0.00   -2     cond1        R1
+    1     12.50   -1     cond1        R1
+    2     42.00    0     cond1        R1
+    3      8.00    1     cond1        R1
+    4      4.25    2     cond1        R1
+    5      2.00    3     cond1        R1
+    6      2.00    4     cond1        R1
+    7      1.00    5     cond1        R1
+    8      1.00    6     cond1        R1
+    """
+    val = bw.stats(region[0], region[1], region[2], nBins=nb_bin, exact=True)
+    bins = list(range(len(val)))
+    if len(bins) != nb_bin:
+        raise ValueError("The lenght of bins should be equals to nb_bin")
+    if resize[0] > 0:
+        max_loc = max(region[1] - resize[0], 0)
+        val_before = bw.stats(region[0], max_loc, region[1], nBins=resize[1],
+                              exact=True)
+        min_loc = min(region[2] + resize[0], bw.chroms(region[0]))
+        val_after = bw.stats(region[0], region[2], min_loc, nBins=resize[1],
+                             exact=True)
+        if None in val_after:
+            val_after = val_after[::-1]
+            print(f"Warning ! None values found in {region} - "
+                  f"{[region[0], region[2], min_loc]}")
+        if region[5] == "+":
+            bin_before = list(range(-len(val_before), 0))
+            bin_after = list(range(bins[-1] + 1,
+                                   bins[-1] + 1 + len(val_after)))
+            val = val_before + val + val_after
+        else:
+            bin_before = list(range(-len(val_after), 0))
+            bin_after = list(range(bins[-1] + 1,
+                                   bins[-1] + 1 + len(val_before)))
+            val = val_after[::-1] + val[::-1] + val_before[::-1]
+        bins = bin_before + bins + bin_after
+    dic = {"coverage": val, "bin": bins}
+    df = pd.DataFrame(dic)
+    df['condition'] = [condition_name] * df.shape[0]
+    df['replicate'] = [replicate] * df.shape[0]
+    return df
+def create_sample_table(bw_file: Path, regions: List[List],
+                        replicate: str, nb_bin: int, resize: List[int],
+                        condition_name: str,
+                        ) -> pd.DataFrame:
+    """
+    Get the table for all the regions of interest
+    :param bw_file: A bigwig file
+    :param regions: The regions of interest
+    :param replicate: The replicate name
+    :param nb_bin: The number of bin that will represent the region
+    :param resize: The number of nucleotide used to extend the region
+    in both side
+    :param condition_name: the name of the condition
+    :return: a table with the coverage of this region
+    """
+    list_df = []
+    bw = pbw.open(str(bw_file))
+    for region in tqdm(regions, desc="scanning coverage ..."):
+        list_df.append(inspect_bigwig_regions(bw, region, replicate, nb_bin,
+                                              resize, condition_name))
+    return pd.concat(list_df, axis=0, ignore_index=True)
+def create_full_table(df_exp: pd.DataFrame, regions: List[List],
+                      nb_bin: int, resize: List[int],
+                      input_folder: Path) -> pd.DataFrame:
+    """
+    get the regions for every bigwig files.
+    :param df_exp: A dataframe containing the bigwig file that \
+    we want to analyse
+    :param regions: The regions to visualise
+    :param nb_bin: The number of bin used to resize the regions
+    :param resize: The number of nucleotides \
+    of the regions localised at each sides of the genomic regions inside
+    `regions`.
+    :param input_folder: Folder where the bigwig file are located
+    :return: The full coverage table
+    """
+    list_df = []
+    for i in range(df_exp.shape[0]):
+        mline = df_exp.iloc[i, :]
+        bw_file = input_folder / mline['bigwig']
+        print(f"working on file {bw_file}")
+        condition = mline['condition']
+        replicate = mline['replicate']
+        list_df.append(create_sample_table(bw_file, regions, replicate,
+                                           nb_bin, resize, condition))
+    return pd.concat(list_df, axis=0, ignore_index=True)
+def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int,
+                      environment: List[int],
+                      region_name: str, order_condition: List[str]
+                      ) -> pd.DataFrame:
+    """
+    summarize the data in df_cov.
+    :param df_cov: A dataframe of coverage for each bin.
+    :param figure_type: The kind of figure to make (metagene or barplot)
+    :param nb_bin: The number of bin used to represent the region of interest
+    :param environment: A list of two int. The first contains the number of \
+    nucleotide to represent around the region of interest and the second,
+    the number of bin used to represent those surrounding regions.
+    :param region_name: the name of the region analysed
+    :param order_condition: The order of conditions
+    :return: The summarised dataframe
+    """
+    df_sum = df_cov.groupby(['bin', 'condition', 'replicate']).mean()\
+        .reset_index()
+    if figure_type == "metagene":
+        return df_sum
+    if environment[0] != 0:
+        df_sum['location'] = df_cov['bin'].apply(
+            lambda x: f"before_{region_name}" if x < 0 else
+            f"after_{region_name}" if x >= nb_bin else region_name)
+    df_sum.drop('bin', axis=1, inplace=True)
+    if environment[0] != 0:
+        col_merge = ['condition', 'replicate', 'location']
+    else:
+        col_merge = ['condition', 'replicate']
+    df_sum = df_sum.groupby(col_merge).mean().reset_index()
+    if 'location' in df_sum.columns:
+        df_sum['location'] = pd.Categorical(
+            df_sum['location'], ordered=True,
+            categories=[f"before_{region_name}", region_name,
+                        f"after_{region_name}"]
+        )
+        df_sum['condition'] = pd.Categorical(
+            df_sum['condition'], ordered=True,
+            categories=order_condition
+        )
+        df_sum.sort_values(['condition', 'location'], ascending=True,
+                           inplace=True)
+    return df_sum
+def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
+                    border_names: List[str], nb_bin: int,
+                    environment: List[int], region_name: str,
+                    output: Path) -> None:
+    """
+    Create a metagene figure on the region of interest.
+    :param df_sum: The summarized coverage table
+    :param show_replicate: True to show the replicate, false else
+    :param border_names: The name of borders of the region of interest
+    :param nb_bin: The number of bins representing the regions of interest
+    :param environment:  A list of two int. The first contains the number of \
+    nucleotide to represent around the region of interest and the second,
+    the number of bin used to represent those surrounding regions.
+    :param output: Folder where the figure will be created
+    :param region_name: The region of interest
+    """
+    sns.set(context='poster', style='white')
+    if show_replicate:
+        g = sns.relplot('bin', 'coverage', hue='condition', data=df_sum,
+                        kind='line', style='replicate', ci=None,
+                        height=12, aspect=1.7)
+    else:
+        g = sns.relplot('bin', 'coverage', hue='condition', data=df_sum,
+                        kind='line', ci="sd", height=12, aspect=1.7)
+    y_val = g.ax.get_ylim()[1] * 0.99
+    if border_names[0] != '':
+        g.ax.axvline(x=0, color='k', linestyle='--', alpha=0.1)
+        g.ax.annotate(border_names[0], [0, y_val], ha="center", va='center')
+    if border_names[1] != '':
+        g.ax.axvline(x=nb_bin - 1, color='k', linestyle='--', alpha=0.1)
+        g.ax.annotate(border_names[1], [nb_bin - 1, y_val], ha="center",
+                      va='center')
+    g.set_xlabels('Bins')
+    g.set_ylabels('Coverage')
+    plt.subplots_adjust(top=0.9)
+    title = f"Average coverage in region '{region_name}'"
+    if environment[0] != 0:
+        title += f"\nand in their surrounding regions of {environment[0]} nt"
+    g.fig.suptitle(title)
+    g.savefig(output / f"metagene_{region_name}_{nb_bin}bin_" \
+              f"{environment[0]}_nt-around-{environment[1]}-bin.pdf")
+    g.fig.clf()
+def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
+                   nb_bin: int,
+                   environment: List[int], region_name: str,
+                   output: Path) -> None:
+    """
+    Create a barplot figure on the region of interest.
+    :param df_sum: The summarized coverage table
+    :param show_replicate: True to show the replicate, false else
+    :param nb_bin: The number of bins representing the regions of interest
+    :param environment:  A list of two int. The first contains the number of \
+    nucleotide to represent around the region of interest and the second,
+    the number of bin used to represent those surrounding regions.
+    :param output: Folder where the figure will be created
+    :param region_name: The region of interest
+    """
+    sns.set(context='poster', style='white')
+    if show_replicate:
+        g = sns.catplot(x="condition", y="coverage", hue="replicate",
+                        kind="bar", data=df_sum, height=12, aspect=1.77,
+                        ci=None)
+    else:
+        g = sns.catplot(x="condition", y="coverage",
+                        kind="bar", data=df_sum, height=12, aspect=1.77,
+                        ci='sd')
+    g.set_xlabels('')
+    g.set_ylabels('Coverage')
+    plt.subplots_adjust(top=0.9)
+    title = f"Average coverage in region '{region_name}'"
+    g.fig.suptitle(title)
+    g.savefig(output / f"barplot_{region_name}_{nb_bin}bin_" \
+                 f"{environment[0]}_nt-around-{environment[1]}-bin.pdf")
+    g.fig.clf()
+def create_figure(design: Path, bw_folder: Path, region_bed: Path,
+                  region_name: str, nb_bin: int = 100,
+                  figure_type: str = 'metagene',
+                  show_replicate: bool = True, environment: List[int] = (0, 0),
+                  border_names: List[str] = ('', ''),
+                  output: Path = Path('.')) -> None:
+    """
+    Create A metagene or a barplot figure from bigwig file on regions defined \
+    in the bed file provided with 'region_bed' parameter.
+    :param design: A tabulated file containing 3 columns. The first columns \
+    contains a bigwig filename, the second contains the condition name and \
+    the last one contains the replicate of the condition.
+    :param bw_folder: The folder containing the bigwig file mentioned in \
+    the first column of the 'design' table.
+    :param region_bed: A bed file containing the regions to visualise
+    :param region_name: The name of the region analysed
+    :param nb_bin: The number of bins used to represents the regions of \
+    'region_bed'.
+    :param figure_type: The kind of representation wanted (barplot or metagene)
+    :param show_replicate: True to create a figure showing the replicate \
+    false else.
+    :param environment: A list of two int. The first contains the number of \
+    nucleotide to represent around the region of interest and the second,
+    the number of bin used to represent those surrounding regions.
+    :param border_names: The name of the borders
+    :param output: Folder where the results will be created
+    """
+    df_exp = pd.read_csv(design, sep="\t")
+    regions = load_bed(region_bed)
+    region_bed_name = region_bed.name.replace('.bed', '')
+    outfile = f'tmp_cov_table_{region_bed_name}_{nb_bin}bin_' \
+              f'{environment[0]}_nt-around-{environment[1]}-bin.txt.gz'
+    cov_file = output / outfile
+    if cov_file.is_file():
+        df_cov = pd.read_csv(cov_file, sep="\t", compression='gzip')
+    else:
+        df_cov = create_full_table(df_exp, regions, nb_bin, environment,
+                                   bw_folder)
+        df_cov.to_csv(cov_file, sep="\t", index=False, compression='gzip')
+    ordered_condition = []
+    for condition in df_exp['condition'].to_list():
+        if condition not in ordered_condition:
+            ordered_condition.append(condition)
+    df_sum = create_df_summary(df_cov, figure_type, nb_bin, environment,
+                               region_name, ordered_condition)
+    if figure_type == "metagene":
+        figure_metagene(df_sum, show_replicate, border_names, nb_bin,
+                        environment, region_name, output)
+    else:
+        if 'location' in df_sum.columns:
+            for cur_region in df_sum['location'].unique():
+                df_tmp = df_sum.loc[df_sum['location'] == cur_region, :]
+                figure_barplot(df_tmp, show_replicate, nb_bin, environment,
+                               cur_region, output)
+        else:
+            figure_barplot(df_sum, show_replicate, nb_bin, environment,
+                           region_name, output)
+if __name__ == "__main__":
+    testmod()
--- a/tests/files/coverage_in_bw_file.txt
+++ b/tests/files/coverage_in_bw_file.txt
+1	0	9	0
+1	9	10	1
+1	10	11	100
+1	11	12	75
+1	12	13	50
+1	13	15	25
+1	15	20	10
+1	20	25	2
+1	25	28	1
+1	28	100	0
+1	100	110	1
+1	110	120	2
+1	120	125	5
+1	125	128	10
+1	128	130	15
+1	130	131	30
+1	131	132	50
+1	132	133	100
+1	133	134	20
+1	134	135	5
+1	135	999	0
--- a/tests/files/exons.bed
+++ b/tests/files/exons.bed
+#ref	start	end	id	score	strand
+18	28681865	28682388	1_1	0	-
+18	28681183	28681432	1_2	0	-
+18	28673521	28673606	1_3	0	-
+18	28672063	28672263	1_4	0	-
+18	28671489	28671530	1_5	0	-
+18	28670990	28671110	1_6	0	-
+18	28669401	28669557	1_7	0	-
+18	28667631	28667776	1_8	0	-
+18	28666538	28666705	1_9	0	-
--- a/tests/files/genes.bed
+++ b/tests/files/genes.bed
+#ref	start	end	id	score	strand
+18	28645943	28682388	1	DSC2	-
+18	28709190	28742819	2	DSC1	-
+18	28898050	28937394	3	DSG1	+
+18	28956739	28994869	4	DSG4	+
+13	45766989	45775176	5	KCTD4	-
+13	45911001	45915347	6	TPT1	-
+18	48918411	49088839	7	AC011260.1	+
+18	49866541	51062273	8	DCC	+
+13	45967450	45992516	9	SLC25A30	-
--- a/tests/files/list_genes.txt
+++ b/tests/files/list_genes.txt
+73
+75
+89
+123
+128
--- a/tests/files/small.bw
+++ b/tests/files/small.bw
--- a/tests/run_tests.py
+++ b/tests/run_tests.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description:
+"""
+import doctest
+from pathlib import Path
+from typing import List
+import unittest
+import sys
+sys.path.insert(0, str(Path(__file__).parents[1].resolve()))
+# recover ignored files
+def get_ignored_files() -> List[str]:
+    """
+    Recover ignored python files in gitignore
+    """
+    gitignore = Path(__file__).parents[1] / '.gitignore'
+    if not gitignore.is_file():
+        return []
+    with gitignore.open('r') as f:
+        files = f.read().splitlines()
+    return [cfile.replace('.py', '').replace('/', '.')
+            for cfile in files if cfile.endswith('.py')]
+# Loading every python file in this folder
+list_mod = [str(mfile.relative_to(Path(__file__).resolve().parents[1]))
+            for mfile in list((Path(__file__).resolve().parents[1] / "src").rglob('*.py'))]
+list_mod2 = [m.replace('.py', '').replace('/', '.') for m in list_mod
+             if '__init__' not in m
+             and '__main__' not in m
+             and 'test' not in m
+             and 'config' not in m]
+final_mod = [mod for mod in list_mod2 if mod not in get_ignored_files()]
+def load_tests(loader, tests, ignore):
+    for cmod in final_mod:
+        tests.addTest(doctest.DocTestSuite(cmod))
+    return tests
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file