figure_maker.py

#!/usr/bin/env python3

# -*- coding: UTF-8 -*-

"""
Description:
"""

from pathlib import Path
from typing import List, Union, Any, Tuple, Optional
from doctest import testmod
from ..bed_handler.config import TestConfig
import pandas as pd
import pyBigWig as pbw
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from loguru import logger
import matplotlib as mpl
import matplotlib.font_manager
from rpy2.robjects import r, pandas2ri, FloatVector
from matplotlib import collections as mc
import numpy as np


def load_bed(bed: Path, bed_name: str) -> List[List[Union[int, str]]]:
    """
    Read a bed file and return the lines within it.

    :param bed: A bed file containing the regions of interest
    :param bed_name: The name of the regions of interest inside the bed file
    :return:The list of feature inside the bed

    >>> load_bed(TestConfig.gene_bed, 'gene_test')[0]
    ['18', 28645943, 28682388, 1, 'DSC2', '-', 'gene_test']
    """
    list_regions = []
    with bed.open('r') as inbed:
        for line in inbed:
            if not line.startswith("#"):
                cline = line.replace("\n", "").split("\t")
                my_id = int(cline[3]) if cline[3].isdigit() else cline[3]
                list_regions.append([cline[0], int(cline[1]), int(cline[2]),
                                     my_id, cline[4], cline[5],
                                     bed_name])
    return list_regions


def load_beds(beds: List[Path], bed_names: List[str]
              ) -> List[List[Union[int, str]]]:
    """
    Read a bed file and return the lines within it.

    :param beds: A list of bed files containing the regions of interest
    :param bed_names: A list of names indentifying regions insides the given \
    beds.
    :return:The list of feature inside the beds file

    >>> load_beds([TestConfig.gene_bed, TestConfig.gene_bed],
    ... ['gene1', 'gene2'])[0]
    ['18', 28645943, 28682388, 1, 'DSC2', '-', 'gene1']
    >>> load_beds([TestConfig.gene_bed, TestConfig.gene_bed],
    ... ['gene1', 'gene2'])[-1]
    ['13', 45967450, 45992516, 9, 'SLC25A30', '-', 'gene2']
    """
    regions = []
    for i in range(len(beds)):
        regions += load_bed(beds[i], bed_names[i])
    return regions


def inspect_bigwig_regions(bw: Any, region: List,
                           replicate: str, nb_bin: int, resize: List[int],
                           condition_name: str,
                           ) -> pd.DataFrame:
    """
    get the coverage value inside the bigwig region `region`.

    :param bw: A opened bigwig file
    :param region: The region of interest
    :param replicate: The replicate name
    :param nb_bin: The number of bin that will represent the region
    :param resize: The number of nucleotide used to extend the region
    in both side
    :param condition_name: the name of the condition
    :return: a table with the coverage of this region

    >>> my_bw = pbw.open(str(TestConfig.small_bw))
    >>> mregion = ['1', 10, 25, 1, 'Test', '+', 'exon']
    >>> inspect_bigwig_regions(my_bw, mregion, 'R1', 5, [4, 2], 'cond1')
        coverage  bin condition replicate region
    0   0.000000   -2     cond1        R1   exon
    1   0.500000   -1     cond1        R1   exon
    2  75.000000    0     cond1        R1   exon
    3  20.000000    1     cond1        R1   exon
    4  10.000000    2     cond1        R1   exon
    5   4.666667    3     cond1        R1   exon
    6   2.000000    4     cond1        R1   exon
    7   1.000000    5     cond1        R1   exon
    8   0.500000    6     cond1        R1   exon
    >>> mregion = ['1', 110, 133, 1, 'Test', '-', 'exon2']
    >>> inspect_bigwig_regions(my_bw, mregion, 'R1', 5, [4, 2], 'cond1')
       coverage  bin condition replicate region
    0      0.00   -2     cond1        R1  exon2
    1     12.50   -1     cond1        R1  exon2
    2     42.00    0     cond1        R1  exon2
    3      8.00    1     cond1        R1  exon2
    4      4.25    2     cond1        R1  exon2
    5      2.00    3     cond1        R1  exon2
    6      2.00    4     cond1        R1  exon2
    7      1.00    5     cond1        R1  exon2
    8      1.00    6     cond1        R1  exon2
    >>> inspect_bigwig_regions(my_bw, mregion, 'R1', 5, [0, 0], 'cond1')
       coverage  bin condition replicate region
    0     42.00    0     cond1        R1  exon2
    1      8.00    1     cond1        R1  exon2
    2      4.25    2     cond1        R1  exon2
    3      2.00    3     cond1        R1  exon2
    4      2.00    4     cond1        R1  exon2
    """
    r_start = max(region[1], 0)
    r_end = min(region[2], bw.chroms(region[0]))
    if r_end - r_start < nb_bin:
        return pd.DataFrame()
    val = bw.stats(region[0],
                   max(region[1], 0),
                   min(region[2], bw.chroms(region[0])),
                   nBins=nb_bin,
                   exact=True)
    bins = list(range(len(val)))
    if len(bins) != nb_bin:
        raise ValueError("The lenght of bins should be equals to nb_bin")
    max_loc = max(region[1] - resize[0], 0)

    min_loc = min(region[2] + resize[0], bw.chroms(region[0]))
    if resize[0] > 0:
        val_before = bw.stats(region[0], max_loc, region[1], nBins=resize[1],
                              exact=True)
        val_after = bw.stats(region[0], region[2], min_loc, nBins=resize[1],
                             exact=True)
    else:
        val_before = []
        val_after = []
    if None in val_after:
        print(f"Warning ! None values found in {region} - "
              f"{[region[0], region[2], min_loc]}")
    if region[5] == "+":
        bin_before = list(range(-len(val_before), 0))
        bin_after = list(range(bins[-1] + 1,
                               bins[-1] + 1 + len(val_after)))
        val = val_before + val + val_after
    else:
        bin_before = list(range(-len(val_after), 0))
        bin_after = list(range(bins[-1] + 1,
                               bins[-1] + 1 + len(val_before)))
        val = val_after[::-1] + val[::-1] + val_before[::-1]
    bins = bin_before + bins + bin_after
    dic = {"coverage": val, "bin": bins}
    df = pd.DataFrame(dic)
    df['condition'] = [condition_name] * df.shape[0]
    df['replicate'] = [replicate] * df.shape[0]
    df['region'] = [region[6]] * df.shape[0]
    return df


def create_sample_table(bw_file: Path, regions: List[List],
                        replicate: str, nb_bin: int, resize: List[int],
                        condition_name: str,
                        ) -> pd.DataFrame:
    """
    Get the table for all the regions of interest

    :param bw_file: A bigwig file
    :param regions: The regions of interest
    :param replicate: The replicate name
    :param nb_bin: The number of bin that will represent the region
    :param resize: The number of nucleotide used to extend the region
    in both side
    :param condition_name: the name of the condition
    :return: a table with the coverage of this region
    """
    list_df = []
    bw = pbw.open(str(bw_file))
    for region in tqdm(regions, desc="scanning coverage ..."):
        list_df.append(inspect_bigwig_regions(bw, region, replicate, nb_bin,
                                              resize, condition_name))
    r = sum([df.empty for df in list_df])
    if r > 0:
        logger.warning(f"They were {r} bed feature that could not be reported "
                       f"because of their small size")
    return pd.concat(list_df, axis=0, ignore_index=True)


def create_full_table(df_exp: pd.DataFrame, regions: List[List],
                      nb_bin: int, resize: List[int],
                      input_folder: Path) -> pd.DataFrame:
    """
    get the regions for every bigwig files.

    :param df_exp: A dataframe containing the bigwig file that \
    we want to analyse
    :param regions: The regions to visualise
    :param nb_bin: The number of bin used to resize the regions
    :param resize: The number of nucleotides \
    of the regions localised at each sides of the genomic regions inside
    `regions`.
    :param input_folder: Folder where the bigwig file are located
    :return: The full coverage table
    """
    list_df = []
    for i in range(df_exp.shape[0]):
        mline = df_exp.iloc[i, :]
        bw_file = input_folder / mline['bigwig']
        print(f"working on file {bw_file}")
        condition = mline['condition']
        replicate = mline['replicate']
        list_df.append(create_sample_table(bw_file, regions, replicate,
                                           nb_bin, resize, condition))
    return pd.concat(list_df, axis=0, ignore_index=True)


def merge_condition_region_col(df: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
    """

    :param df: A dataframe of mean coverage for each bin.
    :return: The dataframe with the region of condition column merged and \
    the name of the merged column
    """
    if len(df['region'].unique()) == 1:
        condition_col = 'condition'
        df.drop('region', axis=1, inplace=True)
    elif len(df['condition'].unique()) == 1:
        condition_col = 'region'
        df.drop('condition', axis=1, inplace=True)
    else:
        condition_col = 'condition-region'
        df[condition_col] = df['condition'] + "-" + df['region']
        df.drop(['condition', 'region'], axis=1, inplace=True)
    return df, condition_col


def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int,
                      environment: List[int],
                      region_name: str, order_condition: List[str],
                      order_bed_name: List[str],
                      ) -> Tuple[pd.DataFrame, str]:
    """
    summarize the data in df_cov.

    :param df_cov: A dataframe of coverage for each bin.
    :param figure_type: The kind of figure to make (metagene or barplot)
    :param nb_bin: The number of bin used to represent the region of interest
    :param environment: A list of two int. The first contains the number of \
    nucleotide to represent around the region of interest and the second,
    the number of bin used to represent those surrounding regions.
    :param region_name: the name of the region analysed
    :param order_condition: The order of conditions
    :param order_bed_name: The order of bed name to respect
    :return: The summarised dataframe and the condition col
    """
    df_sum = df_cov.groupby(['bin', 'condition', 'region', 'replicate']) \
        .mean() \
        .reset_index()
    if figure_type == "metagene":
        if environment[0] != 0:
            df_sum['location'] = df_sum['bin'].apply(
                lambda x: f"before_{region_name}" if x < 0 else
                f"after_{region_name}" if x >= nb_bin else region_name)
        df_sum, condition_col = merge_condition_region_col(df_sum)
        return df_sum, condition_col
    df_sum.drop('bin', axis=1, inplace=True)
    if environment[0] != 0:
        col_merge = ['condition', 'region', 'replicate', 'location']
    else:
        col_merge = ['condition', 'region', 'replicate']
    df_sum = df_sum.groupby(col_merge).mean().reset_index()
    if 'location' in df_sum.columns:
        df_sum['location'] = pd.Categorical(
            df_sum['location'], ordered=True,
            categories=[f"before_{region_name}", region_name,
                        f"after_{region_name}"]
        )
        df_sum['condition'] = pd.Categorical(
            df_sum['condition'], ordered=True,
            categories=order_condition
        )
        df_sum['region'] = pd.Categorical(
            df_sum['region'], ordered=True,
            categories=order_bed_name
        )
        df_sum.sort_values(['condition', 'region', 'location'], ascending=True,
                           inplace=True)
    df_sum, condition_col = merge_condition_region_col(df_sum)
    return df_sum, condition_col


def line_maker(list_pval, up_value, position=0):
    """
    Create a list of lines, with their colour.

    Create a line if the a p-value in list_pval[i] is below 0.05.
    If up_mean[i] > down_mean[i] the line will be green, purple else.
    :param list_pval: (list of float), list of pvalue get by the comparison
    of a propensity scale in a particular sequence position in an
    up-regulated and down_regulated set of sequences.
    :param up_value: (int) the ordinate coordinates where the line will be
    placed.
    :param position: (int) the abscissa position to begin to draw the lines
    :return: lines - (list of list of 2 tuple), the list of 2 tuple corresponds
    to a lines with the coordinates [(x1, y1), (x2, y2)]
    """
    lcolor = []
    lines = []
    for i in range(len(list_pval)):
        if list_pval[i] <= 0.05:
            val = i + position
            lines.append([(val - 0.5, up_value), (val + 0.5, up_value)])
            lcolor.append("#000000")  # red
    return lines, lcolor


def paired_t_test(values1: List[float], values2: List[float]) -> float:
    """
    Get the p-value of a paired t-test for each bin

    :param values1: A list of values
    :param values2: Another list of values
    :return: The p-values of the paired t-test
    >>> paired_t_test([1, 2, 8], [5, 8, 15])
    0.02337551764357566
    """
    if len(values1) != len(values2):
        raise IndexError("values1 and values2 should have the same length")
    ttp = r("""
    function(values1, values2) {
        return(t.test(values1, values2, paired=T)$p.value)
    }
    """)
    return ttp(FloatVector(values1), FloatVector(values2))[0]


def compute_stats(dff: pd.DataFrame, y_line: float, group_col: str,
                  outfile: Path) -> Tuple[List[List[Tuple]], List]:
    """

    :param dff: A dataframe containing the coverage displayed in the figure
    :param y_line: The height of the p-value line
    :param group_col: A group column
    :param outfile: The file to save the stats
    :return: A list of lines coordinates in the form of [(x1, y1), (x2, y2)], \
    and the color associated to each line
    """
    df = dff.sort_values(["bin", group_col], ascending=True)
    groups = df[group_col].unique()
    if len(groups) != 2:
        raise NotImplementedError("Statistical analysis only implemented for "
                                  "2 groups of data")
    p_values_ttp = []
    grp1 = []
    vgrp1 = []
    grp2 = []
    vgrp2 = []
    for bin in df["bin"].unique():
        tmp = df[df["bin"] == bin]
        values1 = tmp.loc[tmp[group_col] == groups[0], "coverage"].values
        values2 = tmp.loc[tmp[group_col] == groups[1], "coverage"].values
        p_values_ttp.append(paired_t_test(values1, values2))
        grp1.append(np.mean(values1))
        grp2.append(np.mean(values2))
        vgrp1.append(";".join(map(str, [round(v, 3) for v in values1])))
        vgrp2.append(";".join(map(str, [round(v, 3) for v in values2])))
    stats_df = pd.DataFrame({"bin": df["bin"].unique(),
                             "p_values": p_values_ttp,
                             groups[0]: grp1, groups[1]: grp2,
                             f"val-{groups[0]}": vgrp1,
                             f"val-{groups[1]}": vgrp2,})
    stats_df.to_csv(outfile, sep="\t", index=False)
    return line_maker(p_values_ttp, y_line, min(df["bin"].unique()))


def display_stat(g: sns.FacetGrid, dff: pd.DataFrame, y_line: float,
                 group_col: str, stat: bool, outfile: Path) -> sns.FacetGrid:
    """
    Update the graphics by displaying stats.

    :param g: A seaborn FacetGrid objects corresponding to the metagene \
    figure
    :param dff: A dataframe containing the coverage displayed in the figure
    :param y_line: The height of the p-value line
    :param group_col: The column containing the groups analyzed
    :param outfile: The file where the metagene will be saved
    :return: The facetGrid with the stats
    """
    if not stat:
        return g
    stat_file = outfile.parent / f"{outfile.stem}.txt"
    lines, lcolor = compute_stats(dff, y_line, group_col, stat_file)
    lc = mc.LineCollection(lines, colors=lcolor, linewidths=5)
    g.ax.add_collection(lc)
    return g


def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
                    border_names: List[str], nb_bin: int,
                    environment: List[int], bed_name: str,
                    output: Path, norm: Union[int, Path],
                    condition_col: str, ylim: Optional[List[float]],
                    stat: bool = False) -> Path:
    """
    Create a metagene figure on the region of interest.

    :param df_sum: The summarized coverage table
    :param show_replicate: True to show the replicate, false else
    :param border_names: The name of borders of the region of interest
    :param nb_bin: The number of bins representing the regions of interest
    :param environment:  A list of two int. The first contains the number of \
    nucleotide to represent around the region of interest and the second,
    the number of bin used to represent those surrounding regions.
    :param output: Folder where the figure will be created
    :param bed_name: The name of considered regions
    :param norm: an integer corresponding to the bin used to normalise \
    the samples or a file containing the normalisations to apply to \
    each samples
    :param condition_col: The name of the condition columns
    :param ylim: The range of the y axis
    :param stat: A boolean indicating wether to perform statistical analysis
    :return: The name of the figure
    """
    font_files = matplotlib.font_manager.findSystemFonts(fontpaths=None,
                                                         fontext='ttf')
    font_list = matplotlib.font_manager.createFontList(font_files)
    matplotlib.font_manager.fontManager.ttflist.extend(font_list)

    sns.set(context='poster', style='white', font_scale=1.4)
    mpl.rcParams['font.family'] = 'Arial'
    if show_replicate:
        g = sns.relplot('bin', 'coverage', hue=condition_col, data=df_sum,
                        kind='line', style='replicate', ci=None,
                        height=12, aspect=1.7)
    else:
        g = sns.relplot('bin', 'coverage', hue=condition_col, data=df_sum,
                        kind='line', ci=None, height=12, aspect=1.7)
    y_val = g.ax.get_ylim()[1] * 0.99
    if border_names[0] != '':
        g.ax.axvline(x=0, color='k', linestyle='--', alpha=0.1)
        g.ax.annotate(border_names[0], [0, y_val], ha="center", va='center')
    if border_names[1] != '':
        g.ax.axvline(x=nb_bin - 1, color='k', linestyle='--', alpha=0.1)
        g.ax.annotate(border_names[1], [nb_bin - 1, y_val], ha="center",
                      va='center')
    g.set_xlabels('Bins')
    g.set_ylabels('Coverage')
    plt.subplots_adjust(top=0.9)
    tmp_bed_name = bed_name.replace("--", ", ")
    title = f"Average coverage in region '{tmp_bed_name}'"
    if environment[0] != 0:
        title += f" and in their surrounding regions of {environment[0]} nt"
    g.fig.suptitle(title, fontsize=30)
    tmp_bed_name = bed_name.replace("--", "-")
    outfile_title = f"metagene_{tmp_bed_name}_{nb_bin}bin_" \
                    f"{environment[0]}_nt-around-{environment[1]}-bin"
    if isinstance(norm, int):
        outfile_title += f"_b{norm}_norm"
    elif isinstance(norm, Path):
        outfile_title += f"_file_norm"
    outfile_title += ".pdf"
    if ylim[0] is not None:
        g.ax.set_ylim(ylim[0], ylim[1])
    ymin, ymax = g.ax.get_ylim()
    g = display_stat(g, df_sum, ymin + ((ymax - ymin) / 50), condition_col,
                     stat, output / outfile_title)
    g.ax.tick_params(left=True, bottom=True)
    g.savefig(output / outfile_title)
    g.fig.clf()
    return output / outfile_title


def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
                   nb_bin: int,
                   environment: List[int], region_name: str,
                   output: Path, norm: Union[int, Path],
                   condition_col: str) -> None:
    """
    Create a barplot figure on the region of interest.

    :param df_sum: The summarized coverage table
    :param show_replicate: True to show the replicate, false else
    :param nb_bin: The number of bins representing the regions of interest
    :param environment:  A list of two int. The first contains the number of \
    nucleotide to represent around the region of interest and the second,
    the number of bin used to represent those surrounding regions.
    :param output: Folder where the figure will be created
    :param region_name: The region of interest
    :param norm: an integer corresponding to the bin used to normalise \
    the samples or a file containing the normalisations to apply to \
    each samples
    :param condition_col: The name of the condition columns
    """
    sns.set(context='poster', style='white')
    if show_replicate:
        g = sns.catplot(x=condition_col, y="coverage", hue="replicate",
                        kind="bar", data=df_sum, height=12, aspect=1.77,
                        ci=None)
    else:
        g = sns.catplot(x=condition_col, y="coverage",
                        kind="bar", data=df_sum, height=12, aspect=1.77,
                        ci='sd')
    g.set_xlabels('')
    g.set_ylabels('Coverage')
    plt.subplots_adjust(top=0.9)
    rgt = region_name.replace('--', ', ')
    title = f"Average coverage in region '{rgt}'"
    g.fig.suptitle(title)
    rgt = region_name.replace('--', '-')
    outfile_title = f"barplot_{rgt}_{nb_bin}bin_" \
                    f"{environment[0]}_nt-around-{environment[1]}-bin"
    if isinstance(norm, int):
        outfile_title += f"_b{norm}_norm"
    elif isinstance(norm, Path):
        outfile_title += f"_file_norm"
    outfile_title += ".pdf"
    g.savefig(output / outfile_title)
    g.fig.clf()


def bin_normalisation(df: pd.DataFrame, norm: Union[int, Path],
                      outfile: Path) -> pd.DataFrame:
    """
    Normalise the bins coverage by the average coverage on a particular bin \
    or by a value given in a particular file.

    :param df: he dataframe of coverage
    :param norm: The bin used to normalise the sample or a file containing \
    the value used to normalise the samples.
    :param outfile: The table containing coverage values
    :return: the dataframe with normalised coverage
    """
    if isinstance(norm, int):
        if norm not in list(df['bin'].unique()):
            raise ValueError(f"the bin {norm} was not found in the coverage "
                             f"dataframe.")
        df_val = df.loc[df['bin'] == norm,
                        ['coverage', 'condition', 'region', 'replicate']] \
            .groupby(['condition', 'region', 'replicate']).mean().reset_index()
        df_val.rename({"coverage": "coef"}, axis=1, inplace=True)
        noutfile = outfile.parent / 'coef_table' / \
                   (outfile.name.replace(".txt.gz", "") +
                    f".txt")
        noutfile.parent.mkdir(exist_ok=True, parents=True)
        df_val.to_csv(noutfile, sep="\t", index=False)
    else:
        df_val = pd.read_csv(norm, sep="\t")
    if len(df_val['region'].unique()) > 1:
        df = df.merge(df_val, how="left", on=['condition', 'region',
                                              'replicate'])
    else:
        df_val.drop('region', axis=1, inplace=True)
        df = df.merge(df_val, how="left", on=['condition', 'replicate'])
    df['coverage'] = df['coverage'] / df['coef']
    df.drop('coef', axis=1, inplace=True)
    return df


def compute_diff(df_sum: pd.DataFrame) -> pd.DataFrame:
    """
    Compute the average coverage differences between two conditions.

    :param df_sum: A dataframe containing the coverage values between \
    two conditions
    :return: The dataframe with a coolumn diff

    >>> di = pd.DataFrame({'bin': [-25] * 6,
    ... 'condition': ['Ctrl', 'Ctrl', 'Ctrl', 'siDDX', 'siDDX', 'siDDX'],
    ... 'replicate': ['R1', 'R2', 'R3', 'R1', 'R2', 'R3'],
    ... 'coverage': [0.1,
    ...  0.2,
    ...  0.3,
    ...  0.15,
    ...  0.25,
    ...  0.35],
    ... 'location': ['before_ddx_down'] * 6})
    >>> compute_diff(di)
       bin         location  Ctrl  siDDX  diff
    0  -25  before_ddx_down   0.2   0.25  0.05
    """
    cond = df_sum["condition"].unique()
    df_sum = df_sum.groupby(["bin", "location", "condition"]).mean()
    if len(cond) != 2:
        raise IndexError("Only two different conditions must be given to "
                         "perform stats")
    df = df_sum.pivot_table(index=["bin", "location"],
                            columns="condition",
                            values="coverage").reset_index()
    df.columns.name = None
    df.groupby(["bin", "location"])
    df["diff"] = df[cond[1]] - df[cond[0]]
    return df


def get_shapiro_an_lm_pvalue(df: pd.DataFrame, location: str) -> pd.Series:
    """
    Return a series containing the shapiro and the t-test p-value of \
    the difference in coverage between 2 conditions at a particular location

    :param df: A dataframe containing the difference in coverage \
    between 2 conditions at a particular location
    :param location: The region of interest
    :return: a series containing the shapiro and the t-test p-value of \
    the difference in coverage between 2 conditions at a particular location
    """
    df2 = df[df["location"] == location].copy()
    df2.to_csv(f"{location}.txt", sep="\t", index=False)
    pandas2ri.activate()
    stat_s = r("""
    function(data){
        mod <- lm(diff ~ 1, data=data)
        s <- summary(mod)
        stat <- s$coefficients[1, "Pr(>|t|)"]
        shapiro <- shapiro.test(data$diff)$p.value
        return(c(shapiro, stat))
    }
    """)
    res = stat_s(df2)
    return pd.Series({"shapiro_p-value": res[0],
                      "t-test-p-value": res[1],
                      "location": location})


def create_figure(design: Path, bw_folder: Path, region_beds: List[Path],
                  bed_names: List[str], nb_bin: int = 100,
                  figure_type: str = 'metagene',
                  norm: Union[int, Path, None] = None,
                  show_replicate: bool = True, environment: List[int] = (0, 0),
                  border_names: List[str] = ('', ''),
                  output: Path = Path('.'),
                  ylim: List[float] = (None, None), stat: bool = False
                  ) -> None:
    """
    Create A metagene or a barplot figure from bigwig file on regions defined \
    in the bed file provided with 'region_bed' parameter.

    :param design: A tabulated file containing 3 columns. The first columns \
    contains a bigwig filename, the second contains the condition name and \
    the last one contains the replicate of the condition.
    :param bw_folder: The folder containing the bigwig file mentioned in \
    the first column of the 'design' table.
    :param region_beds: A list of bed files containing the regions to visualise
    :param bed_names: A list of names identifying regions insides the given \
    beds.
    :param nb_bin: The number of bins used to represents the regions of \
    'region_bed'.
    :param figure_type: The kind of representation wanted (barplot or metagene)
    :param norm: an integer corresponding to the bin used to normalise \
    the samples or a file containing the normalisations to apply to \
    each samples
    :param show_replicate: True to create a figure showing the replicate \
    false else.
    :param environment: A list of two int. The first contains the number of \
    nucleotide to represent around the region of interest and the second,
    the number of bin used to represent those surrounding regions.
    :param border_names: The name of the borders
    :param output: Folder where the results will be created
    :param ylim: The range of y-axis
    :param stat: A boolean indicating whether or not to perform \
    statistical analysis
    """
    if len(region_beds) != len(bed_names):
        raise IndexError("Parameter region_beds and bed_names should "
                         "have the same length")
    df_exp = pd.read_csv(design, sep="\t")
    regions = load_beds(region_beds, bed_names)
    region_bed_name = "-".join(b.name.replace('.bed', '') for b in region_beds)
    outfile = f'tmp_cov_table_{design.name.replace(".txt", "")}' \
              f'_{region_bed_name}_{nb_bin}bin_' \
              f'{environment[0]}_nt-around-{environment[1]}-bin'
    if isinstance(norm, int):
        outfile += f'_bin{norm}_norm'
    elif isinstance(norm, Path):
        outfile += f'_file_norm'
    outfile += '.txt.gz'
    cov_file = output / outfile
    df_cov = create_full_table(df_exp, regions, nb_bin, environment,
                               bw_folder)
    if norm is not None:
        df_cov = bin_normalisation(df_cov, norm, cov_file)
    ordered_condition = []
    for condition in df_exp['condition'].to_list():
        if condition not in ordered_condition:
            ordered_condition.append(condition)
    region_kind = "--".join(bed_names)
    df_sum, cond_col = create_df_summary(df_cov, figure_type, nb_bin,
                                         environment, region_kind,
                                         ordered_condition, bed_names)
    if figure_type == "metagene":
        figure_metagene(df_sum, show_replicate, border_names, nb_bin,
                        environment, region_kind, output, norm,
                        cond_col, ylim, stat)

    else:
        if 'location' in df_sum.columns:
            for cur_region in df_sum['location'].unique():
                df_tmp = df_sum.loc[df_sum['location'] == cur_region, :]
                figure_barplot(df_tmp, show_replicate, nb_bin, environment,
                               cur_region, output, norm, cond_col)
        else:
            figure_barplot(df_sum, show_replicate, nb_bin, environment,
                           region_kind, output, norm, cond_col)


if __name__ == "__main__":
    testmod()