Compare revisions

nfontrod · nfontrod · nfontrod · nfontrod · nfontrod · nfontrod
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ tests/files/test_genome.fa.fai
 coverage.xml
 .scannerwork/
 doc/build/*
+*.txt
+*.bed
+.vscode/*
--- a/src/bed_handler/config.py
+++ b/src/bed_handler/config.py
@@ -32,6 +32,7 @@ class BedConfig:
    sipp_vs_ctcf = base / "data" / "Sipp_exon_vs_CTCF.csv"
    all_vs_ctcf = base / 'data' / 'All_Exons_vs_CTCF.csv'
    exons_vs_ctcf = base / "data" / "All_Exons_vs_CTCF.csv"
+    expressed_genes = base / "data" / "5y_expressed_genes_basemean>5.txt"
    bed = OutputBed
    size = 5000

--- a/src/bed_handler/filter_bed.py
+++ b/src/bed_handler/filter_bed.py
@@ -8,7 +8,7 @@ a file, that can be find in a specific column in the bed file
 """
 from pathlib import Path
-from typing import List
+from typing import List, Union
 from .config import TestConfig, OutputBed
 import pandas as pd
 import lazyparser as lp
@@ -30,7 +30,8 @@ def select_ft_of_interest(gene_file: Path) -> List[int]:
            gene_id for gene_id in gene_list]
-def filter_bed(bed_file: Path, gene_list: List[int], col_name: str, keep: bool
+def filter_bed(bed_file: Path, gene_list: List[Union[int, float]],
+               col_name: str, keep: bool
               ) -> pd.DataFrame:
    """
    load a bed containing FasterDB gene and only recover the gene of \

--- a/src/bed_handler/select_regulated_near_ctcf_exons.py
+++ b/src/bed_handler/select_regulated_near_ctcf_exons.py
@@ -15,6 +15,7 @@ import warnings
 from .get_other_exon_in_same_gene import create_gene_bed4norm
 from pathlib import Path
 import lazyparser as lp
+from typing import List
 def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
@@ -126,6 +127,17 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
    return df
+def filter_expressed(exon_list: List[str]) -> List[str]:
+    """
+    Filter only expressed exons.
+    :param exon_list: A list of exons
+    :return: The list of expressed exons
+    """
+    egenes = BedConfig.expressed_genes.open('r').read().splitlines()
+    return [exon for exon in exon_list if exon.split("_")[0] in egenes]
 def create_bed_ctcf_exon(reg: str, threshold: int,
                         location: str, include0: bool = False,
                         near_ctcf: bool = True) -> None:
@@ -169,6 +181,7 @@ def create_bed_ctcf_exon(reg: str, threshold: int,
        bad_id = df['id'].to_list() if include0 \
            else df['id'].to_list() + df.loc[df['dist'] == 0, 'id'].to_list()
        list_exons = [e for e in tmp_exons if e not in bad_id]
+    list_exons = filter_expressed(list_exons)
    list_genes = [int(exon.split('_')[0]) for exon in list_exons]
    df_exon = filter_bed(BedConfig.exon_bed, list_exons)
    df_gene = filter_bed(BedConfig.gene_bed, list_genes)
@@ -239,6 +252,7 @@ def get_bed_ctcf_exon(exon_bed: str, threshold: int,
        bad_id = df['id'].to_list() if include0 \
            else df['id'].to_list() + df.loc[df['dist'] == 0, 'id'].to_list()
        list_exons = [e for e in tmp_exons if e not in bad_id]
+    list_exons = filter_expressed(list_exons)
    list_genes = [int(exon.split('_')[0]) for exon in list_exons]
    df_exon = filter_bed(BedConfig.exon_bed, list_exons)
    df_gene = filter_bed(BedConfig.gene_bed, list_genes)

--- a/src/commands.sh
+++ b/src/commands.sh
@@ -641,47 +641,58 @@ for exp in ${exps[*]}; do
 done
-bins=(0 99)
+# Figure 1C
-beds=(readthrough no_readthrough)
+names=(IP)
+designs=(data/designnew_exp_all_replicates_IP.txt)
+bins=(99) # 0)
+beds=(readthrough) # no_readthrough)
 for bed in ${beds[*]}; do
  for bin in ${bins[*]}; do
-    python3 -m src.visu \
+    for i in ${!designs[@]}; do
-        --design data/design_exp_all_replicates.txt \
+      python3 -m src.visu \
-        --bw_folder data/bigwig/ \
+          --design ${designs[i]} \
-        --region_bed results/bed_file/${bed}_expressed_gene.bed \
+          --bw_folder data/bigwig_newnorm/ \
-        --region_name ${bed} \
+          --region_bed results/bed_file/${bed}_expressed_gene.bed \
-        --output results/figures/  \
+          --region_name ${bed} \
-        --border_name TSS TTS \
+          --output results/figures/  \
-        --environment 10000 25 \
+          --border_name TSS TTS \
-        --show_replicate n \
+          --environment 10000 25 \
-        --figure_type metagene \
+          --show_replicate n \
-        --nb_bin 100 \
+          --figure_type metagene \
-        --norm ${bin}
+          --nb_bin 100 \
+          --norm ${bin} \
+          --stat True
-    mv results/figures/metagene_${bed}_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf results/figures/all_replicates_${bed}_b${bin}_norm.pdf
+      mv results/figures/metagene_${bed}_100bin_10000_nt-around-25-bin_b${bin}_norm.pdf results/figures/Fig1C_${bed}_b${bin}_norm_${names[$i]}.pdf
+    done
  done
 done
-beds=(readthrough no_readthrough)
+names=(IP)
-bins=(0 99)
+designs=(data/designnew_exp_all_replicates_IP.txt)
-loc=(TSS TTS)
+beds=(readthrough) # no_readthrough)
+bins=(99) # 0)
+loc=(TTS) # TSS)
 for bed in ${beds[*]}; do
  for i in ${!bins[@]}; do
-    python3 -m src.visu \
+     for j in ${!designs[@]}; do
-          --design data/design_exp_all_replicates.txt \
+      python3 -m src.visu \
-          --bw_folder data/bigwig/ \
+            --design ${designs[i]} \
-          --region_bed results/bed_file/${bed}_expressed_gene_10kb.bed \
+            --bw_folder data/bigwig_newnorm/ \
-          --region_name ${bed} \
+            --region_bed results/bed_file/${bed}_expressed_gene_10kb.bed \
-          --output results/figures/  \
+            --region_name ${bed} \
-          --border_name TTS '' \
+            --output results/figures/  \
-          --environment 0 0 \
+            --border_name TTS '' \
-          --show_replicate n \
+            --environment 0 0 \
-          --figure_type metagene \
+            --show_replicate n \
-          --nb_bin 25 \
+            --figure_type metagene \
-          --norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_${bed}_expressed_gene_100bin_10000_nt-around-25-bin_bin${bins[$i]}_norm.txt"
+            --nb_bin 25 \
+            --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_${names[$j]}_${bed}_expressed_gene_100bin_10000_nt-around-25-bin_bin${bins[$i]}_norm.txt" \
-    mv results/figures/metagene_${bed}_25bin_0_nt-around-0-bin_file_norm.pdf results/figures/all_replicates_TTS10kb_${bed}_${loc[$i]}-bin_norm.pdf
+            --stat True
+    mv results/figures/metagene_${bed}_25bin_0_nt-around-0-bin_file_norm.pdf results/figures/Fig_1C_TTS10kb_${bed}_${loc[$i]}-bin_norm_${names[$j]}.pdf
+    done
  done
 done
@@ -772,13 +783,15 @@ python3 -m src.visu \
    --show_replicate n \
    --figure_type metagene \
    --nb_bin 100 \
-    --norm "None"
+    --norm "None" \
+    --stat True
 mv results/figures/metagene_all_gene_100bin_0_nt-around-0-bin.pdf results/figures/metagene_5y_rnaseq_TSS-2kb_all_gene.pdf
 #####################################################
 # Metagene figure of last exons in readthrough genes near (<=2000 nt) or far (> 2000nt) from a CTCF file and last exons from non readthrough genes
+# figures 5C
 #####################################################
 # Bed file containing the last exons of expressed gene with readthrough
@@ -803,8 +816,8 @@ for i in ${!list_names[*]}; do
  gbed=${bed/exon\.bed/gene-dup.bed}
  nbed=${gbed/\.bed/}
  python3 -m src.visu \
-      --design data/design_exp_all_replicates.txt \
+      --design data/designnew_exp_all_replicates_IP.txt \
-      --bw_folder data/bigwig/ \
+      --bw_folder data/bigwig_newnorm \
      --region_bed results/bed_file/${gbed} \
      --region_name ${cname} \
      --output results/figures/  \
@@ -818,8 +831,8 @@ for i in ${!list_names[*]}; do
  rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf
  python3 -m src.visu \
-      --design data/design_exp_all_replicates.txt \
+      --design data/designnew_exp_all_replicates_IP.txt \
-      --bw_folder data/bigwig/ \
+      --bw_folder data/bigwig_newnorm \
      --region_bed results/bed_file/${bed} \
      --region_name ${cname} \
      --output results/figures/  \
@@ -828,10 +841,61 @@ for i in ${!list_names[*]}; do
      --show_replicate n \
      --figure_type metagene \
      --nb_bin 30 \
+      --stat True \
      -y 0.15 0.4 \
-      --norm "results/figures/coef_table/tmp_cov_table_design_exp_all_replicates_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt"
+      --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_IP_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt" \
+      --stat True
  mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metaexon_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf
 done
-python3 -m src.gc_content -B results/bed_file/readthrough_last_exon_near_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/readthrough_last_exon_far_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/no_readthrough_expressed_last_exon.bed -b readthrough_ctcf readthrough no_readthrough -g data/Homo_sapiens.GRCh37.dna.primary_assembly.fa -f "exons" -e 2000
\ No newline at end of file
+###########################################################
+# Figures siPP vs siCTRL pour ddx_down_ctcf,
+# , ddx_down figure 5B
+###########################################################
+list_names=(ddx_down_ctcf ddx_down)
+bed_names=(CTCF_2000_both_ddx_down_with0_exon.bed Far_CTCF_2000_both_ddx_down_with0_exon.bed)
+for i in ${!list_names[*]}; do
+  cname=${list_names[i]}
+  bed=${bed_names[i]}
+  gbed=${bed/exon\.bed/gene-dup.bed}
+  nbed=${gbed/\.bed/}
+  python3 -m src.visu \
+      --design data/designnew_exp_all_replicates_IP.txt \
+      --bw_folder data/bigwig_newnorm/ \
+      --region_bed results/bed_file/${gbed} \
+      --region_name ${cname} \
+      --output results/figures/  \
+      --border_name TSS TTS \
+      --environment 10000 25 \
+      --show_replicate n \
+      --figure_type metagene \
+      --nb_bin 100 \
+      --norm '0'
+  rm results/figures/metagene_${cname}_100bin_10000_nt-around-25-bin_b0_norm.pdf
+  python3 -m src.visu \
+      --design data/designnew_exp_all_replicates_IP.txt \
+      --bw_folder data/bigwig_newnorm/ \
+      --region_bed results/bed_file/${bed} \
+      --region_name ${cname} \
+      --output results/figures/  \
+      --border_name " " " " \
+      --environment 10000 25 \
+      --show_replicate n \
+      --figure_type metagene \
+      --nb_bin 30 \
+      -y 0.15 0.4 \
+      --stat True \
+      --norm "results/figures/coef_table/tmp_cov_table_designnew_exp_all_replicates_IP_${nbed}_100bin_10000_nt-around-25-bin_bin0_norm.txt" \
+      --stat True
+  mv results/figures/metagene_${cname}_30bin_10000_nt-around-25-bin_file_norm.pdf results/figures/all_replicates_metagene_${cname}_100bin_10000_nt-around-25-bin_file_norm.pdf
+done
+python3 -m src.gc_content -B results/bed_file/readthrough_last_exon_near_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/readthrough_last_exon_far_CTCF_2000_both_ddx_with0_exon.bed results/bed_file/no_readthrough_expressed_last_exon.bed -b readthrough_ctcf readthrough no_readthrough -g data/Homo_sapiens.GRCh37.dna.primary_assembly.fa -f "exons" -e 2000
--- a/src/visu/__main__.py
+++ b/src/visu/__main__.py
@@ -21,7 +21,8 @@ def launcher(design: str, bw_folder: str, region_beds: List[str],
             figure_type: str = 'metagene', norm: str = 'None',
             show_replicate: str = 'y', environment: List[int] = (0, 0),
             border_names: List[str] = ('', ''),
-             output: str = '.', ylim: List[float] = (None, None)) -> None:
+             output: str = '.', ylim: List[float] = (None, None),
+             stat: bool = False) -> None:
    """
    Create A metagene or a barplot figure from bigwig file on regions defined \
    in the bed file provided with 'region_bed' parameter.
@@ -48,6 +49,9 @@ def launcher(design: str, bw_folder: str, region_beds: List[str],
    :param border_names: The name of the borders
    :param output: Folder where the results will be created
    :param ylim: The y-axis range (default None)
+    :param stat: A boolean indicating whether to perform \
+    statistical analysis. This parameter can only be set to True \
+     if figure_type is 'metagene' (default: False)
    """
    if ylim[0] is not None and (len(ylim) != 2 or ylim[0] >= ylim[1]):
        raise ValueError("The ylim must have two values and the first value "
@@ -59,6 +63,9 @@ def launcher(design: str, bw_folder: str, region_beds: List[str],
                         f"greater than the second")
    show_rep = show_replicate.lower() == 'y'
    norm = int(norm) if norm.isdigit() else None if norm == 'None' else norm
+    if stat and figure_type != "metagene":
+        raise NotImplementedError(f"the stat parameter can only be used if "
+                                  f"figure type is metagene")
    if isinstance(norm, str):
        norm = Path(norm)
        if not norm.is_file():
@@ -66,7 +73,7 @@ def launcher(design: str, bw_folder: str, region_beds: List[str],
    reg_beds = [Path(p) for p in region_beds]
    create_figure(Path(design), Path(bw_folder), reg_beds,
                  region_names, nb_bin, figure_type, norm, show_rep,
-                  environment, border_names, Path(output), ylim)
+                  environment, border_names, Path(output), ylim, stat)
 launcher()
--- a/src/visu/figure_maker.py
+++ b/src/visu/figure_maker.py
@@ -18,6 +18,9 @@ from tqdm import tqdm
 from loguru import logger
 import matplotlib as mpl
 import matplotlib.font_manager
+from rpy2.robjects import r, pandas2ri, FloatVector
+from matplotlib import collections as mc
+import numpy as np
 def load_bed(bed: Path, bed_name: str) -> List[List[Union[int, str]]]:
@@ -259,12 +262,12 @@ def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int,
        .mean() \
        .reset_index()
    if figure_type == "metagene":
+        if environment[0] != 0:
+            df_sum['location'] = df_sum['bin'].apply(
+                lambda x: f"before_{region_name}" if x < 0 else
+                f"after_{region_name}" if x >= nb_bin else region_name)
        df_sum, condition_col = merge_condition_region_col(df_sum)
        return df_sum, condition_col
-    if environment[0] != 0:
-        df_sum['location'] = df_sum['bin'].apply(
-            lambda x: f"before_{region_name}" if x < 0 else
-            f"after_{region_name}" if x >= nb_bin else region_name)
    df_sum.drop('bin', axis=1, inplace=True)
    if environment[0] != 0:
        col_merge = ['condition', 'region', 'replicate', 'location']
@@ -291,11 +294,118 @@ def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int,
    return df_sum, condition_col
+def line_maker(list_pval, up_value, position=0):
+    """
+    Create a list of lines, with their colour.
+    Create a line if the a p-value in list_pval[i] is below 0.05.
+    If up_mean[i] > down_mean[i] the line will be green, purple else.
+    :param list_pval: (list of float), list of pvalue get by the comparison
+    of a propensity scale in a particular sequence position in an
+    up-regulated and down_regulated set of sequences.
+    :param up_value: (int) the ordinate coordinates where the line will be
+    placed.
+    :param position: (int) the abscissa position to begin to draw the lines
+    :return: lines - (list of list of 2 tuple), the list of 2 tuple corresponds
+    to a lines with the coordinates [(x1, y1), (x2, y2)]
+    """
+    lcolor = []
+    lines = []
+    for i in range(len(list_pval)):
+        if list_pval[i] <= 0.05:
+            val = i + position
+            lines.append([(val - 0.5, up_value), (val + 0.5, up_value)])
+            lcolor.append("#000000")  # red
+    return lines, lcolor
+def paired_t_test(values1: List[float], values2: List[float]) -> float:
+    """
+    Get the p-value of a paired t-test for each bin
+    :param values1: A list of values
+    :param values2: Another list of values
+    :return: The p-values of the paired t-test
+    >>> paired_t_test([1, 2, 8], [5, 8, 15])
+    0.02337551764357566
+    """
+    if len(values1) != len(values2):
+        raise IndexError("values1 and values2 should have the same length")
+    ttp = r("""
+    function(values1, values2) {
+        return(t.test(values1, values2, paired=T)$p.value)
+    }
+    """)
+    return ttp(FloatVector(values1), FloatVector(values2))[0]
+def compute_stats(dff: pd.DataFrame, y_line: float, group_col: str,
+                  outfile: Path) -> Tuple[List[List[Tuple]], List]:
+    """
+    :param dff: A dataframe containing the coverage displayed in the figure
+    :param y_line: The height of the p-value line
+    :param group_col: A group column
+    :param outfile: The file to save the stats
+    :return: A list of lines coordinates in the form of [(x1, y1), (x2, y2)], \
+    and the color associated to each line
+    """
+    df = dff.sort_values(["bin", group_col], ascending=True)
+    groups = df[group_col].unique()
+    if len(groups) != 2:
+        raise NotImplementedError("Statistical analysis only implemented for "
+                                  "2 groups of data")
+    p_values_ttp = []
+    grp1 = []
+    vgrp1 = []
+    grp2 = []
+    vgrp2 = []
+    for bin in df["bin"].unique():
+        tmp = df[df["bin"] == bin]
+        values1 = tmp.loc[tmp[group_col] == groups[0], "coverage"].values
+        values2 = tmp.loc[tmp[group_col] == groups[1], "coverage"].values
+        p_values_ttp.append(paired_t_test(values1, values2))
+        grp1.append(np.mean(values1))
+        grp2.append(np.mean(values2))
+        vgrp1.append(";".join(map(str, [round(v, 3) for v in values1])))
+        vgrp2.append(";".join(map(str, [round(v, 3) for v in values2])))
+    stats_df = pd.DataFrame({"bin": df["bin"].unique(),
+                             "p_values": p_values_ttp,
+                             groups[0]: grp1, groups[1]: grp2,
+                             f"val-{groups[0]}": vgrp1,
+                             f"val-{groups[1]}": vgrp2,})
+    stats_df.to_csv(outfile, sep="\t", index=False)
+    return line_maker(p_values_ttp, y_line, min(df["bin"].unique()))
+def display_stat(g: sns.FacetGrid, dff: pd.DataFrame, y_line: float,
+                 group_col: str, stat: bool, outfile: Path) -> sns.FacetGrid:
+    """
+    Update the graphics by displaying stats.
+    :param g: A seaborn FacetGrid objects corresponding to the metagene \
+    figure
+    :param dff: A dataframe containing the coverage displayed in the figure
+    :param y_line: The height of the p-value line
+    :param group_col: The column containing the groups analyzed
+    :param outfile: The file where the metagene will be saved
+    :return: The facetGrid with the stats
+    """
+    if not stat:
+        return g
+    stat_file = outfile.parent / f"{outfile.stem}.txt"
+    lines, lcolor = compute_stats(dff, y_line, group_col, stat_file)
+    lc = mc.LineCollection(lines, colors=lcolor, linewidths=5)
+    g.ax.add_collection(lc)
+    return g
 def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
                    border_names: List[str], nb_bin: int,
                    environment: List[int], bed_name: str,
                    output: Path, norm: Union[int, Path],
-                    condition_col: str, ylim: Optional[List[float]]) -> None:
+                    condition_col: str, ylim: Optional[List[float]],
+                    stat: bool = False) -> Path:
    """
    Create a metagene figure on the region of interest.
@@ -313,6 +423,8 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
    each samples
    :param condition_col: The name of the condition columns
    :param ylim: The range of the y axis
+    :param stat: A boolean indicating wether to perform statistical analysis
+    :return: The name of the figure
    """
    font_files = matplotlib.font_manager.findSystemFonts(fontpaths=None,
                                                         fontext='ttf')
@@ -354,9 +466,13 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
    outfile_title += ".pdf"
    if ylim[0] is not None:
        g.ax.set_ylim(ylim[0], ylim[1])
+    ymin, ymax = g.ax.get_ylim()
+    g = display_stat(g, df_sum, ymin + ((ymax - ymin) / 50), condition_col,
+                     stat, output / outfile_title)
    g.ax.tick_params(left=True, bottom=True)
    g.savefig(output / outfile_title)
    g.fig.clf()
+    return output / outfile_title
 def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
@@ -445,6 +561,71 @@ def bin_normalisation(df: pd.DataFrame, norm: Union[int, Path],
    return df
+def compute_diff(df_sum: pd.DataFrame) -> pd.DataFrame:
+    """
+    Compute the average coverage differences between two conditions.
+    :param df_sum: A dataframe containing the coverage values between \
+    two conditions
+    :return: The dataframe with a coolumn diff
+    >>> di = pd.DataFrame({'bin': [-25] * 6,
+    ... 'condition': ['Ctrl', 'Ctrl', 'Ctrl', 'siDDX', 'siDDX', 'siDDX'],
+    ... 'replicate': ['R1', 'R2', 'R3', 'R1', 'R2', 'R3'],
+    ... 'coverage': [0.1,
+    ...  0.2,
+    ...  0.3,
+    ...  0.15,
+    ...  0.25,
+    ...  0.35],
+    ... 'location': ['before_ddx_down'] * 6})
+    >>> compute_diff(di)
+       bin         location  Ctrl  siDDX  diff
+    0  -25  before_ddx_down   0.2   0.25  0.05
+    """
+    cond = df_sum["condition"].unique()
+    df_sum = df_sum.groupby(["bin", "location", "condition"]).mean()
+    if len(cond) != 2:
+        raise IndexError("Only two different conditions must be given to "
+                         "perform stats")
+    df = df_sum.pivot_table(index=["bin", "location"],
+                            columns="condition",
+                            values="coverage").reset_index()
+    df.columns.name = None
+    df.groupby(["bin", "location"])
+    df["diff"] = df[cond[1]] - df[cond[0]]
+    return df
+def get_shapiro_an_lm_pvalue(df: pd.DataFrame, location: str) -> pd.Series:
+    """
+    Return a series containing the shapiro and the t-test p-value of \
+    the difference in coverage between 2 conditions at a particular location
+    :param df: A dataframe containing the difference in coverage \
+    between 2 conditions at a particular location
+    :param location: The region of interest
+    :return: a series containing the shapiro and the t-test p-value of \
+    the difference in coverage between 2 conditions at a particular location
+    """
+    df2 = df[df["location"] == location].copy()
+    df2.to_csv(f"{location}.txt", sep="\t", index=False)
+    pandas2ri.activate()
+    stat_s = r("""
+    function(data){
+        mod <- lm(diff ~ 1, data=data)
+        s <- summary(mod)
+        stat <- s$coefficients[1, "Pr(>|t|)"]
+        shapiro <- shapiro.test(data$diff)$p.value
+        return(c(shapiro, stat))
+    }
+    """)
+    res = stat_s(df2)
+    return pd.Series({"shapiro_p-value": res[0],
+                      "t-test-p-value": res[1],
+                      "location": location})
 def create_figure(design: Path, bw_folder: Path, region_beds: List[Path],
                  bed_names: List[str], nb_bin: int = 100,
                  figure_type: str = 'metagene',
@@ -452,7 +633,8 @@ def create_figure(design: Path, bw_folder: Path, region_beds: List[Path],
                  show_replicate: bool = True, environment: List[int] = (0, 0),
                  border_names: List[str] = ('', ''),
                  output: Path = Path('.'),
-                  ylim: List[float] = (None, None)) -> None:
+                  ylim: List[float] = (None, None), stat: bool = False
+                  ) -> None:
    """
    Create A metagene or a barplot figure from bigwig file on regions defined \
    in the bed file provided with 'region_bed' parameter.
@@ -479,6 +661,8 @@ def create_figure(design: Path, bw_folder: Path, region_beds: List[Path],
    :param border_names: The name of the borders
    :param output: Folder where the results will be created
    :param ylim: The range of y-axis
+    :param stat: A boolean indicating whether or not to perform \
+    statistical analysis
    """
    if len(region_beds) != len(bed_names):
        raise IndexError("Parameter region_beds and bed_names should "
@@ -509,8 +693,9 @@ def create_figure(design: Path, bw_folder: Path, region_beds: List[Path],
                                         ordered_condition, bed_names)
    if figure_type == "metagene":
        figure_metagene(df_sum, show_replicate, border_names, nb_bin,
-                        environment, region_kind, output, norm, cond_col,
+                        environment, region_kind, output, norm,
-                        ylim)
+                        cond_col, ylim, stat)
    else:
        if 'location' in df_sum.columns:
            for cur_region in df_sum['location'].unique():
No results found