diff --git a/src/find_interaction_cluster/__main__.py b/src/find_interaction_cluster/__main__.py
index 71a3bcc3bec01abe717ee3219cf54c9199f32a99..37102fa6918cacba2c6e068d85b068acc3e4c4c9 100644
--- a/src/find_interaction_cluster/__main__.py
+++ b/src/find_interaction_cluster/__main__.py
@@ -13,17 +13,25 @@ from .config import ConfigGraph
 from typing import List
 from .install_hipMCL import install_hipmcl
 from .sf_and_communities import multiple_stat_launcher
-from .nt_and_community import multiple_nt_lmm_launcher
+from .nt_and_community import multiple_nt_lm_launcher
+from .create_ppi_files import ppi_stat_launcher
 import logging
 from ..logging_conf import logging_def
+from .colocalisation_n_ppi_analysis import coloc_ppi_stat_main
 
 
-@lp.parse
-def launcher(weight: List[int] = (1),
-             global_weight: List[int] = (0),
+@lp.parse(weight=range(1, 11), global_weight=range(11),
+          feature=('gene', 'exon'), region=('', 'exon', 'intron', 'gene'),
+          iteration="iteration > 20", test_type=('lm', 'perm'),
+          component_type=("nt", "aa", "dnt"))
+def launcher(weight: int = 1,
+             global_weight: int = 0,
              same_gene: bool = True,
              ps: int = ConfigGraph.cpu,
-             html_fig: bool = False, feature: str = 'exon',
+             html_fig: bool = False, feature: str = 'exon', region: str = '',
+             component_type: str = 'nt',
+             iteration: int = 1000, test_type: str = "lm",
+             project: str = "GSM1018963_GSM1018964",
              logging_level: str = "DISABLE"):
     """
     Script used to find communities inside  exon co-localized within a project
@@ -38,7 +46,17 @@ def launcher(weight: List[int] = (1),
     same gene (True) or not (False) (default True)
     :param  html_fig: True to display the html figure (default False).
     :param feature: The feature we want to analyse (default 'exon')
+    :param region: The region of a gene to analyse (used only if feature \
+    is 'gene') (default '') (can be 'gene', 'exon', 'intron').
+    :param component_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa' (default 'nt').
     :param logging_level: The level of data to display (default 'DISABLE')
+    :param iteration: the number of iteration for ppi and frequency analysis
+    :param project: The project name of interest \
+    (only used is global_weight = 0).
+    :param test_type: The king of test to perform for frequency analysis. \
+    (default 'lmm') (choose from 'lmm', 'perm')
+    :param ps: The number of processes to use
     """
     logging_def(ConfigGraph.community_folder, __file__, logging_level)
     if feature == "gene" and not same_gene:
@@ -47,12 +65,20 @@ def launcher(weight: List[int] = (1),
         same_gene = True
     if not ConfigGraph.get_hipmcl_prog().is_file():
         install_hipmcl("INFO")
-    multiple_community_launcher(1, weight, global_weight, same_gene, html_fig,
-                                feature, logging_level)
-    # multiple_stat_launcher(ps, weight, global_weight, same_gene, feature,
-    #                        logging_level)
-    multiple_nt_lmm_launcher(ps, weight, global_weight, same_gene, feature,
-                             logging_level)
+    multiple_community_launcher(weight, global_weight, project, same_gene,
+                                html_fig, feature, logging_level)
+    multiple_stat_launcher(ps, weight, global_weight, project, same_gene,
+                           feature, logging_level)
+    multiple_nt_lm_launcher(ps, weight, global_weight, project,
+                            same_gene, feature, region, component_type,
+                            test_type, iteration, logging_level)
+    if feature == "gene":
+        # ppi_stat_launcher(weight, global_weight, project, same_gene,
+        #                   ConfigGraph.ppi_threshold, iteration,
+        #                   logging_level)
+        coloc_ppi_stat_main(weight, global_weight, project, same_gene,
+                            iteration, logging_level)
+
 
 
 launcher()
\ No newline at end of file
diff --git a/src/find_interaction_cluster/colocalisation_n_ppi_analysis.py b/src/find_interaction_cluster/colocalisation_n_ppi_analysis.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e7efb6518672e2677f13baf055091c288792567
--- /dev/null
+++ b/src/find_interaction_cluster/colocalisation_n_ppi_analysis.py
@@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: The goal of this script is to check whether close \
+gene produce protein that interact between each other
+"""
+
+from .config import ConfigGraph
+from ..logging_conf import logging_def
+import logging
+from .community_finder import get_projects
+from pathlib import Path
+import pandas as pd
+from itertools import combinations, product
+from typing import List, Tuple
+from doctest import testmod
+import numpy as np
+from .ppi_scripts.config_ppi import ConfigPPI
+import seaborn as sns
+from scipy.stats import mannwhitneyu
+from ..fig_utils.stat_annot import add_stat_annotation
+
+
+def get_gene_list(gene_list: str) -> List[int]:
+    """
+    From a string containing a list of id of genes separated by a comma, \
+    return a list of those id.
+
+    :param gene_list: A list of gene id separated by a comma
+    :return: The list of gene id
+
+    >>> get_gene_list("1, 2, 3")
+    [1, 2, 3]
+    """
+    id_gene_list = list(map(int, gene_list.split(', ')))
+    return sorted(id_gene_list)
+
+
+def get_couples_in_community(row: pd.Series) -> pd.DataFrame:
+    """
+    From a line of a dataframe containing a community, return each \
+    couples of gene in that community.
+
+    :param row:  line of a dataframe containing a community
+    :return: A dataframe containing couples of gene in that community
+
+    >>> data = pd.Series({"community": "C1",
+    ...                   "nodes": 3,
+    ...                   "genes": "1, 2, 3"})
+    >>> get_couples_in_community(data)
+       gene1  gene2  community_size community
+    0      1      2               3        C1
+    1      1      3               3        C1
+    2      2      3               3        C1
+    """
+    couples = combinations(get_gene_list(row['genes']), 2)
+    dic = {"gene1": [], "gene2": []}
+    for gene1, gene2 in couples:
+        dic["gene1"].append(gene1)
+        dic["gene2"].append(gene2)
+    dic['community_size'] = [row['nodes']] * len(dic["gene1"])
+    dic['community'] = [row['community']] * len(dic["gene1"])
+    return pd.DataFrame(dic)
+
+
+def edge_community_table(df_com: pd.DataFrame) -> pd.DataFrame:
+    """
+    From a dataframe containing the communities of co-localised genes in the  \
+    nucleus, create a table showing each couple of gene in each community.
+
+    :param df_com: dataframe containing the communities of co-localised \
+    genes in the nucleus
+    :return: table showing each couple of gene in each community.
+
+    >>> data = pd.DataFrame({"community": ["C1", "C2"],
+    ...                      "nodes": [3, 2],
+    ...                      "genes": ["1, 2, 3", "4, 5"]})
+    >>> edge_community_table(data)
+       gene1  gene2  community_size community
+    0      1      2               3        C1
+    1      1      3               3        C1
+    2      2      3               3        C1
+    3      4      5               2        C2
+    """
+    list_df = [get_couples_in_community(df_com.iloc[i, :])
+               for i in range(df_com.shape[0])]
+    return pd.concat(list_df, axis=0, ignore_index=True)
+
+
+def ppi_good_order(line: str) -> List[int]:
+    """
+    From a line of a file coming containing each couple of genes whose \
+    proteins interact with each other return the line with the proper order.
+
+    :param line: a line of a file coming containing each couple of genes \
+    whose proteins interact with each other
+    :return: The lowest gene id, the highest gene id and their \
+    interaction score
+
+    >>> ppi_good_order("ENSP00000000233\\tENSP00000272298\\t490\\t" +
+    ... "5764\\t3926\\n")
+    [3926, 5764, 490]
+    """
+    cline = line.strip().split("\t")
+    return sorted([int(cline[3]), int(cline[4])]) + [int(cline[2])]
+
+
+def load_fasterdb_ppi(fasterdb_ppi: Path) -> pd.DataFrame:
+    """
+    Return a file containing each couple of genes whose proteins \
+    interact with each other.
+
+    :param fasterdb_ppi: A file containing couples of genes whose proteins \
+    interact with each other
+    :return: The dataframe of the interaction between genes \
+    at protein level and their score.
+
+    >>> from tempfile import NamedTemporaryFile
+    >>> f = NamedTemporaryFile(mode='w')
+    >>> _ = f.write("ENSP00000000233\\tENSP00000272298\\t490\\t" +
+    ... "5764\\t3926\\n" + "ENSP00000000233\\tENSP00000253401\\t198\\t" +
+    ... "5764\\t3273\\n")
+    >>> f.flush()
+    >>> mfile = Path(f.name)
+    >>> load_fasterdb_ppi(mfile)
+       gene1  gene2  string_score
+    0   3926   5764           490
+    1   3273   5764           198
+    """
+    with fasterdb_ppi.open("r") as ppi_file:
+        content = [
+            ppi_good_order(line)
+            for line in ppi_file
+            if not line.startswith("protein1")
+        ]
+    return pd.DataFrame(content, columns=['gene1', 'gene2', 'string_score'])
+
+
+def merge_community_and_score(df_cpl: pd.DataFrame, ppi_score: pd.DataFrame
+                              ) -> pd.DataFrame:
+    """
+    :param df_cpl: table showing each couple of gene in each community \
+    at gene level
+    :param ppi_score: The dataframe of the interaction between genes \
+    at protein level and their score.
+    :return: df_com with string score
+
+    >>> d1 = pd.DataFrame({"gene1": [1, 1, 2, 4], "gene2": [2, 3, 3, 5],
+    ... "community_size": [3, 3, 3, 2], "community": ["C1", "C1", "C1", "C2"]})
+    >>> d2 = pd.DataFrame({"gene1": [1, 1, 2, 9], "gene2": [2, 3, 3, 8],
+    ... "string_score": [10, 20, 30, 40]})
+    >>> merge_community_and_score(d1, d2)
+       gene1  gene2  community_size community  string_score
+    0      1      2               3        C1          10.0
+    1      1      3               3        C1          20.0
+    2      2      3               3        C1          30.0
+    3      4      5               2        C2           NaN
+    """
+    return df_cpl.merge(ppi_score, how="left", on=["gene1", "gene2"])
+
+
+def get_gene_in_community(df_comp: pd.DataFrame) -> List[int]:
+    """
+    Get all the couples of exons inter-community
+
+    :param df_comp: table showing each couple of gene in each community \
+    at gene level
+    :return: The list of gene inside community
+
+    >>> data = pd.DataFrame({"community": ["C1", "C2"],
+    ...                      "nodes": [3, 2],
+    ...                      "genes": ["1, 2, 3", "4, 5"]})
+    >>> get_gene_in_community(data)
+    [1, 2, 3, 4, 5]
+    """
+    gene_list = []
+    for i in range(df_comp.shape[0]):
+        gene_list += get_gene_list(df_comp.iloc[i, :]["genes"])
+    return gene_list
+
+
+def get_couple_of_ctrl_exon(community_gene: str, all_genes: List[int]
+                            ) -> List[Tuple[int, int]]:
+    """
+    Get every control couple of genes, (couples of genes inter-community).
+
+    :param community_gene: A string containing the genes of interest.
+    :param all_genes: Every genes in every community at dna level
+    :return: Every inter-community couple
+
+    >>> get_couple_of_ctrl_exon('1, 2', [1, 2, 3, 4, 5])
+    [(1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5)]
+    """
+    cgenes = get_gene_list(community_gene)
+    all_genes = [g for g in all_genes if g not in cgenes]
+    values = list(product(cgenes, all_genes))
+    return [v for v in values if v[0] < v[1]]
+
+
+def get_very_ctrl_couple(df_comp: pd.DataFrame) -> pd.DataFrame:
+    """
+    Get a dataframe containing all the control couple of genes
+
+    :param df_comp: dataframe containing the communities of co-localised \
+    genes in the nucleus
+    :return: A dataframe containing all the control couples of genes
+
+    >>> data = pd.DataFrame({"community": ["C1", "C2"],
+    ...                      "nodes": [3, 2],
+    ...                      "genes": ["1, 2, 3", "4, 5"]})
+    >>> get_very_ctrl_couple(data)
+       gene1  gene2  community_size community
+    0      1      4             NaN    C-CTRL
+    1      1      5             NaN    C-CTRL
+    2      2      4             NaN    C-CTRL
+    3      2      5             NaN    C-CTRL
+    4      3      4             NaN    C-CTRL
+    5      3      5             NaN    C-CTRL
+    """
+    all_genes = get_gene_in_community(df_comp)
+    ctrl_couples = []
+    for i in range(df_comp.shape[0]):
+        ctrl_couples += get_couple_of_ctrl_exon(df_comp.iloc[i, :]["genes"],
+                                                all_genes)
+    df = pd.DataFrame(ctrl_couples, columns=["gene1", "gene2"])
+    df["community_size"] = [np.nan] * df.shape[0]
+    df["community"] = ["C-CTRL"] * df.shape[0]
+    return df
+
+
+def create_scored_dataframe(df_comp: pd.DataFrame, fasterdb_ppi: Path
+                            ) -> pd.DataFrame:
+    """
+    Create a dataframe containing couple of exons \
+    inside a community at dna level and the control couples between \
+    communities and their string score.
+
+    :param df_comp: dataframe containing the communities of co-localised \
+    genes in the nucleus
+    :param fasterdb_ppi: A file containing couples of genes whose proteins \
+    interact with each other
+    :return:  a dataframe containing couple of exons \
+    inside a community at dna level and the control couples between \
+    communities and their string score.
+
+    >>> from tempfile import NamedTemporaryFile
+    >>> f = NamedTemporaryFile(mode='w')
+    >>> _ = f.write("P1\\tP2\\t490\\t1\\t2\\n" + "P3\\tP4\\t198\\t1\\t3\\n"
+    ...             "P3\\tP4\\t199\\t1\\t4\\n" + "P3\\tP4\\t200\\t1\\t5\\n"
+    ...             "P3\\tP4\\t202\\t2\\t3\\n" + "P3\\tP4\\t208\\t2\\t4\\n"
+    ...             "P3\\tP4\\t398\\t2\\t5\\n" + "P3\\tP4\\t298\\t3\\t4\\n"
+    ...             "P3\\tP4\\t498\\t5\\t3\\n")
+    >>> f.flush()
+    >>> mfile = Path(f.name)
+    >>> data = pd.DataFrame({"community": ["C1", "C2"],
+    ...                      "nodes": [3, 2],
+    ...                      "genes": ["1, 2, 3", "4, 5"]})
+    >>> create_scored_dataframe(data, mfile)
+       gene1  gene2  community_size community  string_score      group
+    0      1      2             3.0        C1         490.0  community
+    1      1      3             3.0        C1         198.0  community
+    2      2      3             3.0        C1         202.0  community
+    3      4      5             2.0        C2           NaN  community
+    4      1      4             NaN    C-CTRL         199.0       ctrl
+    5      1      5             NaN    C-CTRL         200.0       ctrl
+    6      2      4             NaN    C-CTRL         208.0       ctrl
+    7      2      5             NaN    C-CTRL         398.0       ctrl
+    8      3      4             NaN    C-CTRL         298.0       ctrl
+    9      3      5             NaN    C-CTRL         498.0       ctrl
+    """
+    df_ctrl = get_very_ctrl_couple(df_comp)
+    df_real = edge_community_table(df_comp)
+    full_df = pd.concat([df_real, df_ctrl], axis=0, ignore_index=True)
+    df_score = load_fasterdb_ppi(fasterdb_ppi)
+    full_df = merge_community_and_score(full_df, df_score)
+    full_df['group'] = ["community"] * full_df.shape[0]
+    full_df.loc[full_df['community'] == "C-CTRL", "group"] = "ctrl"
+    return full_df.drop_duplicates()
+
+
+def create_figure(df_full: pd.DataFrame, outfile: Path) -> None:
+    """
+    Create a barplot of string confidence score.
+
+    :param df_full: A dataframe showing the interactions between \
+    genes in same community.
+
+    :param outfile: The file ythat will contains the barplot
+    """
+    sns.set(context="talk")
+    g = sns.catplot(x="group", y="string_score", data=df_full, kind="bar",
+                    ci="sd", aspect=1.7, height=12)
+    _, p = mannwhitneyu(df_full.loc[df_full['group'] == 'community',
+                                    'string_score'].values,
+                        df_full.loc[df_full['group'] == 'community',
+                                    'string_score'].values)
+    add_stat_annotation(g.ax, data=df_full, x="group", y="string_score",
+                        box_pair_list=[("community", "ctrl", p)])
+    g.savefig(outfile)
+
+
+def coloc_ppi_stat_main(weight: int, global_weight: int,
+                        project: str, same_gene: bool, iteration: int = 1000,
+                        logging_level: str = "DISABLE"):
+    """
+    Launch the statistical tests allowing to determine if interaction between \
+    genes at dna level as an influence on the interactions at protein level.
+
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weighs to consider. if \
+    the global weight is equal to 0  then the project `project` is \
+    used.
+    :param project: The project name, used only if global_weight = 0
+    :param same_gene: Say if we consider as co-localised exon within the \
+    same gene
+    :param threshold: The minimum threshold needed to consider the interaction
+    :param iteration: The number of iteration to make
+    :param logging_level: Level of information to display
+    """
+    ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True)
+    logging_def(ConfigGraph.community_folder, __file__, logging_level)
+    logging.info("Checking if gene communities at DNA level have an "
+                 "influence on communities at protein level")
+    project = get_projects(global_weight, project)
+    logging.debug("Calculating stats...")
+    community_file = ConfigGraph.get_community_file(project, weight,
+                                                    global_weight,
+                                                    same_gene, "gene",
+                                                    f".txt")
+
+    df_com = pd.read_csv(community_file, sep="\t")
+    df_com = df_com[df_com['nodes'] >= 10].copy()
+    full_df = create_scored_dataframe(df_com, ConfigPPI.fasterdb_ppi)
+    outfile = ConfigGraph.get_community_file(project, weight, global_weight,
+                                             same_gene, "gene",
+                                             f"_interation_gene-protein.pdf",
+                                             "community_gene_vs_protein")
+    create_figure(full_df, outfile)
+    logging.info(f"{full_df.shape[0]} total couples (with ctrl)")
+    logging.info(f"{full_df[-np.isnan(full_df['string_score'])].shape[0]} "
+                 f"couples with a defined string score")
+    tmp = full_df[full_df['group'] == 'community'].shape[0]
+    logging.info(f"{tmp} tests couples")
+    tmp = full_df.loc[(-np.isnan(full_df['string_score'])) &
+                      (full_df['group'] == 'community'), :].shape[0]
+    logging.info(f"{tmp} tests couples with a defined string score")
+
+
+
+if __name__ == "__main__":
+    testmod()
\ No newline at end of file
diff --git a/src/find_interaction_cluster/community_figures/__file__.py b/src/find_interaction_cluster/community_figures/__file__.py
new file mode 100644
index 0000000000000000000000000000000000000000..60f3a118d41cf7494b1cd067bf5d649b42ba1e05
--- /dev/null
+++ b/src/find_interaction_cluster/community_figures/__file__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description:
+"""
diff --git a/src/find_interaction_cluster/community_figures/__main__.py b/src/find_interaction_cluster/community_figures/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cef8dd4ac65fccb0d7296154482de8f1822c3dd
--- /dev/null
+++ b/src/find_interaction_cluster/community_figures/__main__.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: Create a barplot showing the frequency/value of a particular \
+item in every community of genomic features (genes of exons) that are close \
+in the nucleus compared to a control list of features.
+"""
+
+import lazyparser as lp
+import pandas as pd
+from pathlib import Path
+from .fig_functions import create_community_fig
+
+
+class FileNameError(Exception):
+    pass
+
+
+class MissingColumnError(Exception):
+    pass
+
+
+def load_and_check_table(table: str, feature: str, target_col: str):
+    """
+    Load a file containing a dataframe. It must contains the following \
+    columns: id_feature, target_col, community and community_size.
+
+
+    :param table: A file containing a table with the id of the chosen \
+    `feature` (i.e FasterDB id of genes or exons), a column with data of \
+    interest ( this column must have the name *target_col*) and two columns \
+    with the community and the size of the community of the feature if it \
+    has one (None, else).
+    :param feature: The kind of feature analysed
+    :param target_col: The name of the column containing the data of interest
+    :return: The loaded dataframe
+    """
+    if table.endswith(".gz"):
+        df = pd.read_csv(table, sep="\t", compression="gzip")
+    else:
+        df = pd.read_csv(table, sep="\t")
+    required_cols = [f"id_{feature}", target_col, "community",
+                     "community_size"]
+    for rqd in required_cols:
+        if rqd not in df.columns:
+            raise MissingColumnError(f"The column {rqd} is missing !")
+    return df
+
+
+@lp.parse(table="file", output="folder", test_type=["lm", "permutation"],
+          iteration="20 < iteration")
+def create_community_figures(table: str, feature: str, target_col: str,
+                             output: str, outfile: str, test_type: str,
+                             target_kind: str = "",
+                             iteration: int = 10000) -> None:
+    """
+    Create a dataframe with a control community, save it as a table and \
+    as a barplot figure.
+
+    :param table: A file containing a table with the id of the chosen \
+    `feature` (i.e FasterDB id of genes or exons), a column with data of \
+     interest ( this column must have the name *target_col*) and two columns \
+     with the community and the size of the community of the feature if it \
+     has one (None, else).
+    :param feature: The kind of feature analysed (exon or gene)
+    :param target_col: The name of the column containing the data of interest
+    :param output: The output folder
+    :param outfile: The name of the output figure file (pdf format)
+    :param test_type: The type of test to make (permutation or lm)
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param iteration: The number of sub samples to create. This parameter \
+    is only used if test_type = 'permutation' (default 10000).
+    """
+    df = load_and_check_table(table, feature, target_col)
+    if not outfile.endswith(".pdf"):
+        raise FileNameError("The output figure must be in pdf format !")
+    moutfile = Path(output) / outfile
+    create_community_fig(df, feature, target_col, moutfile, test_type,
+                         target_kind=target_kind, iteration=iteration)
+
+
+if __name__ == "__main__":
+    create_community_figures()
diff --git a/src/find_interaction_cluster/community_figures/fig_functions.py b/src/find_interaction_cluster/community_figures/fig_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..124bf0db76fd59256cf69caf18cfedf6d497548b
--- /dev/null
+++ b/src/find_interaction_cluster/community_figures/fig_functions.py
@@ -0,0 +1,498 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description:
+"""
+
+import pandas as pd
+from pathlib import Path
+from typing import Dict, Tuple, List, Optional
+from tqdm import tqdm
+from rpy2.robjects import r, pandas2ri
+from statsmodels.stats.multitest import multipletests
+import numpy as np
+from ..radomization_test_ppi import get_pvalue
+import seaborn as sns
+
+
+def get_community_table(communities: List[List[str]],
+                        size_threshold: int, feature: str) -> pd.DataFrame:
+    """
+    return the table indicating the name of the exons and the \
+    the name of the community.
+
+    :param communities: List of community of exons
+    :param size_threshold: The required size a community must \
+    have to be considered
+    :param feature: The kind of feature analysed
+    :return: table of community
+    >>> c = [['1_1', '2_5'], ['7_9', '4_19', '3_3']]
+    >>> get_community_table(c, 3, 'exon')
+      community id_exon  community_size
+    0        C2     7_9               3
+    1        C2    4_19               3
+    2        C2     3_3               3
+    >>> c = [['1', '2'], ['7', '49', '3']]
+    >>> get_community_table(c, 3, 'gene')
+      community id_gene  community_size
+    0        C2       7               3
+    1        C2      49               3
+    2        C2       3               3
+    """
+    dic = {"community": [], f"id_{feature}": [], "community_size": []}
+    for k, comm in enumerate(communities):
+        if len(comm) >= size_threshold:
+            name = f'C{k + 1}'
+            clen = len(comm)
+            for exon in comm:
+                dic["community"].append(name)
+                dic[f'id_{feature}'].append(exon)
+                dic["community_size"].append(clen)
+    return pd.DataFrame(dic)
+
+
+def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str
+                     ) -> pd.DataFrame:
+    """
+    Make the lm analysis to see if the exon regulated by a splicing factor \
+    are equally distributed among the communities.
+
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param outfile: A name of a file
+    :param target_col: The name of the column containing the data of interest
+    :return: the pvalue of lm
+    """
+    pandas2ri.activate()
+    lmf = r(
+        """
+        require("DHARMa")
+
+        function(data, folder, partial_name) {
+            mod <- lm(%s ~ log(community_size) +  community, data=data)
+            simulationOutput <- simulateResiduals(fittedModel = mod, n = 250)
+            png(paste0(folder, "/dignostics_summary", partial_name, ".png"))
+            plot(simulationOutput)
+            dev.off()
+            return(as.data.frame(summary(mod)$coefficients))
+        }
+
+        """ % target_col)
+    folder = outfile.parent / "diagnostics"
+    folder.mkdir(parents=True, exist_ok=True)
+    partial_name = outfile.name.replace('.pdf', '')
+    df.to_csv(f'frequency_{target_col}.txt', sep="\t", index=False)
+    res_df = lmf(df, str(folder), partial_name).reset_index()
+    res_df.rename({'index': 'community'}, inplace=True, axis=1)
+    res_df['community'] = res_df['community'].str.replace('community', '')
+    res_df.loc[res_df['community'] == "(Intercept)", "community"] = "C-CTRL"
+    mean_df = df[[target_col, "community", "community_size"]]. \
+        groupby(["community", "community_size"]).mean().reset_index()
+    return res_df.merge(mean_df, how="left", on="community")
+
+
+def lm_with_ctrl(df: pd.DataFrame,
+                 target_col: str, outfile: Path,
+                 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param target_col: The name of the column containing the data of interest
+    :param outfile: File that will contains the final figure
+    :return: The dataframe with ctrl exon and \
+    The dataframe with the p-value compared to the control \
+    list of feature.
+    """
+    df['community'] = df['community'].fillna("C-CTRL")
+    size = df.loc[df["community"] == "C-CTRL", :].shape[0]
+    df['community_size'] = df['community_size'].fillna(size)
+    df['community_size'] = df['community_size'].astype(int)
+    return df, lm_maker_summary(df, outfile, target_col)
+
+
+def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame,
+                      target_col: str, feature: str) -> pd.DataFrame:
+    """
+    Merge df and rdf together.
+
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param rdf: The dataframe containing the mean frequency for \
+    each community and the p-value of their enrichment compared to control \
+    exons.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The feature of interest
+    :return: The merged dataframe: i.e df with the stats columns
+    """
+    p_col = "Pr(>|t|)"
+    df = df[[f"id_{feature}", target_col, "community",
+             "community_size"]].copy()
+    rdf = rdf[["community", "community_size", p_col, target_col]].copy()
+    rdf.rename({target_col: f"mean_{target_col}", p_col: "p-adj"},
+               axis=1, inplace=True)
+    df = df.merge(rdf, how="left", on=["community", "community_size"])
+    df_ctrl = df[df["community"] == "C-CTRL"]
+    df = df[df["community"] != "C-CTRL"].copy()
+    df.sort_values(f"mean_{target_col}", ascending=True, inplace=True)
+    return pd.concat([df_ctrl, df], axis=0, ignore_index=True)
+
+
+def get_permutation_mean(df_ctrl: pd.DataFrame,
+                         cpnt: str, size: int, iteration: int) -> List[float]:
+    """
+    Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
+    of `nt` frequencies from it.
+
+    :param df_ctrl: A dataframe containing the frequency of each nucleotide \
+    in each exons/gene in fasterdb.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param size: The size of each sub samples to create
+    :param iteration: The number of sub samples to create
+    :return: The list of mean frequencies of `nt` in each subsample
+    """
+    return [
+        float(np.mean(df_ctrl[cpnt].sample(int(size), replace=True).values))
+        for _ in range(iteration)
+    ]
+
+
+def perm_community_pval(row: pd.Series, df_ctrl: pd.DataFrame,
+                        cpnt: str, iteration: int
+                        ) -> Tuple[float, float, float, str]:
+    """
+    Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
+    of `nt` frequencies from it.
+
+    :param row: A line of a dataframe containing the frequency of \
+    each feature inside a community.
+    :param df_ctrl: A dataframe containing the frequency of each nucleotide \
+    in each exons/gene in fasterdb.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param iteration: The number of sub samples to create
+    :return: The ctrl mean frequency value of `nt`, its standard error \
+    the pvalue and the regulation of the enrichment/impoverishment \
+    of the community in `row` compared to control exons.
+    """
+    list_values = get_permutation_mean(df_ctrl, cpnt, row["community_size"],
+                                       iteration)
+    pval, reg = get_pvalue(np.array(list_values), row[cpnt], iteration)
+    return float(np.mean(list_values)), float(np.std(list_values)), pval, reg
+
+
+def perm_pvalues(df: pd.DataFrame, df_ctrl: pd.DataFrame, feature: str,
+                 target_col: str, iteration: int,
+                 dic_com: Dict) -> pd.DataFrame:
+    """
+    Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
+    of `nt` frequencies from it.
+
+    :param df: A dataframe containing the frequency of each nucleotide \
+    in each exons belonging to a community.
+    :param df_ctrl: A dataframe containing the frequency of each nucleotide \
+    in each exons/gene in fasterdb.
+    :param feature: The feature of interest (gene, exon)
+    :param target_col: The name of the column containing the data of interest
+    :param iteration: The number of sub samples to create
+    :param dic_com: A dictionary linking each community to the exons \
+    it contains.
+    :return: The dataframe containing p-values and regulation \
+    indicating the enrichment of
+    """
+    list_pval, list_reg, mean_ctrl, std_ctrl = ([] for _ in range(4))
+    for i in tqdm(range(df.shape[0]), desc="performing permutations"):
+        row = df.iloc[i, :]
+        res = perm_community_pval(row,
+                                  df_ctrl.loc[
+                                    -df_ctrl[f'id_{feature}'
+                                             ].isin(dic_com[row['community']]),
+                                    :],
+                                  target_col, iteration)
+        [x.append(y) for x, y in zip([mean_ctrl, std_ctrl, list_pval,
+                                      list_reg], res)]
+    adj_pvals = multipletests(list_pval, alpha=0.05,
+                              method='fdr_bh',
+                              is_sorted=False,
+                              returnsorted=False)[1]
+    adj_regs = [list_reg[i] if adj_pvals[i] <= 0.05 else " . "
+                for i in range(len(list_reg))]
+    df[f'{target_col}_mean_{iteration}_ctrl'] = mean_ctrl
+    df[f'{target_col}_std_{iteration}_ctrl'] = std_ctrl
+    df[f'p-adj'] = adj_pvals
+    df[f'reg-adj'] = adj_regs
+    return df
+
+
+def perm_with_ctrl(df: pd.DataFrame, feature: str,
+                   target_col: str, dic_com: Dict,
+                   iteration: int) -> pd.DataFrame:
+    """
+
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param feature: The kind of feature analysed
+    :param target_col:  The name of the column containing the data of interest
+    :param dic_com: A dictionary linking each community to the exons \
+    it contains.
+    :param iteration: The number of sub samples to create
+    :return: The dataframe with the p-value compared to the control \
+    list of exons.
+    """
+    df_tmp = df.loc[-df["community"].isna(), :]
+    mean_df = df_tmp[[target_col, "community", "community_size"]]. \
+        groupby(["community", "community_size"]).mean().reset_index()
+    return perm_pvalues(mean_df, df, feature, target_col,
+                        iteration, dic_com)
+
+
+def create_perm_ctrl_df(ctrl_df: pd.DataFrame, order_df: pd.DataFrame,
+                        cpnt: str, feature: str, iteration: int
+                        ) -> pd.DataFrame:
+    """
+
+    :param ctrl_df: A dataframe containing the mean ctrl values, \
+    the mean control std and the community from which those control \
+    have been created
+    :param order_df: A dataframe containing the community and their final \
+    order.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param feature: The feature of interest
+    :param iteration: The number of iteration
+    :return: The ctrl_tmp_df in good order
+    """
+    dsize = ctrl_df.shape[0]
+    ctrl_df[f"mean_{cpnt}"] = \
+        [np.mean(ctrl_df[f"{cpnt}_mean_{iteration}_ctrl"])] * dsize
+    ctrl_df[f"id_{feature}"] = ['ctrl'] * dsize
+    ctrl_df["community_size"] = [dsize] * dsize
+    ctrl_df = ctrl_df.merge(order_df, how='left', on="community")
+    ctrl_df.rename({f"{cpnt}_mean_{iteration}_ctrl": cpnt,
+                    f"{cpnt}_std_{iteration}_ctrl": 'ctrl_std'}, axis=1,
+                   inplace=True)
+    return ctrl_df.sort_values("order", ascending=True)
+
+
+def expand_results_perm(df: pd.DataFrame, rdf: pd.DataFrame, target_col: str,
+                        feature: str, iteration: int) -> pd.DataFrame:
+    """
+    Merge df and rdf together.
+
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param rdf: The dataframe containing the mean frequency for \
+    each community and the p-value of their enrichment compared to control \
+    exons.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The feature of interest
+    :param iteration: The number of iteration
+    :return: The merged dataframe: i.e df with the stats columns
+    """
+    df = df.loc[-df["community"].isna(),
+                [f"id_{feature}", target_col,
+                 "community", "community_size"]].copy()
+    ctrl_df = rdf[[f"{target_col}_mean_{iteration}_ctrl",
+                   f"{target_col}_std_{iteration}_ctrl", "community"]].copy()
+    rdf = rdf[["community", "community_size", target_col, "p-adj"]].copy()
+    rdf.rename({target_col: f"mean_{target_col}"}, axis=1, inplace=True)
+    df = df.merge(rdf, how="left", on=["community", "community_size"])
+    df.sort_values(f"mean_{target_col}", ascending=True, inplace=True)
+    order_df = df[["community"]].drop_duplicates().copy()
+    order_df["order"] = range(order_df.shape[0])
+    df_ctrl = create_perm_ctrl_df(ctrl_df, order_df, target_col, feature,
+                                  iteration)
+    return pd.concat([df_ctrl, df], axis=0, ignore_index=True)
+
+
+def make_barplot(df_bar: pd.DataFrame, outfile: Path,
+                 target_col: str, feature: str, target_kind: str = "") -> None:
+    """
+    Create a barplot showing the frequency of `nt` for every community \
+    of exons/gene in `df_bar`.
+
+    :param df_bar: A dataframe with the enrichment of a \
+    nucleotide frequency for every community
+    :param outfile: File were the figure will be stored
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The king of feature of interest
+    """
+    sns.set(context="poster")
+    g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point",
+                    ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4,
+                    scale=0.5,
+                    palette=["red"] + ["darkgray"] * (df_bar.shape[0] - 1))
+    g2 = sns.catplot(x="community", y=target_col, data=df_bar, kind="bar",
+                     ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4,
+                     palette=["red"] + ["darkgray"] * (df_bar.shape[0] - 1))
+    g.fig.subplots_adjust(top=0.9)
+    target_kind = f" ({target_kind})" if target_kind else ""
+    g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}"
+                   f"among community of {feature}s\n"
+                   f"(stats obtained with a lm test)")
+    g.set(xticklabels=[])
+    g.ax.set_ylabel(f'Frequency of {target_col}')
+    df_bara = df_bar.drop_duplicates(subset="community", keep="first")
+    for i, p in enumerate(g2.ax.patches):
+        stats = "*" if df_bara.iloc[i, :]["p-adj"] < 0.05 else ""
+        com = df_bara.iloc[i, :]["community"]
+        csd = np.std(df_bar.loc[df_bar["community"] == com, target_col])
+        g.ax.annotate(stats,
+                      (p.get_x() + p.get_width() / 2., p.get_height() + csd),
+                      ha='center', va='center', xytext=(0, 10), fontsize=12,
+                      textcoords='offset points')
+    g.savefig(outfile)
+
+
+def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
+                      target_col: str, feature: str,
+                      target_kind: str = "") -> None:
+    """
+    Create a barplot showing the frequency of `nt` for every community \
+    of exons/gene in `df_bar`.
+
+    :param df_bar: A dataframe with the enrichment of a \
+    nucleotide frequency for every community
+    :param outfile: File were the figure will be stored
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The king of feature of interest
+    """
+    sns.set(context="poster")
+    df_ctrl = df_bar.loc[df_bar[f"id_{feature}"] == 'ctrl', :]
+    df_bar = df_bar.loc[df_bar[f"id_{feature}"] != 'ctrl', :].copy()
+    g2 = sns.catplot(x="community", y=target_col, data=df_bar, kind="bar",
+                     ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4,
+                     palette=["darkgray"] * (df_bar.shape[0]))
+    g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point",
+                    ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4,
+                    scale=0.5, palette=["darkgray"] * (df_bar.shape[0]))
+    xrange = g.ax.get_xlim()
+    df_ctrl.plot(x="community", y=target_col, kind="scatter", ax=g.ax,
+                 yerr="ctrl_std", legend=False, zorder=10,
+                 color=(0.8, 0.2, 0.2, 0.4))
+    g.ax.set_xlim(xrange)
+    g.fig.subplots_adjust(top=0.9)
+    target_kind = f" ({target_kind})" if target_kind else ""
+    g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}"
+                   f"among community of {feature}s\n"
+                   f"(stats obtained with a permutation test)")
+    g.set(xticklabels=[])
+    g.ax.set_ylabel(f'Frequency of {target_col}')
+    df_bara = df_bar.drop_duplicates(subset="community", keep="first")
+    for i, p in enumerate(g2.ax.patches):
+        stats = "*" if df_bara.iloc[i, :]["p-adj"] < 0.05 else ""
+        com = df_bara.iloc[i, :]["community"]
+        csd = np.std(df_bar.loc[df_bar["community"] == com, target_col])
+        g.ax.annotate(stats,
+                      (p.get_x() + p.get_width() / 2., p.get_height() + csd),
+                      ha='center', va='center', xytext=(0, 10), fontsize=12,
+                      textcoords='offset points')
+    g.savefig(outfile)
+
+
+def barplot_creation(df_bar: pd.DataFrame, outfig: Path,
+                     cpnt: str, test_type: str, feature: str,
+                     target_kind) -> None:
+    """
+    Reformat a dataframe with the enrichment of a nucleotide frequency \
+    for every feature for every community and then create a \
+    barplot showing those frequencies.
+
+    :param df_bar: A dataframe with the enrichment of a \
+    nucleotide frequency for every community and showing the frequency \
+    of each feature in each community
+    :param outfig: File were the figure will be stored
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param test_type: The kind of test make
+    :param feature: The king of feature of interest
+    :param test_type: The type of test to make (permutation or lm)
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    """
+    if test_type == "lm":
+        make_barplot(df_bar, outfig, cpnt, feature, target_kind)
+    else:
+        make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind)
+
+
+def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict:
+    """
+    Create a dictionary containing the exons contained in each community.
+
+    :param df: A dataframe containing the frequency of each nucleotide \
+    in each exons belonging to a community.
+    :param feature: the feature of interest (exon, gene)
+    :return: A dictionary linking each community to the exons it contains
+
+    >>> dataf = pd.DataFrame({"id_gene": ['1', '2', '3', '4', '5'],
+    ... 'community': ['C1', 'C1', 'C2', 'C2', np.nan]})
+    >>> get_feature_by_community(dataf, 'gene')
+    {'C1': ['1', '2'], 'C2': ['3', '4']}
+    >>> dataf.rename({"id_gene": "id_exon"}, axis=1, inplace=True)
+    >>> get_feature_by_community(dataf, 'exon')
+    {'C1': ['1', '2'], 'C2': ['3', '4']}
+    """
+    dic = {}
+    for i in range(df.shape[0]):
+        com, id_ft = df.iloc[i, :][['community', f'id_{feature}']]
+        if com is not None:
+            if com in dic:
+                dic[com].append(id_ft)
+            else:
+                dic[com] = [id_ft]
+    return dic
+
+
+def create_community_fig(df: pd.DataFrame, feature: str,
+                         target_col: str,
+                         outfile_ctrl: Path, test_type: str,
+                         dic_com: Optional[Dict] = None,
+                         target_kind: str = "",
+                         iteration: int = 10000) -> None:
+    """
+    Create a dataframe with a control community, save it as a table and \
+    as a barplot figure.
+
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param feature: The kind of feature analysed
+    :param target_col: The name of the column containing the data of interest
+    :param outfile_ctrl: file used to stored the table and the figure \
+    containing the test communities and the control community
+    :param test_type: The type of test to make (permutation or lm)
+    :param dic_com: A dictionary linking each community to the exons \
+    it contains.
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param iteration: The number of sub samples to create
+    """
+    if dic_com is None:
+        dic_com = {} if test_type == 'lm' \
+            else get_feature_by_community(df, feature)
+    if test_type == "lm":
+        ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl)
+        df_bar = expand_results_lm(ndf, rdf, target_col, feature)
+    else:
+        rdf = perm_with_ctrl(df, feature, target_col, dic_com, iteration)
+        df_bar = expand_results_perm(df, rdf, target_col, feature, iteration)
+    rdf.to_csv(str(outfile_ctrl).replace(".pdf", ".txt"), sep="\t",
+               index=False)
+    bar_outfile = str(outfile_ctrl).replace(".pdf", "_bar.txt")
+    df_bar.to_csv(bar_outfile, sep="\t", index=False)
+    barplot_creation(df_bar, outfile_ctrl, target_col, test_type, feature,
+                     target_kind)
diff --git a/src/find_interaction_cluster/community_finder.py b/src/find_interaction_cluster/community_finder.py
index b4180848fd5efb1c27f06fe999bad04d58512d1a..d65d5a81cacaedc6dbe18b7885e4a89fc4568fa5 100644
--- a/src/find_interaction_cluster/community_finder.py
+++ b/src/find_interaction_cluster/community_finder.py
@@ -253,8 +253,8 @@ def get_figure_title(project, weight, global_weight, same_gene, feature):
     :param feature: The kind of analysed features
     :return: A figure title
     """
-    title = f"Co-localisation between {feature}s having a weight greater than " \
-            f"{weight} in "
+    title = f"Co-localisation between {feature}s having a weight greater " \
+            f"than {weight} in "
     if global_weight == 0:
         title += f"the project {project}"
     else:
@@ -347,7 +347,7 @@ def community_finder(weight: int, global_weight: int, project: str = "",
     logging.debug('Done !')
 
 
-def get_projects(global_weight: int) -> List[str]:
+def get_projects(global_weight: int, project: str) -> str:
     """
     Get projects name.
 
@@ -355,69 +355,38 @@ def get_projects(global_weight: int) -> List[str]:
     the global weight is equal to 0 then then density figure are calculated \
     by project, else all projet are merge together and the interaction \
     seen in `global_weight` project are taken into account
-    :return: The list of the project to consider
+    :param project: The name of a project
+    :return: The project to consider
     """
     if global_weight != 0:
-        return [f'Global-weight-{global_weight}']
+        return f'Global-weight-{global_weight}'
     else:
-        return ConfigGraph.good_projects
+        return project
 
 
-def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]:
-    """
-    Get projects name given a list of global_weight and a dictionary linking,
-    each project name to it's corresponding global weight.
-
-    :param global_weights: The list of global weights to consider. if \
-    the global weight is equal to 0 then then density figure are calculated \
-    by project, else all projet are merge together and the interaction \
-    seen in `global_weight` project are taken into account
-    :return: project names and a dictionary linking,
-    each name to it's corresponding global weight.
-    """
-    dic = {}
-    projects = []
-    for global_weight in global_weights:
-        tmp = get_projects(global_weight)
-        projects += tmp
-        for p in tmp:
-            dic[p] = global_weight
-    return projects, dic
-
-
-def multiple_community_launcher(ps: int, weights: List[int],
-                                global_weights: List[int],
-                                same_gene: bool, html_fig: bool = False,
+def multiple_community_launcher(weight: int,
+                                global_weight: int,
+                                project: str,
+                                same_gene: bool,
+                                html_fig: bool = False,
                                 feature: str = 'exon',
                                 logging_level: str = "DISABLE"):
     """
-    :param ps: The number of processes we want to use.
-    :param weights: The list of weights of interaction to consider
-    :param global_weights: The list global weights to consider. if \
-    the global weight is equal to 0 then then density figure are calculated \
-    by project, else all projcet are merge together and the interaction \
-    seen in `global_weight` project are taken into account
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weighs to consider. if \
+    the global weight is equal to 0  then the project `project` is \
+    used.
     :param same_gene: Say if we consider as co-localised exon within the \
     same gene
     :param html_fig: True to create an html figure, false else
     :param feature: The feature we want to analyse (default 'exon')
+    :param project: The project name, used only if global_weight = 0
     :param logging_level: Level of information to display
     """
     ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True)
     logging_def(ConfigGraph.community_folder, __file__, logging_level)
-    global_weights = list(np.unique(global_weights))
-    weights = list(np.unique(weights))
-    projects, dic_project = get_projects_name(global_weights)
-    condition = list(product(projects, weights))
-    processes = []
-    pool = mp.Pool(processes=min(ps, len(condition)))
-    for project, weight in condition:
-        global_weight = dic_project[project]
-        logging.info(f'Finding community for project : {project}, '
-                     f'global_weight : {global_weight}, weight: {weight}')
-        args = [weight, global_weight, project, same_gene, html_fig, feature]
-        processes.append(pool.apply_async(community_finder, args))
-    for proc in processes:
-        proc.get(timeout=None)
-    pool.close()
-    pool.join()
+    project = get_projects(global_weight, project)
+    logging.info(f'Finding community for project : {project}, '
+                 f'global_weight : {global_weight}, weight: {weight}')
+    community_finder(weight, global_weight, project, same_gene, html_fig,
+                     feature)
diff --git a/src/find_interaction_cluster/config.py b/src/find_interaction_cluster/config.py
index dd566401ef4ca4f4af9eb380e5cb7126904fc002..ac3ea4925797c1eb72ebac6f5cd7d170229d62b2 100644
--- a/src/find_interaction_cluster/config.py
+++ b/src/find_interaction_cluster/config.py
@@ -37,7 +37,7 @@ def get_weight_folder(weight: int, global_weight: int):
 
 def get_community_file(project: str, weight: int, global_weight: int,
                        same_gene: bool, feature: str = 'exon',
-                       ext: str = ".txt", stat: bool = False):
+                       ext: str = ".txt", sub_fold: str = ''):
     """
     Get the output file of interest.
 
@@ -51,13 +51,12 @@ def get_community_file(project: str, weight: int, global_weight: int,
     same gene
     :param the kind of feature analyzed
     :param ext: The file extension
-    :param stat: True to place the result in 'sf_community_enrichment' \
-     subfolder
+    :param subfolder: if filled, then the data are recovered from a subfolder
     :return: The filename of interest
     """
     folder = get_weight_folder(weight, global_weight)
-    if stat:
-        folder = folder / 'sf_community_enrichment'
+    if sub_fold != '':
+        folder = folder / sub_fold
     folder.mkdir(exist_ok=True, parents=True)
     if global_weight != 0:
         project = f"global-weight-{global_weight}"
@@ -116,6 +115,3 @@ class ConfigGraph:
     get_hip_folder = get_hipmcl_folder
     get_hipmcl_prog = get_hipmcl_prog
     good_projects = get_good_project()
-    genes_communities = data / \
-        "global-weight-3_weight-3_same_gene-True_gene.txt"
-    genes_regulation_sf_community = results / "genes_regulation_sf_community"
diff --git a/src/find_interaction_cluster/create_ppi_files.py b/src/find_interaction_cluster/create_ppi_files.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d401736c97007912f8ec3b84253e5c7613b6fb7
--- /dev/null
+++ b/src/find_interaction_cluster/create_ppi_files.py
@@ -0,0 +1,373 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: create a file of gene interaction at gene and protein levels.
+"""
+
+from .config import ConfigGraph
+from .ppi_scripts.config_ppi import ConfigPPI
+from typing import List
+from ..logging_conf import logging_def
+import numpy as np
+import logging
+from .community_finder import get_projects
+from itertools import product
+import multiprocessing as mp
+from pathlib import Path
+from typing import Dict, Union
+import pandas as pd
+from .community_finder import create_graph, find_communities, \
+    write_cytoscape_graph
+from doctest import testmod
+from .radomization_test_ppi import get_ppi_community_gene, \
+    get_dna_community_gene, update_overlap_df,summary_randomisation_test
+
+
+def get_community_dic_fom_community_file(mfile: Union[Path, pd.DataFrame],
+                                         community_data: bool = True) -> Dict:
+    """
+    Read a file containing community of genes and return a dictionary \
+    linking each gene to other gene in it's community.
+
+    :param mfile: A file containing community of genes
+    :return: A dictionary linking a gene to all the other in it's community
+    """
+    df = pd.read_csv(mfile, sep="\t") if isinstance(mfile, Path) else mfile
+    dic = {}
+    for i in range(df.shape[0]):
+        cseries = df.iloc[i, :]
+        list_gene = list(map(int, cseries['genes'].split(", ")))
+        if len(list_gene) > 1:
+            for gene in list_gene:
+                if community_data:
+                    dic[gene] = {"links": [g for g in list_gene if g != gene],
+                                 "community": cseries['community']}
+                else:
+                    dic[gene] = [g for g in list_gene if g != gene]
+    return dic
+
+
+def ppi_array(fasterdb_ppi: Path, threshold: int) -> np.array:
+    """
+    Return an array of gene having their proteins interaction with each \
+    other
+    :param fasterdb_ppi: A file containing ppi interaction
+    :param threshold: The minimum theshold needed to consider the interaction
+    :return: An array containing the interaction
+    """
+    df = pd.read_csv(fasterdb_ppi, sep="\t")
+    df['gene1'] = df['gene1'].astype(str)
+    df['gene2'] = df['gene2'].astype(str)
+    return df.loc[df['combined_score'] >= threshold,
+                  ['gene1', 'gene2', 'combined_score']]\
+        .drop_duplicates(keep="first")\
+        .values
+
+
+def write_interaction_ppi(arr_interaction: np.array, project: str,
+                          weight: int, global_weight: int, same_gene: bool,
+                          use_weight: bool = False):
+    """
+
+    :param arr_interaction:  Each couples of co-localized feature within a \
+    project.
+    :param project: The name of the project of interest
+    :param weight: The minimum weight of interaction to consider
+    :param global_weight: The global weight to consider. if \
+    the global weight is equal to 0 then then density figure are calculated \
+    by project, else all projet are merge together and the interaction \
+    seen in `global_weight` project are taken into account
+    :param same_gene: Say if we consider as co-localised, exons within the \
+    same gene (True) or not (False) (default False)
+    :param use_weight: Say if we want to write the weight into the result file.
+    """
+    logging.debug('Writing interaction files ...')
+    outfile = ConfigGraph.get_community_file(project, weight, global_weight,
+                                             same_gene, "gene",
+                                             f"_interation_PPI_tmp.txt",
+                                             "community_gene_vs_protein")
+    result = ConfigGraph.get_community_file(project, weight, global_weight,
+                                            same_gene, "gene",
+                                            f"_communities_PPI_tmp.txt",
+                                            "community_gene_vs_protein")
+    with outfile.open('w') as f:
+        for exon1, exon2, cweight in arr_interaction:
+            if not use_weight:
+                cweight = 1
+            f.write(f"{exon1}\t{exon2}\t{cweight}\n")
+    return outfile, result
+
+
+def ppi_community_finder(fasterdb_ppi: Path, project: str,
+                         weight: int, global_weight: int,
+                         same_gene: bool = True,
+                         threshold: int = 700):
+    """
+    Find communities inside protein-protein interaction file
+
+    :param fasterdb_ppi: File containing ppi interaction
+    :param project: The name of the project of interest
+    :param weight: The minimum weight of interaction to consider
+    :param global_weight: The global weight to consider. if \
+    the global weight is equal to 0 then then density figure are calculated \
+    by project, else all projet are merge together and the interaction \
+    seen in `global_weight` project are taken into account
+    :param same_gene: Say if we consider as co-localised, exons within the \
+    same gene (True) or not (False) (default False)
+    :param threshold: The minimum threshold needed to consider the interaction
+    """
+    interaction = ppi_array(fasterdb_ppi, threshold)
+    outfile, result_file = write_interaction_ppi(interaction, project,
+                                                 weight, global_weight,
+                                                 same_gene)
+    graph = create_graph(interaction)
+    df, dic_community = find_communities(graph, project, outfile, result_file,
+                                         "gene")
+    logging.debug('Writing results ...')
+    outfiles = [ConfigGraph.get_community_file(
+        project, weight, global_weight, same_gene, "gene", ext,
+        "community_gene_vs_protein")
+        for ext in [f'_graph_community_PPI.txt', f'_graph_community_PPI.cyjs']]
+    df.to_csv(outfiles[0], sep="\t", index=False)
+    logging.debug("Saving the graph ...")
+    write_cytoscape_graph(graph, dic_community, outfiles[1])
+    logging.debug('Done !')
+    return df
+
+
+def create_dataframe(gene_com_dic: Dict) -> pd.DataFrame:
+    """
+    Create a dataframe from a dictionary containing a community of gene.
+
+    :param gene_com_dic: A dictionary linking each gene, to it's neighbors \
+    at DNA level
+    :return: A table containing the gene, the community name of it's gene \
+    and the community size of this gene
+    """
+    content = [[gene, gene_com_dic[gene]['community'],
+                len(gene_com_dic[gene]['links'])]
+               for gene in gene_com_dic]
+    return pd.DataFrame(content, columns=["id_gene", "DNA_community",
+                                          "community_size"])
+
+
+def add_size_and_common_gene_number(df: pd.DataFrame, dic_gene: Dict,
+                                    dic_ppi: Dict, nb_col: str,
+                                    size_col: str) -> pd.DataFrame:
+    """
+    Add a column containing the number of shared interaction between genes \
+    and their product.
+
+    :param df: A dataframe containing
+    :param dic_gene: A dictionary containing the gene interacting at \
+    DNA level
+    :param dic_ppi: A dictionary contaning the interaction between protein \
+    of genes.
+    :param nb_col: The name of the column that will stored the shared \
+    interactions between genes at protein and DNA level
+    :param size_col: The size of the community
+    :return: The dataframe with two more columns.
+    """
+    nb_content_col = []
+    size_content_col = []
+    for i in range(df.shape[0]):
+        cgene = df.iloc[i, :]["id_gene"]
+        if cgene in dic_ppi:
+            common = [g for g in dic_ppi[cgene]
+                      if g in dic_gene[cgene]['links']]
+            size_content_col.append(len(dic_ppi[cgene]))
+            nb_content_col.append(len(common))
+        else:
+            size_content_col.append(0)
+            nb_content_col.append(0)
+    df[nb_col] = nb_content_col
+    df[size_col] = size_content_col
+    return df
+
+
+def filter_most_overllaping_ppi(df: pd.DataFrame, size_threshold: int
+                                ) -> pd.DataFrame:
+    """
+    Get the DNA community and the ppi community have the larger overlap \
+    possible.
+
+    :param df: The dataframe containing the number of \
+    genes that clusters in community at DNA and protein level
+    :param size_threshold: the minimum size required to keep gene
+    community at dna level.
+    :return: The dataframe with only one line per DNA community
+
+    >>> test_df = pd.DataFrame({"id_gene": range(1, 12),
+    ... "DNA_community": ["C1"] * 5 + ["C2"] * 5 + ["C3"],
+    ... "community_size": [9] * 5 + [14] * 5 + [19],
+    ... "nb_com-ppi": [0, 1, 2, 2, 0, 0, 1, 0, 0, 0, 0],
+    ... "size_com-ppi": [30, 50, 105, 102, 25, 30, 42, 47, 89, 12, 0]})
+    >>> filter_most_overllaping_ppi(test_df, 10)
+      DNA_community  community_size  nb_com-ppi  size_com-ppi
+    0            C1              10           3           105
+    1            C2              15           2            42
+    2            C3              20           0             0
+    """
+    df.drop('id_gene', axis=1, inplace=True)
+    dna_communities = df["DNA_community"].unique()
+    list_df = []
+    for my_community in dna_communities:
+        tmp_df = df.loc[(df['DNA_community'] == my_community), :]
+        val = np.max(tmp_df['nb_com-ppi'].values)
+        list_df.append(tmp_df.loc[tmp_df['nb_com-ppi'] == val, :]
+                       .sample(1, random_state=1))
+    df = pd.concat(list_df, axis=0, ignore_index=True)
+    df['nb_com-ppi'] = df.apply(lambda x: x['nb_com-ppi'] + 1
+                                if x["size_com-ppi"] > 0
+                                else x['nb_com-ppi'], axis=1)
+    df["community_size"] = df["community_size"] + 1
+    return df[df["community_size"] >= size_threshold]
+
+
+def create_community_ppi_table(community_file: Path, fasterdb_ppi: Path,
+                               project: str,
+                               weight: int, global_weight: int,
+                               same_gene: bool = True,
+                               threshold: int = 700):
+    """
+
+    :param community_file: A file containing community of gene interacting \
+    at DNA level
+    :param fasterdb_ppi: A file containing pairs of genes having their \
+    protein product interacting at protein level.
+    :param project: The name of the project of interest
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weight to consider. if \
+    the global weight is equal to 0 then then density figure are calculated \
+    by project, else all project are merge together and the interaction \
+    seen in `global_weight` project are taken into account
+    :param same_gene: Say if we consider as co-localised exon within the \
+    same gene
+    :param threshold: The minimum threshold needed to consider the interaction
+    :return:
+    """
+    outfile = ConfigGraph.get_community_file(project, weight, global_weight,
+                                             same_gene, "gene",
+                                             '_graph_community_PPI.txt',
+                                             "community_gene_vs_protein")
+    if not outfile.is_file():
+        logging.debug("Creating the community file for PPI")
+        df_comm_ppi = ppi_community_finder(fasterdb_ppi, project, weight,
+                                           global_weight, same_gene, threshold)
+    else:
+        df_comm_ppi = pd.read_csv(outfile, sep="\t")
+    logging.debug("Turning ppi community file into a dic")
+    dic_ppi_com = get_community_dic_fom_community_file(df_comm_ppi,
+                                                       community_data=False)
+    logging.debug("Loading gene community file")
+    dic_gene = get_community_dic_fom_community_file(community_file,
+                                                    community_data=True)
+    logging.debug("Creation of the gene community ad DNA level")
+    df = create_dataframe(dic_gene)
+    logging.debug("Add a column for proteins community")
+    df = add_size_and_common_gene_number(df, dic_gene, dic_ppi_com,
+                                         "nb_com-ppi", "size_com-ppi")
+    return df
+
+
+def ppi_stats_analysis(community_file: Path, fasterdb_ppi: Path,
+                       project: str,
+                       weight: int, global_weight: int,
+                       same_gene: bool = True,
+                       threshold: int = 700, iteration: int = 1000):
+    """
+    :param community_file: A file containing community of gene interacting \
+    at DNA level
+    :param fasterdb_ppi: A file containing pairs of genes having their \
+    protein product interacting at protein level.
+    :param project: The name of the project of interest
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weight to consider. if \
+    the global weight is equal to 0 then then density figure are calculated \
+    by project, else all projcet are merge together and the interaction \
+    seen in `global_weight` project are taken into account
+    :param same_gene: Say if we consider as co-localised exon within the \
+    same gene
+    :param threshold: The minimum threshold needed to consider the interaction
+    :param iteration: The number of iteration to make
+    :return:
+    """
+    outfile = ConfigGraph.get_community_file(project, weight,
+                                             global_weight, same_gene, "gene",
+                                             f"ppi_gene_complete_table.txt",
+                                             "community_gene_vs_protein")
+    ppi_outfile = ConfigGraph.get_community_file(project, weight,
+                                                 global_weight,
+                                                 same_gene, "gene",
+                                                 '_graph_community_PPI.txt',
+                                                 "community_gene_vs_protein")
+    if not outfile.is_file():
+        df = create_community_ppi_table(community_file, fasterdb_ppi,
+                                        project, weight, global_weight,
+                                        same_gene, threshold)
+        df.to_csv(outfile, sep="\t", index=False)
+    else:
+        df = pd.read_csv(outfile, sep="\t")
+    size_threshold = 10
+    df_overlap = filter_most_overllaping_ppi(df, size_threshold)
+    ppi_gene = get_ppi_community_gene(pd.read_csv(ppi_outfile, sep="\t"))
+    dic_dna_gene = get_dna_community_gene(pd.read_csv(community_file,
+                                                      sep="\t"))
+    df, dic_values = update_overlap_df(df_overlap, dic_dna_gene, ppi_gene,
+                                       iteration)
+    outstat = ConfigGraph.get_community_file(project, weight, global_weight,
+                                             same_gene, "gene",
+                                             f"ppi_gene_table_{iteration}_"
+                                             f"stat.txt",
+                                             "community_gene_vs_protein")
+    df.to_csv(outstat, sep="\t", index=False)
+    final_df = summary_randomisation_test(df, dic_dna_gene, ppi_gene,
+                                          iteration, dic_values,
+                                          use_seed=False)
+    outstat = ConfigGraph.get_community_file(project, weight, global_weight,
+                                             same_gene, "gene",
+                                             f"ppi_gene_table_{iteration}_"
+                                             f"stat_recap.txt",
+                                             "community_gene_vs_protein")
+    final_df.to_csv(outstat, sep="\t", index=False)
+
+
+def ppi_stat_launcher(weight: int,
+                      global_weight: int,
+                      project: str,
+                      same_gene: bool, threshold: int = 700,
+                      iteration: int = 1000,
+                      logging_level: str = "DISABLE"):
+    """
+    Launch the statistical allowing to determine if interaction between \
+    genes at dna level as an influence on the interactions at protein level.
+
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weighs to consider. if \
+    the global weight is equal to 0  then the project `project` is \
+    used.
+    :param project: The project name, used only if global_weight = 0
+    :param same_gene: Say if we consider as co-localised exon within the \
+    same gene
+    :param threshold: The minimum threshold needed to consider the interaction
+    :param iteration: The number of iteration to make
+    :param logging_level: Level of information to display
+    """
+    ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True)
+    logging_def(ConfigGraph.community_folder, __file__, logging_level)
+    logging.info("Checking if gene communities at DNA level have an "
+                 "influence on communities at protein level")
+    project = get_projects(global_weight, project)
+    logging.debug("Calculating stats...")
+    community_file = ConfigGraph.get_community_file(project, weight,
+                                                    global_weight,
+                                                    same_gene, "gene",
+                                                    f".txt")
+    ppi_stats_analysis(community_file, ConfigPPI.fasterdb_ppi, project,
+                       weight, global_weight, same_gene, threshold, iteration)
+
+
+if __name__ == "__main__":
+    testmod()
diff --git a/src/find_interaction_cluster/nt_and_community.py b/src/find_interaction_cluster/nt_and_community.py
index 48896d40879815fffa759794518de252c62a100b..2f9d7d0e0e6608e58603d11585d8792e3c73749f 100644
--- a/src/find_interaction_cluster/nt_and_community.py
+++ b/src/find_interaction_cluster/nt_and_community.py
@@ -7,59 +7,73 @@ Description: The goal of this script is to see if the nucleotide \
 frequency is not random between communities
 """
 
-
 import pandas as pd
 import logging
 import sqlite3
 from .config import ConfigGraph, get_communities
-from typing import List
+from typing import List, Tuple, Dict
 from doctest import testmod
 from functools import reduce
 from pathlib import Path
 from rpy2.robjects import r, pandas2ri
 from statsmodels.stats.multitest import multipletests
-import numpy as np
-from .community_finder import get_projects_name
+from .community_finder import get_projects
 from ..logging_conf import logging_def
 from itertools import product
 import multiprocessing as mp
-from .sf_and_communities import get_key
+from .community_figures.fig_functions import create_community_fig
+from ..nt_composition.config import get_features
 
 
-def get_nt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
-                     feature: str) -> pd.DataFrame:
+def get_cpnt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
+                       feature: str, region: str = "",
+                       component_type: str = "nt") -> pd.DataFrame:
     """
     Get the frequency of every nucleotides for features in list_ft.
 
     :param cnx: Connection to chia-pet database
     :param list_ft: The list of exons for which we want to get
     :param feature: the kind of feature analysed
+    :param region: The region of gene analysed if feature is gene
+    :param component_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
     :return: the frequency of nucleotides for the list of exons.
 
-    >>> d = get_nt_frequency(sqlite3.connect(ConfigGraph.db_file),
+    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
     ... ["1_1", "1_2"], "exon")
     >>> d[["id_exon", 'A', 'C', 'G', 'T']]
     ft id_exon         A         C         G         T
     0      1_1  16.63480  34.60803  32.12237  16.63480
     1      1_2  16.06426  26.10442  39.75904  18.07229
-    >>> d = get_nt_frequency(sqlite3.connect(ConfigGraph.db_file),
+    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
     ... ['1', '2'], "gene")
     >>> d[["id_gene", 'A', 'C', 'G', 'T']]
     ft id_gene         A         C         G         T
     0        1  29.49376  18.34271  18.43874  33.72479
     1        2  31.90401  16.40251  18.79033  32.90315
+    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
+    ... ['1', '2'], "gene", 'exon', 'aa')
+    >>> d[["id_gene", "R", "K", "D", "Q", "E"]]
+        ft id_gene        R        K        D        Q        E
+    0        1  4.75247  5.19300  5.95391  4.07997  6.96189
+    1        2  4.34203  6.23736  6.77708  5.21984  7.01769
     """
+    query_region = ""
     if feature == "gene":
         list_ft = [int(ft) for ft in list_ft]
+        if region == "":
+            region = "gene"
+        query_region = f"AND region = '{region}'"
     query = f"""
              SELECT ft, id_{feature}, frequency
              FROM cin_{feature}_frequency
              WHERE id_{feature} IN {tuple(list_ft)}
-             AND ft_type="nt" 
+             AND ft_type = '{component_type}' 
+             {query_region}
              """
     df = pd.read_sql_query(query, cnx)
-    df = df.pivot_table(index=f"id_{feature}", columns="ft", values="frequency")\
-        .reset_index()
+    df = df.pivot_table(index=f"id_{feature}", columns="ft",
+                        values="frequency").reset_index()
     df[f"id_{feature}"] = df[f"id_{feature}"].astype(str)
     return df
 
@@ -78,20 +92,20 @@ def get_community_table(communities: List[List[str]],
     >>> c = [['1_1', '2_5'], ['7_9', '4_19', '3_3']]
     >>> get_community_table(c, 3, 'exon')
       community id_exon  community_size
-    0        C1     7_9               3
-    1        C1    4_19               3
-    2        C1     3_3               3
+    0        C2     7_9               3
+    1        C2    4_19               3
+    2        C2     3_3               3
     >>> c = [['1', '2'], ['7', '49', '3']]
     >>> get_community_table(c, 3, 'gene')
       community id_gene  community_size
-    0        C1       7               3
-    1        C1      49               3
-    2        C1       3               3
+    0        C2       7               3
+    1        C2      49               3
+    2        C2       3               3
     """
     dic = {"community": [], f"id_{feature}": [], "community_size": []}
     for k, comm in enumerate(communities):
         if len(comm) >= size_threshold:
-            name = f'C{k}'
+            name = f'C{k + 1}'
             clen = len(comm)
             for exon in comm:
                 dic["community"].append(name)
@@ -100,15 +114,15 @@ def get_community_table(communities: List[List[str]],
     return pd.DataFrame(dic)
 
 
-def lmm_maker(df: pd.DataFrame, outfile: Path, nt: str) -> float:
+def lm_maker(df: pd.DataFrame, outfile: Path, nt: str) -> float:
     """
-    Make the lmm analysis to see if the exon regulated by a splicing factor \
+    Make the lm analysis to see if the exon regulated by a splicing factor \
     are equally distributed among the communities.
 
     :param df: The dataframe
     :param outfile: A name of a file
     :param nt: the nucleotide of interest
-    :return: the pvalue of lmm
+    :return: the pvalue of lm
 
     """
     pandas2ri.activate()
@@ -126,7 +140,6 @@ def lmm_maker(df: pd.DataFrame, outfile: Path, nt: str) -> float:
             dev.off()
             return(anova(mod, null_mod, test="Chisq"))
         }
-
         """ % (nt, nt))
     folder = outfile.parent / "diagnostics"
     folder.mkdir(parents=True, exist_ok=True)
@@ -135,45 +148,6 @@ def lmm_maker(df: pd.DataFrame, outfile: Path, nt: str) -> float:
     return res["Pr(>Chisq)"][1]
 
 
-def lmm_maker_summary(df: pd.DataFrame, outfile: Path, nt: str
-                      ) -> pd.DataFrame:
-    """
-    Make the lmm analysis to see if the exon regulated by a splicing factor \
-    are equally distributed among the communities.
-
-    :param df: The dataframe
-    :param outfile: A name of a file
-    :param nt: the nucleotide of interest
-    :return: the pvalue of lmm
-
-    """
-    pandas2ri.activate()
-    lmm = r(
-        """
-        require("DHARMa")
-
-        function(data, folder, partial_name) {
-            mod <- lm(%s ~ log(community_size) +  community, data=data)
-            simulationOutput <- simulateResiduals(fittedModel = mod, n = 250)
-            png(paste0(folder, "/dignostics_summary", partial_name, ".png"))
-            plot(simulationOutput)
-            dev.off()
-            return(as.data.frame(summary(mod)$coefficients))
-        }
-
-        """ % nt)
-    folder = outfile.parent / "diagnostics"
-    folder.mkdir(parents=True, exist_ok=True)
-    partial_name = outfile.name.replace('.txt', '')
-    res_df = lmm(df, str(folder), partial_name).reset_index()
-    res_df.rename({'index': 'community'}, inplace=True, axis=1)
-    res_df['community'] = res_df['community'].str.replace('community', '')
-    res_df.loc[res_df['community'] == "(Intercept)", "community"] = "C-CTRL"
-    mean_df = df[[nt, "community", "community_size"]].\
-        groupby(["community", "community_size"]).mean().reset_index()
-    return res_df.merge(mean_df, how="left", on="community")
-
-
 def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]:
     """
     Return the id of every gene/exons in chia-pet database.
@@ -189,36 +163,136 @@ def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]:
     return [str(cid[0]) for cid in res]
 
 
-def create_ctrl_community(df: pd.DataFrame, outfile: Path,
-                          feature: str = 'exon') -> pd.DataFrame:
+def create_ctrl_community(df: pd.DataFrame,
+                          feature: str = 'exon', region: str = "",
+                          cpnt_type: str = 'nt') -> pd.DataFrame:
     """
     :param df: A dataframe containing the frequency of each feature in a \
     community.
-    :param outfile: The output table containing frequencies
     :param feature: The kind of feature to analyse
+    :param region: only use if feature is 'gene'. Used to focus on \
+    a given region in genes (can be gene, exon, intron).
+    :param cpnt_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
     :return: A dataframe containing the frequency of every nucleotides \
     of every exon in a large community
     """
-    if outfile.is_file():
-        return pd.read_csv(outfile, sep="\t")
     size_threshold = 10 if feature == "gene" else 50
     cnx = sqlite3.connect(ConfigGraph.db_file)
     ft_id = get_ft_id(cnx, feature)
     list_ft = [cid for cid in ft_id
                if cid not in df[f"id_{feature}"].to_list()]
-    df_nt = get_nt_frequency(cnx, list_ft, feature)
+    df_nt = get_cpnt_frequency(cnx, list_ft, feature, region, cpnt_type)
     df_com = get_community_table([list_ft], size_threshold, feature)
     df_nt = df_nt.merge(df_com, how="left", on=f"id_{feature}")
     df_nt['community'] = ['C-CTRL'] * df_nt.shape[0]
     df = pd.concat([df, df_nt], axis=0, ignore_index=True)
-    df.to_csv(outfile, sep="\t", index=False)
     return df
 
 
-def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
-                            global_weight: int, same_gene: bool,
-                            nt: str, feature: str = "exon",
-                            ) -> pd.Series:
+def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict:
+    """
+    Create a dictionary containing the exons contained in each community.
+
+    :param df: A dataframe containing the frequency of each nucleotide \
+    in each exons belonging to a community.
+    :param feature: the feature of interest (exon, gene)
+    :return: A dictionary linking each community to the exons it contains
+
+    >>> dataf = pd.DataFrame({"id_gene": ['1', '2', '3', '4'],
+    ... 'community': ['C1', 'C1', 'C2', 'C2']})
+    >>> get_feature_by_community(dataf, 'gene')
+    {'C1': ['1', '2'], 'C2': ['3', '4']}
+    >>> dataf.rename({"id_gene": "id_exon"}, axis=1, inplace=True)
+    >>> get_feature_by_community(dataf, 'exon')
+    {'C1': ['1', '2'], 'C2': ['3', '4']}
+    """
+    dic = {}
+    for i in range(df.shape[0]):
+        com, id_ft = df.iloc[i, :][['community', f'id_{feature}']]
+
+        if com in dic:
+            dic[com].append(id_ft)
+        else:
+            dic[com] = [id_ft]
+    return dic
+
+
+def prepare_dataframe(df: pd.DataFrame, test_type: str, nt: str,
+                      iteration: int) -> pd.DataFrame:
+    """
+
+    :param df: A dataframe with the enrichment of a \
+    nucleotide frequency for every community
+    :param test_type: The kind of test performed to
+    :param nt: The nucleotide of interest
+    :param iteration: The number of iteration permformed to \
+    produce the database
+    :return: The dataframe ready for barplot visualisation
+    """
+    if test_type == "lm":
+        # removing the size parameter
+        df = df[df["community"] != "log(_size)"].copy()
+        df.rename({"Pr(>|t|)": "p-adj"}, axis=1, inplace=True)
+        df_bar = df[['community', nt, 'p-adj']]
+        df_ctrl = df_bar[df_bar["community"] == "C-CTRL"]
+        df_bar = df_bar[df_bar["community"] != "C-CTRL"].copy()
+        df_bar.sort_values(nt, ascending=True, inplace=True)
+    else:
+        df_bar = df[["community", nt, f"{nt}_mean_{iteration}_ctrl",
+                     "p-adj"]].copy()
+        ctrl_val = df_bar[f"{nt}_mean_{iteration}_ctrl"]
+        df_bar.drop(f"{nt}_mean_{iteration}_ctrl", axis=1, inplace=True)
+        df_bar.sort_values(nt, ascending=True, inplace=True)
+        df_ctrl = pd.DataFrame({"community": ["C-CTRL"] * len(ctrl_val),
+                                nt: ctrl_val})
+    df_bar = pd.concat([df_ctrl, df_bar], axis=0, ignore_index=True)
+    return df_bar
+
+
+def create_outfiles(project: str, weight: int, global_weight: int,
+                    same_gene: bool, feature: str, cpnt_type: str,
+                    cpnt: str, test_type: str
+                    ) -> Tuple[Path, Path]:
+    """
+    Create a file used to store diagnostics and a file used to store the \
+    table containing the test communities and the control community
+
+    :param project: The name of the project of interest
+    :param weight: The minimum weight of interaction to consider
+    :param global_weight: The global weight to consider. if \
+    the global weight is equal to 0 then then density figure are calculated \
+    by project, else all projet are merge together and the interaction \
+    seen in `global_weight` project are taken into account
+    :param same_gene: Say if we consider as co-localised, exons within the \
+    same gene (True) or not (False) (default False)
+    :param cpnt_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param feature: The kind of feature analysed
+    :param test_type: The type of test to make (permutation or lm)
+    :return: file used to store diagnostics and a file used to store the \
+    table containing the test communities and the control community
+    """
+
+    outfolder = f"community_enrichment/{cpnt_type}_analysis"
+    outfile = ConfigGraph.\
+        get_community_file(project, weight, global_weight, same_gene, feature,
+                           f"{cpnt}-{cpnt_type}_stat_{test_type}.txt",
+                           outfolder)
+    outfile_ctrl = ConfigGraph.\
+        get_community_file(project, weight, global_weight, same_gene, feature,
+                           f"{cpnt}-{cpnt_type}_VS_CTRL_stat_{test_type}.pdf",
+                           outfolder)
+    return outfile, outfile_ctrl
+
+
+def get_stat_cpnt_communities(df: pd.DataFrame, project: str, weight: int,
+                              global_weight: int, same_gene: bool,
+                              cpnt_type: str, cpnt: str,
+                              dic_com: Dict, feature: str = "exon",
+                              test_type: str = "",
+                              iteration: int = 1000) -> pd.Series:
     """
     Get data (proportion of `reg` regulated exons by a splicing factor
     `sf_name` within a community) for every community.
@@ -233,41 +307,32 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
     seen in `global_weight` project are taken into account
     :param same_gene: Say if we consider as co-localised, exons within the \
     same gene (True) or not (False) (default False)
-    :param nt: The nucleotide of interest
-    :param feature: The king of feature analysed
+    :param cpnt_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param dic_com: A dictionary linking each community to the exons \
+    it contains.
+    :param feature: The kind of feature analysed
+    :param test_type: The type of test to make (permutation or lm)
+    :param iteration: The number of sub samples to create
     """
-    logging.debug(f"lmm for {project}, w:{weight}, "
-                  f"g:{global_weight} nt: {nt}")
-    res = {"project": [], "nt": [], 'pval': []}
-
-    outfile = ConfigGraph.get_community_file(project, weight,
-                                             global_weight,
-                                             same_gene, feature,
-                                             f"{nt}_stat.txt",
-                                             True)
-    nfolder = outfile.parent / "nt_analysis"
-    nfolder.mkdir(exist_ok=True, parents=True)
-    noutfile = nfolder / outfile.name
-    pval = lmm_maker(df, noutfile, nt)
-    res['project'] = project
-    res['nt'] = nt
-    res['pval'] = pval
-    nt_ctrl_table = noutfile.parent / noutfile.name.replace("_stat.txt",
-                                                            "_ctrl.txt")
-    ndf = create_ctrl_community(df, nt_ctrl_table, feature)
-    sum_df = lmm_maker_summary(ndf, outfile, nt)
-    outfile_ctrl = ConfigGraph.get_community_file(project, weight,
-                                                  global_weight,
-                                                  same_gene, feature,
-                                                  f"{nt}_VS_CTRL_stat.txt",
-                                                  True)
-    noutfile_ctrl = nfolder / outfile_ctrl.name
-    sum_df.to_csv(noutfile_ctrl, sep="\t", index=False)
+    logging.debug(f"{test_type} for {project}, w:{weight}, "
+                  f"g:{global_weight} cpnt: {cpnt}({cpnt_type})")
+    outfile, outfile_ctrl = create_outfiles(project, weight, global_weight,
+                                            same_gene, feature, cpnt_type,
+                                            cpnt, test_type)
+    res = {"project": project, "cpnt": cpnt,
+           'pval': lm_maker(df, outfile, cpnt)}
+    create_community_fig(df, feature, cpnt, outfile_ctrl, test_type,
+                         dic_com, cpnt_type, iteration)
     return pd.Series(res)
 
 
 def create_dataframe(project: str, weight: int, global_weight: int,
-                     same_gene: bool, feature: str = 'exon') -> pd.DataFrame:
+                     same_gene: bool, feature: str = 'exon',
+                     region: str = "", component_type: str = 'nt',
+                     from_communities: bool = True
+                     ) -> pd.DataFrame:
     """
     :param project: The name of the project of interest
     :param weight: The minimum weight of interaction to consider
@@ -278,91 +343,134 @@ def create_dataframe(project: str, weight: int, global_weight: int,
     :param same_gene: Say if we consider as co-localised, exons within the \
     same gene (True) or not (False)
     :param feature: The kind of feature to analyse
+    :param from_communities: True if we only select gene/exons
+    :param region: the region of interest to extract from gene
+    :param component_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
     :return: A dataframe containing the frequency of every nucleotides \
     of every exon in a large community
     """
     size_threshold = 10 if feature == "gene" else 50
-    result = ConfigGraph.get_community_file(project, weight, global_weight,
-                                            same_gene, feature,
-                                            "_communities.txt")
-    communities = get_communities(result, 0)
-    ncommunities = [c for c in communities if len(c) >= size_threshold]
     cnx = sqlite3.connect(ConfigGraph.db_file)
-    list_exon = reduce(lambda x, y: x + y, ncommunities)
-    df = get_nt_frequency(cnx, list_exon, feature)
-    df_com = get_community_table(communities, size_threshold, feature)
-    df = df.merge(df_com, how="left", on=f"id_{feature}")
+    if from_communities:
+        result = ConfigGraph.get_community_file(project, weight, global_weight,
+                                                same_gene, feature,
+                                                "_communities.txt")
+        communities = get_communities(result, 0)
+        ncommunities = [c for c in communities if len(c) >= size_threshold]
+        list_exon = reduce(lambda x, y: x + y, ncommunities)
+
+        df_com = get_community_table(communities, size_threshold, feature)
+        df = get_cpnt_frequency(cnx, list_exon, feature, region,
+                                component_type)
+        df = df.merge(df_com, how="left", on=f"id_{feature}")
+    else:
+        list_exon = get_ft_id(cnx, feature)
+        df = get_cpnt_frequency(cnx, list_exon, feature, region,
+                                component_type)
     return df
 
 
-def multiple_nt_lmm_launcher(ps: int, weights: List[int],
-                             global_weights: List[int],
-                             same_gene: bool,
-                             feature: str = 'exon',
-                             logging_level: str = "DISABLE"):
+def create_dataframes(project, weight, global_weight, same_gene, feature,
+                      region, test_type, component_type: str
+                      ) -> Tuple[pd.DataFrame, Dict]:
+    """
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weighs to consider. if \
+    the global weight is equal to 0  then the project `project` is \
+    used.
+    :param project: The project name, used only if global_weight = 0
+    :param same_gene: Say if we consider as co-localised exon within the \
+    same gene
+    :param feature: The kind of analysed feature
+    :param region: the region of interest to extract from gene
+    :param test_type: The type of test to make (permutation or lm)
+    :param component_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
+    """
+    df = create_dataframe(project, weight, global_weight, same_gene,
+                          feature, region, component_type)
+    df_ctrl = create_dataframe(project, weight, global_weight, same_gene,
+                               feature, region, component_type,
+                               from_communities=False)
+    df_ctrl = df_ctrl.loc[-df_ctrl[f"id_{feature}"].isin(df[f"id_{feature}"]),
+                          :].copy()
+    dic_com = {} if test_type == 'lm' \
+        else get_feature_by_community(df, feature)
+    df = pd.concat([df, df_ctrl], axis=0, ignore_index=True)
+    return df, dic_com
+
+
+def multiple_nt_lm_launcher(ps: int,
+                            weight: int,
+                            global_weight: int,
+                            project: str,
+                            same_gene: bool,
+                            feature: str = 'exon',
+                            region: str = '',
+                            component_type: str = "nt",
+                            test_type: str = "lm",
+                            iteration: int = 1000,
+                            logging_level: str = "DISABLE"):
     """
     Launch the statistical analysis for every
 
-    :param ps: The number of processes we want to use.
-    :param weights: The list of weights of interaction to consider
-    :param global_weights: The list global weights to consider. if \
-    the global weight is equal to 0 then then density figure are calculated \
-    by project, else all projcet are merge together and the interaction \
-    seen in `global_weight` project are taken into account
+    :param ps: The number of processes to use
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weighs to consider. if \
+    the global weight is equal to 0  then the project `project` is \
+    used.
+    :param project: The project name, used only if global_weight = 0
     :param same_gene: Say if we consider as co-localised exon within the \
     same gene
     :param feature: The kind of analysed feature
+    :param component_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
+    :param region: the region of interest to extract from gene
+    :param test_type: The type of test to make (permutation or lmm)
+    :param iteration: The number of iteration to perform
     :param logging_level: Level of information to display
     """
     ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True)
     logging_def(ConfigGraph.community_folder, __file__, logging_level)
-    logging.info("Checking if communities as an effect on nucleotide "
+    if feature == "gene" and region != "exon" and component_type == "aa":
+        msg = f"If you looking at amino acid at gene level, then the region " \
+              f"must be 'exon' not '{region}'. Setting it to exon. " \
+              f"(corresponds to the concatenation of exons for all genes)"
+        logging.warning(msg)
+        region = 'exon'
+    logging.info(f"Checking if communities as an effect on {component_type} "
                  "frequency")
-    global_weights = list(np.unique(global_weights))
-    weights = list(np.unique(weights))
-    projects, dic_project = get_projects_name(global_weights)
-    nt_list = ["A", "C", "G", "T", "S", "W"]
-    condition = list(product(projects, weights, nt_list))
-    processes = {}
+    project = get_projects(global_weight, project)
+    cpnt_list = get_features(component_type)
+    condition = list(product([project], [weight], cpnt_list))
+    processes = []
     pool = mp.Pool(processes=min(ps, len(condition)))
-    logging.debug("Calculating stats...")
-    dic_df = {}
-    for project, weight, nt in condition:
-        global_weight = dic_project[project]
-        ckey = get_key(project, weight)
-        if ckey in dic_df:
-            df = dic_df[ckey]
-        else:
-            df = create_dataframe(project, weight, global_weight, same_gene,
-                                  feature)
-            nfile_table = ConfigGraph.get_community_file(project, weight,
-                                                         global_weight,
-                                                         same_gene, feature,
-                                                         f"_nt_table.txt",
-                                                         True)
-            df.to_csv(nfile_table, sep="\t", index=False)
-            dic_df[ckey] = df
-        args = [df, project, weight, global_weight, same_gene, nt, feature]
-        if ckey not in processes.keys():
-            processes[ckey] = [pool.apply_async(get_stat_nt_communities, args)]
-        else:
-            processes[ckey].append(
-                pool.apply_async(get_stat_nt_communities, args))
-    for p, value in processes.items():
-        project, weight = p.split("_")
-        results = [p.get(timeout=None) for p in value]
-        pool.close()
-        pool.join()
-        fdf = pd.DataFrame(results)
-        fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1]
-        outfile = ConfigGraph.get_community_file(project, weight,
-                                                 dic_project[project],
-                                                 same_gene, feature,
-                                                 f"lmm-nt_stat.txt",
-                                                 True)
-        nfolder = outfile.parent / "nt_analysis"
-        noutfile = nfolder / outfile.name
-        fdf.to_csv(noutfile, sep="\t", index=False)
+    logging.debug("Creating tables")
+    df, dic_com = create_dataframes(project, weight, global_weight,
+                                    same_gene, feature, region,
+                                    test_type, component_type)
+    for project, weight, cpnt in condition:
+        nfile_table = ConfigGraph.get_community_file(
+            project, weight, global_weight, same_gene, feature,
+            f"_{component_type}_table.txt", "community_enrichment")
+        df.to_csv(nfile_table, sep="\t", index=False)
+        args = [df, project, weight, global_weight, same_gene, component_type,
+                cpnt, dic_com, feature, test_type, iteration]
+        processes.append(pool.apply_async(get_stat_cpnt_communities, args))
+    results = [p.get(timeout=None) for p in processes]
+    pool.close()
+    pool.join()
+    fdf = pd.DataFrame(results)
+    fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1]
+    outfile = ConfigGraph.get_community_file(project, weight,
+                                             global_weight,
+                                             same_gene, feature,
+                                             f"lmm-{component_type}_stat.txt",
+                                             "community_enrichment")
+    nfolder = outfile.parent / f"{component_type}_analysis"
+    noutfile = nfolder / outfile.name
+    fdf.to_csv(noutfile, sep="\t", index=False)
 
 
 if __name__ == "__main__":
diff --git a/src/find_interaction_cluster/ppi_scripts/__init__.py b/src/find_interaction_cluster/ppi_scripts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..60f3a118d41cf7494b1cd067bf5d649b42ba1e05
--- /dev/null
+++ b/src/find_interaction_cluster/ppi_scripts/__init__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description:
+"""
diff --git a/src/find_interaction_cluster/ppi_scripts/__main__.py b/src/find_interaction_cluster/ppi_scripts/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2f10e03ded5cbaf1644ba3a5f4883f1c0189483
--- /dev/null
+++ b/src/find_interaction_cluster/ppi_scripts/__main__.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description:
+"""
+
+from .get_ensembl_hg19_gene_ccoordinates import create_protein_gene_file
+from .create_gene_interaction_file import get_protein_interaction_table
+
+def launcher_ppi():
+    """
+    launch every script in this submodules.
+    """
+    create_protein_gene_file()
+    get_protein_interaction_table()
+
+
+launcher_ppi()
\ No newline at end of file
diff --git a/src/find_interaction_cluster/ppi_scripts/config_ppi.py b/src/find_interaction_cluster/ppi_scripts/config_ppi.py
new file mode 100644
index 0000000000000000000000000000000000000000..d61820b4b05874c7d86013efed9c5018f6407623
--- /dev/null
+++ b/src/find_interaction_cluster/ppi_scripts/config_ppi.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: This script contains all the variables needed in this \
+submodules
+"""
+
+from ..config import ConfigGraph
+
+
+class ConfigPPI:
+    """
+    contains all the variables used in this submodules
+    """
+    output_folder = ConfigGraph.output_folder / "ppi_analysis"
+    data = ConfigGraph.data
+    bed_ensembl = data / "ppi_data" / "liftover_hg38gene2hg19.bed"
+    ensembl_gene = data / "ppi_data" / "mart_export_hg37_proteins_genes.txt"
+    ppi_string = data / "ppi_data" / "9606.protein.links.v11.0.txt.gz"
+    protein_gene = output_folder / "protein_gene.txt"
+    gene_ppi_file = output_folder / "ppi_gene_file.txt"
+    protein_gene_fasterdb = output_folder / "protein_gene_fasterdb.txt"
+    fasterdb_ppi = output_folder / "fasterdb_ppi.txt"
\ No newline at end of file
diff --git a/src/find_interaction_cluster/ppi_scripts/create_gene_interaction_file.py b/src/find_interaction_cluster/ppi_scripts/create_gene_interaction_file.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c3bebabfea017d47bcfa3ae0cdef0dc53949518
--- /dev/null
+++ b/src/find_interaction_cluster/ppi_scripts/create_gene_interaction_file.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: Create a file of genes having their proteins interacting with \
+each other
+"""
+
+from pathlib import Path
+from ..config import Config
+from .config_ppi import ConfigPPI
+import pandas as pd
+from typing import Dict, Optional, Tuple
+
+
+def load_string_ppi_file(ppi_file: Path) -> pd.DataFrame:
+    """
+    Load the string protein protein interaction file.
+
+    :param ppi_file: A file containing the protein protein interaction.
+    :return: The loaded string ppi interaction file
+    """
+    df = pd.read_csv(ppi_file, sep=" ")
+    df['protein1'] = df['protein1'].str.replace("9606.", "",  regex=False)
+    df['protein2'] = df['protein2'].str.replace("9606.", "", regex=False)
+    return df
+
+
+def load_protein_gene_links(prot_gene_file: Path) -> pd.DataFrame:
+    """
+    Load the file linking each gene to it's hg19 coordinates and encoded \
+    proteins.
+
+    :param prot_gene_file: A file linking each gene to it's hg19 \
+    coordinates and encoded proteins.
+    :return: The loaded file
+    """
+    df = pd.read_csv(prot_gene_file, sep="\t")
+    df.rename({"Chromosome-scaffold_name": "chr", "Gene_start": "start",
+               "Gene_end": "stop", "Strand": "strand"}, axis=1, inplace=True)
+    df['Gene_name'] = df['Gene_name'].str.upper()
+    return df
+
+
+def load_fasterdb_gene(bed_gene: Path) -> Dict:
+    """
+    Load a bed file containing all fasterDB gene.
+
+    :param bed_gene: A bed file containing all fasterdb gene
+    :return: A dictionary linking each fasterDB gene name to its coordinates
+    """
+    dic = {}
+    with bed_gene.open("r") as inbed:
+        for line in inbed:
+            line = line.replace("\n", "").split("\t")
+            gene_name = line[4].upper()
+            if gene_name not in dic.keys():
+                dic[gene_name] = [[line[0], int(line[1]), int(line[2]),
+                                   int(line[3]), gene_name, line[5]]]
+            else:
+                dic[gene_name].append(
+                    [line[0], int(line[1]), int(line[2]),
+                     int(line[3]), gene_name, line[5]])
+    return dic
+
+
+def find_fasterdb_id(mseries: pd.Series, dic: Dict) -> Optional[int]:
+    """
+    Find the fasterDB gene id from a line corresponding to an ensembl gene.
+
+    :param mseries: A line containing the ensembl gene id and gene name of \
+    a gene
+    :param dic: A dictionary linking each gene name to it's coordinate
+    :return: The fasterDB id of the ensembl gene.
+    """
+    if mseries['Gene_name'].upper() not in dic.keys():
+            return None
+    genes = dic[mseries['Gene_name'].upper()]
+    if len(genes) == 1:
+        return genes[0][3]
+    elif len(genes) >= 1:
+        for gene in genes:
+            if (
+                mseries['chr'] == gene[0]
+                and mseries['start'] == gene[1]
+                and mseries['stop'] == gene[2]
+                and mseries['strand'] == gene[5]
+            ):
+                return gene[3]
+        return None
+    return None
+
+
+def add_gene_id_column(prot_gene_df: pd.DataFrame, dic: Dict) -> pd.DataFrame:
+    """
+    Add the fasterDB gene id to ensembl gene then only return lines in \
+    the dataframe with a fasterDB gene id.
+
+    :param prot_gene_df: A table containing ensembl gene id.
+    :param dic: A dictionary linking gene name to is fasterdb id.
+    :return: The table prot_gene_df with a column FasterDB_id
+    """
+    prot_gene_df['FasterDB_id'] = prot_gene_df.apply(find_fasterdb_id,
+                                                     axis=1, dic=dic)
+    df = prot_gene_df.loc[-prot_gene_df['FasterDB_id'].isna(), :].copy()
+    df['FasterDB_id'] = df['FasterDB_id'].astype(int)
+    return df
+
+
+def add_fasterdb_id_to_ppi_file(ppi_df: pd.DataFrame,
+                                df_fasterdb: pd.DataFrame) -> pd.DataFrame:
+    """
+    Add two columns to the ppi_df: one containing the fasterdb gene_id \
+    of the gene that encode the first interacting protein and the second \
+    containing the fasterdb gene id of the gene encoding the second \
+    interacting protein.
+
+    :param ppi_df: Dataframe of interaction
+    :param df_fasterdb: df with the fasterDB gene id of the gene en their \
+    encoding protein identified by ensembl protein id.
+    :return: The dataframe with the gene id of genes encoding the interacting \
+    protein.
+    """
+    df_fasterdb = df_fasterdb[["Protein_stable_ID", "FasterDB_id"]].copy()
+    df_fasterdb.rename({"FasterDB_id": "gene"}, axis=1, inplace=True)
+    ppi_df = ppi_df.merge(df_fasterdb, how="left", left_on="protein1",
+                          right_on="Protein_stable_ID")
+    ppi_df = ppi_df.merge(df_fasterdb, how="left", left_on="protein2",
+                          right_on="Protein_stable_ID", suffixes=['1', '2'])
+    ppi_df = ppi_df.loc[(-ppi_df['gene1'].isna()) &
+                        (-ppi_df['gene2'].isna()), :]
+    ppi_df.drop(["Protein_stable_ID1", "Protein_stable_ID2"], axis=1,
+                inplace=True)
+    ppi_df['gene1'] = ppi_df['gene1'].astype(int)
+    ppi_df['gene2'] = ppi_df['gene2'].astype(int)
+    return ppi_df
+
+
+def get_protein_interaction_table():
+    """
+    link ensemble gene and their encoding protein to a fasterDB gene id.
+    """
+    df_prot_gene = load_protein_gene_links(ConfigPPI.protein_gene)
+    dic_gene = load_fasterdb_gene(Config.bed_gene)
+    df_fasterdb_id = add_gene_id_column(df_prot_gene, dic_gene)
+    df_fasterdb_id.to_csv(ConfigPPI.protein_gene_fasterdb, sep="\t",
+                          index=False)
+    ppi_df = load_string_ppi_file(ConfigPPI.ppi_string)
+    ppi_fasterdb = add_fasterdb_id_to_ppi_file(ppi_df, df_fasterdb_id)
+    ppi_fasterdb.to_csv(ConfigPPI.fasterdb_ppi, sep="\t", index=False)
diff --git a/src/find_interaction_cluster/ppi_scripts/get_ensembl_hg19_gene_ccoordinates.py b/src/find_interaction_cluster/ppi_scripts/get_ensembl_hg19_gene_ccoordinates.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f2c9fa94c751d03bde3351dff9a3f5367ab6a3d
--- /dev/null
+++ b/src/find_interaction_cluster/ppi_scripts/get_ensembl_hg19_gene_ccoordinates.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: The goal of this script is to get only coding gene from \
+ensembl..
+"""
+
+from .config_ppi import ConfigPPI
+import pandas as pd
+from pathlib import Path
+import numpy as np
+
+
+def load_genes(ensembl_file: Path) -> pd.DataFrame:
+    """
+    Load a table containing all the human protein of ensembl database \
+    and the name and location of their genes.
+
+    :param ensembl_file: A file containing all the human protein of \
+    ensembl database and the name and location of their genes.
+    :return: table containing all the human protein of ensembl database \
+    and the name and location of their genes.
+    """
+    df = pd.read_csv(ensembl_file, sep="\t",
+                     dtype={"Gene_stable_ID": str,
+                            "Protein_stable_ID": str,
+                            "Chromosome-scaffold_name": str,
+                            "Gene_start": int,
+                            "Gene_end": int,
+                            "Strand": str,
+                            "Gene_name": str})
+    df = df.loc[-df['Protein_stable_ID'].isna(), :]
+    df = df.loc[df["Chromosome-scaffold_name"].isin(
+        list(map(str, range(1, 23))) + ["X", "Y"]), :]
+    return df.drop_duplicates()
+
+
+def create_protein_gene_file():
+    """
+    Create a file linking each ensembl protein to it's gene and the hg19 \
+    coordinate of this gene.
+    """
+
+    df_gene = load_genes(ConfigPPI.ensembl_gene)
+    ConfigPPI.protein_gene.parent.mkdir(parents=True, exist_ok=True)
+    df_gene.to_csv(ConfigPPI.protein_gene, sep="\t", index=False)
diff --git a/src/find_interaction_cluster/radomization_test_ppi.py b/src/find_interaction_cluster/radomization_test_ppi.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a7e2e95c6a94ecae97d0193928a5da6f9f34c3c
--- /dev/null
+++ b/src/find_interaction_cluster/radomization_test_ppi.py
@@ -0,0 +1,447 @@
+#!/usr/bin/env python3
+
+# -*- coding: UTF-8 -*-
+
+"""
+Description: Contains all the functions needed to make randomisation tests.
+"""
+
+
+import pandas as pd
+import numpy as np
+from doctest import testmod
+from typing import Dict, Tuple
+import logging
+from tqdm import tqdm
+from statsmodels.stats.multitest import multipletests
+
+
+class IterationNumberError(Exception):
+    pass
+
+
+def get_ppi_community_gene(df: pd.DataFrame) -> np.array:
+    """
+    From a dataframe containing ppi communities return an array of \
+    the gene id found in those community
+
+    :param df: dataframe containing ppi communities
+    :return: An array containing genes belonging to PPI community
+
+    >>> df_test = pd.DataFrame({"community": ["C1", "C2"], "node": [3, 2],
+    ... "edges": [3, 1], "cov": [0.77, 0.77], "modularity": [0.5, 0.5],
+    ... "genes": ["1, 2, 3", "5, 7"], "project": ["Global-weight-3"] * 2})
+    >>> get_ppi_community_gene(df_test)
+    array([1, 2, 3, 5, 7])
+    """
+    gene_list = []
+    for i in range(df.shape[0]):
+        gene_list += df.iloc[i, :]['genes'].split(", ")
+    return np.unique(np.array(gene_list, dtype=int))
+
+
+def get_dna_community_gene(df: pd.DataFrame) -> Dict[str, np.array]:
+    """
+    Create a dictionary containing gene communities at DNA level.
+
+    :param df: A dataframe containing the community at dna level
+    :return: A dictionary linking each community to the genes it contains
+
+    >>> df_test = pd.DataFrame({"community": ["C1", "C2"], "node": [3, 2],
+    ... "edges": [3, 1], "cov": [0.77, 0.77], "modularity": [0.5, 0.5],
+    ... "genes": ["1, 2, 3", "5, 7"], "project": ["Global-weight-3"] * 2})
+    >>> get_dna_community_gene(df_test)
+    {'C1': array([1, 2, 3]), 'C2': array([5, 7])}
+    >>> df_test = pd.DataFrame({"community": ["C1", "C1"], "node": [3, 2],
+    ... "edges": [3, 1], "cov": [0.77, 0.77], "modularity": [0.5, 0.5],
+    ... "genes": ["1, 2, 3", "5, 7"], "project": ["Global-weight-3"] * 2})
+    >>> get_dna_community_gene(df_test)
+    Traceback (most recent call last):
+    ...
+    ValueError: A community name is duplicated
+    """
+    dic_dna_gene = {}
+    if df.shape[0] != len(df['community'].unique()):
+        raise ValueError("A community name is duplicated")
+    for i in range(df.shape[0]):
+        ms = df.iloc[i, :]
+        dic_dna_gene[ms["community"]] = np.array(ms["genes"].split(', '),
+                                                 dtype=int)
+    return dic_dna_gene
+
+
+def get_common_genes(dna_gene: np.array, ppi_gene: np.array) -> int:
+    """
+    Return the number of gene common between ppi_gene and size_ppi.
+
+    :param ppi_gene: A community of gene whose protein product interact \
+    between each other.
+    :param dna_gene: A community of gene at dna level
+    :return: The number of common exons between the two lists
+
+    >>> get_common_genes(np.array([1, 5, 9, 15]), np.array([5, 9, 15, 17]))
+    3
+    >>> get_common_genes(np.array([1, 5, 9, 15]), np.array([5, 9, 15, 1]))
+    4
+    >>> get_common_genes(np.array([1, 5, 9, 15]), np.array([18, 19, 115, 0]))
+    0
+    """
+    return len(np.intersect1d(dna_gene, ppi_gene))
+
+
+def get_list_common_gene(community: str, dic_dna_gene: Dict[str, np.array],
+                         ppi_size: int, ppi_gene: np.array, iteration: int,
+                         use_seed: bool = True) -> np.array:
+    """
+    Return the number of common gene between genes in the community \
+    `community`and a sublist of randomly chosen genes in `ppi_gene` of \
+    size `ppi_size`.
+
+    :param community: A name of a gene community at dna level
+    :param dic_dna_gene: The dictionary containing gene interacting at dna \
+    level
+    :param ppi_size: The size of the ppi_gene subsample
+    :param ppi_gene: All the gene interacting at protein level
+    :param iteration: The number of time we want to repeat the process
+    :param use_seed: True to use a fixed seed, false else.
+    :return: The number of gene in common between the genes in community \
+    `community` and the subsample of ppi_size
+
+    >>> dna_gene = {"C1": [1, 2, 3, 4, 5]}
+    >>> ppi_g = list(range(1, 51))
+    >>> get_list_common_gene("C1", dna_gene, 10, ppi_g, 5)
+    array([2, 1, 0, 1, 1])
+    >>> ppi_g = list(range(1, 1001))
+    >>> get_list_common_gene("C1", dna_gene, 100, ppi_g, 5)
+    array([0, 0, 1, 0, 0])
+    >>> ppi_g = list(range(1, 11))
+    >>> get_list_common_gene("C1", dna_gene, 5, ppi_g, 5)
+    array([3, 2, 2, 4, 3])
+    >>> get_list_common_gene("C1", dna_gene, 10, ppi_g, 5)
+    array([5, 5, 5, 5, 5])
+    """
+    if use_seed:
+        np.random.seed(1)
+    common = [
+        get_common_genes(
+            dic_dna_gene[community],
+            np.random.choice(ppi_gene, ppi_size, replace=False),
+        )
+        for _ in range(iteration)
+    ]
+    return np.array(common)
+
+
+def get_pvalue(values: np.array, target: float, iteration: int
+               ) -> Tuple[float, str]:
+    """
+    Return The p-value and the regulation
+    :param values:  The list of control values
+    :param target: The test value
+    :param iteration: The iteration of interest
+    :return: The p-value and the regulation
+    >>> get_pvalue(np.arange(10), 1.75, 10)
+    (0.2, ' . ')
+    >>> get_pvalue(np.arange(10), -1, 10)
+    (0.0, ' . ')
+    >>> get_pvalue(np.arange(10), 9.75, 10)
+    (0.0, ' . ')
+    >>> get_pvalue(np.arange(10), 8.8, 10)
+    (0.1, ' . ')
+    >>> get_pvalue(np.arange(100), 98.8, 100)
+    (0.01, ' + ')
+    >>> get_pvalue(np.arange(100), 100, 100)
+    (0.0, ' + ')
+    >>> get_pvalue(np.arange(100), -1, 100)
+    (0.0, ' - ')
+    >>> get_pvalue(np.arange(0, 0.10, 0.01), 0.05, 10)
+    (0.5, ' . ')
+    """
+    values = np.sort(values)
+    val_len = len(values)
+    if val_len != iteration:
+        msg = f"Error the length of values vector {len(values)} " \
+              f"is different than iteration {iteration}"
+        logging.exception(msg)
+        raise IterationNumberError(msg)
+    idx = values[values <= target].size
+    impov = idx / val_len
+    idx = values[values >= target].size
+    enrich = idx / val_len
+    regulation = " . "
+    if impov < enrich:
+        pval = impov
+        if pval <= 0.05:
+            regulation = " - "
+    else:
+        pval = enrich
+        if pval <= 0.05:
+            regulation = " + "
+    if iteration < 20:
+        regulation = " . "
+    return pval, regulation
+
+
+def adjust_regulation(x: pd.Series) -> str:
+    """
+    Return the adjusted regulation.
+
+    :param x: A row a the dataframe
+    :return: The adjusted row
+    """
+    return x["regulation"] if x["p-adjust"] <= 0.05 else " . "
+
+
+def update_overlap_df(df_overlap: pd.DataFrame,
+                      dic_dna_gene: Dict[str, np.array],
+                      ppi_gene: np.array, iteration: int,
+                      use_seed: bool = True,
+                      ) -> Tuple[pd.DataFrame, np.array]:
+    """
+    Perform a randomization test to check if communities at \
+    DNA level are more similar at protein level than randomly expected.
+
+    :param df_overlap: A dataframe containing every gene community at dna \
+    level and the number of common gene with a gne community at protein \
+    level
+    :param dic_dna_gene: The dictionary containing gene interacting at dna \
+    level
+    :param ppi_gene: All the gene interacting at protein level
+    :param iteration: The number of time we want to repeat the process
+    :param use_seed: True to use a fixed seed, false else.
+    :return: The updated Df (column p-value, p-adjust and regulation) + \
+    the list of common gene shared between gene community at DNA and \
+    protein level obtained by randmization analysis
+
+    >>> do = pd.DataFrame({"DNA_community": ["C1", "C2", "C3"],
+    ... "community_size": [2, 3, 2], "nb_com-ppi": [2, 3, 1],
+    ... "size_com-ppi": [10, 10, 10]})
+    >>> dna_gene = {"C1": [1, 2], "C2": [3, 4, 5], "C3": [6, 7]}
+    >>> ppi_g = list(range(11))
+    >>> d, di = update_overlap_df(do, dna_gene, ppi_g, 100)
+    >>> d.iloc[:, [0, 1, 2, 4, 5]]
+      DNA_community  community_size  nb_com-ppi  p-value  p-adjust
+    0            C1               2           2     0.79      0.79
+    1            C2               3           3     0.72      0.79
+    2            C3               2           1     0.18      0.54
+    >>> d.iloc[:, 6].to_list() == d.iloc[:, 7].to_list() == [" . "] * 3
+    True
+    >>> d, di = update_overlap_df(do, dna_gene, list(range(1000)), 100)
+    >>> d.iloc[:, [0, 1, 2, 4, 5]]
+      DNA_community  community_size  nb_com-ppi  p-value  p-adjust
+    0            C1               2           2     0.01      0.01
+    1            C2               3           3     0.00      0.00
+    2            C3               2           1     0.00      0.00
+    >>> d.iloc[:, 6].to_list() == d.iloc[:, 7].to_list() == [" + "] * 3
+    True
+    >>> len(di.keys()) == 3
+    True
+    >>> len(di[list(di.keys())[0]]) == 100
+    True
+    """
+    pval_list = []
+    reg_list = []
+    dic = {}
+    for i in tqdm(range(df_overlap.shape[0])):
+        community = df_overlap.iloc[i, :]["DNA_community"]
+        values = get_list_common_gene(community,
+                                      dic_dna_gene,
+                                      df_overlap.iloc[i, :]["size_com-ppi"],
+                                      ppi_gene,
+                                      iteration, use_seed)
+        dic[community] = values
+        pval, reg = get_pvalue(values, df_overlap.iloc[i, :]["nb_com-ppi"],
+                               iteration)
+        pval_list.append(pval)
+        reg_list.append(reg)
+    df_overlap["p-value"] = pval_list
+    df_overlap["p-adjust"] = multipletests(pval_list, alpha=0.05,
+                                           method='fdr_bh',
+                                           is_sorted=False,
+                                           returnsorted=False)[1]
+    df_overlap["regulation"] = reg_list
+    df_overlap["regulation-adjusted"] = \
+        df_overlap.apply(adjust_regulation, axis=1)
+    return df_overlap, dic
+
+
+def summarize_df_overlap(df_overlap: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a summary of the dataframe `df_overlap` that indicates the number \
+    of common gene shared between communities at protein and DNA level.
+
+    :param df_overlap: The dataframe containing community at DNA and \
+    protein level and their enrichment
+    :return: The dataframe showing the number of enrichment, impoverishment \
+    or no regulation
+
+    >>> do = pd.DataFrame({"DNA_community": ["C1", "C2", "C3"],
+    ... "community_size": [2, 3, 2], "nb_com-ppi": [2, 3, 1],
+    ... "size_com-ppi": [10, 10, 10], "p-value": [0.01, 0, 0],
+    ... "p-adjust": [0.01, 0.01, 0.01], "regulation": [" + "] * 3,
+    ... "regulation-adjusted": [" + "] * 3})
+    >>> summarize_df_overlap(do)
+      regulation-adjusted  community_size  nb_com-ppi  size_com-ppi  number
+    0                  +         2.333333         2.0          10.0       3
+    1                  -              NaN         NaN           NaN       0
+    2                  .              NaN         NaN           NaN       0
+    """
+    df_overlap = df_overlap[["regulation-adjusted", "community_size",
+                            "nb_com-ppi", "size_com-ppi", "p-adjust"]]
+    df_tmp = pd.DataFrame({"regulation-adjusted": [" + ", " - ", " . "],
+                           "community_size": [np.nan] * 3,
+                           "nb_com-ppi": [np.nan] * 3,
+                           "size_com-ppi": [np.nan] * 3,
+                           "p-adjust": [np.nan] * 3})
+    df = pd.concat([df_overlap, df_tmp], axis=0, ignore_index=True)
+    df = df.groupby("regulation-adjusted").agg({"community_size": "mean",
+                                                "nb_com-ppi": "mean",
+                                                "size_com-ppi": "mean",
+                                                "p-adjust": "count"})
+    return df.rename({"p-adjust": "number"}, axis=1).reset_index()
+
+
+def get_enriched_impoverished_communities(df_overlap: pd.DataFrame,
+                                          dic_dna_gene: Dict[str, np.array],
+                                          ppi_gene: np.array, iteration: int,
+                                          dic_values: Dict[str, np.array],
+                                          use_seed: bool = True
+                                         ) -> Dict[str, int]:
+    """
+    For each communities, randomly samples only once in the list \
+    of `ppi_gene` to get the number of common gene betwwen this sample \
+    and the community of gene at DNA level. Then this function determines \
+    how many communites are enriched, impovershied thanks to `di_values`.
+
+    :param df_overlap: A dataframe containing every gene community at dna \
+    level and the number of common gene with a gne community at protein \
+    level
+    :param dic_dna_gene: The dictionary containing gene interacting at dna \
+    level
+    :param ppi_gene: All the gene interacting at protein level
+    :param iteration: The number of time we want to repeat the process
+    :param dic_values: A dictionary containing the number of common \
+    gene between communities at DNA and protein level for each \
+    subsampling made from the ppi_gene list.
+    :param use_seed: True to use a fixed seed, false else.
+    :return: The number of enriched, impoverished number of \
+    genes shared betwwen gene at DNA and protein level.
+
+    >>> do = pd.DataFrame({"DNA_community": ["C1", "C2", "C3"],
+    ... "community_size": [2, 3, 2], "nb_com-ppi": [2, 3, 1],
+    ... "size_com-ppi": [10, 10, 10], "p-value": [0.01, 0, 0],
+    ... "p-adjust": [0.01, 0.01, 0.01], "regulation": [" + "] * 3,
+    ... "regulation-adjusted": [" + "] * 3})
+    >>> dna_gene = {"C1": [1, 2], "C2": [3, 4, 5], "C3": [6, 7]}
+    >>> ppi_g = list(range(11))
+    >>> dv = {"C1": [0] * 95 + [1, 1, 1, 2, 2],
+    ... "C2": [0] * 95 + [1, 1, 1, 2, 2],
+    ... "C3": [0] * 75 + [1] * 10 + [2] * 10 + [3] * 5}
+    >>> get_enriched_impoverished_communities(do, dna_gene, ppi_g, 100, dv)
+    {' + ': 2, ' . ': 1, ' - ': 0}
+    >>> dv = {"C1": [0] * 95 + [1, 1, 1, 2, 2],
+    ... "C2": [0] * 95 + [1, 1, 1, 2, 2],
+    ... "C3": [3] * 75 + [4] * 23 + [1] * 2}
+    >>> get_enriched_impoverished_communities(do, dna_gene, ppi_g, 100, dv)
+    {' + ': 2, ' . ': 0, ' - ': 1}
+    """
+    pval_list = []
+    reg_list = []
+    for i in range(df_overlap.shape[0]):
+        community = df_overlap.iloc[i, :]["DNA_community"]
+        my_value = get_list_common_gene(community,
+                                        dic_dna_gene,
+                                        df_overlap.iloc[i, :]["size_com-ppi"],
+                                        ppi_gene,
+                                        1, use_seed)[0]
+        pval, reg = get_pvalue(dic_values[community], my_value,
+                               iteration)
+        pval_list.append(pval)
+        reg_list.append(reg)
+    p_adjust = multipletests(pval_list, alpha=0.05, method='fdr_bh',
+                             is_sorted=False, returnsorted=False)[1]
+    dic = {" + ": 0, " . ": 0, " - ": 0}
+    for i, reg in enumerate(reg_list):
+        dic[reg if p_adjust[i] <= 0.05 else " . "] += 1
+    return dic
+
+
+def summary_randomisation_test(df_overlap: pd.DataFrame,
+                               dic_dna_gene: Dict[str, np.array],
+                               ppi_gene: np.array, iteration: int,
+                               dic_values: Dict[str, np.array],
+                               use_seed: bool = True
+                               ) -> pd.DataFrame:
+    """
+    Says if the number of community that shared an enriched number \
+    of common gene between the protein and DNA level is enriched as well.
+
+    :param df_overlap: A dataframe containing every gene community at dna \
+    level and the number of common gene with a gne community at protein \
+    level
+    :param dic_dna_gene: The dictionary containing gene interacting at dna \
+    level
+    :param ppi_gene: All the gene interacting at protein level
+    :param iteration: The number of time we want to repeat the process
+    :param dic_values: A dictionary containing the number of common \
+    gene between communities at DNA and protein level for each \
+    subsampling made from the ppi_gene list.
+    :param use_seed: True to use a fixed seed, false else.
+    :return: The updated summary table
+
+    >>> do = pd.DataFrame({"DNA_community": ["C1", "C2", "C3"],
+    ... "community_size": [2, 3, 2], "nb_com-ppi": [2, 3, 1],
+    ... "size_com-ppi": [10, 10, 10], "p-value": [0.01, 0, 0],
+    ... "p-adjust": [0.01, 0.01, 0.01], "regulation": [" + "] * 3,
+    ... "regulation-adjusted": [" + "] * 3})
+    >>> dna_gene = {"C1": [1, 2], "C2": [3, 4, 5], "C3": [6, 7]}
+    >>> ppi_g = list(range(11))
+    >>> dv = {"C1": [0] * 95 + [1, 1, 1, 2, 2],
+    ... "C2": [0] * 95 + [1, 1, 1, 2, 2],
+    ... "C3": [0] * 75 + [1] * 10 + [2] * 10 + [3] * 5}
+    >>> ds = summary_randomisation_test(do, dna_gene, ppi_g, 100, dv)
+    >>> ds.iloc[:, range(4)]
+      regulation-adjusted  community_size  nb_com-ppi  size_com-ppi
+    0                  +         2.333333         2.0          10.0
+    1                  -              NaN         NaN           NaN
+    2                  .              NaN         NaN           NaN
+    >>> ds.iloc[:, range(4, 7)]
+       number  mean_100_number  p-adjust
+    0       3              2.0       0.0
+    1       0              0.0       1.0
+    2       0              1.0       0.0
+    >>> ds.iloc[:, 7].to_list() == [" + ", " . ", " - "]
+    True
+    """
+    df_summarized = summarize_df_overlap(df_overlap)
+    dic_rand_sum = {k: [] for k in df_summarized["regulation-adjusted"].values}
+    for _ in tqdm(range(iteration), desc="Making stats to summarise global "
+                                         "enrichment"):
+        d = get_enriched_impoverished_communities(df_overlap, dic_dna_gene,
+                                                  ppi_gene, iteration,
+                                                  dic_values, use_seed)
+        for k, v in dic_rand_sum.items():
+            v.append(d[k])
+    pvalues = []
+    regulations = []
+    mean_number = []
+    for i in range(df_summarized.shape[0]):
+        reg, val = df_summarized.iloc[i, :][["regulation-adjusted", "number"]]
+        pval, creg = get_pvalue(dic_rand_sum[reg], val,
+                               iteration)
+        # print(reg, val, dic_rand_sum[reg])
+        pvalues.append(pval)
+        regulations.append(creg)
+        mean_number.append(np.mean(dic_rand_sum[reg]))
+    p_adjust = multipletests(pvalues, alpha=0.05, method='fdr_bh',
+                             is_sorted=False, returnsorted=False)[1]
+    reg_adj = [regulations[i] if p_adjust[i] <= 0.05 else " . "
+               for i in range(len(regulations))]
+    df_summarized[f"mean_{iteration}_number"] = mean_number
+    df_summarized[f"p-adjust"] = p_adjust
+    df_summarized[f"reg_adj"] = reg_adj
+    return df_summarized
+
+
+if __name__ == "__main__":
+    testmod()
diff --git a/src/find_interaction_cluster/sf_and_communities.py b/src/find_interaction_cluster/sf_and_communities.py
index b950c8b2c7fdf0ce4d63916a53b564b0c60e1080..d81dc5931eaa789c0ef2a119a64cd3197077571b 100644
--- a/src/find_interaction_cluster/sf_and_communities.py
+++ b/src/find_interaction_cluster/sf_and_communities.py
@@ -15,7 +15,7 @@ from .community_finder import get_communities
 import pandas as pd
 import numpy as np
 from itertools import product
-from .community_finder import get_projects_name
+from .community_finder import get_projects
 import multiprocessing as mp
 import logging
 from ..logging_conf import logging_def
@@ -182,7 +182,7 @@ def glmm_maker(expanded_df: pd.DataFrame, outfile: Path) -> float:
     ... "%reg in community": [40, 42.85], 'pval': [1, 0.5], 'padj': [1, 1]})
     >>> e_df = expand_dataframe(d)
     >>> outfile = ConfigGraph.get_community_file("Test", 1, 1, True,
-    ... "_stat.txt", True)
+    ... "_stat.txt", "sf_community_enrichment")
     >>> glmm_maker(e_df, outfile)
     1.0
     """
@@ -235,7 +235,8 @@ def glmm_statistics(df: pd.DataFrame, sf_name: str, reg: str,
     expanded_df = expand_dataframe(ndf)
     outfile = ConfigGraph.get_community_file(project, weight, global_weight,
                                              same_gene, feature,
-                                             f"{sf_name}_{reg}_stat.txt", True)
+                                             f"{sf_name}_{reg}_stat.txt",
+                                             "sf_community_enrichment")
     noutfold = outfile.parent / "expanded_df"
     noutfold.mkdir(exist_ok=True, parents=True)
     noutfile = noutfold / outfile.name
@@ -361,19 +362,21 @@ def get_key(project: str, weight: int) -> str:
     return f"{project}_{weight}"
 
 
-def multiple_stat_launcher(ps: int, weights: List[int],
-                           global_weights: List[int],
+def multiple_stat_launcher(ps: int,
+                           weight: int,
+                           global_weight: int,
+                           project: str,
                            same_gene: bool, feature: str = 'exon',
                            logging_level: str = "DISABLE"):
     """
     Launch the statistical analysis for every
 
-    :param ps: The number of processes we want to use.
-    :param weights: The list of weights of interaction to consider
-    :param global_weights: The list global weights to consider. if \
-    the global weight is equal to 0 then then density figure are calculated \
-    by project, else all projcet are merge together and the interaction \
-    seen in `global_weight` project are taken into account
+    :param ps: The number of processes to use
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weighs to consider. if \
+    the global weight is equal to 0  then the project `project` is \
+    used.
+    :param project: The project name, used only if global_weight = 0
     :param same_gene: Say if we consider as co-localised exon within the \
     same gene
     :param feature: The feature we want to analyse
@@ -384,22 +387,19 @@ def multiple_stat_launcher(ps: int, weights: List[int],
     logging.info(f"Checking if communities contains often {feature}s "
                  f"regulated by a splicing factor")
     sf_list = get_sfname()
-    global_weights = list(np.unique(global_weights))
-    weights = list(np.unique(weights))
-    projects, dic_project = get_projects_name(global_weights)
-    condition = list(product(projects, weights, sf_list, ['down', 'up']))
+    project = get_projects(global_weight, project)
+    condition = list(product([project], [weight], sf_list, ['down', 'up']))
     processes = {}
     pool = mp.Pool(processes=min(ps, len(condition)))
     logging.debug("Calculating stats...")
     for project, weight, sf_name, reg in condition:
         ckey = get_key(project, weight)
-        global_weight = dic_project[project]
         args = [sf_name, reg, project, weight, global_weight, same_gene,
                 feature]
-        if ckey not in processes.keys():
-            processes[ckey] = [pool.apply_async(get_stat4communities, args)]
-        else:
+        if ckey in processes:
             processes[ckey].append(pool.apply_async(get_stat4communities, args))
+        else:
+            processes[ckey] = [pool.apply_async(get_stat4communities, args)]
     for p, value in processes.items():
         project, weight = p.split("_")
         list_tuples = [proc.get(timeout=None) for proc in value]
@@ -409,17 +409,19 @@ def multiple_stat_launcher(ps: int, weights: List[int],
         list_series = [t[1] for t in list_tuples]
         df = pd.concat(list_df, axis=0, ignore_index=True)
         outfile = ConfigGraph.get_community_file(project, weight,
-                                                 dic_project[project],
+                                                 global_weight,
                                                  same_gene, feature,
-                                                 "_stat.txt", True)
+                                                 "_stat.txt",
+                                                 "sf_community_enrichment")
         df.to_csv(outfile, sep="\t", index=False)
         glm_df = pd.DataFrame(list_series)
         glm_df["padj"] = multipletests(glm_df['pval'].values,
                                        method='fdr_bh')[1]
         outfile = ConfigGraph.get_community_file(project, weight,
-                                                 dic_project[project],
+                                                 global_weight,
                                                  same_gene, feature,
-                                                 "_glmm_stat.txt", True)
+                                                 "_glmm_stat.txt",
+                                                 "sf_community_enrichment")
         glm_df.to_csv(outfile, sep="\t", index=False)
 
 
diff --git a/src/nt_composition/make_nt_correlation.py b/src/nt_composition/make_nt_correlation.py
index ddbb5b18f23c5119ce860dd3fc7fa7c760374a88..58969b9e7c329f8729656d85dd995a688a0b2d47 100644
--- a/src/nt_composition/make_nt_correlation.py
+++ b/src/nt_composition/make_nt_correlation.py
@@ -61,7 +61,7 @@ def get_project_colocalisation(cnx: sqlite3.Connection, project: str,
                                global_weight: int, same_gene: bool,
                                get_weight: bool = False,
                                exon_bc: Optional[np.array] = None,
-                               inter_chr: bool = False,
+                               inter_chr: bool = False, distance: int = 10000,
                                level: str = "exon") -> np.array:
     """
     Get the interactions in project `project` for gene and exons
@@ -84,8 +84,11 @@ def get_project_colocalisation(cnx: sqlite3.Connection, project: str,
     """
     logging.debug(f'Recovering interaction ({os.getpid()})')
     select_add = get_select_addition(global_weight, get_weight, same_gene)
-    inter_str = " AND level = 'inter'" if inter_chr else ""
-    inter_str_t = " AND t1.level = 'inter'" if inter_chr else ""
+    inter_str = " AND level = 'inter'" \
+        if inter_chr else f"AND (level = 'inter' OR distance >= {distance})"
+    inter_str_t = " AND t1.level = 'inter'" \
+        if inter_chr \
+        else f"AND (t1.level = 'inter' OR t1.distance >= {distance})"
     if level == "gene" and not same_gene:
         logging.warning(f"When level is equal to gene then same_gene must "
                         f"be true. Setting it to True.")