+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+Description: Create a barplot showing the frequency/value of a particular \
+item in every community of genomic features (genes of exons) that are close \
+in the nucleus compared to a control list of features.
+import lazyparser as lp
+import pandas as pd
+from pathlib import Path
+from .fig_functions import create_community_fig
+class FileNameError(Exception):
+    pass
+class MissingColumnError(Exception):
+    pass
+def load_and_check_table(table: str, feature: str, target_col: str):
+    """
+    Load a file containing a dataframe. It must contains the following \
+    columns: id_feature, target_col, community and community_size.
+    :param table: A file containing a table with the id of the chosen \
+    `feature` (i.e FasterDB id of genes or exons), a column with data of \
+    interest ( this column must have the name *target_col*) and two columns \
+    with the community and the size of the community of the feature if it \
+    has one (None, else).
+    :param feature: The kind of feature analysed
+    :param target_col: The name of the column containing the data of interest
+    :return: The loaded dataframe
+    """
+    if table.endswith(".gz"):
+        df = pd.read_csv(table, sep="\t", compression="gzip")
+    else:
+        df = pd.read_csv(table, sep="\t")
+    required_cols = [f"id_{feature}", target_col, "community",
+                     "community_size"]
+    for rqd in required_cols:
+        if rqd not in df.columns:
+            raise MissingColumnError(f"The column {rqd} is missing !")
+    return df
+@lp.parse(table="file", output="folder", test_type=["lm", "permutation"],
+          iteration="0 < iteration < 20")
+def create_community_figures(table: str, feature: str, target_col: str,
+                             output: str, outfile: str, test_type: str,
+                             target_kind: str = "",
+                             iteration: int = 10000) -> None:
+    """
+    Create a dataframe with a control community, save it as a table and \
+    as a barplot figure.
+    :param table: A file containing a table with the id of the chosen \
+    `feature` (i.e FasterDB id of genes or exons), a column with data of \
+     interest ( this column must have the name *target_col*) and two columns \
+     with the community and the size of the community of the feature if it \
+     has one (None, else).
+    :param feature: The kind of feature analysed (exons or genes)
+    :param target_col: The name of the column containing the data of interest
+    :param output: The output folder
+    :param outfile: The name of the output figure file (pdf format)
+    :param test_type: The type of test to make (permutation or lm)
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param iteration: The number of sub samples to create. This parameter \
+    is only used if test_type = 'permutation' (default 10000).
+    """
+    df = load_and_check_table(table, feature, target_col)
+    if not outfile.endswith(".pdf"):
+        raise FileNameError("The output figure must be in pdf format !")
+    moutfile = Path(output) / outfile
+    create_community_fig(df, feature, target_col, moutfile, test_type,
+                         target_kind=target_kind, iteration=iteration)
+if __name__ == "__main__":
+    create_community_figures()
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+import pandas as pd
+from pathlib import Path
+from typing import Dict, Tuple, List, Optional
+import sqlite3
+from ..config import ConfigGraph
+from tqdm import tqdm
+from rpy2.robjects import r, pandas2ri
+from statsmodels.stats.multitest import multipletests
+import numpy as np
+from ..radomization_test_ppi import get_pvalue
+import seaborn as sns
+def get_cpnt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
+                       feature: str, region: str = "",
+                       component_type: str = "nt") -> pd.DataFrame:
+    """
+    Get the frequency of every nucleotides for features in list_ft.
+    :param cnx: Connection to chia-pet database
+    :param list_ft: The list of exons for which we want to get
+    :param feature: the kind of feature analysed
+    :param region: The region of gene analysed if feature is gene
+    :param component_type: The type of component to analyse; It \
+    can be 'nt', 'dnt' or 'aa'.
+    :return: the frequency of nucleotides for the list of exons.
+    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
+    ... ["1_1", "1_2"], "exon")
+    >>> d[["id_exon", 'A', 'C', 'G', 'T']]
+    ft id_exon         A         C         G         T
+    0      1_1  16.63480  34.60803  32.12237  16.63480
+    1      1_2  16.06426  26.10442  39.75904  18.07229
+    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
+    ... ['1', '2'], "gene")
+    >>> d[["id_gene", 'A', 'C', 'G', 'T']]
+    ft id_gene         A         C         G         T
+    0        1  29.49376  18.34271  18.43874  33.72479
+    1        2  31.90401  16.40251  18.79033  32.90315
+    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
+    ... ['1', '2'], "gene", 'exon', 'aa')
+    >>> d[["id_gene", "R", "K", "D", "Q", "E"]]
+        ft id_gene        R        K        D        Q        E
+    0        1  4.75247  5.19300  5.95391  4.07997  6.96189
+    1        2  4.34203  6.23736  6.77708  5.21984  7.01769
+    """
+    query_region = ""
+    if feature == "gene":
+        list_ft = [int(ft) for ft in list_ft]
+        if region == "":
+            region = "gene"
+        query_region = f"AND region = '{region}'"
+    query = f"""
+             SELECT ft, id_{feature}, frequency
+             FROM cin_{feature}_frequency
+             WHERE id_{feature} IN {tuple(list_ft)}
+             AND ft_type = '{component_type}' 
+             {query_region}
+             """
+    df = pd.read_sql_query(query, cnx)
+    df = df.pivot_table(index=f"id_{feature}", columns="ft",
+                        values="frequency").reset_index()
+    df[f"id_{feature}"] = df[f"id_{feature}"].astype(str)
+    return df
+def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]:
+    """
+    Return the id of every gene/exons in chia-pet database.
+    :param cnx: A connection to chiapet database
+    :param feature: The feature of interest
+    :return: The list of feature id
+    """
+    query = f"SELECT DISTINCT id FROM cin_{feature}"
+    c = cnx.cursor()
+    c.execute(query)
+    res = c.fetchall()
+    return [str(cid[0]) for cid in res]
+def get_community_table(communities: List[List[str]],
+                        size_threshold: int, feature: str) -> pd.DataFrame:
+    """
+    return the table indicating the name of the exons and the \
+    the name of the community.
+    :param communities: List of community of exons
+    :param size_threshold: The required size a community must \
+    have to be considered
+    :param feature: The kind of feature analysed
+    :return: table of community
+    >>> c = [['1_1', '2_5'], ['7_9', '4_19', '3_3']]
+    >>> get_community_table(c, 3, 'exon')
+      community id_exon  community_size
+    0        C2     7_9               3
+    1        C2    4_19               3
+    2        C2     3_3               3
+    >>> c = [['1', '2'], ['7', '49', '3']]
+    >>> get_community_table(c, 3, 'gene')
+      community id_gene  community_size
+    0        C2       7               3
+    1        C2      49               3
+    2        C2       3               3
+    """
+    dic = {"community": [], f"id_{feature}": [], "community_size": []}
+    for k, comm in enumerate(communities):
+        if len(comm) >= size_threshold:
+            name = f'C{k + 1}'
+            clen = len(comm)
+            for exon in comm:
+                dic["community"].append(name)
+                dic[f'id_{feature}'].append(exon)
+                dic["community_size"].append(clen)
+    return pd.DataFrame(dic)
+def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str
+                     ) -> pd.DataFrame:
+    """
+    Make the lm analysis to see if the exon regulated by a splicing factor \
+    are equally distributed among the communities.
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param outfile: A name of a file
+    :param target_col: The name of the column containing the data of interest
+    :return: the pvalue of lm
+    """
+    pandas2ri.activate()
+    lmf = r(
+        """
+        require("DHARMa")
+        function(data, folder, partial_name) {
+            mod <- lm(%s ~ log(community_size) +  community, data=data)
+            simulationOutput <- simulateResiduals(fittedModel = mod, n = 250)
+            png(paste0(folder, "/dignostics_summary", partial_name, ".png"))
+            plot(simulationOutput)
+            dev.off()
+            return(as.data.frame(summary(mod)$coefficients))
+        }
+        """ % target_col)
+    folder = outfile.parent / "diagnostics"
+    folder.mkdir(parents=True, exist_ok=True)
+    partial_name = outfile.name.replace('.pdf', '')
+    df.to_csv(f'frequency_{target_col}.txt', sep="\t", index=False)
+    res_df = lmf(df, str(folder), partial_name).reset_index()
+    res_df.rename({'index': 'community'}, inplace=True, axis=1)
+    res_df['community'] = res_df['community'].str.replace('community', '')
+    res_df.loc[res_df['community'] == "(Intercept)", "community"] = "C-CTRL"
+    mean_df = df[[target_col, "community", "community_size"]]. \
+        groupby(["community", "community_size"]).mean().reset_index()
+    return res_df.merge(mean_df, how="left", on="community")
+def lm_with_ctrl(df: pd.DataFrame,
+                 target_col: str, outfile: Path,
+                 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param target_col: The name of the column containing the data of interest
+    :param outfile: File that will contains the final figure
+    :return: The dataframe with ctrl exon and \
+    The dataframe with the p-value compared to the control \
+    list of feature.
+    """
+    df['community'] = df['community'].fillna("C-CTRL")
+    return df, lm_maker_summary(df, outfile, target_col)
+def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame,
+                      target_col: str, feature: str) -> pd.DataFrame:
+    """
+    Merge df and rdf together.
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param rdf: The dataframe containing the mean frequency for \
+    each community and the p-value of their enrichment compared to control \
+    exons.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The feature of interest
+    :return: The merged dataframe: i.e df with the stats columns
+    """
+    p_col = "Pr(>|t|)"
+    df = df[[f"id_{feature}", target_col, "community",
+             "community_size"]].copy()
+    rdf = rdf[["community", "community_size", p_col, target_col]].copy()
+    rdf.rename({target_col: f"mean_{target_col}", p_col: "p-adj"},
+               axis=1, inplace=True)
+    df = df.merge(rdf, how="left", on=["community", "community_size"])
+    df_ctrl = df[df["community"] == "C-CTRL"]
+    df = df[df["community"] != "C-CTRL"].copy()
+    df.sort_values(f"mean_{target_col}", ascending=True, inplace=True)
+    return pd.concat([df_ctrl, df], axis=0, ignore_index=True)
+def get_permutation_mean(df_ctrl: pd.DataFrame,
+                         cpnt: str, size: int, iteration: int) -> List[float]:
+    """
+    Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
+    of `nt` frequencies from it.
+    :param df_ctrl: A dataframe containing the frequency of each nucleotide \
+    in each exons/gene in fasterdb.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param size: The size of each sub samples to create
+    :param iteration: The number of sub samples to create
+    :return: The list of mean frequencies of `nt` in each subsample
+    """
+    return [
+        float(np.mean(df_ctrl[cpnt].sample(size, replace=True).values))
+        for _ in range(iteration)
+    ]
+def perm_community_pval(row: pd.Series, df_ctrl: pd.DataFrame,
+                        cpnt: str, iteration: int
+                        ) -> Tuple[float, float, float, str]:
+    """
+    Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
+    of `nt` frequencies from it.
+    :param row: A line of a dataframe containing the frequency of \
+    each feature inside a community.
+    :param df_ctrl: A dataframe containing the frequency of each nucleotide \
+    in each exons/gene in fasterdb.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param iteration: The number of sub samples to create
+    :return: The ctrl mean frequency value of `nt`, its standard error \
+    the pvalue and the regulation of the enrichment/impoverishment \
+    of the community in `row` compared to control exons.
+    """
+    list_values = get_permutation_mean(df_ctrl, cpnt, row["community_size"],
+                                       iteration)
+    pval, reg = get_pvalue(np.array(list_values), row[cpnt], iteration)
+    return float(np.mean(list_values)), float(np.std(list_values)), pval, reg
+def perm_pvalues(df: pd.DataFrame, df_ctrl: pd.DataFrame, feature: str,
+                 target_col: str, iteration: int,
+                 dic_com: Dict) -> pd.DataFrame:
+    """
+    Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
+    of `nt` frequencies from it.
+    :param df: A dataframe containing the frequency of each nucleotide \
+    in each exons belonging to a community.
+    :param df_ctrl: A dataframe containing the frequency of each nucleotide \
+    in each exons/gene in fasterdb.
+    :param feature: The feature of interest (gene, exon)
+    :param target_col: The name of the column containing the data of interest
+    :param iteration: The number of sub samples to create
+    :param dic_com: A dictionary linking each community to the exons \
+    it contains.
+    :return: The dataframe containing p-values and regulation \
+    indicating the enrichment of
+    """
+    list_pval, list_reg, mean_ctrl, std_ctrl = ([] for _ in range(4))
+    for i in tqdm(range(df.shape[0]), desc="performing permutations"):
+        row = df.iloc[i, :]
+        res = perm_community_pval(row,
+                                  df_ctrl.loc[
+                                    -df_ctrl[f'id_{feature}'
+                                             ].isin(dic_com[row['community']]),
+                                    :],
+                                  target_col, iteration)
+        [x.append(y) for x, y in zip([mean_ctrl, std_ctrl, list_pval,
+                                      list_reg], res)]
+    adj_pvals = multipletests(list_pval, alpha=0.05,
+                              method='fdr_bh',
+                              is_sorted=False,
+                              returnsorted=False)[1]
+    adj_regs = [list_reg[i] if adj_pvals[i] <= 0.05 else " . "
+                for i in range(len(list_reg))]
+    df[f'{target_col}_mean_{iteration}_ctrl'] = mean_ctrl
+    df[f'{target_col}_std_{iteration}_ctrl'] = std_ctrl
+    df[f'p-adj'] = adj_pvals
+    df[f'reg-adj'] = adj_regs
+    return df
+def perm_with_ctrl(df: pd.DataFrame, feature: str,
+                   target_col: str, dic_com: Dict,
+                   iteration: int) -> pd.DataFrame:
+    """
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param feature: The kind of feature analysed
+    :param target_col:  The name of the column containing the data of interest
+    :param dic_com: A dictionary linking each community to the exons \
+    it contains.
+    :param iteration: The number of sub samples to create
+    :return: The dataframe with the p-value compared to the control \
+    list of exons.
+    """
+    df_tmp = df.loc[-df["community"].isna(), :]
+    mean_df = df_tmp[[target_col, "community", "community_size"]]. \
+        groupby(["community", "community_size"]).mean().reset_index()
+    return perm_pvalues(mean_df, df, feature, target_col,
+                        iteration, dic_com)
+def create_perm_ctrl_df(ctrl_df: pd.DataFrame, order_df: pd.DataFrame,
+                        cpnt: str, feature: str, iteration: int
+                        ) -> pd.DataFrame:
+    """
+    :param ctrl_df: A dataframe containing the mean ctrl values, \
+    the mean control std and the community from which those control \
+    have been created
+    :param order_df: A dataframe containing the community and their final \
+    order.
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param feature: The feature of interest
+    :param iteration: The number of iteration
+    :return: The ctrl_tmp_df in good order
+    """
+    dsize = ctrl_df.shape[0]
+    ctrl_df[f"mean_{cpnt}"] = \
+        [np.mean(ctrl_df[f"{cpnt}_mean_{iteration}_ctrl"])] * dsize
+    ctrl_df[f"id_{feature}"] = ['ctrl'] * dsize
+    ctrl_df["community_size"] = [dsize] * dsize
+    ctrl_df = ctrl_df.merge(order_df, how='left', on="community")
+    ctrl_df.rename({f"{cpnt}_mean_{iteration}_ctrl": cpnt,
+                    f"{cpnt}_std_{iteration}_ctrl": 'ctrl_std'}, axis=1,
+                   inplace=True)
+    return ctrl_df.sort_values("order", ascending=True)
+def expand_results_perm(df: pd.DataFrame, rdf: pd.DataFrame, target_col: str,
+                        feature: str, iteration: int) -> pd.DataFrame:
+    """
+    Merge df and rdf together.
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param rdf: The dataframe containing the mean frequency for \
+    each community and the p-value of their enrichment compared to control \
+    exons.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The feature of interest
+    :param iteration: The number of iteration
+    :return: The merged dataframe: i.e df with the stats columns
+    """
+    df = df.loc[-df["community"].isna(),
+                [f"id_{feature}", target_col,
+                 "community", "community_size"]].copy()
+    ctrl_df = rdf[[f"{target_col}_mean_{iteration}_ctrl",
+                   f"{target_col}_std_{iteration}_ctrl", "community"]].copy()
+    rdf = rdf[["community", "community_size", target_col, "p-adj"]].copy()
+    rdf.rename({target_col: f"mean_{target_col}"}, axis=1, inplace=True)
+    df = df.merge(rdf, how="left", on=["community", "community_size"])
+    df.sort_values(f"mean_{target_col}", ascending=True, inplace=True)
+    order_df = df[["community"]].drop_duplicates().copy()
+    order_df["order"] = range(order_df.shape[0])
+    df_ctrl = create_perm_ctrl_df(ctrl_df, order_df, target_col, feature,
+                                  iteration)
+    return pd.concat([df_ctrl, df], axis=0, ignore_index=True)
+def make_barplot(df_bar: pd.DataFrame, outfile: Path,
+                 target_col: str, feature: str, target_kind: str = "") -> None:
+    """
+    Create a barplot showing the frequency of `nt` for every community \
+    of exons/gene in `df_bar`.
+    :param df_bar: A dataframe with the enrichment of a \
+    nucleotide frequency for every community
+    :param outfile: File were the figure will be stored
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The king of feature of interest
+    """
+    sns.set(context="poster")
+    g = sns.catplot(x="community", y=target_col, data=df_bar, kind="bar",
+                    ci="sd", aspect=2.5, height=12, errwidth=0.5, capsize=.4,
+                    palette=["red"] + ["darkgray"] * (df_bar.shape[0] - 1))
+    g.fig.subplots_adjust(top=0.9)
+    target_kind = f" ({target_kind})" if target_kind else ""
+    g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}"
+                   f"among community of {feature}s\n"
+                   f"(stats obtained with a lm test)")
+    g.set(xticklabels=[])
+    g.ax.set_ylabel(f'Frequency of {target_col}')
+    df_bara = df_bar.drop_duplicates(subset="community", keep="first")
+    for i, p in enumerate(g.ax.patches):
+        stats = "*" if df_bara.iloc[i, :]["p-adj"] < 0.05 else ""
+        com = df_bara.iloc[i, :]["community"]
+        csd = np.std(df_bar.loc[df_bar["community"] == com, target_col])
+        g.ax.annotate(stats,
+                      (p.get_x() + p.get_width() / 2., p.get_height() + csd),
+                      ha='center', va='center', xytext=(0, 10), fontsize=12,
+                      textcoords='offset points')
+    g.savefig(outfile)
+def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
+                      target_col: str, feature: str,
+                      target_kind: str = "") -> None:
+    """
+    Create a barplot showing the frequency of `nt` for every community \
+    of exons/gene in `df_bar`.
+    :param df_bar: A dataframe with the enrichment of a \
+    nucleotide frequency for every community
+    :param outfile: File were the figure will be stored
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param target_col: The name of the column containing the data of interest
+    :param feature: The king of feature of interest
+    """
+    sns.set(context="poster")
+    df_ctrl = df_bar.loc[df_bar[f"id_{feature}"] == 'ctrl', :]
+    df_bar = df_bar.loc[df_bar[f"id_{feature}"] != 'ctrl', :].copy()
+    g2 = sns.catplot(x="community", y=target_col, data=df_bar, kind="bar",
+                     ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4,
+                     palette=["darkgray"] * (df_bar.shape[0]))
+    g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point",
+                    ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4,
+                    scale=0.5, palette=["darkgray"] * (df_bar.shape[0]))
+    xrange = g.ax.get_xlim()
+    df_ctrl.plot(x="community", y=target_col, kind="scatter", ax=g.ax,
+                 yerr="ctrl_std", legend=False, zorder=10,
+                 color=(0.8, 0.2, 0.2, 0.4))
+    g.ax.set_xlim(xrange)
+    g.fig.subplots_adjust(top=0.9)
+    target_kind = f" ({target_kind})" if target_kind else ""
+    g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}"
+                   f"among community of {feature}s\n"
+                   f"(stats obtained with a permutation test)")
+    g.set(xticklabels=[])
+    g.ax.set_ylabel(f'Frequency of {target_col}')
+    df_bara = df_bar.drop_duplicates(subset="community", keep="first")
+    for i, p in enumerate(g2.ax.patches):
+        stats = "*" if df_bara.iloc[i, :]["p-adj"] < 0.05 else ""
+        com = df_bara.iloc[i, :]["community"]
+        csd = np.std(df_bar.loc[df_bar["community"] == com, target_col])
+        g.ax.annotate(stats,
+                      (p.get_x() + p.get_width() / 2., p.get_height() + csd),
+                      ha='center', va='center', xytext=(0, 10), fontsize=12,
+                      textcoords='offset points')
+    g.savefig(outfile)
+def barplot_creation(df_bar: pd.DataFrame, outfig: Path,
+                     cpnt: str, test_type: str, feature: str,
+                     target_kind) -> None:
+    """
+    Reformat a dataframe with the enrichment of a nucleotide frequency \
+    for every feature for every community and then create a \
+    barplot showing those frequencies.
+    :param df_bar: A dataframe with the enrichment of a \
+    nucleotide frequency for every community and showing the frequency \
+    of each feature in each community
+    :param outfig: File were the figure will be stored
+    :param cpnt: The component (nt, aa, dnt) of interest
+    :param test_type: The kind of test make
+    :param feature: The king of feature of interest
+    :param test_type: The type of test to make (permutation or lm)
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    """
+    if test_type == "lm":
+        make_barplot(df_bar, outfig, cpnt, feature, target_kind)
+    else:
+        make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind)
+def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict:
+    """
+    Create a dictionary containing the exons contained in each community.
+    :param df: A dataframe containing the frequency of each nucleotide \
+    in each exons belonging to a community.
+    :param feature: the feature of interest (exon, gene)
+    :return: A dictionary linking each community to the exons it contains
+    >>> dataf = pd.DataFrame({"id_gene": ['1', '2', '3', '4', '5'],
+    ... 'community': ['C1', 'C1', 'C2', 'C2', np.nan]})
+    >>> get_feature_by_community(dataf, 'gene')
+    {'C1': ['1', '2'], 'C2': ['3', '4']}
+    >>> dataf.rename({"id_gene": "id_exon"}, axis=1, inplace=True)
+    >>> get_feature_by_community(dataf, 'exon')
+    {'C1': ['1', '2'], 'C2': ['3', '4']}
+    """
+    dic = {}
+    for i in range(df.shape[0]):
+        com, id_ft = df.iloc[i, :][['community', f'id_{feature}']]
+        if com is not None:
+            if com in dic:
+                dic[com].append(id_ft)
+            else:
+                dic[com] = [id_ft]
+    return dic
+def create_community_fig(df: pd.DataFrame, feature: str,
+                         target_col: str,
+                         outfile_ctrl: Path, test_type: str,
+                         dic_com: Optional[Dict] = None,
+                         target_kind: str = "",
+                         iteration: int = 10000) -> None:
+    """
+    Create a dataframe with a control community, save it as a table and \
+    as a barplot figure.
+    :param df: A dataframe containing the id of the chosen `feature` \
+    (i.e FasterDB id of genes or exons) a column with data for interest (\
+    this column must have the name *target_col*) and the community \
+    and the size of the community of the feature if it has one (None, else).
+    :param feature: The kind of feature analysed
+    :param target_col: The name of the column containing the data of interest
+    :param outfile_ctrl: file used to stored the table and the figure \
+    containing the test communities and the control community
+    :param test_type: The type of test to make (permutation or lm)
+    :param dic_com: A dictionary linking each community to the exons \
+    it contains.
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param iteration: The number of sub samples to create
+    """
+    if dic_com is None:
+        dic_com = get_feature_by_community(df, feature)
+    if test_type == "lm":
+        ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl)
+        df_bar = expand_results_lm(ndf, rdf, target_col, feature)
+    else:
+        rdf = perm_with_ctrl(df, feature, target_col, dic_com, iteration)
+        df_bar = expand_results_perm(df, rdf, target_col, feature, iteration)
+    rdf.to_csv(str(outfile_ctrl).replace(".pdf", ".txt"), sep="\t",
+               index=False)
+    bar_outfile = str(outfile_ctrl).replace(".pdf", "_bar.txt")
+    df_bar.to_csv(bar_outfile, sep="\t", index=False)
+    barplot_creation(df_bar, outfile_ctrl, target_col, test_type, feature,
+                     target_kind)