src/find_interaction_cluster/graph_figures/sf_community_graph.py: graph...

src/find_interaction_cluster/graph_figures/sf_community_graph.py: graph creation displaying the community regulated by many splicing factors and how they interact between them

src/find_interaction_cluster/graph_figures/sf_community_graph.py: graph...
src/find_interaction_cluster/graph_figures/sf_community_graph.py: graph creation displaying the community regulated by many splicing factors and how they interact between them
c22b1d42 · nfontrod · 28d3ff0e · c22b1d42
Commit c22b1d42 authored 3 years ago by nfontrod
--- a/src/find_interaction_cluster/graph_figures/sf_community_graph.py
+++ b/src/find_interaction_cluster/graph_figures/sf_community_graph.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description: Create a graph figure showing wich community is regulated \
+by which splicing factor
+"""
+import networkx as nx
+from typing import List
+import pandas as pd
+import sqlite3
+import numpy as np
+from .graph_functions import Parameters, recover_json_graph_of_interest, \
+    Config, get_regulation_table, get_community_tables, merge_dataframes, \
+    get_regulated_community, subgraph_creation, load_graphic, check_if_exist
+from .create_community_node_graph import create_community_sized_graph
+from ..sf_and_communities import get_sfname
+import matplotlib.cm as cm
+from pathlib import Path
+import json
+from pyvis.network import Network
+import lazyparser as lp
+from matplotlib.colors import to_hex
+def add_node_color(c_graph: nx.Graph, sf_name: str, color: str,
+                   communities: List[str]) -> nx.Graph:
+    """
+    Update the graphic of community to add the color ``color`` to \
+    the community containing a enriched amount of exons or genes \
+    regulated by a splicing factor.
+    :param c_graph: The graphic of community to update
+    :param sf_name: The name of the splicing factor regulating the \
+    communities given in communities.
+    :param color: The color to give to the communities
+    :param communities: The list of community to update
+    :return: The network update
+    >>> g = nx.Graph()
+    >>> g.add_nodes_from(list("ABC"))
+    >>> g.add_edges_from([("A", "B"), ("A", "C")])
+    >>> ng = add_node_color(g, "TRA2_down", "red", ["A", "B"])
+    >>> list(ng.nodes[x] for x in list("ABC")) == [{'reg': 'TRA2_down',
+    ... 'color': 'red', 'title': 'TRA2_down'}, {'reg': 'TRA2_down',
+    ... 'color': 'red', 'title': 'TRA2_down'}, {}]
+    True
+    >>> ng = add_node_color(ng, "SR1_up", "white", ["A", "C"])
+    >>> list(ng.nodes[x] for x in list("ABC")) == [{'reg':
+    ... 'TRA2_down, SR1_up', 'color': 'red', 'title': 'TRA2_down<br/>SR1_up',
+    ... 'shape': 'triangle'}, {'reg': 'TRA2_down', 'color': 'red',
+    ... 'title': 'TRA2_down'}, {'reg': 'SR1_up', 'color': 'white',
+    ... 'title': 'SR1_up'}]
+    True
+    """
+    for c in communities:
+        if "reg" not in c_graph.nodes[c].keys():
+            c_graph.nodes[c]["reg"] = sf_name
+            c_graph.nodes[c]["color"] = color
+        else:
+            c_graph.nodes[c]["reg"] += f", {sf_name}"
+            c_graph.nodes[c]["shape"] = 'triangle'
+        if "title" not in c_graph.nodes[c].keys():
+            c_graph.nodes[c]["title"] = sf_name
+        else:
+            c_graph.nodes[c]["title"] += f"<br/>{sf_name}"
+    return c_graph
+def get_regulated_communities(c_graph: nx.Graph) -> List[str]:
+    """
+    Get the communities regulated by a splicing factor.
+    :param c_graph: The graphic of community to update
+    :return: The list of regulated communities
+    >>> g = nx.Graph()
+    >>> g.add_nodes_from(list("ABC"))
+    >>> g.add_edges_from([("A", "B"), ("A", "C")])
+    >>> ng = add_node_color(g, "TRA2_down", "red", ["A", "B"])
+    >>> get_regulated_communities(ng)
+    ['A', 'B']
+    """
+    return [node for node in c_graph.nodes
+            if "reg" in c_graph.nodes[node].keys()]
+def select_splicing_factors(sf_list: List[str]) -> List[str]:
+    """
+    Return sf_list if sf_list doesn't contain ALL else return ALL.
+    :param sf_list: A list of splicing factor of interest
+    :return: sf_list if sf_list doesn't contain ALL else return ALL.
+    >>> select_splicing_factors(list("ABC"))
+    ['A', 'B', 'C']
+    """
+    return sf_list if "ALL" not in sf_list else get_sfname()
+def get_title(nb_sf: int, reg: str, threshold: float, min_reg, feature: str,
+              iteration: int) -> str:
+    """
+    Return a title
+    :param nb_sf: The number of splicing factor analyzed
+    :param reg: The name of the regulation chosen
+    :param threshold: Minimum frequency of gene regulated in a colony to \
+    select it (but it must also contains at least min_reg gene regulated)
+    :param min_reg: The minimum of regulated exon in a community to \
+    take it into account
+    :param feature: The kind of feature we want to analyse
+    :param iteration: The number of iteration to perform
+    :return: The title of the figure
+    >>> get_title(5, "down", 0.1, 2, "gene", 0)
+    'Figure of the communities containing at least 10.0 % of their genes \
+(or more than 2 genes) down-regulated by 5 splicing factors'
+    """
+    reg = "regulated" if reg == "reg" else f"{reg}-regulated"
+    if iteration < 20:
+        return f"Figure of the communities containing at least " \
+               f"{threshold * 100} % of their {feature}s (or more than " \
+               f"{min_reg} {feature}s) {reg} by {nb_sf} splicing factors"
+    return f"Figure of the communities enriched in {feature}s" \
+           f" {reg} by {nb_sf} splicing factors (permutation test " \
+           f"{iteration} iteration)"
+def get_outfiles(c_graph_file: Path, sf_list: List[str], threshold: float,
+                 min_reg: int, min_community: int, min_community_size: int,
+                 iteration: int) -> List[Path]:
+    """
+    Return the figure and json outfile to store and visualize graph data
+    :param c_graph_file: A graph containing a community level graph
+    :param sf_list: The list of splicing factor of interest
+    :param threshold: Minimum frequency of gene regulated in a colony to \
+    select it (but it must also contains at least min_reg gene regulated)
+    :param min_reg: The minimum of regulated exon in a community to \
+    take it into account
+    :param min_community: The minimum number of enriched community \
+    required to produce a figure
+    :param min_community_size: The minimum size used to consider communities
+    :param iteration: If this parameter is greater or equal to 20 then a \
+    permutation test is made to find the significantly enriched communities. \
+    Below 20, significant communities are found
+    :return:
+    """
+    outfolder = c_graph_file.parent / "graph_figures" / "community_level"
+    outfolder.mkdir(parents=True, exist_ok=True)
+    if iteration < 20:
+        return [outfolder / f"community_graph_{len(sf_list)}_"
+                            f"sf_t{threshold}_min-reg-{min_reg}_min-com_"
+                            f"{min_community}_min-size-{min_community_size}."
+                            f"{ext}" for ext in ["json", "html"]]
+    else:
+        return [outfolder / f"community_graph_{len(sf_list)}_"
+                            f"sf_t{threshold}_iteration-{iteration}_"
+                            f"min-size-{min_community_size}."
+                            f"{ext}" for ext in ["json", "html"]]
+def update_community_graphic(p: Parameters, c_graph: nx.Graph, color: str,
+                             sf_name: str, reg: str,
+                             threshold: float, min_reg: int = 2,
+                             iteration: int = 0, min_community: int = 3,
+                             min_community_size: int = 10) -> nx.Graph:
+    """
+    :param p: A class containing configurations
+    :param c_graph: A community level graph
+    :param sf_name: The name of the splicing factor of interest
+    :param reg: The name of the regulation chosen
+    :param color: color of the regulated node
+    :param threshold: Minimum frequency of gene regulated in a colony to \
+    select it (but it must also contains at least min_reg gene regulated)
+    :param min_reg: The minimum of regulated exon in a community to \
+    take it into account
+    :param iteration: If this parameter is greater or equal to 20 then a \
+    permutation test is made to find the significantly enriched communities. \
+    Below 20, significant communities are found
+    :param min_community: The minimum number of enriched community \
+    required to produce a figure
+    :param min_community_size: The minimum size used to consider communities
+    """
+    print(f"Working on {sf_name}, {reg}")
+    graph_file, comm_file = recover_json_graph_of_interest(p)
+    check_if_exist(graph_file)
+    reg_table = get_regulation_table(sqlite3.connect(Config.db_file),
+                                     sf_name, reg, p.feature)
+    df_com_file = pd.read_csv(comm_file, sep="\t")
+    df_com = get_community_tables(df_com_file, p.feature)
+    full_com = merge_dataframes(reg_table, df_com, p.feature)
+    full_com.loc[full_com["community_size"] < min_community_size,
+                 ["community", "community_size"]] = [np.nan, np.nan]
+    list_communities = get_regulated_community(full_com, p.feature, threshold,
+                                               min_reg, iteration)
+    if len(list_communities) >= min_community:
+        c_graph = add_node_color(c_graph, f"{sf_name}_{reg}", color,
+                                 list_communities)
+    return c_graph
+def write_figure(c_graph: nx.Graph, outfile: Path, title: str = ""
+                 ) -> None:
+    """
+    Write the network figure.
+    :param c_graph: An html figure
+    :param outfile: The file where the graphic will be created
+    :param title: The title of the figure
+    """
+    net = Network(width="100%", height="100%", heading=title)
+    net.from_nx(c_graph)
+    net.force_atlas_2based()
+    net.toggle_physics(False)
+    net.show_buttons(filter_=["nodes", "edges", "physics"])
+    net.write_html(str(outfile))
+@lp.parse
+def create_community_sf_graph(project: str, weight: int, global_weight: int,
+                              same_gene: bool, inflation: float,
+                              cell_line: str, feature: str, sf_list: List[str],
+                              reg: str, threshold: float, min_reg: int = 2,
+                              iteration: int = 0, min_community: int = 3,
+                              min_community_size: int = 10):
+    """
+    :param project: A project name of interest. Used only if \
+    global_weight is 0
+    :param weight: The weight of interaction to consider
+    :param global_weight: The global weight to consider. if \
+    the global weight is equal to 0 then then density figure are \
+    calculated by project, else all project are merge together and the \
+    interaction seen in `global_weight` project are taken into account \
+    :param same_gene: Say if we consider as co-localised, exons within \
+    the same gene (True) or not (False)
+    :param inflation: The inflation parameter
+    :param cell_line: Interactions are only selected from projects made \
+     on a specific cell line (ALL to disable this filter)
+    :param feature: The feature we want to analyse
+    :param sf_list: The list of the splicing factor of interest
+    :param reg: The name of the regulation chosen
+    :param threshold: Minimum frequency of gene regulated in a colony to \
+    select it (but it must also contains at least min_reg gene regulated)
+    :param min_reg: The minimum of regulated exon in a community to \
+    take it into account
+    :param iteration: If this parameter is greater or equal to 20 then a \
+    permutation test is made to find the significantly enriched communities. \
+    Below 20, significant communities are found
+    :param min_community: The minimum number of enriched community \
+    required to produce a figure
+    :param min_community_size: The minimum size used to consider communities
+    """
+    c_graph_file = create_community_sized_graph.__wrapped__(
+        project, weight, global_weight, same_gene, inflation, cell_line,
+        feature, min_community_size)
+    c_graph = load_graphic(c_graph_file)
+    p = Parameters(project, weight, global_weight, same_gene, inflation,
+                   cell_line, feature)
+    sf_list = select_splicing_factors(sf_list)
+    colors = cm.hsv(np.linspace(0, 1, min(len(sf_list), 5)))
+    colors = [to_hex(c) for c in colors]
+    for sf_name, color in zip(sf_list, colors):
+        c_graph = update_community_graphic(p, c_graph, color, sf_name, reg,
+                                           threshold, min_reg, iteration,
+                                           min_community, min_community_size)
+    regulated_com = get_regulated_communities(c_graph)
+    c_graph = subgraph_creation(c_graph, [regulated_com])
+    outfiles = get_outfiles(c_graph_file, sf_list, threshold, min_reg,
+                            min_community, min_community_size, iteration)
+    title = get_title(len(sf_list), reg, threshold, min_reg, feature,
+                      iteration)
+    g_json = nx.json_graph.node_link_data(c_graph)
+    json.dump(g_json, outfiles[0].open('w'), indent=2)
+    write_figure(c_graph, outfiles[1], title)
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) == 1:
+        import doctest
+        doctest.testmod()
+    else:
+        create_community_sf_graph()