src/find_interaction_cluster/community_finder.py: creation of...

src/find_interaction_cluster/community_finder.py: creation of write_cytoscape_graph to create a cytoscape network + modification of find_communities to use this previous function. Modification of create_figure to use a new layout for a better display. Add parameter html_fig for community_finder and multiple_community_launcher functions

src/find_interaction_cluster/community_finder.py: creation of...
4eeae6f6 · nfontrod · a2db1464 · 4eeae6f6
Commit 4eeae6f6 authored 4 years ago by nfontrod
--- a/src/find_interaction_cluster/community_finder.py
+++ b/src/find_interaction_cluster/community_finder.py
@@ -24,6 +24,7 @@ import matplotlib.cm as cm
 from matplotlib.colors import to_rgba
 from itertools import product
 import multiprocessing as mp
+import json
 def get_nodes(interaction: np.array) -> np.array:
@@ -67,6 +68,24 @@ def lighten_color(color: np.array, lighten: float = 1):
    return color
+def write_cytoscape_graph(graph: nx.Graph, dic_community: Dict[str, str],
+                          outfile: str):
+    """
+    Write a dictionary in cyjs format usable by cytoscape.
+    :param graph: The network
+    :param dic_community: dictionary linking each njode to it's community.
+    :param outfile: The name of the graph
+    """
+    data = nx.cytoscape_data(graph)
+    for d in data['elements']['nodes']:
+        exon = d['data']['id']
+        d['data']['community'] = dic_community[exon]['num']
+    res = json.dumps(data)
+    with open(outfile, 'w') as f:
+         f.write(res)
 def find_communities(graph: nx.Graph, project: str
                     ) -> Tuple[pd.DataFrame, Dict]:
    """
@@ -78,14 +97,15 @@ def find_communities(graph: nx.Graph, project: str
    to wich each exon belong
    """
    logging.debug("Finding community ...")
-    communities_generator = community.label_propagation_communities(graph)
+    communities_generator = community.asyn_lpa_communities(graph)
    communities = list(communities_generator)
    dic_community = {}
    cov = round(community.coverage(graph, communities), 2)
-    perf = np.nan #  community.performance(graph, communities)
+    # perf = community.performance(graph, communities)
+    modularity = community.modularity(graph, communities, weight='X')
    d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [],
         '%E vs E in complete G': [],
-         'cov': [], 'perf': [], 'exons': []}
+         'cov': [], 'modularity': [], 'exons': []}
    colors = cm.hsv(np.linspace(0, 1, len(communities)))
    for k, c in enumerate(communities):
        subg = nx.subgraph(graph, c)
@@ -106,9 +126,11 @@ def find_communities(graph: nx.Graph, project: str
        d['HCS'].append(is_hc)
        d['%E vs E in complete G'].append(round(
            nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2))
-        d['exons'].append(', '.join(list(c)))
+        d['exons'].append(', '.join(sorted(list(c))))
        d['cov'].append(cov)
-        d['perf'].append(perf)
+        d['modularity'].append(round(modularity, 5))
+        if nb_nodes <= 9:
+            graph.remove_nodes_from(c)
    d['project'] = [project] * len(d['community'])
    df = pd.DataFrame(d)
    return df, dic_community
@@ -121,7 +143,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict,
    :param graph: The network object
    :param outfile: File were the figure will be created
-    :param dic_community: dictionary linking each njode to it's community.
+    :param dic_community: dictionary linking each node to it's community.
    :param title: The title of the figure
    """
    logging.debug("Creating figure")
@@ -130,7 +152,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict,
    middle_x = []
    middle_y = []
    weight = []
-    nodes = nx.spring_layout(graph, seed=1)
+    nodes = nx.nx_pydot.graphviz_layout(graph, prog="neato")
    for edge in graph.edges():
        x0, y0 = nodes[edge[0]]
        x1, y1 = nodes[edge[1]]
@@ -227,7 +249,8 @@ def get_figure_title(project, weight, global_weight, same_gene):
 def community_finder(weight: int, global_weight: int, project: str = "",
-                     same_gene=True, logging_level: str = "DISABLE"):
+                     same_gene=True, html_fig: bool = False,
+                     logging_level: str = "DISABLE"):
    """
    Find communities inside co-localisation between exons found in \
    a ChIA-PET project.
@@ -241,6 +264,7 @@ def community_finder(weight: int, global_weight: int, project: str = "",
    :param same_gene: Say if we consider as co-localised, exons within the \
    same gene (True) or not (False) (default False)
    :param logging_level: The level of data to display (default 'DISABLE')
+    :param html_fig: True to create an html figure, false else
    """
    ConfigGraph.output_folder.mkdir(exist_ok=True, parents=True)
    logging_def(ConfigGraph.output_folder, __file__, logging_level)
@@ -249,13 +273,14 @@ def community_finder(weight: int, global_weight: int, project: str = "",
                                             global_weight, same_gene, True)
    graph = create_graph(interaction)
    df, dic_community = find_communities(graph, project)
-    outfile = ConfigGraph.get_community_file(project, weight, global_weight,
+    outfiles = [ConfigGraph.get_community_file(
-                                             same_gene, is_fig=False)
+        project, weight, global_weight, same_gene, ext)
-    df.to_csv(outfile, sep="\t", index=False)
+        for ext in ['.txt', '.cyjs', '.html']]
-    figure = ConfigGraph.get_community_file(project, weight, global_weight,
+    df.to_csv(outfiles[0], sep="\t", index=False)
-                                            same_gene, is_fig=True)
+    write_cytoscape_graph(graph, dic_community, outfiles[1])
-    fig_title = get_figure_title(project, weight, global_weight, same_gene)
+    if html_fig:
-    create_figure(graph, figure, dic_community, fig_title)
+        fig_title = get_figure_title(project, weight, global_weight, same_gene)
+        create_figure(graph, outfiles[2], dic_community, fig_title)
 def get_projects(global_weight: int) -> List[str]:
@@ -306,7 +331,7 @@ def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]:
 def multiple_community_launcher(ps: int, weights: List[int],
                                global_weights: List[int],
-                                same_gene: bool,
+                                same_gene: bool, html_fig: bool = False,
                                logging_level: str = "DISABLE"):
    """
    :param ps: The number of processes we want to use.
@@ -317,6 +342,7 @@ def multiple_community_launcher(ps: int, weights: List[int],
    seen in `global_weight` project are taken into account
    :param same_gene: Say if we consider as co-localised exon within the \
    same gene
+    :param html_fig: True to create an html figure, false else
    :param logging_level: Level of information to display
    """
    ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True)
@@ -331,7 +357,7 @@ def multiple_community_launcher(ps: int, weights: List[int],
        global_weight = dic_project[project]
        logging.info(f'Finding community for project : {project}, '
                     f'global_weight : {global_weight}, weight: {weight}')
-        args = [weight, global_weight, project, same_gene]
+        args = [weight, global_weight, project, same_gene, html_fig]
        processes.append(pool.apply_async(community_finder, args))
    for proc in processes:
        proc.get(timeout=None)