From 68c96e9a65b83c01eff536b874aa133ea8765c99 Mon Sep 17 00:00:00 2001
From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr>
Date: Mon, 6 Jul 2020 16:09:58 +0200
Subject: [PATCH] src/find_interaction_cluster/community_finder.py: find
 communities using hipMCL method

---
 .../community_finder.py                       | 90 ++++++++++++++++---
 1 file changed, 78 insertions(+), 12 deletions(-)

diff --git a/src/find_interaction_cluster/community_finder.py b/src/find_interaction_cluster/community_finder.py
index cec20332..43aab7c0 100644
--- a/src/find_interaction_cluster/community_finder.py
+++ b/src/find_interaction_cluster/community_finder.py
@@ -25,6 +25,7 @@ from matplotlib.colors import to_rgba
 from itertools import product
 import multiprocessing as mp
 import json
+import subprocess as sp
 
 
 def get_nodes(interaction: np.array) -> np.array:
@@ -83,49 +84,75 @@ def write_cytoscape_graph(graph: nx.Graph, dic_community: Dict[str, str],
         d['data']['community'] = dic_community[exon]['num']
     res = json.dumps(data)
     with open(outfile, 'w') as f:
-         f.write(res)
+        f.write(res)
 
 
-def find_communities(graph: nx.Graph, project: str
+def get_communities(result: Path) -> List[List[str]]:
+    """
+    Get the communities inside the file `result`
+
+    :param result: A file containing the communities found by the hipMCL \
+    program.
+    :return: The list of communities find by the hipMCL program.
+    """
+    communities = []
+    with result.open('r') as f:
+        for line in f:
+            communities.append(line.replace('\n', '').strip().split(' '))
+    return communities
+
+
+def find_communities(graph: nx.Graph, project: str,
+                     outfile: Path, result_file: Path
                      ) -> Tuple[pd.DataFrame, Dict]:
     """
     Find the communities within the graph.
 
     :param graph: Graph of co-localized exons
     :param project: The name of the project
+    :param outfile: A file containing interaction
     :return: The communities within the graph and the community \
     to wich each exon belong
     """
+    warn = 0
     logging.debug("Finding community ...")
-    communities_generator = community.asyn_lpa_communities(graph)
-    communities = list(communities_generator)
+    if not result_file.is_file():
+        cmd = f"mpirun -np 1 {ConfigGraph.get_hipmcl_prog()} -M {outfile} " \
+              f"-I 1.2 -per-process-mem 8 -o {result_file}"
+        sp.check_call(cmd, shell=True, stderr=sp.STDOUT)
+    communities = get_communities(result_file)
     dic_community = {}
     cov = round(community.coverage(graph, communities), 2)
-    # perf = community.performance(graph, communities)
     modularity = community.modularity(graph, communities, weight='X')
-    d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [],
+    d = {'community': [], 'nodes': [], 'edges': [], 'EC': [], 'HCS': [],
          '%E vs E in complete G': [],
          'cov': [], 'modularity': [], 'exons': []}
     colors = cm.hsv(np.linspace(0, 1, len(communities)))
     for k, c in enumerate(communities):
         subg = nx.subgraph(graph, c)
         nb_nodes = len(c)
+        if nb_nodes <= 1:
+            warn += 1
         nb_edges = len(subg.edges)
+        try:
+            complete = round(nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100,
+                             2)
+        except ZeroDivisionError:
+            complete = np.nan
         edge_connectivity = nx.edge_connectivity(subg)
         is_hc = 'yes' if edge_connectivity > nb_nodes / 2 else 'no'
         for exon in c:
             dic_community[exon] = {'num': f'C{k + 1}',
                                    'col': lighten_color(colors[k])
                                    if is_hc == 'yes' else
-                                   (lighten_color(colors[k], 0.1) if nb_nodes > 2 else 'white')
-                                   }
+                                   (lighten_color(colors[k], 0.1)
+                                    if nb_nodes > 2 else 'white')}
         d['community'].append(f'C{k + 1}')
         d['nodes'].append(nb_nodes)
         d['edges'].append(nb_edges)
         d['EC'].append(edge_connectivity)
         d['HCS'].append(is_hc)
-        d['%E vs E in complete G'].append(round(
-            nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2))
+        d['%E vs E in complete G'].append(complete)
         d['exons'].append(', '.join(sorted(list(c))))
         d['cov'].append(cov)
         d['modularity'].append(round(modularity, 5))
@@ -133,6 +160,9 @@ def find_communities(graph: nx.Graph, project: str
             graph.remove_nodes_from(c)
     d['project'] = [project] * len(d['community'])
     df = pd.DataFrame(d)
+    if warn > 0:
+        logging.warning(f" {warn} communities were composed by one exon or "
+                        f"less !")
     return df, dic_community
 
 
@@ -248,6 +278,38 @@ def get_figure_title(project, weight, global_weight, same_gene):
     return title
 
 
+def write_interaction_file(arr_interaction: np.array, project: str,
+                           weight: int, global_weight: int, same_gene: bool,
+                           use_weight: bool = False):
+    """
+
+    :param arr_interaction:  Each couples of co-localized exons within a \
+    project.
+    :param project: The name of the project of interest
+    :param weight: The minimum weight of interaction to consider
+    :param global_weight: The global weight to consider. if \
+    the global weight is equal to 0 then then density figure are calculated \
+    by project, else all projet are merge together and the interaction \
+    seen in `global_weight` project are taken into account
+    :param same_gene: Say if we consider as co-localised, exons within the \
+    same gene (True) or not (False) (default False)
+    :param use_weight: Say if we want to write the weight into the result file.
+    :return:
+    """
+    logging.debug('Writing interaction files ...')
+    outfile = ConfigGraph.get_community_file(project, weight, global_weight,
+                                             same_gene, "_interation.txt")
+    result = ConfigGraph.get_community_file(project, weight, global_weight,
+                                            same_gene,
+                                            "_interation_result.txt")
+    with outfile.open('w') as f:
+        for exon1, exon2, cweight in arr_interaction:
+            if not use_weight:
+                cweight = 1
+            f.write(f"{exon1}\t{exon2}\t{cweight}\n")
+    return outfile, result
+
+
 def community_finder(weight: int, global_weight: int, project: str = "",
                      same_gene=True, html_fig: bool = False,
                      logging_level: str = "DISABLE"):
@@ -271,8 +333,11 @@ def community_finder(weight: int, global_weight: int, project: str = "",
     cnx = sqlite3.connect(ConfigGraph.db_file)
     interaction = get_project_colocalisation(cnx, project, weight,
                                              global_weight, same_gene, True)
+    outfile, result_file = write_interaction_file(interaction, project,
+                                                  weight, global_weight,
+                                                  same_gene)
     graph = create_graph(interaction)
-    df, dic_community = find_communities(graph, project)
+    df, dic_community = find_communities2(graph, project, outfile, result_file)
     outfiles = [ConfigGraph.get_community_file(
         project, weight, global_weight, same_gene, ext)
         for ext in ['.txt', '.cyjs', '.html']]
@@ -307,12 +372,13 @@ def get_projects(global_weight: int) -> List[str]:
         cnx.close()
         return res
 
+
 def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]:
     """
     Get projects name given a list of global_weight and a dictionary linking,
     each project name to it's corresponding global weight.
 
-    :param global_weight: The list of global weights to consider. if \
+    :param global_weights: The list of global weights to consider. if \
     the global weight is equal to 0 then then density figure are calculated \
     by project, else all projet are merge together and the interaction \
     seen in `global_weight` project are taken into account
-- 
GitLab