diff --git a/src/find_interaction_cluster/community_finder.py b/src/find_interaction_cluster/community_finder.py index dd7a0f80dce3213083f6f1fcc6e4e3ca790f19e0..5df05555d40497188d3aaafc71742dc03d3e520d 100644 --- a/src/find_interaction_cluster/community_finder.py +++ b/src/find_interaction_cluster/community_finder.py @@ -87,18 +87,22 @@ def write_cytoscape_graph(graph: nx.Graph, dic_community: Dict[str, str], f.write(res) -def get_communities(result: Path) -> List[List[str]]: +def get_communities(result: Path, threshold: int = 0) -> List[List[str]]: """ Get the communities inside the file `result` :param result: A file containing the communities found by the hipMCL \ program. + :param threshold: The number of exon the community must contains at least \ + to be recovered :return: The list of communities find by the hipMCL program. """ communities = [] with result.open('r') as f: for line in f: - communities.append(line.replace('\n', '').strip().split(' ')) + tmp = line.replace('\n', '').strip().split(' ') + if len(tmp) > threshold: + communities.append(tmp) return communities @@ -118,7 +122,7 @@ def find_communities(graph: nx.Graph, project: str, logging.debug("Finding community ...") if not result_file.is_file(): cmd = f"mpirun -np 1 {ConfigGraph.get_hipmcl_prog()} -M {outfile} " \ - f"-I 1.2 -per-process-mem 8 -o {result_file}" + f"-I 1.2 -per-process-mem 32 -o {result_file}" sp.check_call(cmd, shell=True, stderr=sp.STDOUT) communities = get_communities(result_file) dic_community = {} @@ -128,6 +132,7 @@ def find_communities(graph: nx.Graph, project: str, '%E vs E in complete G': [], 'cov': [], 'modularity': [], 'exons': []} colors = cm.hsv(np.linspace(0, 1, len(communities))) + logging.debug('Creating community result file') for k, c in enumerate(communities): subg = nx.subgraph(graph, c) nb_nodes = len(c) @@ -338,14 +343,17 @@ def community_finder(weight: int, global_weight: int, project: str = "", same_gene) graph = create_graph(interaction) df, dic_community = find_communities(graph, project, outfile, result_file) + logging.debug('Writing results ...') outfiles = [ConfigGraph.get_community_file( project, weight, global_weight, same_gene, ext) for ext in ['.txt', '.cyjs', '.html']] df.to_csv(outfiles[0], sep="\t", index=False) + logging.debug("Saving the graph ...") write_cytoscape_graph(graph, dic_community, outfiles[1]) if html_fig: fig_title = get_figure_title(project, weight, global_weight, same_gene) create_figure(graph, outfiles[2], dic_community, fig_title) + logging.debug('Done !') def get_projects(global_weight: int) -> List[str]: @@ -427,3 +435,5 @@ def multiple_community_launcher(ps: int, weights: List[int], processes.append(pool.apply_async(community_finder, args)) for proc in processes: proc.get(timeout=None) + pool.close() + pool.join()