diff --git a/src/find_interaction_cluster/community_finder.py b/src/find_interaction_cluster/community_finder.py index cec2033276b1b13153fdb4b3addb0e889cd31b3d..43aab7c0d14ac171723f4e8b9f21edad73518923 100644 --- a/src/find_interaction_cluster/community_finder.py +++ b/src/find_interaction_cluster/community_finder.py @@ -25,6 +25,7 @@ from matplotlib.colors import to_rgba from itertools import product import multiprocessing as mp import json +import subprocess as sp def get_nodes(interaction: np.array) -> np.array: @@ -83,49 +84,75 @@ def write_cytoscape_graph(graph: nx.Graph, dic_community: Dict[str, str], d['data']['community'] = dic_community[exon]['num'] res = json.dumps(data) with open(outfile, 'w') as f: - f.write(res) + f.write(res) -def find_communities(graph: nx.Graph, project: str +def get_communities(result: Path) -> List[List[str]]: + """ + Get the communities inside the file `result` + + :param result: A file containing the communities found by the hipMCL \ + program. + :return: The list of communities find by the hipMCL program. + """ + communities = [] + with result.open('r') as f: + for line in f: + communities.append(line.replace('\n', '').strip().split(' ')) + return communities + + +def find_communities(graph: nx.Graph, project: str, + outfile: Path, result_file: Path ) -> Tuple[pd.DataFrame, Dict]: """ Find the communities within the graph. :param graph: Graph of co-localized exons :param project: The name of the project + :param outfile: A file containing interaction :return: The communities within the graph and the community \ to wich each exon belong """ + warn = 0 logging.debug("Finding community ...") - communities_generator = community.asyn_lpa_communities(graph) - communities = list(communities_generator) + if not result_file.is_file(): + cmd = f"mpirun -np 1 {ConfigGraph.get_hipmcl_prog()} -M {outfile} " \ + f"-I 1.2 -per-process-mem 8 -o {result_file}" + sp.check_call(cmd, shell=True, stderr=sp.STDOUT) + communities = get_communities(result_file) dic_community = {} cov = round(community.coverage(graph, communities), 2) - #Â perf = community.performance(graph, communities) modularity = community.modularity(graph, communities, weight='X') - d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [], + d = {'community': [], 'nodes': [], 'edges': [], 'EC': [], 'HCS': [], '%E vs E in complete G': [], 'cov': [], 'modularity': [], 'exons': []} colors = cm.hsv(np.linspace(0, 1, len(communities))) for k, c in enumerate(communities): subg = nx.subgraph(graph, c) nb_nodes = len(c) + if nb_nodes <= 1: + warn += 1 nb_edges = len(subg.edges) + try: + complete = round(nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, + 2) + except ZeroDivisionError: + complete = np.nan edge_connectivity = nx.edge_connectivity(subg) is_hc = 'yes' if edge_connectivity > nb_nodes / 2 else 'no' for exon in c: dic_community[exon] = {'num': f'C{k + 1}', 'col': lighten_color(colors[k]) if is_hc == 'yes' else - (lighten_color(colors[k], 0.1) if nb_nodes > 2 else 'white') - } + (lighten_color(colors[k], 0.1) + if nb_nodes > 2 else 'white')} d['community'].append(f'C{k + 1}') d['nodes'].append(nb_nodes) d['edges'].append(nb_edges) d['EC'].append(edge_connectivity) d['HCS'].append(is_hc) - d['%E vs E in complete G'].append(round( - nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2)) + d['%E vs E in complete G'].append(complete) d['exons'].append(', '.join(sorted(list(c)))) d['cov'].append(cov) d['modularity'].append(round(modularity, 5)) @@ -133,6 +160,9 @@ def find_communities(graph: nx.Graph, project: str graph.remove_nodes_from(c) d['project'] = [project] * len(d['community']) df = pd.DataFrame(d) + if warn > 0: + logging.warning(f" {warn} communities were composed by one exon or " + f"less !") return df, dic_community @@ -248,6 +278,38 @@ def get_figure_title(project, weight, global_weight, same_gene): return title +def write_interaction_file(arr_interaction: np.array, project: str, + weight: int, global_weight: int, same_gene: bool, + use_weight: bool = False): + """ + + :param arr_interaction: Each couples of co-localized exons within a \ + project. + :param project: The name of the project of interest + :param weight: The minimum weight of interaction to consider + :param global_weight: The global weight to consider. if \ + the global weight is equal to 0 then then density figure are calculated \ + by project, else all projet are merge together and the interaction \ + seen in `global_weight` project are taken into account + :param same_gene: Say if we consider as co-localised, exons within the \ + same gene (True) or not (False) (default False) + :param use_weight: Say if we want to write the weight into the result file. + :return: + """ + logging.debug('Writing interaction files ...') + outfile = ConfigGraph.get_community_file(project, weight, global_weight, + same_gene, "_interation.txt") + result = ConfigGraph.get_community_file(project, weight, global_weight, + same_gene, + "_interation_result.txt") + with outfile.open('w') as f: + for exon1, exon2, cweight in arr_interaction: + if not use_weight: + cweight = 1 + f.write(f"{exon1}\t{exon2}\t{cweight}\n") + return outfile, result + + def community_finder(weight: int, global_weight: int, project: str = "", same_gene=True, html_fig: bool = False, logging_level: str = "DISABLE"): @@ -271,8 +333,11 @@ def community_finder(weight: int, global_weight: int, project: str = "", cnx = sqlite3.connect(ConfigGraph.db_file) interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, True) + outfile, result_file = write_interaction_file(interaction, project, + weight, global_weight, + same_gene) graph = create_graph(interaction) - df, dic_community = find_communities(graph, project) + df, dic_community = find_communities2(graph, project, outfile, result_file) outfiles = [ConfigGraph.get_community_file( project, weight, global_weight, same_gene, ext) for ext in ['.txt', '.cyjs', '.html']] @@ -307,12 +372,13 @@ def get_projects(global_weight: int) -> List[str]: cnx.close() return res + def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]: """ Get projects name given a list of global_weight and a dictionary linking, each project name to it's corresponding global weight. - :param global_weight: The list of global weights to consider. if \ + :param global_weights: The list of global weights to consider. if \ the global weight is equal to 0 then then density figure are calculated \ by project, else all projet are merge together and the interaction \ seen in `global_weight` project are taken into account