diff --git a/src/find_interaction_cluster/community_finder.py b/src/find_interaction_cluster/community_finder.py index a7a36c611262e2fd1929b1f9459d7ac7a260fa53..cec2033276b1b13153fdb4b3addb0e889cd31b3d 100644 --- a/src/find_interaction_cluster/community_finder.py +++ b/src/find_interaction_cluster/community_finder.py @@ -24,6 +24,7 @@ import matplotlib.cm as cm from matplotlib.colors import to_rgba from itertools import product import multiprocessing as mp +import json def get_nodes(interaction: np.array) -> np.array: @@ -67,6 +68,24 @@ def lighten_color(color: np.array, lighten: float = 1): return color +def write_cytoscape_graph(graph: nx.Graph, dic_community: Dict[str, str], + outfile: str): + """ + Write a dictionary in cyjs format usable by cytoscape. + + :param graph: The network + :param dic_community: dictionary linking each njode to it's community. + :param outfile: The name of the graph + """ + data = nx.cytoscape_data(graph) + for d in data['elements']['nodes']: + exon = d['data']['id'] + d['data']['community'] = dic_community[exon]['num'] + res = json.dumps(data) + with open(outfile, 'w') as f: + f.write(res) + + def find_communities(graph: nx.Graph, project: str ) -> Tuple[pd.DataFrame, Dict]: """ @@ -78,14 +97,15 @@ def find_communities(graph: nx.Graph, project: str to wich each exon belong """ logging.debug("Finding community ...") - communities_generator = community.label_propagation_communities(graph) + communities_generator = community.asyn_lpa_communities(graph) communities = list(communities_generator) dic_community = {} cov = round(community.coverage(graph, communities), 2) - perf = np.nan # community.performance(graph, communities) + #Â perf = community.performance(graph, communities) + modularity = community.modularity(graph, communities, weight='X') d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [], '%E vs E in complete G': [], - 'cov': [], 'perf': [], 'exons': []} + 'cov': [], 'modularity': [], 'exons': []} colors = cm.hsv(np.linspace(0, 1, len(communities))) for k, c in enumerate(communities): subg = nx.subgraph(graph, c) @@ -106,9 +126,11 @@ def find_communities(graph: nx.Graph, project: str d['HCS'].append(is_hc) d['%E vs E in complete G'].append(round( nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2)) - d['exons'].append(', '.join(list(c))) + d['exons'].append(', '.join(sorted(list(c)))) d['cov'].append(cov) - d['perf'].append(perf) + d['modularity'].append(round(modularity, 5)) + if nb_nodes <= 9: + graph.remove_nodes_from(c) d['project'] = [project] * len(d['community']) df = pd.DataFrame(d) return df, dic_community @@ -121,7 +143,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict, :param graph: The network object :param outfile: File were the figure will be created - :param dic_community: dictionary linking each njode to it's community. + :param dic_community: dictionary linking each node to it's community. :param title: The title of the figure """ logging.debug("Creating figure") @@ -130,7 +152,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict, middle_x = [] middle_y = [] weight = [] - nodes = nx.spring_layout(graph, seed=1) + nodes = nx.nx_pydot.graphviz_layout(graph, prog="neato") for edge in graph.edges(): x0, y0 = nodes[edge[0]] x1, y1 = nodes[edge[1]] @@ -227,7 +249,8 @@ def get_figure_title(project, weight, global_weight, same_gene): def community_finder(weight: int, global_weight: int, project: str = "", - same_gene=True, logging_level: str = "DISABLE"): + same_gene=True, html_fig: bool = False, + logging_level: str = "DISABLE"): """ Find communities inside co-localisation between exons found in \ a ChIA-PET project. @@ -241,6 +264,7 @@ def community_finder(weight: int, global_weight: int, project: str = "", :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) :param logging_level: The level of data to display (default 'DISABLE') + :param html_fig: True to create an html figure, false else """ ConfigGraph.output_folder.mkdir(exist_ok=True, parents=True) logging_def(ConfigGraph.output_folder, __file__, logging_level) @@ -249,13 +273,14 @@ def community_finder(weight: int, global_weight: int, project: str = "", global_weight, same_gene, True) graph = create_graph(interaction) df, dic_community = find_communities(graph, project) - outfile = ConfigGraph.get_community_file(project, weight, global_weight, - same_gene, is_fig=False) - df.to_csv(outfile, sep="\t", index=False) - figure = ConfigGraph.get_community_file(project, weight, global_weight, - same_gene, is_fig=True) - fig_title = get_figure_title(project, weight, global_weight, same_gene) - create_figure(graph, figure, dic_community, fig_title) + outfiles = [ConfigGraph.get_community_file( + project, weight, global_weight, same_gene, ext) + for ext in ['.txt', '.cyjs', '.html']] + df.to_csv(outfiles[0], sep="\t", index=False) + write_cytoscape_graph(graph, dic_community, outfiles[1]) + if html_fig: + fig_title = get_figure_title(project, weight, global_weight, same_gene) + create_figure(graph, outfiles[2], dic_community, fig_title) def get_projects(global_weight: int) -> List[str]: @@ -306,7 +331,7 @@ def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]: def multiple_community_launcher(ps: int, weights: List[int], global_weights: List[int], - same_gene: bool, + same_gene: bool, html_fig: bool = False, logging_level: str = "DISABLE"): """ :param ps: The number of processes we want to use. @@ -317,6 +342,7 @@ def multiple_community_launcher(ps: int, weights: List[int], seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised exon within the \ same gene + :param html_fig: True to create an html figure, false else :param logging_level: Level of information to display """ ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True) @@ -331,7 +357,7 @@ def multiple_community_launcher(ps: int, weights: List[int], global_weight = dic_project[project] logging.info(f'Finding community for project : {project}, ' f'global_weight : {global_weight}, weight: {weight}') - args = [weight, global_weight, project, same_gene] + args = [weight, global_weight, project, same_gene, html_fig] processes.append(pool.apply_async(community_finder, args)) for proc in processes: proc.get(timeout=None)