From 39d972e6fc816dfda9ab5dcb741079d12fe9f3d7 Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Fri, 26 Jun 2020 17:27:50 +0200 Subject: [PATCH] src/find_interaction_cluster/community_finder.py: creation of lighten_color and modification of find_communities to add some data to the communities find and to change the color of the communities in function of those data + creation of multiple_community_launcher to find communities for many projects or wieghts --- .../community_finder.py | 149 ++++++++++++++---- 1 file changed, 121 insertions(+), 28 deletions(-) diff --git a/src/find_interaction_cluster/community_finder.py b/src/find_interaction_cluster/community_finder.py index 8974a385..a7a36c61 100644 --- a/src/find_interaction_cluster/community_finder.py +++ b/src/find_interaction_cluster/community_finder.py @@ -19,13 +19,11 @@ import logging import plotly.graph_objects as go import plotly from pathlib import Path -from typing import Tuple, Dict +from typing import Tuple, Dict, List import matplotlib.cm as cm -from matplotlib.colors import rgb2hex - - -class BadProjectName(Exception): - pass +from matplotlib.colors import to_rgba +from itertools import product +import multiprocessing as mp def get_nodes(interaction: np.array) -> np.array: @@ -56,6 +54,19 @@ def create_graph(interaction: np.array) -> nx.Graph: return graph +def lighten_color(color: np.array, lighten: float = 1): + """ + + :param color: A tuple of 3 float + :param lighten: The value to add to color. + :return: The color lightened + """ + color = list(to_rgba(color)) + color = [int(c * 255) for c in color[0:3]] + [lighten] + color = 'rgba(' + ', '.join(map(str, color)) + ')' + return color + + def find_communities(graph: nx.Graph, project: str ) -> Tuple[pd.DataFrame, Dict]: """ @@ -67,24 +78,37 @@ def find_communities(graph: nx.Graph, project: str to wich each exon belong """ logging.debug("Finding community ...") - communities_generator = community.girvan_newman(graph) - communities = next(communities_generator) + communities_generator = community.label_propagation_communities(graph) + communities = list(communities_generator) dic_community = {} - cov = community.coverage(graph, communities) - perf = community.performance(graph, communities) - d = {'community': [], 'size': [], 'cov': [], 'perf': [], 'exons': []} - colors = cm.rainbow(np.linspace(0, 1, len(communities))) + cov = round(community.coverage(graph, communities), 2) + perf = np.nan # community.performance(graph, communities) + d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [], + '%E vs E in complete G': [], + 'cov': [], 'perf': [], 'exons': []} + colors = cm.hsv(np.linspace(0, 1, len(communities))) for k, c in enumerate(communities): - clen = len(c) + subg = nx.subgraph(graph, c) + nb_nodes = len(c) + nb_edges = len(subg.edges) + edge_connectivity = nx.edge_connectivity(subg) + is_hc = 'yes' if edge_connectivity > nb_nodes / 2 else 'no' for exon in c: dic_community[exon] = {'num': f'C{k + 1}', - 'col': rgb2hex(colors[k][:3]) - if clen > 2 else 'white'} + 'col': lighten_color(colors[k]) + if is_hc == 'yes' else + (lighten_color(colors[k], 0.1) if nb_nodes > 2 else 'white') + } d['community'].append(f'C{k + 1}') - d['size'].append(len(list(c))) + d['nodes'].append(nb_nodes) + d['edges'].append(nb_edges) + d['EC'].append(edge_connectivity) + d['HCS'].append(is_hc) + d['%E vs E in complete G'].append(round( + nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2)) d['exons'].append(', '.join(list(c))) - d['cov'].append(round(cov, 5)) - d['perf'].append(round(perf, 5)) + d['cov'].append(cov) + d['perf'].append(perf) d['project'] = [project] * len(d['community']) df = pd.DataFrame(d) return df, dic_community @@ -220,16 +244,6 @@ def community_finder(weight: int, global_weight: int, project: str = "", """ ConfigGraph.output_folder.mkdir(exist_ok=True, parents=True) logging_def(ConfigGraph.output_folder, __file__, logging_level) - if project != "" and global_weight != 0: - msg = "A project name was given given along with a " \ - "global weight != 0 ! You should not give a project name if " \ - "you want to concider many project together" - logging.exception(msg) - raise BadProjectName(msg) - if project == "" and global_weight == 0: - msg = "A project name must be given when global weight == 0" - logging.exception(msg) - raise BadProjectName cnx = sqlite3.connect(ConfigGraph.db_file) interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, True) @@ -242,3 +256,82 @@ def community_finder(weight: int, global_weight: int, project: str = "", same_gene, is_fig=True) fig_title = get_figure_title(project, weight, global_weight, same_gene) create_figure(graph, figure, dic_community, fig_title) + + +def get_projects(global_weight: int) -> List[str]: + """ + Get projects name. + + :param global_weight: The global weight to consider. if \ + the global weight is equal to 0 then then density figure are calculated \ + by project, else all projet are merge together and the interaction \ + seen in `global_weight` project are taken into account + :return: The list of the project to consider + """ + if global_weight != 0: + return [f'Global-weight-{global_weight}'] + else: + cnx = sqlite3.connect(ConfigGraph.db_file) + c = cnx.cursor() + query = f"SELECT DISTINCT id_sample " \ + f"FROM cin_projects " \ + f"WHERE id_sample = 'GSM1872888' " + c.execute(query) + res = list(np.asarray(c.fetchall()).flatten()) + c.close() + cnx.close() + return res + +def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]: + """ + Get projects name given a list of global_weight and a dictionary linking, + each project name to it's corresponding global weight. + + :param global_weight: The list of global weights to consider. if \ + the global weight is equal to 0 then then density figure are calculated \ + by project, else all projet are merge together and the interaction \ + seen in `global_weight` project are taken into account + :return: project names and a dictionary linking, + each name to it's corresponding global weight. + """ + dic = {} + projects = [] + for global_weight in global_weights: + tmp = get_projects(global_weight) + projects += tmp + for p in tmp: + dic[p] = global_weight + return projects, dic + + +def multiple_community_launcher(ps: int, weights: List[int], + global_weights: List[int], + same_gene: bool, + logging_level: str = "DISABLE"): + """ + :param ps: The number of processes we want to use. + :param weights: The list of weights of interaction to consider + :param global_weights: The list global weights to consider. if \ + the global weight is equal to 0 then then density figure are calculated \ + by project, else all projcet are merge together and the interaction \ + seen in `global_weight` project are taken into account + :param same_gene: Say if we consider as co-localised exon within the \ + same gene + :param logging_level: Level of information to display + """ + ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True) + logging_def(ConfigGraph.community_folder, __file__, logging_level) + global_weights = list(np.unique(global_weights)) + weights = list(np.unique(weights)) + projects, dic_project = get_projects_name(global_weights) + condition = list(product(projects, weights)) + processes = [] + pool = mp.Pool(processes=min(ps, len(condition))) + for project, weight in condition: + global_weight = dic_project[project] + logging.info(f'Finding community for project : {project}, ' + f'global_weight : {global_weight}, weight: {weight}') + args = [weight, global_weight, project, same_gene] + processes.append(pool.apply_async(community_finder, args)) + for proc in processes: + proc.get(timeout=None) -- GitLab