Skip to content
Snippets Groups Projects
Commit 4eeae6f6 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/community_finder.py: creation of...

src/find_interaction_cluster/community_finder.py: creation of write_cytoscape_graph to create a cytoscape network + modification of find_communities to use this previous function. Modification of create_figure to use a new layout for a better display. Add parameter html_fig for community_finder and multiple_community_launcher functions
parent a2db1464
Branches
No related tags found
No related merge requests found
...@@ -24,6 +24,7 @@ import matplotlib.cm as cm ...@@ -24,6 +24,7 @@ import matplotlib.cm as cm
from matplotlib.colors import to_rgba from matplotlib.colors import to_rgba
from itertools import product from itertools import product
import multiprocessing as mp import multiprocessing as mp
import json
def get_nodes(interaction: np.array) -> np.array: def get_nodes(interaction: np.array) -> np.array:
...@@ -67,6 +68,24 @@ def lighten_color(color: np.array, lighten: float = 1): ...@@ -67,6 +68,24 @@ def lighten_color(color: np.array, lighten: float = 1):
return color return color
def write_cytoscape_graph(graph: nx.Graph, dic_community: Dict[str, str],
outfile: str):
"""
Write a dictionary in cyjs format usable by cytoscape.
:param graph: The network
:param dic_community: dictionary linking each njode to it's community.
:param outfile: The name of the graph
"""
data = nx.cytoscape_data(graph)
for d in data['elements']['nodes']:
exon = d['data']['id']
d['data']['community'] = dic_community[exon]['num']
res = json.dumps(data)
with open(outfile, 'w') as f:
f.write(res)
def find_communities(graph: nx.Graph, project: str def find_communities(graph: nx.Graph, project: str
) -> Tuple[pd.DataFrame, Dict]: ) -> Tuple[pd.DataFrame, Dict]:
""" """
...@@ -78,14 +97,15 @@ def find_communities(graph: nx.Graph, project: str ...@@ -78,14 +97,15 @@ def find_communities(graph: nx.Graph, project: str
to wich each exon belong to wich each exon belong
""" """
logging.debug("Finding community ...") logging.debug("Finding community ...")
communities_generator = community.label_propagation_communities(graph) communities_generator = community.asyn_lpa_communities(graph)
communities = list(communities_generator) communities = list(communities_generator)
dic_community = {} dic_community = {}
cov = round(community.coverage(graph, communities), 2) cov = round(community.coverage(graph, communities), 2)
perf = np.nan # community.performance(graph, communities) # perf = community.performance(graph, communities)
modularity = community.modularity(graph, communities, weight='X')
d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [], d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [],
'%E vs E in complete G': [], '%E vs E in complete G': [],
'cov': [], 'perf': [], 'exons': []} 'cov': [], 'modularity': [], 'exons': []}
colors = cm.hsv(np.linspace(0, 1, len(communities))) colors = cm.hsv(np.linspace(0, 1, len(communities)))
for k, c in enumerate(communities): for k, c in enumerate(communities):
subg = nx.subgraph(graph, c) subg = nx.subgraph(graph, c)
...@@ -106,9 +126,11 @@ def find_communities(graph: nx.Graph, project: str ...@@ -106,9 +126,11 @@ def find_communities(graph: nx.Graph, project: str
d['HCS'].append(is_hc) d['HCS'].append(is_hc)
d['%E vs E in complete G'].append(round( d['%E vs E in complete G'].append(round(
nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2)) nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2))
d['exons'].append(', '.join(list(c))) d['exons'].append(', '.join(sorted(list(c))))
d['cov'].append(cov) d['cov'].append(cov)
d['perf'].append(perf) d['modularity'].append(round(modularity, 5))
if nb_nodes <= 9:
graph.remove_nodes_from(c)
d['project'] = [project] * len(d['community']) d['project'] = [project] * len(d['community'])
df = pd.DataFrame(d) df = pd.DataFrame(d)
return df, dic_community return df, dic_community
...@@ -121,7 +143,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict, ...@@ -121,7 +143,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict,
:param graph: The network object :param graph: The network object
:param outfile: File were the figure will be created :param outfile: File were the figure will be created
:param dic_community: dictionary linking each njode to it's community. :param dic_community: dictionary linking each node to it's community.
:param title: The title of the figure :param title: The title of the figure
""" """
logging.debug("Creating figure") logging.debug("Creating figure")
...@@ -130,7 +152,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict, ...@@ -130,7 +152,7 @@ def create_figure(graph: nx.Graph, outfile: Path, dic_community: Dict,
middle_x = [] middle_x = []
middle_y = [] middle_y = []
weight = [] weight = []
nodes = nx.spring_layout(graph, seed=1) nodes = nx.nx_pydot.graphviz_layout(graph, prog="neato")
for edge in graph.edges(): for edge in graph.edges():
x0, y0 = nodes[edge[0]] x0, y0 = nodes[edge[0]]
x1, y1 = nodes[edge[1]] x1, y1 = nodes[edge[1]]
...@@ -227,7 +249,8 @@ def get_figure_title(project, weight, global_weight, same_gene): ...@@ -227,7 +249,8 @@ def get_figure_title(project, weight, global_weight, same_gene):
def community_finder(weight: int, global_weight: int, project: str = "", def community_finder(weight: int, global_weight: int, project: str = "",
same_gene=True, logging_level: str = "DISABLE"): same_gene=True, html_fig: bool = False,
logging_level: str = "DISABLE"):
""" """
Find communities inside co-localisation between exons found in \ Find communities inside co-localisation between exons found in \
a ChIA-PET project. a ChIA-PET project.
...@@ -241,6 +264,7 @@ def community_finder(weight: int, global_weight: int, project: str = "", ...@@ -241,6 +264,7 @@ def community_finder(weight: int, global_weight: int, project: str = "",
:param same_gene: Say if we consider as co-localised, exons within the \ :param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False) same gene (True) or not (False) (default False)
:param logging_level: The level of data to display (default 'DISABLE') :param logging_level: The level of data to display (default 'DISABLE')
:param html_fig: True to create an html figure, false else
""" """
ConfigGraph.output_folder.mkdir(exist_ok=True, parents=True) ConfigGraph.output_folder.mkdir(exist_ok=True, parents=True)
logging_def(ConfigGraph.output_folder, __file__, logging_level) logging_def(ConfigGraph.output_folder, __file__, logging_level)
...@@ -249,13 +273,14 @@ def community_finder(weight: int, global_weight: int, project: str = "", ...@@ -249,13 +273,14 @@ def community_finder(weight: int, global_weight: int, project: str = "",
global_weight, same_gene, True) global_weight, same_gene, True)
graph = create_graph(interaction) graph = create_graph(interaction)
df, dic_community = find_communities(graph, project) df, dic_community = find_communities(graph, project)
outfile = ConfigGraph.get_community_file(project, weight, global_weight, outfiles = [ConfigGraph.get_community_file(
same_gene, is_fig=False) project, weight, global_weight, same_gene, ext)
df.to_csv(outfile, sep="\t", index=False) for ext in ['.txt', '.cyjs', '.html']]
figure = ConfigGraph.get_community_file(project, weight, global_weight, df.to_csv(outfiles[0], sep="\t", index=False)
same_gene, is_fig=True) write_cytoscape_graph(graph, dic_community, outfiles[1])
fig_title = get_figure_title(project, weight, global_weight, same_gene) if html_fig:
create_figure(graph, figure, dic_community, fig_title) fig_title = get_figure_title(project, weight, global_weight, same_gene)
create_figure(graph, outfiles[2], dic_community, fig_title)
def get_projects(global_weight: int) -> List[str]: def get_projects(global_weight: int) -> List[str]:
...@@ -306,7 +331,7 @@ def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]: ...@@ -306,7 +331,7 @@ def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]:
def multiple_community_launcher(ps: int, weights: List[int], def multiple_community_launcher(ps: int, weights: List[int],
global_weights: List[int], global_weights: List[int],
same_gene: bool, same_gene: bool, html_fig: bool = False,
logging_level: str = "DISABLE"): logging_level: str = "DISABLE"):
""" """
:param ps: The number of processes we want to use. :param ps: The number of processes we want to use.
...@@ -317,6 +342,7 @@ def multiple_community_launcher(ps: int, weights: List[int], ...@@ -317,6 +342,7 @@ def multiple_community_launcher(ps: int, weights: List[int],
seen in `global_weight` project are taken into account seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised exon within the \ :param same_gene: Say if we consider as co-localised exon within the \
same gene same gene
:param html_fig: True to create an html figure, false else
:param logging_level: Level of information to display :param logging_level: Level of information to display
""" """
ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True) ConfigGraph.community_folder.mkdir(exist_ok=True, parents=True)
...@@ -331,7 +357,7 @@ def multiple_community_launcher(ps: int, weights: List[int], ...@@ -331,7 +357,7 @@ def multiple_community_launcher(ps: int, weights: List[int],
global_weight = dic_project[project] global_weight = dic_project[project]
logging.info(f'Finding community for project : {project}, ' logging.info(f'Finding community for project : {project}, '
f'global_weight : {global_weight}, weight: {weight}') f'global_weight : {global_weight}, weight: {weight}')
args = [weight, global_weight, project, same_gene] args = [weight, global_weight, project, same_gene, html_fig]
processes.append(pool.apply_async(community_finder, args)) processes.append(pool.apply_async(community_finder, args))
for proc in processes: for proc in processes:
proc.get(timeout=None) proc.get(timeout=None)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment