Skip to content
Snippets Groups Projects
Commit 68c96e9a authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/community_finder.py: find communities using hipMCL method

parent cf1f8a3b
No related branches found
No related tags found
No related merge requests found
......@@ -25,6 +25,7 @@ from matplotlib.colors import to_rgba
from itertools import product
import multiprocessing as mp
import json
import subprocess as sp
def get_nodes(interaction: np.array) -> np.array:
......@@ -83,49 +84,75 @@ def write_cytoscape_graph(graph: nx.Graph, dic_community: Dict[str, str],
d['data']['community'] = dic_community[exon]['num']
res = json.dumps(data)
with open(outfile, 'w') as f:
f.write(res)
f.write(res)
def find_communities(graph: nx.Graph, project: str
def get_communities(result: Path) -> List[List[str]]:
"""
Get the communities inside the file `result`
:param result: A file containing the communities found by the hipMCL \
program.
:return: The list of communities find by the hipMCL program.
"""
communities = []
with result.open('r') as f:
for line in f:
communities.append(line.replace('\n', '').strip().split(' '))
return communities
def find_communities(graph: nx.Graph, project: str,
outfile: Path, result_file: Path
) -> Tuple[pd.DataFrame, Dict]:
"""
Find the communities within the graph.
:param graph: Graph of co-localized exons
:param project: The name of the project
:param outfile: A file containing interaction
:return: The communities within the graph and the community \
to wich each exon belong
"""
warn = 0
logging.debug("Finding community ...")
communities_generator = community.asyn_lpa_communities(graph)
communities = list(communities_generator)
if not result_file.is_file():
cmd = f"mpirun -np 1 {ConfigGraph.get_hipmcl_prog()} -M {outfile} " \
f"-I 1.2 -per-process-mem 8 -o {result_file}"
sp.check_call(cmd, shell=True, stderr=sp.STDOUT)
communities = get_communities(result_file)
dic_community = {}
cov = round(community.coverage(graph, communities), 2)
# perf = community.performance(graph, communities)
modularity = community.modularity(graph, communities, weight='X')
d = {'community': [], 'nodes': [], 'edges' : [], 'EC': [], 'HCS': [],
d = {'community': [], 'nodes': [], 'edges': [], 'EC': [], 'HCS': [],
'%E vs E in complete G': [],
'cov': [], 'modularity': [], 'exons': []}
colors = cm.hsv(np.linspace(0, 1, len(communities)))
for k, c in enumerate(communities):
subg = nx.subgraph(graph, c)
nb_nodes = len(c)
if nb_nodes <= 1:
warn += 1
nb_edges = len(subg.edges)
try:
complete = round(nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100,
2)
except ZeroDivisionError:
complete = np.nan
edge_connectivity = nx.edge_connectivity(subg)
is_hc = 'yes' if edge_connectivity > nb_nodes / 2 else 'no'
for exon in c:
dic_community[exon] = {'num': f'C{k + 1}',
'col': lighten_color(colors[k])
if is_hc == 'yes' else
(lighten_color(colors[k], 0.1) if nb_nodes > 2 else 'white')
}
(lighten_color(colors[k], 0.1)
if nb_nodes > 2 else 'white')}
d['community'].append(f'C{k + 1}')
d['nodes'].append(nb_nodes)
d['edges'].append(nb_edges)
d['EC'].append(edge_connectivity)
d['HCS'].append(is_hc)
d['%E vs E in complete G'].append(round(
nb_edges / (nb_nodes * (nb_nodes - 1) / 2) * 100, 2))
d['%E vs E in complete G'].append(complete)
d['exons'].append(', '.join(sorted(list(c))))
d['cov'].append(cov)
d['modularity'].append(round(modularity, 5))
......@@ -133,6 +160,9 @@ def find_communities(graph: nx.Graph, project: str
graph.remove_nodes_from(c)
d['project'] = [project] * len(d['community'])
df = pd.DataFrame(d)
if warn > 0:
logging.warning(f" {warn} communities were composed by one exon or "
f"less !")
return df, dic_community
......@@ -248,6 +278,38 @@ def get_figure_title(project, weight, global_weight, same_gene):
return title
def write_interaction_file(arr_interaction: np.array, project: str,
weight: int, global_weight: int, same_gene: bool,
use_weight: bool = False):
"""
:param arr_interaction: Each couples of co-localized exons within a \
project.
:param project: The name of the project of interest
:param weight: The minimum weight of interaction to consider
:param global_weight: The global weight to consider. if \
the global weight is equal to 0 then then density figure are calculated \
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param use_weight: Say if we want to write the weight into the result file.
:return:
"""
logging.debug('Writing interaction files ...')
outfile = ConfigGraph.get_community_file(project, weight, global_weight,
same_gene, "_interation.txt")
result = ConfigGraph.get_community_file(project, weight, global_weight,
same_gene,
"_interation_result.txt")
with outfile.open('w') as f:
for exon1, exon2, cweight in arr_interaction:
if not use_weight:
cweight = 1
f.write(f"{exon1}\t{exon2}\t{cweight}\n")
return outfile, result
def community_finder(weight: int, global_weight: int, project: str = "",
same_gene=True, html_fig: bool = False,
logging_level: str = "DISABLE"):
......@@ -271,8 +333,11 @@ def community_finder(weight: int, global_weight: int, project: str = "",
cnx = sqlite3.connect(ConfigGraph.db_file)
interaction = get_project_colocalisation(cnx, project, weight,
global_weight, same_gene, True)
outfile, result_file = write_interaction_file(interaction, project,
weight, global_weight,
same_gene)
graph = create_graph(interaction)
df, dic_community = find_communities(graph, project)
df, dic_community = find_communities2(graph, project, outfile, result_file)
outfiles = [ConfigGraph.get_community_file(
project, weight, global_weight, same_gene, ext)
for ext in ['.txt', '.cyjs', '.html']]
......@@ -307,12 +372,13 @@ def get_projects(global_weight: int) -> List[str]:
cnx.close()
return res
def get_projects_name(global_weights: List[int]) -> Tuple[List[str], Dict]:
"""
Get projects name given a list of global_weight and a dictionary linking,
each project name to it's corresponding global weight.
:param global_weight: The list of global weights to consider. if \
:param global_weights: The list of global weights to consider. if \
the global weight is equal to 0 then then density figure are calculated \
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment