Skip to content
Snippets Groups Projects
Commit c22b1d42 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/graph_figures/sf_community_graph.py: graph...

src/find_interaction_cluster/graph_figures/sf_community_graph.py: graph creation displaying the community regulated by many splicing factors and how they interact between them
parent 28d3ff0e
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: Create a graph figure showing wich community is regulated \
by which splicing factor
"""
import networkx as nx
from typing import List
import pandas as pd
import sqlite3
import numpy as np
from .graph_functions import Parameters, recover_json_graph_of_interest, \
Config, get_regulation_table, get_community_tables, merge_dataframes, \
get_regulated_community, subgraph_creation, load_graphic, check_if_exist
from .create_community_node_graph import create_community_sized_graph
from ..sf_and_communities import get_sfname
import matplotlib.cm as cm
from pathlib import Path
import json
from pyvis.network import Network
import lazyparser as lp
from matplotlib.colors import to_hex
def add_node_color(c_graph: nx.Graph, sf_name: str, color: str,
communities: List[str]) -> nx.Graph:
"""
Update the graphic of community to add the color ``color`` to \
the community containing a enriched amount of exons or genes \
regulated by a splicing factor.
:param c_graph: The graphic of community to update
:param sf_name: The name of the splicing factor regulating the \
communities given in communities.
:param color: The color to give to the communities
:param communities: The list of community to update
:return: The network update
>>> g = nx.Graph()
>>> g.add_nodes_from(list("ABC"))
>>> g.add_edges_from([("A", "B"), ("A", "C")])
>>> ng = add_node_color(g, "TRA2_down", "red", ["A", "B"])
>>> list(ng.nodes[x] for x in list("ABC")) == [{'reg': 'TRA2_down',
... 'color': 'red', 'title': 'TRA2_down'}, {'reg': 'TRA2_down',
... 'color': 'red', 'title': 'TRA2_down'}, {}]
True
>>> ng = add_node_color(ng, "SR1_up", "white", ["A", "C"])
>>> list(ng.nodes[x] for x in list("ABC")) == [{'reg':
... 'TRA2_down, SR1_up', 'color': 'red', 'title': 'TRA2_down<br/>SR1_up',
... 'shape': 'triangle'}, {'reg': 'TRA2_down', 'color': 'red',
... 'title': 'TRA2_down'}, {'reg': 'SR1_up', 'color': 'white',
... 'title': 'SR1_up'}]
True
"""
for c in communities:
if "reg" not in c_graph.nodes[c].keys():
c_graph.nodes[c]["reg"] = sf_name
c_graph.nodes[c]["color"] = color
else:
c_graph.nodes[c]["reg"] += f", {sf_name}"
c_graph.nodes[c]["shape"] = 'triangle'
if "title" not in c_graph.nodes[c].keys():
c_graph.nodes[c]["title"] = sf_name
else:
c_graph.nodes[c]["title"] += f"<br/>{sf_name}"
return c_graph
def get_regulated_communities(c_graph: nx.Graph) -> List[str]:
"""
Get the communities regulated by a splicing factor.
:param c_graph: The graphic of community to update
:return: The list of regulated communities
>>> g = nx.Graph()
>>> g.add_nodes_from(list("ABC"))
>>> g.add_edges_from([("A", "B"), ("A", "C")])
>>> ng = add_node_color(g, "TRA2_down", "red", ["A", "B"])
>>> get_regulated_communities(ng)
['A', 'B']
"""
return [node for node in c_graph.nodes
if "reg" in c_graph.nodes[node].keys()]
def select_splicing_factors(sf_list: List[str]) -> List[str]:
"""
Return sf_list if sf_list doesn't contain ALL else return ALL.
:param sf_list: A list of splicing factor of interest
:return: sf_list if sf_list doesn't contain ALL else return ALL.
>>> select_splicing_factors(list("ABC"))
['A', 'B', 'C']
"""
return sf_list if "ALL" not in sf_list else get_sfname()
def get_title(nb_sf: int, reg: str, threshold: float, min_reg, feature: str,
iteration: int) -> str:
"""
Return a title
:param nb_sf: The number of splicing factor analyzed
:param reg: The name of the regulation chosen
:param threshold: Minimum frequency of gene regulated in a colony to \
select it (but it must also contains at least min_reg gene regulated)
:param min_reg: The minimum of regulated exon in a community to \
take it into account
:param feature: The kind of feature we want to analyse
:param iteration: The number of iteration to perform
:return: The title of the figure
>>> get_title(5, "down", 0.1, 2, "gene", 0)
'Figure of the communities containing at least 10.0 % of their genes \
(or more than 2 genes) down-regulated by 5 splicing factors'
"""
reg = "regulated" if reg == "reg" else f"{reg}-regulated"
if iteration < 20:
return f"Figure of the communities containing at least " \
f"{threshold * 100} % of their {feature}s (or more than " \
f"{min_reg} {feature}s) {reg} by {nb_sf} splicing factors"
return f"Figure of the communities enriched in {feature}s" \
f" {reg} by {nb_sf} splicing factors (permutation test " \
f"{iteration} iteration)"
def get_outfiles(c_graph_file: Path, sf_list: List[str], threshold: float,
min_reg: int, min_community: int, min_community_size: int,
iteration: int) -> List[Path]:
"""
Return the figure and json outfile to store and visualize graph data
:param c_graph_file: A graph containing a community level graph
:param sf_list: The list of splicing factor of interest
:param threshold: Minimum frequency of gene regulated in a colony to \
select it (but it must also contains at least min_reg gene regulated)
:param min_reg: The minimum of regulated exon in a community to \
take it into account
:param min_community: The minimum number of enriched community \
required to produce a figure
:param min_community_size: The minimum size used to consider communities
:param iteration: If this parameter is greater or equal to 20 then a \
permutation test is made to find the significantly enriched communities. \
Below 20, significant communities are found
:return:
"""
outfolder = c_graph_file.parent / "graph_figures" / "community_level"
outfolder.mkdir(parents=True, exist_ok=True)
if iteration < 20:
return [outfolder / f"community_graph_{len(sf_list)}_"
f"sf_t{threshold}_min-reg-{min_reg}_min-com_"
f"{min_community}_min-size-{min_community_size}."
f"{ext}" for ext in ["json", "html"]]
else:
return [outfolder / f"community_graph_{len(sf_list)}_"
f"sf_t{threshold}_iteration-{iteration}_"
f"min-size-{min_community_size}."
f"{ext}" for ext in ["json", "html"]]
def update_community_graphic(p: Parameters, c_graph: nx.Graph, color: str,
sf_name: str, reg: str,
threshold: float, min_reg: int = 2,
iteration: int = 0, min_community: int = 3,
min_community_size: int = 10) -> nx.Graph:
"""
:param p: A class containing configurations
:param c_graph: A community level graph
:param sf_name: The name of the splicing factor of interest
:param reg: The name of the regulation chosen
:param color: color of the regulated node
:param threshold: Minimum frequency of gene regulated in a colony to \
select it (but it must also contains at least min_reg gene regulated)
:param min_reg: The minimum of regulated exon in a community to \
take it into account
:param iteration: If this parameter is greater or equal to 20 then a \
permutation test is made to find the significantly enriched communities. \
Below 20, significant communities are found
:param min_community: The minimum number of enriched community \
required to produce a figure
:param min_community_size: The minimum size used to consider communities
"""
print(f"Working on {sf_name}, {reg}")
graph_file, comm_file = recover_json_graph_of_interest(p)
check_if_exist(graph_file)
reg_table = get_regulation_table(sqlite3.connect(Config.db_file),
sf_name, reg, p.feature)
df_com_file = pd.read_csv(comm_file, sep="\t")
df_com = get_community_tables(df_com_file, p.feature)
full_com = merge_dataframes(reg_table, df_com, p.feature)
full_com.loc[full_com["community_size"] < min_community_size,
["community", "community_size"]] = [np.nan, np.nan]
list_communities = get_regulated_community(full_com, p.feature, threshold,
min_reg, iteration)
if len(list_communities) >= min_community:
c_graph = add_node_color(c_graph, f"{sf_name}_{reg}", color,
list_communities)
return c_graph
def write_figure(c_graph: nx.Graph, outfile: Path, title: str = ""
) -> None:
"""
Write the network figure.
:param c_graph: An html figure
:param outfile: The file where the graphic will be created
:param title: The title of the figure
"""
net = Network(width="100%", height="100%", heading=title)
net.from_nx(c_graph)
net.force_atlas_2based()
net.toggle_physics(False)
net.show_buttons(filter_=["nodes", "edges", "physics"])
net.write_html(str(outfile))
@lp.parse
def create_community_sf_graph(project: str, weight: int, global_weight: int,
same_gene: bool, inflation: float,
cell_line: str, feature: str, sf_list: List[str],
reg: str, threshold: float, min_reg: int = 2,
iteration: int = 0, min_community: int = 3,
min_community_size: int = 10):
"""
:param project: A project name of interest. Used only if \
global_weight is 0
:param weight: The weight of interaction to consider
:param global_weight: The global weight to consider. if \
the global weight is equal to 0 then then density figure are \
calculated by project, else all project are merge together and the \
interaction seen in `global_weight` project are taken into account \
:param same_gene: Say if we consider as co-localised, exons within \
the same gene (True) or not (False)
:param inflation: The inflation parameter
:param cell_line: Interactions are only selected from projects made \
on a specific cell line (ALL to disable this filter)
:param feature: The feature we want to analyse
:param sf_list: The list of the splicing factor of interest
:param reg: The name of the regulation chosen
:param threshold: Minimum frequency of gene regulated in a colony to \
select it (but it must also contains at least min_reg gene regulated)
:param min_reg: The minimum of regulated exon in a community to \
take it into account
:param iteration: If this parameter is greater or equal to 20 then a \
permutation test is made to find the significantly enriched communities. \
Below 20, significant communities are found
:param min_community: The minimum number of enriched community \
required to produce a figure
:param min_community_size: The minimum size used to consider communities
"""
c_graph_file = create_community_sized_graph.__wrapped__(
project, weight, global_weight, same_gene, inflation, cell_line,
feature, min_community_size)
c_graph = load_graphic(c_graph_file)
p = Parameters(project, weight, global_weight, same_gene, inflation,
cell_line, feature)
sf_list = select_splicing_factors(sf_list)
colors = cm.hsv(np.linspace(0, 1, min(len(sf_list), 5)))
colors = [to_hex(c) for c in colors]
for sf_name, color in zip(sf_list, colors):
c_graph = update_community_graphic(p, c_graph, color, sf_name, reg,
threshold, min_reg, iteration,
min_community, min_community_size)
regulated_com = get_regulated_communities(c_graph)
c_graph = subgraph_creation(c_graph, [regulated_com])
outfiles = get_outfiles(c_graph_file, sf_list, threshold, min_reg,
min_community, min_community_size, iteration)
title = get_title(len(sf_list), reg, threshold, min_reg, feature,
iteration)
g_json = nx.json_graph.node_link_data(c_graph)
json.dump(g_json, outfiles[0].open('w'), indent=2)
write_figure(c_graph, outfiles[1], title)
if __name__ == "__main__":
import sys
if len(sys.argv) == 1:
import doctest
doctest.testmod()
else:
create_community_sf_graph()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment