diff --git a/src/find_interaction_cluster/graph_figures/sf_community_graph.py b/src/find_interaction_cluster/graph_figures/sf_community_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..9a383abfc8270d97a7f102fbd1bf466e993a92cd --- /dev/null +++ b/src/find_interaction_cluster/graph_figures/sf_community_graph.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 + +# -*- coding: UTF-8 -*- + +""" +Description: Create a graph figure showing wich community is regulated \ +by which splicing factor +""" +import networkx as nx +from typing import List +import pandas as pd +import sqlite3 +import numpy as np +from .graph_functions import Parameters, recover_json_graph_of_interest, \ + Config, get_regulation_table, get_community_tables, merge_dataframes, \ + get_regulated_community, subgraph_creation, load_graphic, check_if_exist +from .create_community_node_graph import create_community_sized_graph +from ..sf_and_communities import get_sfname +import matplotlib.cm as cm +from pathlib import Path +import json +from pyvis.network import Network +import lazyparser as lp +from matplotlib.colors import to_hex + + +def add_node_color(c_graph: nx.Graph, sf_name: str, color: str, + communities: List[str]) -> nx.Graph: + """ + Update the graphic of community to add the color ``color`` to \ + the community containing a enriched amount of exons or genes \ + regulated by a splicing factor. + + :param c_graph: The graphic of community to update + :param sf_name: The name of the splicing factor regulating the \ + communities given in communities. + :param color: The color to give to the communities + :param communities: The list of community to update + :return: The network update + >>> g = nx.Graph() + >>> g.add_nodes_from(list("ABC")) + >>> g.add_edges_from([("A", "B"), ("A", "C")]) + >>> ng = add_node_color(g, "TRA2_down", "red", ["A", "B"]) + >>> list(ng.nodes[x] for x in list("ABC")) == [{'reg': 'TRA2_down', + ... 'color': 'red', 'title': 'TRA2_down'}, {'reg': 'TRA2_down', + ... 'color': 'red', 'title': 'TRA2_down'}, {}] + True + >>> ng = add_node_color(ng, "SR1_up", "white", ["A", "C"]) + >>> list(ng.nodes[x] for x in list("ABC")) == [{'reg': + ... 'TRA2_down, SR1_up', 'color': 'red', 'title': 'TRA2_down<br/>SR1_up', + ... 'shape': 'triangle'}, {'reg': 'TRA2_down', 'color': 'red', + ... 'title': 'TRA2_down'}, {'reg': 'SR1_up', 'color': 'white', + ... 'title': 'SR1_up'}] + True + """ + for c in communities: + if "reg" not in c_graph.nodes[c].keys(): + c_graph.nodes[c]["reg"] = sf_name + c_graph.nodes[c]["color"] = color + else: + c_graph.nodes[c]["reg"] += f", {sf_name}" + c_graph.nodes[c]["shape"] = 'triangle' + if "title" not in c_graph.nodes[c].keys(): + c_graph.nodes[c]["title"] = sf_name + else: + c_graph.nodes[c]["title"] += f"<br/>{sf_name}" + return c_graph + + +def get_regulated_communities(c_graph: nx.Graph) -> List[str]: + """ + Get the communities regulated by a splicing factor. + + :param c_graph: The graphic of community to update + :return: The list of regulated communities + >>> g = nx.Graph() + >>> g.add_nodes_from(list("ABC")) + >>> g.add_edges_from([("A", "B"), ("A", "C")]) + >>> ng = add_node_color(g, "TRA2_down", "red", ["A", "B"]) + >>> get_regulated_communities(ng) + ['A', 'B'] + """ + return [node for node in c_graph.nodes + if "reg" in c_graph.nodes[node].keys()] + + +def select_splicing_factors(sf_list: List[str]) -> List[str]: + """ + Return sf_list if sf_list doesn't contain ALL else return ALL. + + :param sf_list: A list of splicing factor of interest + :return: sf_list if sf_list doesn't contain ALL else return ALL. + + >>> select_splicing_factors(list("ABC")) + ['A', 'B', 'C'] + """ + return sf_list if "ALL" not in sf_list else get_sfname() + + +def get_title(nb_sf: int, reg: str, threshold: float, min_reg, feature: str, + iteration: int) -> str: + """ + Return a title + + :param nb_sf: The number of splicing factor analyzed + :param reg: The name of the regulation chosen + :param threshold: Minimum frequency of gene regulated in a colony to \ + select it (but it must also contains at least min_reg gene regulated) + :param min_reg: The minimum of regulated exon in a community to \ + take it into account + :param feature: The kind of feature we want to analyse + :param iteration: The number of iteration to perform + :return: The title of the figure + + >>> get_title(5, "down", 0.1, 2, "gene", 0) + 'Figure of the communities containing at least 10.0 % of their genes \ +(or more than 2 genes) down-regulated by 5 splicing factors' + """ + reg = "regulated" if reg == "reg" else f"{reg}-regulated" + if iteration < 20: + return f"Figure of the communities containing at least " \ + f"{threshold * 100} % of their {feature}s (or more than " \ + f"{min_reg} {feature}s) {reg} by {nb_sf} splicing factors" + return f"Figure of the communities enriched in {feature}s" \ + f" {reg} by {nb_sf} splicing factors (permutation test " \ + f"{iteration} iteration)" + + +def get_outfiles(c_graph_file: Path, sf_list: List[str], threshold: float, + min_reg: int, min_community: int, min_community_size: int, + iteration: int) -> List[Path]: + """ + Return the figure and json outfile to store and visualize graph data + :param c_graph_file: A graph containing a community level graph + :param sf_list: The list of splicing factor of interest + :param threshold: Minimum frequency of gene regulated in a colony to \ + select it (but it must also contains at least min_reg gene regulated) + :param min_reg: The minimum of regulated exon in a community to \ + take it into account + :param min_community: The minimum number of enriched community \ + required to produce a figure + :param min_community_size: The minimum size used to consider communities + :param iteration: If this parameter is greater or equal to 20 then a \ + permutation test is made to find the significantly enriched communities. \ + Below 20, significant communities are found + :return: + """ + outfolder = c_graph_file.parent / "graph_figures" / "community_level" + outfolder.mkdir(parents=True, exist_ok=True) + if iteration < 20: + return [outfolder / f"community_graph_{len(sf_list)}_" + f"sf_t{threshold}_min-reg-{min_reg}_min-com_" + f"{min_community}_min-size-{min_community_size}." + f"{ext}" for ext in ["json", "html"]] + else: + return [outfolder / f"community_graph_{len(sf_list)}_" + f"sf_t{threshold}_iteration-{iteration}_" + f"min-size-{min_community_size}." + f"{ext}" for ext in ["json", "html"]] + + +def update_community_graphic(p: Parameters, c_graph: nx.Graph, color: str, + sf_name: str, reg: str, + threshold: float, min_reg: int = 2, + iteration: int = 0, min_community: int = 3, + min_community_size: int = 10) -> nx.Graph: + """ + :param p: A class containing configurations + :param c_graph: A community level graph + :param sf_name: The name of the splicing factor of interest + :param reg: The name of the regulation chosen + :param color: color of the regulated node + :param threshold: Minimum frequency of gene regulated in a colony to \ + select it (but it must also contains at least min_reg gene regulated) + :param min_reg: The minimum of regulated exon in a community to \ + take it into account + :param iteration: If this parameter is greater or equal to 20 then a \ + permutation test is made to find the significantly enriched communities. \ + Below 20, significant communities are found + :param min_community: The minimum number of enriched community \ + required to produce a figure + :param min_community_size: The minimum size used to consider communities + """ + print(f"Working on {sf_name}, {reg}") + graph_file, comm_file = recover_json_graph_of_interest(p) + check_if_exist(graph_file) + reg_table = get_regulation_table(sqlite3.connect(Config.db_file), + sf_name, reg, p.feature) + df_com_file = pd.read_csv(comm_file, sep="\t") + df_com = get_community_tables(df_com_file, p.feature) + full_com = merge_dataframes(reg_table, df_com, p.feature) + full_com.loc[full_com["community_size"] < min_community_size, + ["community", "community_size"]] = [np.nan, np.nan] + list_communities = get_regulated_community(full_com, p.feature, threshold, + min_reg, iteration) + if len(list_communities) >= min_community: + c_graph = add_node_color(c_graph, f"{sf_name}_{reg}", color, + list_communities) + return c_graph + + +def write_figure(c_graph: nx.Graph, outfile: Path, title: str = "" + ) -> None: + """ + Write the network figure. + + :param c_graph: An html figure + :param outfile: The file where the graphic will be created + :param title: The title of the figure + """ + net = Network(width="100%", height="100%", heading=title) + net.from_nx(c_graph) + net.force_atlas_2based() + net.toggle_physics(False) + net.show_buttons(filter_=["nodes", "edges", "physics"]) + net.write_html(str(outfile)) + + +@lp.parse +def create_community_sf_graph(project: str, weight: int, global_weight: int, + same_gene: bool, inflation: float, + cell_line: str, feature: str, sf_list: List[str], + reg: str, threshold: float, min_reg: int = 2, + iteration: int = 0, min_community: int = 3, + min_community_size: int = 10): + """ + :param project: A project name of interest. Used only if \ + global_weight is 0 + :param weight: The weight of interaction to consider + :param global_weight: The global weight to consider. if \ + the global weight is equal to 0 then then density figure are \ + calculated by project, else all project are merge together and the \ + interaction seen in `global_weight` project are taken into account \ + :param same_gene: Say if we consider as co-localised, exons within \ + the same gene (True) or not (False) + :param inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made \ + on a specific cell line (ALL to disable this filter) + :param feature: The feature we want to analyse + :param sf_list: The list of the splicing factor of interest + :param reg: The name of the regulation chosen + :param threshold: Minimum frequency of gene regulated in a colony to \ + select it (but it must also contains at least min_reg gene regulated) + :param min_reg: The minimum of regulated exon in a community to \ + take it into account + :param iteration: If this parameter is greater or equal to 20 then a \ + permutation test is made to find the significantly enriched communities. \ + Below 20, significant communities are found + :param min_community: The minimum number of enriched community \ + required to produce a figure + :param min_community_size: The minimum size used to consider communities + """ + c_graph_file = create_community_sized_graph.__wrapped__( + project, weight, global_weight, same_gene, inflation, cell_line, + feature, min_community_size) + c_graph = load_graphic(c_graph_file) + p = Parameters(project, weight, global_weight, same_gene, inflation, + cell_line, feature) + sf_list = select_splicing_factors(sf_list) + colors = cm.hsv(np.linspace(0, 1, min(len(sf_list), 5))) + colors = [to_hex(c) for c in colors] + for sf_name, color in zip(sf_list, colors): + c_graph = update_community_graphic(p, c_graph, color, sf_name, reg, + threshold, min_reg, iteration, + min_community, min_community_size) + regulated_com = get_regulated_communities(c_graph) + c_graph = subgraph_creation(c_graph, [regulated_com]) + outfiles = get_outfiles(c_graph_file, sf_list, threshold, min_reg, + min_community, min_community_size, iteration) + title = get_title(len(sf_list), reg, threshold, min_reg, feature, + iteration) + g_json = nx.json_graph.node_link_data(c_graph) + json.dump(g_json, outfiles[0].open('w'), indent=2) + write_figure(c_graph, outfiles[1], title) + + +if __name__ == "__main__": + import sys + if len(sys.argv) == 1: + import doctest + doctest.testmod() + else: + create_community_sf_graph()