diff --git a/src/figures_utils/exons_interactions.py b/src/figures_utils/exons_interactions.py index ea6042e9a5d6fba54f9f5806f03fe5c3bca35539..fa9087460b6c90f11b2d640bf2643f07978ec104 100644 --- a/src/figures_utils/exons_interactions.py +++ b/src/figures_utils/exons_interactions.py @@ -11,6 +11,8 @@ from .config_figures import Config import seaborn as sns import random import pandas as pd +import doctest +from typing import List, Tuple, Dict SF = "HNRNPC" @@ -28,14 +30,17 @@ def get_info_from_database(cnx: sqlite3.Connection, query: str) -> list: :param query: the SQL query that allows us to get data from the database :return list_exons: the list of the ID of all fasterdb exons, e.g: ['9995_6', '9995_7', '9996_1', '9996_10'] + + >>> get_info_from_database(sqlite3.connect(Config.db_file), + ... 'SELECT id_sample FROM cin_projects LIMIT 2') + ['GSM1018961_GSM1018962 ', 'GSM1018963_GSM1018964'] """ cursor = cnx.cursor() cursor.execute(query) res = cursor.fetchall() list_data = [] for data in res: - data = str(data).lstrip("('").rstrip("',)") - list_data.append(data) + list_data += list(data) return list_data @@ -61,7 +66,7 @@ def get_projects_links_to_a_splicing_factor(cnx: sqlite3.Connection, return id_project -def get_ase_events(cnx: sqlite3.Connection, id_project: str) -> list: +def get_ase_events(cnx: sqlite3.Connection, id_project: str) -> List: """ Get every exons regulated (down or up) according to a particular project. @@ -99,7 +104,7 @@ def get_ase_events(cnx: sqlite3.Connection, id_project: str) -> list: return nres -def washing_events(exon_list: list) -> list: +def washing_events(exon_list: List) -> List: """ Remove redundant exons or remove exons showing different regulation. @@ -139,7 +144,7 @@ def washing_events(exon_list: list) -> list: def get_every_events_4_a_sl(cnx: sqlite3.Connection, sf_name: str, - regulation: str) -> tuple: + regulation: str) -> Tuple: """ Get every splicing events for a given splicing factor. @@ -172,7 +177,52 @@ def get_every_events_4_a_sl(cnx: sqlite3.Connection, sf_name: str, # ##########GET ALL EXONS IN INTERACTION (for all ChIA-PET datasets)########### -def get_exons_in_interaction(cnx: sqlite3.Connection, id_project: str) -> dict: +def get_co_localisation_by_project(cnx: sqlite3.Connection, id_project: str) \ + -> List[Tuple[str, str]]: + """ + Return The list of exons that are co-localized. + + :param cnx: Connection to chi-pet database. + :param id_project: The id of a project + :return: The exons co-localized 2 by 2. + """ + cursor = cnx.cursor() + query = """SELECT exon1, exon2 + FROM cin_exon_interaction + WHERE id_project = ?""" + cursor.execute(query, (id_project,)) + res = cursor.fetchall() + cursor.close() + return res + + +def get_colocalized_exons(interaction: List[Tuple[str, str]], id_project: str + ) -> Dict[str, List]: + """ + Get a dictionary linking each exon to it's neighbors. + + :param interaction: A list of co-localized exons + :param id_project: The id of a project + :return: dictionary linking each exon to it's neighbors. + + >>> interaction = [('1_1', '2_1'), ('1_1', '3_1'), ('3_1', '2_1')] + >>> res = {'1_1.test': ['2_1', '3_1'], '2_1.test': ['1_1', '3_1'], + ... '3_1.test': ['1_1', '2_1']} + >>> get_colocalized_exons(interaction, id_project = 'test') == res + True + """ + exons_interactions = {} + for key, value in interaction: + for k, v in [(key, value), (value, key)]: + k = k + "." + id_project + if k not in exons_interactions: + exons_interactions[k] = [v] + else: + exons_interactions[k].append(v) + return exons_interactions + + +def get_exons_in_interaction(cnx: sqlite3.Connection, id_project: str) -> Dict: """ Allows: - to retrieve in the database the pairs of interacting exons, according to @@ -188,24 +238,39 @@ def get_exons_in_interaction(cnx: sqlite3.Connection, id_project: str) -> dict: exons with which it interacts, e.g: '9843_10.GSM1234': ['9843_2', '9843_3', '9843_4'], according to a ChIA-PET dataset. """ - cursor = cnx.cursor() - query = """SELECT exon1, exon2 - FROM cin_exon_interaction - WHERE id_project = ?""" - cursor.execute(query, (id_project,)) - res = cursor.fetchall() + interaction = get_co_localisation_by_project(cnx, id_project) + return get_colocalized_exons(interaction, id_project) - exons_interactions = {} - for key, value in res: - key = key + "." + id_project - if key not in exons_interactions: - exons_interactions[key] = [value] + +def combine_project_dictionary(projects_dictionary: Dict[str, List]): + """ + Combine the dictionary of interactions of many projects into one. + + :param projects_dictionary: dictionary of interactions of many projects + :return: The combined dictionary + + >>> res = {'1_1.test': ['2_1', '3_1'], '2_1.test': ['1_1', '3_1'], + ... '3_1.test': ['1_1', '2_1'], '1_1.test2': ['2_1', '3_1', '4_1'], + ... '2_1.test2': ['1_1', '6_1'], '3_1.test2': ['1_1', '2_1'], + ... '51_1.test3': ['17_1']} + >>> expected = {'1_1': ['2_1', '3_1', '2_1', '3_1', '4_1'], + ... '2_1': ['1_1', '3_1', '1_1', '6_1'], + ... '3_1': ['1_1', '2_1', '1_1', '2_1'], + ... '51_1': ['17_1']} + >>> combine_project_dictionary(res) == expected + True + """ + final_exons_interactions = {} + for key, value in projects_dictionary.items(): + key = key.split(".")[0] + if key not in final_exons_interactions: + final_exons_interactions[key] = value else: - exons_interactions[key].append(value) - return exons_interactions + final_exons_interactions[key] += value + return final_exons_interactions -def get_every_exons_in_interaction() -> dict: +def get_every_exons_in_interaction() -> Dict[str, List]: """ Allows to: - obtain for each dataset of ChIA-PET, a dictionary, with for each exon @@ -230,19 +295,12 @@ def get_every_exons_in_interaction() -> dict: id_chia_pet) dict_interaction.update(inter) - final_exons_interactions = {} - for key, value in dict_interaction.items(): - key = key.split(".")[0] - if key not in final_exons_interactions: - final_exons_interactions[key] = value - else: - final_exons_interactions[key].extend(value) - return final_exons_interactions + return combine_project_dictionary(dict_interaction) # ##############PRODUCE DATA FOR THE CONTROLs PART OF THE FIGURE############### -def find_controls_exons_in_interaction(final_exons_interactions: dict, - list_exons_ctrl: list): +def find_controls_exons_in_interaction(final_exons_interactions: Dict, + list_exons_ctrl: List): """ - For each exon control of the list list_exons_ctrl it allows to obtain the complete list of exons with which it interacts, for all the datasets that @@ -259,24 +317,37 @@ def find_controls_exons_in_interaction(final_exons_interactions: dict, ['15919_3', '7546_8', '4946_3', '3158_10']. :return nb_exons_ctrl_int: the number of exons control in interaction with at least one other exon control. + + >>> inter_dic = {'1_1': ['2_1', '3_1'], + ... '3_1': ['2_1', '1_1'], + ... '5_1': ['1_1', '6_1']} + >>> list_exons_ctrl = ['1_1', '3_1', '5_1'] + >>> find_controls_exons_in_interaction(inter_dic, list_exons_ctrl) + 3 + >>> list_exons_ctrl = ['6_1', '1_1', '3_1'] + >>> find_controls_exons_in_interaction(inter_dic, list_exons_ctrl) + 2 + >>> inter_dic = {'1_1': ['2_1', '3_1'], + ... '3_1': ['7_1', '19_1'], + ... '5_1': ['17_1', '6_1']} + >>> find_controls_exons_in_interaction(inter_dic, list_exons_ctrl) + 1 """ - ctrl_final_inter = {} - for exons in list_exons_ctrl: - for key, value in final_exons_interactions.items(): - if exons == key: - ctrl_final_inter[key] = value - - exons_ctrl_interact_exons_ctrl = {} - for key_a, value_a in ctrl_final_inter.items(): - for elmt_a in value_a: - for elmt_b in list_exons_ctrl: - if elmt_a == elmt_b: - exons_ctrl_interact_exons_ctrl[key_a] = value_a - nb_exons_ctrl_int = len(exons_ctrl_interact_exons_ctrl.keys()) - return nb_exons_ctrl_int - - -def random_exons_ctrl(sf_reg: dict, number_exons_ctrl: str) -> float: + count = 0 + for exon in list_exons_ctrl: + tmp = [e for e in list_exons_ctrl if e != exon] + try: + for iexon in final_exons_interactions[exon]: + if iexon in tmp: + count += 1 + break + except KeyError: + continue + return count + + +def random_exons_ctrl(final_exons_interactions: Dict[str, List], + sf_reg: Dict, number_exons_ctrl: str) -> float: """ Allows to: - Generate a list of exons control of size identical to that of the @@ -286,6 +357,10 @@ def random_exons_ctrl(sf_reg: dict, number_exons_ctrl: str) -> float: determine the percentage of exons control in interaction with at least one other exon control. Then we average the 3 percentages obtained. + :param final_exons_interactions: a dictionary with for each exon the + complete list of exons with which it interacts, for all the datasets that + we have, e.g: '4448_11': ['4448_20'], '4857_5': ['4878_1', '494_6'], see + get_every_exons_in_interaction(). :param sf_reg: a dictionary with a list of regulated exons depending on a splicing factor and its regulation, e.g: {'PTBP1_up': ['345_38', '681_2', '781_4', '1090_16', '1291_12']}, see get_every_events_4_a_sl(). @@ -306,22 +381,22 @@ def random_exons_ctrl(sf_reg: dict, number_exons_ctrl: str) -> float: for elmt in value: list_exons.remove(elmt) - i = 0 list_percentage_ctrl = [] - while i < 3: + for _ in range(3): # Put the number of iteration with function parameters list_exons_ctrl = random.choices(list_exons, k=number_exons_ctrl) nb_exons_ctrl_int = find_controls_exons_in_interaction( - get_every_exons_in_interaction(), list_exons_ctrl) + final_exons_interactions, list_exons_ctrl) percentage_ctrl = (nb_exons_ctrl_int / number_exons_ctrl) * 100 rounded_percentage_ctrl = round(percentage_ctrl, 2) list_percentage_ctrl.append(rounded_percentage_ctrl) - i += 1 - average_percent_ctrl = sum(list_percentage_ctrl)/len(list_percentage_ctrl) + + average_percent_ctrl = sum(list_percentage_ctrl) / \ + len(list_percentage_ctrl) return average_percent_ctrl # ####KEPT INTERACTIONS FOR EXONS DOWN OR UP REGULATED BY THE SF STUDIED##### -def kept_interest_exons_sf(final_exons_interactions: dict, sf_reg: dict, +def kept_interest_exons_sf(final_exons_interactions: Dict, sf_reg: Dict, number_exons_sf: str) -> float: """ - For each exon (down and in a second time up) regulated by the SF of @@ -349,24 +424,29 @@ def kept_interest_exons_sf(final_exons_interactions: dict, sf_reg: dict, :return rounded_percentage_sf: the percentage of exons (down and in a second time up) regulated by the SF of interest in interaction with at least one other exon regulated by the same SF, in the same way. + + >>> inter_dic = {'1_1': ['2_1', '3_1'], + ... '3_1': ['2_1', '1_1'], + ... '5_1': ['1_1', '6_1']} + >>> list_exons_ctrl = {'SRSF1_down': ['1_1', '3_1', '5_1']} + >>> kept_interest_exons_sf(inter_dic, list_exons_ctrl, 'SRSF1_down_3') + 100.0 + >>> inter_dic = {'1_1': ['2_1', '3_1'], + ... '3_1': ['2_1', '1_1'], + ... '5_1': ['96_7', '6_1']} + >>> list_exons_ctrl = {'SRSF1_down': ['1_1', '3_1', '5_1']} + >>> kept_interest_exons_sf(inter_dic, list_exons_ctrl, 'SRSF1_down_3') + 66.67 + >>> inter_dic = {'1_1': ['2_1', '3_1'], + ... '3_1': ['2_1', '14_18'], + ... '5_1': ['96_7', '6_1']} + >>> list_exons_ctrl = {'SRSF1_down': ['1_1', '3_1', '5_1']} + >>> kept_interest_exons_sf(inter_dic, list_exons_ctrl, 'SRSF1_down_3') + 33.33 """ - sf_final_inter = dict() - for key_a, value_a in final_exons_interactions.items(): - key_a = key_a.split(".")[0] - for key_b, value_b in sf_reg.items(): - for elmt_b in value_b: - if key_a == elmt_b: - sf_final_inter[key_a] = value_a - - exons_sf_interact_exons_sf = {} - for key_c, value_c in sf_final_inter.items(): - for elmt_c in value_c: - for key_d, value_d in sf_reg.items(): - for elmt_d in value_d: - if elmt_c == elmt_d: - exons_sf_interact_exons_sf[key_c] = value_c - - nb_exons_sf_int = len(exons_sf_interact_exons_sf.keys()) + nb_exons_sf_int = \ + find_controls_exons_in_interaction(final_exons_interactions, + sf_reg[list(sf_reg.keys())[0]]) number_exons_sf = int(number_exons_sf.split("_")[-1]) percentage_sf = (nb_exons_sf_int / number_exons_sf) * 100 rounded_percentage_sf = round(percentage_sf, 2) @@ -394,7 +474,8 @@ def launch_figures_creation(): sf_reg_do, number_exons_do) # CTRL_DOWN - average_percent_ctrl_do = random_exons_ctrl(sf_reg_do, number_exons_do) + average_percent_ctrl_do = random_exons_ctrl(final_exons_interactions, + sf_reg_do, number_exons_do) # SF_UP sf_reg_up, number_exons_up = get_every_events_4_a_sl(sqlite3.connect( @@ -403,7 +484,8 @@ def launch_figures_creation(): sf_reg_up, number_exons_up) # CTRL_UP - average_percent_ctrl_up = random_exons_ctrl(sf_reg_up, number_exons_up) + average_percent_ctrl_up = random_exons_ctrl(final_exons_interactions, + sf_reg_up, number_exons_up) # CREATE FIGURE df = pd.DataFrame({"A": [f"{SF}_down", "Control_down", f"{SF}_up", @@ -418,4 +500,4 @@ def launch_figures_creation(): if __name__ == "__main__": - launch_figures_creation() + doctest.testmod()