diff --git a/src/find_interaction_cluster/.ipynb_checkpoints/composition_heatmap-checkpoint.py b/src/find_interaction_cluster/.ipynb_checkpoints/composition_heatmap-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..abcbab1adac3ed08312b95017ee98014d06813a1 --- /dev/null +++ b/src/find_interaction_cluster/.ipynb_checkpoints/composition_heatmap-checkpoint.py @@ -0,0 +1,345 @@ +""" +description +create a cpnt composition heatmap +""" + +import pandas as pd +from pathlib import Path +import os +import lazyparser as lp +from .config import ConfigGraph +from ..nt_composition.config import get_features +import plotly +import plotly.graph_objects as go +import plotly.figure_factory as ff +from scipy.cluster.hierarchy import linkage +from matplotlib_venn import venn2 +import matplotlib.backends.backend_pdf +import matplotlib.pyplot as plt +import doctest + + +def load_input_output_folders(in_folder, out_folder="results/composition_heatmap"): + in_folder = Path(in_folder) + out_folder = Path(out_folder) + if os.path.exists(out_folder): + pass + else: + os.mkdir(out_folder) + + results_folder = out_folder / f"{in_folder.name}" + + if os.path.exists(results_folder): + pass + else: + os.mkdir(results_folder) + + return in_folder, results_folder + + +def filter_up_down(x): + if "down_figure_bar.txt" in x: + res = "down" + elif "up_figure_bar.txt" in x: + res = "up" + else: + raise ValueError("Unable to filter up and down file correctly , please check your bar files") + return res + + +def tf_filter(df, community, nb_regulated=200,alpha="0.05", fc: float = 0.5849): + """ + filtered dataframe : factors with DE genes less than a threshold (default 200) are eliminated + :param alpha: + :param df: table used to create the heatmap + :param community: the community + :param nb_regulated: the threshold (200 default) + :param fc: the fold change + :return: filtered dataframe + """ + community_name = f"{community.name.split('.txt')[0].split('.bed')[0]}.csv" + if float(format(fc, ".2f")) == 0.58: + community_recap_table_path = ConfigGraph.communities_recap_table_path_058 / community_name + print(community_recap_table_path) + else: + community_recap_table_path = ConfigGraph.communities_recap_table_path_04 / community_name + print(community_recap_table_path) + + try: + + df_f = pd.read_csv(community_recap_table_path) + df_f = df_f.loc[df_f["all_DE"] > nb_regulated] + df_f = df_f.loc[(df_f[f"up_comm_alpha_{alpha}"] != 0) | (df_f[f"down_comm_alpha_{alpha}"] != 0)] + df = df[df["Target_of_assay"].isin(df_f["Target_of_assay"])] + + + except FileNotFoundError: + + print('{} does not exist '.format(community_recap_table_path)) + + return df + + +def load_filter_reacap(in_folder: Path, results_folder: Path, cpnt_type: str, community, alpha="0.05") -> pd.DataFrame: + """ + :param community: + :param results_folder: + :param in_folder: the folder containing all the composition analysis , all nt_analysis,dnt_analysis,tnt_anlysis \ + and aa_analysis + :param cpnt_type: nt or dnt or tnt or aa + :param alpha: type one error + :return: a filtred dataframe containing only enriched vs control , alpha = ? + + >>> d = load_filter_reacap(Path(ConfigGraph.load_filter_reacap),'nt') + + >>> d["TF"] + 0 NKRF + 1 NKRF + 2 NKRF + 3 NKRF + >>> d["down_or_up"] + 0 up + 1 up + 2 down + 3 down + """ + recap_cpnt_path = in_folder / f"{cpnt_type}_analysis" / f"recap_{cpnt_type}.csv" + df = pd.read_csv(recap_cpnt_path, sep="\t") + df_only_TF = pd.read_csv(ConfigGraph.only_tf_05849) + df_only_TF = tf_filter(df_only_TF,community, alpha=alpha) + TF_list = list(df_only_TF["Target_of_assay"]) + df["TF"] = list(map(lambda x: x.split('_')[0], df["factor"])) + df["alpha"] = list(map(lambda x: x.split('_')[1], df["factor"])) + df["down_or_up"] = list(map(lambda x: filter_up_down(x), df["factor"])) + df = df.loc[df["TF"].isin(TF_list)] # to keep only the 136 TF curated manually + df = df.loc[df["alpha"] == alpha] + df = df.loc[(df["grp1"] == "ctrl") & (df["grp2"] == "enriched")] + df.to_csv(results_folder / f"filtred_recap_{cpnt_type}_{alpha}.csv", index=False) + + return df + + +def get_cpnt_row(df: pd.DataFrame, tf: str, reg: str, p_val_weight: bool) -> pd.Series: + """ + :param p_val_weight: if the parameter is set to True then the values inside the heatmap \ + cell will be multiplied by 1-padj as preponderance factor + :param reg: down or up genes + :param cpnt_type: nt or dnt or tnt or aa + :param tf: the transcription factor you want retrieve cpnt frequencies + :param df: the filtered and processed dataframe + + """ + df_reg = df.loc[df["down_or_up"] == reg] + df_reg = df_reg.loc[df["TF"] == tf] + if not df_reg.empty: + if p_val_weight: + df_reg["value"] = df_reg.apply( + lambda x: ((1 - x["p-adj"]) * (x["mean_grp2"] - x["mean_grp1"]) / x["mean_grp1"]), axis=1) + else: + df_reg["value"] = df_reg.apply(lambda x: ((x["mean_grp2"] - x["mean_grp1"]) / x["mean_grp1"]), axis=1) + + df_reg = df_reg[["cpnt", "value"]] + df_reg = df_reg.set_index("cpnt") + row = df_reg["value"] + row = row.append(pd.Series({"TF": tf})) + return row # a pd.Serie + else: + pass + + +def generate_heatmap_tables(df: pd.DataFrame, cpnt_type, p_val_weight: bool) -> (pd.DataFrame, pd.DataFrame): + """ + + :param df: filtred df + :param cpnt_type: + :param p_val_weight: if the parameter is set to True then the values inside the heatmap \ + cell will be multiplied by 1-padj as preponderance factor + :return: tuple containing two dataframe df_down and df_up + """ + tf_list = list(df["TF"].unique()) + cols = get_features(cpnt_type).append("TF") + df_up = pd.DataFrame(columns=cols) + df_down = pd.DataFrame(columns=cols) + + for tf in tf_list: + row = get_cpnt_row(df, tf, "down", p_val_weight) + df_down = df_down.append(row, ignore_index=True) + df_down = df_down.set_index("TF") + + for tf in tf_list: + row = get_cpnt_row(df, tf, "up", p_val_weight) + df_up = df_up.append(row, ignore_index=True) + df_up = df_up.set_index("TF") + + return df_up, df_down + + +def heatmap_fig(df: pd.DataFrame, outfile: Path, color_scale: str): + """ + Create a heatmap. + + :param df: List of percentage of features regulated by a factor in \ + a given spatial cluster + :param outfile: The name of the figure to produce + :param contrast: (int) the value of the contrast + :param color_scale: The name of the color scale + """ + data_array = df.values + labelsx = list(df.columns) + labelsy = list(df.index) + index_side_dic = {l: i for i, l in enumerate(labelsy)} + index_up_dic = {l: i for i, l in enumerate(labelsx)} + data_up = data_array.transpose() + # Initialize figure by creating upper dendrogram + fig = ff.create_dendrogram(data_up, orientation='bottom', labels=labelsx, + linkagefun=lambda x: linkage(x, ConfigGraph.clustering)) + for i in range(len(fig['data'])): + fig['data'][i]['yaxis'] = 'y2' + + # Create Side Dendrogram + dendro_side = ff.create_dendrogram(data_array, orientation='right', + labels=labelsy, + linkagefun=lambda x: linkage(x, ConfigGraph.clustering)) + for i in range(len(dendro_side['data'])): + dendro_side['data'][i]['xaxis'] = 'x2' + + # Create Heatmap + dendro_side_leaves = dendro_side['layout']['yaxis']['ticktext'] + fig['layout']['yaxis']['ticktext'] = dendro_side['layout']['yaxis']['ticktext'] + index_side = [index_side_dic[l] for l in dendro_side_leaves] + dendro_up_leaves = fig['layout']['xaxis']['ticktext'] + heat_data = data_array[index_side, :] + index_up = [index_up_dic[l] for l in dendro_up_leaves] + heat_data = heat_data[:, index_up] + + if color_scale == "Picnic": + heatmap = [ + go.Heatmap( + x=dendro_up_leaves, + y=dendro_side_leaves, + z=heat_data, + colorbar={"x": -0.05}, + colorscale=color_scale, + zmid=0 + ) + ] + else: + heatmap = [ + go.Heatmap( + x=dendro_up_leaves, + y=dendro_side_leaves, + z=heat_data, + colorbar={"x": -0.05}, + colorscale=color_scale + ) + ] + heatmap[0]['x'] = fig['layout']['xaxis']['tickvals'] + fig['layout']['yaxis']['tickvals'] = dendro_side['layout']['yaxis']['tickvals'] + heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals'] + + # + # # Add Heatmap Data to Figure + for data in heatmap: + fig.add_trace(data) + + # Edit Layout + fig['layout'].update({"autosize": True, "height": 1080, "width": 1920, + 'showlegend': False, 'hovermode': 'closest', + }) + # Edit xaxis + fig['layout']['xaxis'].update({'domain': [0.15, 0.8], + 'mirror': False, + 'showgrid': False, + 'showline': False, + 'zeroline': False, + 'showticklabels': True, + 'ticks': ""}) + # Edit xaxis2 + fig['layout'].update({'xaxis2': {'domain': [0, .15], + 'mirror': False, + 'showgrid': False, + 'showline': False, + 'zeroline': False, + 'showticklabels': False, + 'ticks': ""}}) + + # Edit yaxis + fig['layout']['yaxis'].update({'domain': [0.11, .85], + 'mirror': False, + 'showgrid': False, + 'showline': False, + 'zeroline': False, + 'showticklabels': True, + 'ticks': "", + "side": "right"}) + # Edit yaxis2 + fig['layout'].update({'yaxis2': {'domain': [.825, 1], + 'mirror': False, + 'showgrid': False, + 'showline': False, + 'zeroline': False, + 'showticklabels': False, + 'ticks': ""}}) + plotly.offline.plot(fig, filename=str(outfile), auto_open=False) + + +def venn_compo(df0, results_folder, cpnt_type, alpha="0.05"): + """ + creates a venn diagramme classifying tf by the regulation type up/down + + :param df: the filtered and processed dataframe + :param results_folder: + :param cpnt_type: + :param alpha: + """ + totale_analysed_TF = len(df0["TF"].unique()) + df0 = df0[df0["grp1_vs_grp2"] != " . "] + df_down = df0[df0["down_or_up"] == "down"] + df_up = df0[df0["down_or_up"] == "up"] + + E = set(df_down["TF"].unique()) + K = set(df_up["TF"].unique()) + with matplotlib.backends.backend_pdf.PdfPages(results_folder / f"{cpnt_type}_venny_alpha_{alpha}.pdf") as pdf: + fig = plt.figure(figsize=(15, 15)) + title = "{}_{} alpha = {} ".format(results_folder.name, cpnt_type, alpha) + plt.title(title) + total = len(K.union(E)) + I = set(K.intersection(E)) + v = venn2([E, K], (f'TF showing {cpnt_type} biased down genes', f'TF showing {cpnt_type} biased up genes ')) + + if len(I) > 0: + v.get_label_by_id('100').set_text('\n'.join(E - K)) + v.get_label_by_id('110').set_text('\n'.join(I)) + v.get_label_by_id('010').set_text('\n'.join(K - E)) + + plt.text(0.6, 0.2, 'total={}/{} TF show at least one bias of composition \ + in term of {} for (down or up or both)-regulated genes'.format(total, totale_analysed_TF, cpnt_type)) + pdf.savefig() + + +@lp.parse(cpnt_type=['nt', 'dnt', 'tnt', 'aa'], alpha=["0.1", "0.05"]) +def heatmap_launcher(in_folder, cpnt_type, community: str, p_val_weight: bool = False, + out_folder="results/composition_heatmap", + alpha="0.05"): + """ + creates a cpnt composition heatmap + + :param community: the community file path annotation method + :param p_val_weight: if the parameter is set to True then the values inside the heatmap \ + cell will be multiplied by 1-padj as preponderance factor to empathize on the significance level of comparison + :param in_folder: folder containing the recap.csv file + :param cpnt_type: the type of cpnt we are interested in + :param out_folder: folder where all results will be generated + :param alpha: type one error ["0.05","0.1"] default is 0.05 + """ + in_folder, results_folder = load_input_output_folders(in_folder, out_folder) + filtered_recap = load_filter_reacap(in_folder, results_folder, cpnt_type, Path(community), alpha) + df_up, df_down = generate_heatmap_tables(filtered_recap, cpnt_type, p_val_weight) + heatmap_fig(df_up.T, results_folder / f"up_{cpnt_type}_heatmap_alpha_{alpha}.html", color_scale="Picnic") + heatmap_fig(df_down.T, results_folder / f"down_{cpnt_type}_heatmap_alpha_{alpha}.html", color_scale="Picnic") + venn_compo(filtered_recap, results_folder, cpnt_type, alpha) + + +if __name__ == "__main__": + # doctest.testmod() + heatmap_launcher() diff --git a/src/find_interaction_cluster/composition_heatmap.py b/src/find_interaction_cluster/composition_heatmap.py index c955e21b38085943e1810d62f1d741b88d5c459e..abcbab1adac3ed08312b95017ee98014d06813a1 100644 --- a/src/find_interaction_cluster/composition_heatmap.py +++ b/src/find_interaction_cluster/composition_heatmap.py @@ -47,8 +47,42 @@ def filter_up_down(x): return res -def load_filter_reacap(in_folder: Path, results_folder: Path, cpnt_type: str, alpha="0.05") -> pd.DataFrame: +def tf_filter(df, community, nb_regulated=200,alpha="0.05", fc: float = 0.5849): """ + filtered dataframe : factors with DE genes less than a threshold (default 200) are eliminated + :param alpha: + :param df: table used to create the heatmap + :param community: the community + :param nb_regulated: the threshold (200 default) + :param fc: the fold change + :return: filtered dataframe + """ + community_name = f"{community.name.split('.txt')[0].split('.bed')[0]}.csv" + if float(format(fc, ".2f")) == 0.58: + community_recap_table_path = ConfigGraph.communities_recap_table_path_058 / community_name + print(community_recap_table_path) + else: + community_recap_table_path = ConfigGraph.communities_recap_table_path_04 / community_name + print(community_recap_table_path) + + try: + + df_f = pd.read_csv(community_recap_table_path) + df_f = df_f.loc[df_f["all_DE"] > nb_regulated] + df_f = df_f.loc[(df_f[f"up_comm_alpha_{alpha}"] != 0) | (df_f[f"down_comm_alpha_{alpha}"] != 0)] + df = df[df["Target_of_assay"].isin(df_f["Target_of_assay"])] + + + except FileNotFoundError: + + print('{} does not exist '.format(community_recap_table_path)) + + return df + + +def load_filter_reacap(in_folder: Path, results_folder: Path, cpnt_type: str, community, alpha="0.05") -> pd.DataFrame: + """ + :param community: :param results_folder: :param in_folder: the folder containing all the composition analysis , all nt_analysis,dnt_analysis,tnt_anlysis \ and aa_analysis @@ -72,14 +106,16 @@ def load_filter_reacap(in_folder: Path, results_folder: Path, cpnt_type: str, al recap_cpnt_path = in_folder / f"{cpnt_type}_analysis" / f"recap_{cpnt_type}.csv" df = pd.read_csv(recap_cpnt_path, sep="\t") df_only_TF = pd.read_csv(ConfigGraph.only_tf_05849) + df_only_TF = tf_filter(df_only_TF,community, alpha=alpha) TF_list = list(df_only_TF["Target_of_assay"]) df["TF"] = list(map(lambda x: x.split('_')[0], df["factor"])) - df = df.loc[df["TF"].isin(TF_list)] # to keep only the 136 TF curated manually df["alpha"] = list(map(lambda x: x.split('_')[1], df["factor"])) - df = df.loc[df["alpha"] == alpha] df["down_or_up"] = list(map(lambda x: filter_up_down(x), df["factor"])) + df = df.loc[df["TF"].isin(TF_list)] # to keep only the 136 TF curated manually + df = df.loc[df["alpha"] == alpha] df = df.loc[(df["grp1"] == "ctrl") & (df["grp2"] == "enriched")] df.to_csv(results_folder / f"filtred_recap_{cpnt_type}_{alpha}.csv", index=False) + return df @@ -93,7 +129,6 @@ def get_cpnt_row(df: pd.DataFrame, tf: str, reg: str, p_val_weight: bool) -> pd. :param df: the filtered and processed dataframe """ - print(tf) df_reg = df.loc[df["down_or_up"] == reg] df_reg = df_reg.loc[df["TF"] == tf] if not df_reg.empty: @@ -248,7 +283,7 @@ def heatmap_fig(df: pd.DataFrame, outfile: Path, color_scale: str): plotly.offline.plot(fig, filename=str(outfile), auto_open=False) -def venn_compo(df0, results_folder, cpnt_type, alpha="0.05") : +def venn_compo(df0, results_folder, cpnt_type, alpha="0.05"): """ creates a venn diagramme classifying tf by the regulation type up/down @@ -283,11 +318,13 @@ def venn_compo(df0, results_folder, cpnt_type, alpha="0.05") : @lp.parse(cpnt_type=['nt', 'dnt', 'tnt', 'aa'], alpha=["0.1", "0.05"]) -def heatmap_launcher(in_folder, cpnt_type, p_val_weight: bool = False, out_folder="results/composition_heatmap", +def heatmap_launcher(in_folder, cpnt_type, community: str, p_val_weight: bool = False, + out_folder="results/composition_heatmap", alpha="0.05"): """ creates a cpnt composition heatmap + :param community: the community file path annotation method :param p_val_weight: if the parameter is set to True then the values inside the heatmap \ cell will be multiplied by 1-padj as preponderance factor to empathize on the significance level of comparison :param in_folder: folder containing the recap.csv file @@ -296,7 +333,7 @@ def heatmap_launcher(in_folder, cpnt_type, p_val_weight: bool = False, out_folde :param alpha: type one error ["0.05","0.1"] default is 0.05 """ in_folder, results_folder = load_input_output_folders(in_folder, out_folder) - filtered_recap = load_filter_reacap(in_folder, results_folder, cpnt_type, alpha) + filtered_recap = load_filter_reacap(in_folder, results_folder, cpnt_type, Path(community), alpha) df_up, df_down = generate_heatmap_tables(filtered_recap, cpnt_type, p_val_weight) heatmap_fig(df_up.T, results_folder / f"up_{cpnt_type}_heatmap_alpha_{alpha}.html", color_scale="Picnic") heatmap_fig(df_down.T, results_folder / f"down_{cpnt_type}_heatmap_alpha_{alpha}.html", color_scale="Picnic") diff --git a/src/find_interaction_cluster/heatmap_cluster.py b/src/find_interaction_cluster/heatmap_cluster.py index 0cfa96d162ea83e1966521ba6a96b22d7bec7b98..298d305c07a2dec70cf40e7ec92c18b4527303ab 100644 --- a/src/find_interaction_cluster/heatmap_cluster.py +++ b/src/find_interaction_cluster/heatmap_cluster.py @@ -812,7 +812,7 @@ def heatmap_creator(htype: str, community_files: List[str] = (), feature: str = "gene", nb_regulated: int = 200, iteration: int = 10000, alternative: str = "two_sided", ps: int = mp.cpu_count(), - alpha: float = -1 ,fc = 0.5849) -> None: + alpha: float = -1 , fc = 0.5849) -> None: """ Create an heatmap indicating the percentage of genes/exons regulated \ by a splicing factor in every spatial clusters. diff --git a/src/shiny_Interactive_hist_plots/.RData b/src/shiny_Interactive_hist_plots/.RData index 5103124b7263cc7593181d6019e455335c195b6a..9bc95d68410d52a107b9d1315d8e08d5bfbbb75e 100644 Binary files a/src/shiny_Interactive_hist_plots/.RData and b/src/shiny_Interactive_hist_plots/.RData differ diff --git a/src/shiny_Interactive_hist_plots/.Rhistory b/src/shiny_Interactive_hist_plots/.Rhistory index 07baa587c0fe1c04cfcb7331cafa0e45574012e6..1ee87954e097f9be0affada4a24ec23e259e3fc4 100644 --- a/src/shiny_Interactive_hist_plots/.Rhistory +++ b/src/shiny_Interactive_hist_plots/.Rhistory @@ -90,7 +90,17 @@ runApp('Desktop/Transcriptomics_analysis/Enrichement_analysis/scr/shiny_Interact shiny::runApp('Desktop/Transcriptomics_analysis/Enrichement_analysis/scr/shiny_Interactive_hist_plots') shiny::runApp('Desktop/Transcriptomics_analysis/Enrichement_analysis/scr/shiny_Interactive_hist_plots') shiny::runApp('Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots') +runApp('Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots') +runApp('Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots') +runApp('Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots') +runApp('Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots') +shiny::runApp('Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots') +shiny::runApp('Desktop/Transcriptomics_analysis/Enrichement_analysis/scr/shiny_Interactive_hist_plots') +runApp('Desktop/Transcriptomics_analysis/Enrichement_analysis/scr/shiny_Interactive_hist_plots') +runApp('Desktop/Transcriptomics_analysis/Enrichement_analysis/scr/shiny_Interactive_hist_plots') +shiny::runApp('Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots') +shiny::runApp('Desktop/Transcriptomics_analysis/Enrichement_analysis/scr/shiny_Interactive_hist_plots') setwd("~/Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots") -runApp() setwd("~/Desktop/Transcriptomics_analysis/chia-pet_network/src/shiny_Interactive_hist_plots") runApp() +runApp() diff --git a/src/shiny_Interactive_hist_plots/server.R b/src/shiny_Interactive_hist_plots/server.R index e7f48c55976612341b4d65ad184bfbd0ed010e57..3a4f3e7e1a6ef97460f3ea4ced3efe26996be274 100644 --- a/src/shiny_Interactive_hist_plots/server.R +++ b/src/shiny_Interactive_hist_plots/server.R @@ -84,6 +84,7 @@ shinyServer(function(input, output) { geom_bar(stat = 'identity', position = 'dodge')+ theme(axis.text.x=element_text(angle=90,hjust=1))+ geom_text(aes(label=value), position=position_dodge(width=0.9), vjust=-0.25) + #ggsave("/home/ali/Desktop/mtcars.png", width = 40, height = 20, units = "cm") diff --git a/src/shiny_Interactive_hist_plots/ui.R b/src/shiny_Interactive_hist_plots/ui.R index e5c014accb8a9c9e2d6d0c1504adf315eed065de..a00b15c21cc17e90a9478e90049c178f766b5126 100644 --- a/src/shiny_Interactive_hist_plots/ui.R +++ b/src/shiny_Interactive_hist_plots/ui.R @@ -23,7 +23,7 @@ shinyUI(fluidPage( selectInput('project','Select project',choice =Sys.glob("data/PR_*") ), checkboxInput("filter_zero", "Hide TF with zero DE communities", FALSE), - sliderInput("filter", "Show TFX with total number of DE genes >",min = 0, max = 1000, value = 100), + sliderInput("filter", "Show TFX with total number of DE genes >",min = 0, max = 1000, value = 200), verbatimTextOutput("text_dim"), tabsetPanel(type = "tabs",