diff --git a/src/find_interaction_cluster/community_figures/__main__.py b/src/find_interaction_cluster/community_figures/__main__.py index e01081b74bdc737a8ddba3584afa4dc77cfbe615..259b1179f4760706674a387570e16faec9b5bc6b 100644 --- a/src/find_interaction_cluster/community_figures/__main__.py +++ b/src/find_interaction_cluster/community_figures/__main__.py @@ -54,7 +54,8 @@ def load_and_check_table(table: str, feature: str, target_col: str): def create_community_figures(table: str, feature: str, target_col: str, output: str, outfile: str, test_type: str, target_kind: str = "", sd_community: str = "y", - iteration: int = 10000) -> None: + iteration: int = 10000, + display_size: bool = False) -> None: """ Create a dataframe with a control community, save it as a table and \ as a barplot figure. @@ -75,6 +76,8 @@ def create_community_figures(table: str, feature: str, target_col: str, is only used if test_type = 'permutation' (default 10000). :param sd_community: y to display the standard deviation for communities \ false else. + :param display_size: True to display the size of the community above \ + each one of them False to display nothing. (default False) """ df = load_and_check_table(table, feature, target_col) if not outfile.endswith(".pdf"): @@ -83,7 +86,7 @@ def create_community_figures(table: str, feature: str, target_col: str, sd_community = sd_community.lower() == 'y' create_community_fig(df, feature, target_col, moutfile, test_type, target_kind=target_kind, iteration=iteration, - sd_community=sd_community) + sd_community=sd_community, display_size=display_size) if __name__ == "__main__": diff --git a/src/find_interaction_cluster/community_figures/fig_functions.py b/src/find_interaction_cluster/community_figures/fig_functions.py index bc836380ced46fa0a32d6f20a086e50d3e73537f..a05c2561a6626fe7166b89b3593b1605bfdf321d 100644 --- a/src/find_interaction_cluster/community_figures/fig_functions.py +++ b/src/find_interaction_cluster/community_figures/fig_functions.py @@ -316,9 +316,41 @@ def expand_results_perm(df: pd.DataFrame, rdf: pd.DataFrame, target_col: str, return pd.concat([df_ctrl, df], axis=0, ignore_index=True) +def display_size_fig(g: sns.FacetGrid, display_size: bool, target_col: str, + df_bar: pd.DataFrame): + """ + + :param g: A seaborn FacetGrid + :param display_size: True to display the size of the community above \ + each one of them False to display nothing. (default False) + :param target_col: The name of the column containing the data of interest + :param df_bar: A dataframe with the enrichment of a \ + nucleotide frequency for every community (without control) + :return: The seaborn fascetgrid + """ + xrange = g.ax.get_xlim() + if display_size: + df_size = df_bar[[f"mean_{target_col}", "community", + "community_size"]].drop_duplicates() + ax2 = g.ax.twinx() + ax2.set_ylabel('community_size', color="green") + df_size.plot(x="community", y="community_size", kind="scatter", ax=ax2, + legend=False, zorder=55, + color=(0.2, 0.8, 0.2, 0.4)) + ax2.tick_params(axis='y', labelcolor="green") + ax2.grid(False) + sizes = df_size["community_size"].to_list() + if max(sizes) - min(sizes) > 500: + ax2.set_yscale("log") + g.ax.set_xlim(xrange) + g.set(xticklabels=[]) + return g + + def make_barplot(df_bar: pd.DataFrame, outfile: Path, target_col: str, feature: str, target_kind: str = "", - sd_community: Optional[str] = "sd") -> None: + sd_community: Optional[str] = "sd", + display_size: bool = False) -> None: """ Create a barplot showing the frequency of `nt` for every community \ of exons/gene in `df_bar`. @@ -332,6 +364,8 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, :param feature: The king of feature of interest :param sd_community: sd to display community error bar, None to display \ nothing + :param display_size: True to display the size of the community above \ + each one of them False to display nothing. (default False) """ sns.set(context="poster") g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point", @@ -346,7 +380,7 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}" f"among community of {feature}s\n" f"(stats obtained with a lm test)") - g.set(xticklabels=[]) + g = display_size_fig(g, display_size, target_col, df_bar) g.ax.set_ylabel(f'Frequency of {target_col}') df_bara = df_bar.drop_duplicates(subset="community", keep="first") for i, p in enumerate(g2.ax.patches): @@ -366,7 +400,8 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, target_col: str, feature: str, target_kind: str = "", - sd_community: Optional[str] = "sd") -> None: + sd_community: Optional[str] = "sd", + display_size: bool = False) -> None: """ Create a barplot showing the frequency of `nt` for every community \ of exons/gene in `df_bar`. @@ -380,6 +415,8 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, :param feature: The king of feature of interest :param sd_community: sd to display community error bar, None to display \ nothing + :param display_size: True to display the size of the community above \ + each one of them False to display nothing. (default False) """ sns.set(context="poster") df_ctrl = df_bar.loc[df_bar[f"id_{feature}"] == 'ctrl', :] @@ -404,7 +441,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}" f"among community of {feature}s\n" f"(stats obtained with a permutation test)") - g.set(xticklabels=[]) + g = display_size_fig(g, display_size, target_col, df_bar) g.ax.set_ylabel(f'Frequency of {target_col}') df_bara = df_bar.drop_duplicates(subset="community", keep="first") for i, p in enumerate(g2.ax.patches): @@ -423,7 +460,8 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, def barplot_creation(df_bar: pd.DataFrame, outfig: Path, cpnt: str, test_type: str, feature: str, - target_kind: str, sd_community: bool) -> None: + target_kind: str, sd_community: bool, + display_size: bool) -> None: """ Reformat a dataframe with the enrichment of a nucleotide frequency \ for every feature for every community and then create a \ @@ -441,13 +479,16 @@ def barplot_creation(df_bar: pd.DataFrame, outfig: Path, target_col. :param sd_community: True to display the errors bars for communities, False else. + :param display_size: True to display the size of the community above \ + each one of them False to display nothing. (default False) """ sd_community = "sd" if sd_community else None if test_type == "lm": - make_barplot(df_bar, outfig, cpnt, feature, target_kind) + make_barplot(df_bar, outfig, cpnt, feature, target_kind, + display_size=display_size) else: make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind, - sd_community) + sd_community, display_size) def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict: @@ -486,7 +527,8 @@ def create_community_fig(df: pd.DataFrame, feature: str, dic_com: Optional[Dict] = None, target_kind: str = "", iteration: int = 10000, - sd_community: bool = True) -> None: + sd_community: bool = True, + display_size: bool = False) -> None: """ Create a dataframe with a control community, save it as a table and \ as a barplot figure. @@ -507,6 +549,8 @@ def create_community_fig(df: pd.DataFrame, feature: str, :param iteration: The number of sub samples to create :param sd_community: True to display the errors bars for communities, False else. + :param display_size: True to display the size of the community above \ + each one of them False to display nothing. (default False) """ if dic_com is None: dic_com = {} if test_type == 'lm' \ @@ -522,4 +566,4 @@ def create_community_fig(df: pd.DataFrame, feature: str, bar_outfile = str(outfile_ctrl).replace(".pdf", "_bar.txt") df_bar.to_csv(bar_outfile, sep="\t", index=False) barplot_creation(df_bar, outfile_ctrl, target_col, test_type, feature, - target_kind, sd_community) + target_kind, sd_community, display_size)