Skip to content
Snippets Groups Projects
Commit a90c69e7 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/community_figures/*.py: add a paramter...

src/find_interaction_cluster/community_figures/*.py: add a paramter display_size to display the size of every community
parent a072ad25
Branches
No related tags found
No related merge requests found
...@@ -54,7 +54,8 @@ def load_and_check_table(table: str, feature: str, target_col: str): ...@@ -54,7 +54,8 @@ def load_and_check_table(table: str, feature: str, target_col: str):
def create_community_figures(table: str, feature: str, target_col: str, def create_community_figures(table: str, feature: str, target_col: str,
output: str, outfile: str, test_type: str, output: str, outfile: str, test_type: str,
target_kind: str = "", sd_community: str = "y", target_kind: str = "", sd_community: str = "y",
iteration: int = 10000) -> None: iteration: int = 10000,
display_size: bool = False) -> None:
""" """
Create a dataframe with a control community, save it as a table and \ Create a dataframe with a control community, save it as a table and \
as a barplot figure. as a barplot figure.
...@@ -75,6 +76,8 @@ def create_community_figures(table: str, feature: str, target_col: str, ...@@ -75,6 +76,8 @@ def create_community_figures(table: str, feature: str, target_col: str,
is only used if test_type = 'permutation' (default 10000). is only used if test_type = 'permutation' (default 10000).
:param sd_community: y to display the standard deviation for communities \ :param sd_community: y to display the standard deviation for communities \
false else. false else.
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
""" """
df = load_and_check_table(table, feature, target_col) df = load_and_check_table(table, feature, target_col)
if not outfile.endswith(".pdf"): if not outfile.endswith(".pdf"):
...@@ -83,7 +86,7 @@ def create_community_figures(table: str, feature: str, target_col: str, ...@@ -83,7 +86,7 @@ def create_community_figures(table: str, feature: str, target_col: str,
sd_community = sd_community.lower() == 'y' sd_community = sd_community.lower() == 'y'
create_community_fig(df, feature, target_col, moutfile, test_type, create_community_fig(df, feature, target_col, moutfile, test_type,
target_kind=target_kind, iteration=iteration, target_kind=target_kind, iteration=iteration,
sd_community=sd_community) sd_community=sd_community, display_size=display_size)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -316,9 +316,41 @@ def expand_results_perm(df: pd.DataFrame, rdf: pd.DataFrame, target_col: str, ...@@ -316,9 +316,41 @@ def expand_results_perm(df: pd.DataFrame, rdf: pd.DataFrame, target_col: str,
return pd.concat([df_ctrl, df], axis=0, ignore_index=True) return pd.concat([df_ctrl, df], axis=0, ignore_index=True)
def display_size_fig(g: sns.FacetGrid, display_size: bool, target_col: str,
df_bar: pd.DataFrame):
"""
:param g: A seaborn FacetGrid
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
:param target_col: The name of the column containing the data of interest
:param df_bar: A dataframe with the enrichment of a \
nucleotide frequency for every community (without control)
:return: The seaborn fascetgrid
"""
xrange = g.ax.get_xlim()
if display_size:
df_size = df_bar[[f"mean_{target_col}", "community",
"community_size"]].drop_duplicates()
ax2 = g.ax.twinx()
ax2.set_ylabel('community_size', color="green")
df_size.plot(x="community", y="community_size", kind="scatter", ax=ax2,
legend=False, zorder=55,
color=(0.2, 0.8, 0.2, 0.4))
ax2.tick_params(axis='y', labelcolor="green")
ax2.grid(False)
sizes = df_size["community_size"].to_list()
if max(sizes) - min(sizes) > 500:
ax2.set_yscale("log")
g.ax.set_xlim(xrange)
g.set(xticklabels=[])
return g
def make_barplot(df_bar: pd.DataFrame, outfile: Path, def make_barplot(df_bar: pd.DataFrame, outfile: Path,
target_col: str, feature: str, target_kind: str = "", target_col: str, feature: str, target_kind: str = "",
sd_community: Optional[str] = "sd") -> None: sd_community: Optional[str] = "sd",
display_size: bool = False) -> None:
""" """
Create a barplot showing the frequency of `nt` for every community \ Create a barplot showing the frequency of `nt` for every community \
of exons/gene in `df_bar`. of exons/gene in `df_bar`.
...@@ -332,6 +364,8 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, ...@@ -332,6 +364,8 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path,
:param feature: The king of feature of interest :param feature: The king of feature of interest
:param sd_community: sd to display community error bar, None to display \ :param sd_community: sd to display community error bar, None to display \
nothing nothing
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
""" """
sns.set(context="poster") sns.set(context="poster")
g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point", g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point",
...@@ -346,7 +380,7 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, ...@@ -346,7 +380,7 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path,
g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}" g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}"
f"among community of {feature}s\n" f"among community of {feature}s\n"
f"(stats obtained with a lm test)") f"(stats obtained with a lm test)")
g.set(xticklabels=[]) g = display_size_fig(g, display_size, target_col, df_bar)
g.ax.set_ylabel(f'Frequency of {target_col}') g.ax.set_ylabel(f'Frequency of {target_col}')
df_bara = df_bar.drop_duplicates(subset="community", keep="first") df_bara = df_bar.drop_duplicates(subset="community", keep="first")
for i, p in enumerate(g2.ax.patches): for i, p in enumerate(g2.ax.patches):
...@@ -366,7 +400,8 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, ...@@ -366,7 +400,8 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path,
def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
target_col: str, feature: str, target_col: str, feature: str,
target_kind: str = "", target_kind: str = "",
sd_community: Optional[str] = "sd") -> None: sd_community: Optional[str] = "sd",
display_size: bool = False) -> None:
""" """
Create a barplot showing the frequency of `nt` for every community \ Create a barplot showing the frequency of `nt` for every community \
of exons/gene in `df_bar`. of exons/gene in `df_bar`.
...@@ -380,6 +415,8 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, ...@@ -380,6 +415,8 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
:param feature: The king of feature of interest :param feature: The king of feature of interest
:param sd_community: sd to display community error bar, None to display \ :param sd_community: sd to display community error bar, None to display \
nothing nothing
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
""" """
sns.set(context="poster") sns.set(context="poster")
df_ctrl = df_bar.loc[df_bar[f"id_{feature}"] == 'ctrl', :] df_ctrl = df_bar.loc[df_bar[f"id_{feature}"] == 'ctrl', :]
...@@ -404,7 +441,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, ...@@ -404,7 +441,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}" g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}"
f"among community of {feature}s\n" f"among community of {feature}s\n"
f"(stats obtained with a permutation test)") f"(stats obtained with a permutation test)")
g.set(xticklabels=[]) g = display_size_fig(g, display_size, target_col, df_bar)
g.ax.set_ylabel(f'Frequency of {target_col}') g.ax.set_ylabel(f'Frequency of {target_col}')
df_bara = df_bar.drop_duplicates(subset="community", keep="first") df_bara = df_bar.drop_duplicates(subset="community", keep="first")
for i, p in enumerate(g2.ax.patches): for i, p in enumerate(g2.ax.patches):
...@@ -423,7 +460,8 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, ...@@ -423,7 +460,8 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
def barplot_creation(df_bar: pd.DataFrame, outfig: Path, def barplot_creation(df_bar: pd.DataFrame, outfig: Path,
cpnt: str, test_type: str, feature: str, cpnt: str, test_type: str, feature: str,
target_kind: str, sd_community: bool) -> None: target_kind: str, sd_community: bool,
display_size: bool) -> None:
""" """
Reformat a dataframe with the enrichment of a nucleotide frequency \ Reformat a dataframe with the enrichment of a nucleotide frequency \
for every feature for every community and then create a \ for every feature for every community and then create a \
...@@ -441,13 +479,16 @@ def barplot_creation(df_bar: pd.DataFrame, outfig: Path, ...@@ -441,13 +479,16 @@ def barplot_creation(df_bar: pd.DataFrame, outfig: Path,
target_col. target_col.
:param sd_community: True to display the errors bars for communities, :param sd_community: True to display the errors bars for communities,
False else. False else.
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
""" """
sd_community = "sd" if sd_community else None sd_community = "sd" if sd_community else None
if test_type == "lm": if test_type == "lm":
make_barplot(df_bar, outfig, cpnt, feature, target_kind) make_barplot(df_bar, outfig, cpnt, feature, target_kind,
display_size=display_size)
else: else:
make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind, make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind,
sd_community) sd_community, display_size)
def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict: def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict:
...@@ -486,7 +527,8 @@ def create_community_fig(df: pd.DataFrame, feature: str, ...@@ -486,7 +527,8 @@ def create_community_fig(df: pd.DataFrame, feature: str,
dic_com: Optional[Dict] = None, dic_com: Optional[Dict] = None,
target_kind: str = "", target_kind: str = "",
iteration: int = 10000, iteration: int = 10000,
sd_community: bool = True) -> None: sd_community: bool = True,
display_size: bool = False) -> None:
""" """
Create a dataframe with a control community, save it as a table and \ Create a dataframe with a control community, save it as a table and \
as a barplot figure. as a barplot figure.
...@@ -507,6 +549,8 @@ def create_community_fig(df: pd.DataFrame, feature: str, ...@@ -507,6 +549,8 @@ def create_community_fig(df: pd.DataFrame, feature: str,
:param iteration: The number of sub samples to create :param iteration: The number of sub samples to create
:param sd_community: True to display the errors bars for communities, :param sd_community: True to display the errors bars for communities,
False else. False else.
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
""" """
if dic_com is None: if dic_com is None:
dic_com = {} if test_type == 'lm' \ dic_com = {} if test_type == 'lm' \
...@@ -522,4 +566,4 @@ def create_community_fig(df: pd.DataFrame, feature: str, ...@@ -522,4 +566,4 @@ def create_community_fig(df: pd.DataFrame, feature: str,
bar_outfile = str(outfile_ctrl).replace(".pdf", "_bar.txt") bar_outfile = str(outfile_ctrl).replace(".pdf", "_bar.txt")
df_bar.to_csv(bar_outfile, sep="\t", index=False) df_bar.to_csv(bar_outfile, sep="\t", index=False)
barplot_creation(df_bar, outfile_ctrl, target_col, test_type, feature, barplot_creation(df_bar, outfile_ctrl, target_col, test_type, feature,
target_kind, sd_community) target_kind, sd_community, display_size)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment