Skip to content
Snippets Groups Projects
Commit 78072892 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/community_figures/fig_functions.py: add parameter...

src/find_interaction_cluster/community_figures/fig_functions.py: add parameter alpha when selecting permutation test
parent 7d006ae8
No related branches found
No related tags found
No related merge requests found
......@@ -191,7 +191,7 @@ def get_permutation_mean(df_ctrl: pd.DataFrame,
def perm_community_pval(row: pd.Series, df_ctrl: pd.DataFrame,
cpnt: str, iteration: int
cpnt: str, iteration: int, alpha: float
) -> Tuple[float, float, float, str]:
"""
Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
......@@ -203,19 +203,20 @@ def perm_community_pval(row: pd.Series, df_ctrl: pd.DataFrame,
in each exons/gene in fasterdb.
:param cpnt: The component (nt, aa, dnt) of interest
:param iteration: The number of sub samples to create
:param alpha: Type 1 error threshold
:return: The ctrl mean frequency value of `nt`, its standard error \
the pvalue and the regulation of the enrichment/impoverishment \
of the community in `row` compared to control exons.
"""
list_values = get_permutation_mean(df_ctrl, cpnt, row["community_size"],
iteration)
pval, reg = get_pvalue(np.array(list_values), row[cpnt], iteration)
pval, reg = get_pvalue(np.array(list_values), row[cpnt], iteration, alpha)
return float(np.mean(list_values)), float(np.std(list_values)), pval, reg
def perm_pvalues(df: pd.DataFrame, df_ctrl: pd.DataFrame, feature: str,
target_col: str, iteration: int,
dic_com: Dict) -> pd.DataFrame:
dic_com: Dict, alpha: float) -> pd.DataFrame:
"""
Randomly sample `size` `feature` from `df_ctrl` to extract `iteration` \
of `nt` frequencies from it.
......@@ -229,6 +230,7 @@ def perm_pvalues(df: pd.DataFrame, df_ctrl: pd.DataFrame, feature: str,
:param iteration: The number of sub samples to create
:param dic_com: A dictionary linking each community to the exons \
it contains.
:param alpha: Type 1 error threshold
:return: The dataframe containing p-values and regulation \
indicating the enrichment of
"""
......@@ -240,14 +242,14 @@ def perm_pvalues(df: pd.DataFrame, df_ctrl: pd.DataFrame, feature: str,
-df_ctrl[f'id_{feature}'
].isin(dic_com[row['community']]),
:],
target_col, iteration)
target_col, iteration, alpha)
[x.append(y) for x, y in zip([mean_ctrl, std_ctrl, list_pval,
list_reg], res)]
adj_pvals = multipletests(list_pval, alpha=0.05,
adj_pvals = multipletests(list_pval, alpha=alpha,
method='fdr_bh',
is_sorted=False,
returnsorted=False)[1]
adj_regs = [list_reg[i] if adj_pvals[i] <= 0.05 else " . "
adj_regs = [list_reg[i] if adj_pvals[i] <= alpha else " . "
for i in range(len(list_reg))]
df[f'{target_col}_mean_{iteration}_ctrl'] = mean_ctrl
df[f'{target_col}_std_{iteration}_ctrl'] = std_ctrl
......@@ -258,7 +260,7 @@ def perm_pvalues(df: pd.DataFrame, df_ctrl: pd.DataFrame, feature: str,
def perm_with_ctrl(df: pd.DataFrame, feature: str,
target_col: str, dic_com: Dict,
iteration: int) -> pd.DataFrame:
iteration: int, alpha: float) -> pd.DataFrame:
"""
:param df: A dataframe containing the id of the chosen `feature` \
......@@ -270,6 +272,7 @@ def perm_with_ctrl(df: pd.DataFrame, feature: str,
:param dic_com: A dictionary linking each community to the exons \
it contains.
:param iteration: The number of sub samples to create
:param alpha: Type 1 error threshold
:return: The dataframe with the p-value compared to the control \
list of exons.
"""
......@@ -277,7 +280,7 @@ def perm_with_ctrl(df: pd.DataFrame, feature: str,
mean_df = df_tmp[[target_col, "community", "community_size"]]. \
groupby(["community", "community_size"]).mean().reset_index()
return perm_pvalues(mean_df, df, feature, target_col,
iteration, dic_com)
iteration, dic_com, alpha)
def create_perm_ctrl_df(ctrl_df: pd.DataFrame, order_df: pd.DataFrame,
......@@ -462,7 +465,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
target_col: str, feature: str,
target_kind: str = "",
sd_community: Optional[str] = "sd",
display_size: bool = False) -> None:
display_size: bool = False, alpha: float = 0.05) -> None:
"""
Create a barplot showing the frequency of `nt` for every community \
of exons/gene in `df_bar`.
......@@ -478,6 +481,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
nothing
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
:param alpha: Type 1 error threshold
"""
sns.set(context="poster")
df_ctrl = df_bar.loc[df_bar[f"id_{feature}"] == 'ctrl', :]
......@@ -501,12 +505,12 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
target_kind = f" ({target_kind})" if target_kind else ""
g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}"
f" among community of {feature}s\n"
f"(stats obtained with a permutation test)")
f"(stats obtained with a permutation test) (alpha={alpha})")
g = display_size_fig(g, display_size, target_col, df_bar)
g.ax.set_ylabel(f'Frequency of {target_col}')
df_bara = df_bar.drop_duplicates(subset="community", keep="first")
for i, p in enumerate(g2.ax.patches):
stats = "*" if df_bara.iloc[i, :]["p-adj"] < 0.05 else ""
stats = "*" if df_bara.iloc[i, :]["p-adj"] <= alpha else ""
com = df_bara.iloc[i, :]["community"]
csd = 0
if sd_community == "sd":
......@@ -522,7 +526,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path,
def barplot_creation(df_bar: pd.DataFrame, outfig: Path,
cpnt: str, test_type: str, feature: str,
target_kind: str, sd_community: bool,
display_size: bool) -> None:
display_size: bool, alpha: float = 0.05) -> None:
"""
Reformat a dataframe with the enrichment of a nucleotide frequency \
for every feature for every community and then create a \
......@@ -542,6 +546,7 @@ def barplot_creation(df_bar: pd.DataFrame, outfig: Path,
False else.
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
:param alpha: Type 1 error threshold
"""
sd_community = "sd" if sd_community else None
if test_type != "permutation":
......@@ -549,7 +554,7 @@ def barplot_creation(df_bar: pd.DataFrame, outfig: Path,
sd_community=sd_community, display_size=display_size)
else:
make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind,
sd_community, display_size)
sd_community, display_size, alpha)
def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict:
......@@ -589,7 +594,8 @@ def create_community_fig(df: pd.DataFrame, feature: str,
target_kind: str = "",
iteration: int = 10000,
sd_community: bool = True,
display_size: bool = False) -> None:
display_size: bool = False,
alpha: float = 0.05) -> None:
"""
Create a dataframe with a control community, save it as a table and \
as a barplot figure.
......@@ -612,6 +618,7 @@ def create_community_fig(df: pd.DataFrame, feature: str,
False else.
:param display_size: True to display the size of the community above \
each one of them False to display nothing. (default False)
:param alpha: Type I error threshold
"""
df.to_csv(str(outfile_ctrl).replace(".pdf", ".tmp.txt"), sep="\t",
index=False)
......@@ -622,11 +629,12 @@ def create_community_fig(df: pd.DataFrame, feature: str,
ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl, test_type)
df_bar = expand_results_lm(ndf, rdf, target_col, feature, test_type)
else:
rdf = perm_with_ctrl(df, feature, target_col, dic_com, iteration)
rdf = perm_with_ctrl(df, feature, target_col, dic_com, iteration,
alpha)
df_bar = expand_results_perm(df, rdf, target_col, feature, iteration)
rdf.to_csv(str(outfile_ctrl).replace(".pdf", ".txt"), sep="\t",
index=False)
bar_outfile = str(outfile_ctrl).replace(".pdf", "_bar.txt")
df_bar.to_csv(bar_outfile, sep="\t", index=False)
barplot_creation(df_bar, outfile_ctrl, target_col, test_type, feature,
target_kind, sd_community, display_size)
target_kind, sd_community, display_size, alpha)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment