Skip to content
Snippets Groups Projects
Commit a1f2fb6b authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/community_figures/fig_functions.py: change in...

src/find_interaction_cluster/community_figures/fig_functions.py: change in lm_maker_summary to handle logistic regression
parent 3c50729f
Branches
No related tags found
No related merge requests found
...@@ -55,7 +55,7 @@ def get_community_table(communities: List[List[str]], ...@@ -55,7 +55,7 @@ def get_community_table(communities: List[List[str]],
def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
test_type: str) -> pd.DataFrame: test_type: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
""" """
Make the lm analysis to see if the exon regulated by a splicing factor \ Make the lm analysis to see if the exon regulated by a splicing factor \
are equally distributed among the communities. are equally distributed among the communities.
...@@ -67,7 +67,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, ...@@ -67,7 +67,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
:param outfile: A name of a file :param outfile: A name of a file
:param target_col: The name of the column containing the data of interest :param target_col: The name of the column containing the data of interest
:param test_type: The type of test to make (permutation or lm) :param test_type: The type of test to make (permutation or lm)
:return: the pvalue of lm :return: the entry dataframe and the result dataframe post analysis
""" """
pandas2ri.activate() pandas2ri.activate()
if test_type == "lm": if test_type == "lm":
...@@ -76,6 +76,13 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, ...@@ -76,6 +76,13 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
else: else:
mod = f"mod <- glm({target_col} ~ log(community_size) + community," \ mod = f"mod <- glm({target_col} ~ log(community_size) + community," \
f"data=data, family=binomial(link='logit'))" f"data=data, family=binomial(link='logit'))"
df[target_col] = df[target_col].astype(int)
tmp = df[[target_col, 'community']].groupby('community').mean().reset_index()
bad_groups = tmp.loc[tmp[target_col] == 0, "community"].to_list()
if "C-CTRL" in bad_groups:
print("Control group as a mean value equals to 0, exiting...")
exit(1)
df = df[-df["community"].isin(bad_groups)]
lmf = r( lmf = r(
""" """
require("DHARMa") require("DHARMa")
...@@ -100,7 +107,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, ...@@ -100,7 +107,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
res_df.loc[res_df['community'] == "(Intercept)", "community"] = "C-CTRL" res_df.loc[res_df['community'] == "(Intercept)", "community"] = "C-CTRL"
mean_df = df[[target_col, "community", "community_size"]]. \ mean_df = df[[target_col, "community", "community_size"]]. \
groupby(["community", "community_size"]).mean().reset_index() groupby(["community", "community_size"]).mean().reset_index()
return res_df.merge(mean_df, how="left", on="community") return df, res_df.merge(mean_df, how="left", on="community")
def lm_with_ctrl(df: pd.DataFrame, def lm_with_ctrl(df: pd.DataFrame,
...@@ -122,7 +129,7 @@ def lm_with_ctrl(df: pd.DataFrame, ...@@ -122,7 +129,7 @@ def lm_with_ctrl(df: pd.DataFrame,
size = df.loc[df["community"] == "C-CTRL", :].shape[0] size = df.loc[df["community"] == "C-CTRL", :].shape[0]
df['community_size'] = df['community_size'].fillna(size) df['community_size'] = df['community_size'].fillna(size)
df['community_size'] = df['community_size'].astype(int) df['community_size'] = df['community_size'].astype(int)
return df, lm_maker_summary(df, outfile, target_col, test_type) return lm_maker_summary(df, outfile, target_col, test_type)
def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame, def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame,
...@@ -152,7 +159,8 @@ def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame, ...@@ -152,7 +159,8 @@ def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame,
df = df.merge(rdf, how="left", on=["community", "community_size"]) df = df.merge(rdf, how="left", on=["community", "community_size"])
df_ctrl = df[df["community"] == "C-CTRL"] df_ctrl = df[df["community"] == "C-CTRL"]
df = df[df["community"] != "C-CTRL"].copy() df = df[df["community"] != "C-CTRL"].copy()
df.sort_values(f"mean_{target_col}", ascending=True, inplace=True) df.sort_values([f"mean_{target_col}", "community"], ascending=True,
inplace=True)
return pd.concat([df_ctrl, df], axis=0, ignore_index=True) return pd.concat([df_ctrl, df], axis=0, ignore_index=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment