diff --git a/src/find_interaction_cluster/community_figures/fig_functions.py b/src/find_interaction_cluster/community_figures/fig_functions.py index 2c5080c12168edbba00bdec093f509b79d37f661..d7367e87eef4b4044a729f3b87a8656e8b8fd2ae 100644 --- a/src/find_interaction_cluster/community_figures/fig_functions.py +++ b/src/find_interaction_cluster/community_figures/fig_functions.py @@ -55,7 +55,7 @@ def get_community_table(communities: List[List[str]], def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, - test_type: str) -> pd.DataFrame: + test_type: str) -> Tuple[pd.DataFrame, pd.DataFrame]: """ Make the lm analysis to see if the exon regulated by a splicing factor \ are equally distributed among the communities. @@ -67,7 +67,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, :param outfile: A name of a file :param target_col: The name of the column containing the data of interest :param test_type: The type of test to make (permutation or lm) - :return: the pvalue of lm + :return: the entry dataframe and the result dataframe post analysis """ pandas2ri.activate() if test_type == "lm": @@ -76,6 +76,13 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, else: mod = f"mod <- glm({target_col} ~ log(community_size) + community," \ f"data=data, family=binomial(link='logit'))" + df[target_col] = df[target_col].astype(int) + tmp = df[[target_col, 'community']].groupby('community').mean().reset_index() + bad_groups = tmp.loc[tmp[target_col] == 0, "community"].to_list() + if "C-CTRL" in bad_groups: + print("Control group as a mean value equals to 0, exiting...") + exit(1) + df = df[-df["community"].isin(bad_groups)] lmf = r( """ require("DHARMa") @@ -100,7 +107,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, res_df.loc[res_df['community'] == "(Intercept)", "community"] = "C-CTRL" mean_df = df[[target_col, "community", "community_size"]]. \ groupby(["community", "community_size"]).mean().reset_index() - return res_df.merge(mean_df, how="left", on="community") + return df, res_df.merge(mean_df, how="left", on="community") def lm_with_ctrl(df: pd.DataFrame, @@ -122,7 +129,7 @@ def lm_with_ctrl(df: pd.DataFrame, size = df.loc[df["community"] == "C-CTRL", :].shape[0] df['community_size'] = df['community_size'].fillna(size) df['community_size'] = df['community_size'].astype(int) - return df, lm_maker_summary(df, outfile, target_col, test_type) + return lm_maker_summary(df, outfile, target_col, test_type) def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame, @@ -152,7 +159,8 @@ def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame, df = df.merge(rdf, how="left", on=["community", "community_size"]) df_ctrl = df[df["community"] == "C-CTRL"] df = df[df["community"] != "C-CTRL"].copy() - df.sort_values(f"mean_{target_col}", ascending=True, inplace=True) + df.sort_values([f"mean_{target_col}", "community"], ascending=True, + inplace=True) return pd.concat([df_ctrl, df], axis=0, ignore_index=True)