src/find_interaction_cluster/community_figures/fig_functions.py: change in...

src/find_interaction_cluster/community_figures/fig_functions.py: change in lm_maker_summary to handle logistic regression

src/find_interaction_cluster/community_figures/fig_functions.py: change in...
a1f2fb6b · nfontrod · 3c50729f · a1f2fb6b
Commit a1f2fb6b authored Jan 18, 2021 by nfontrod
--- a/src/find_interaction_cluster/community_figures/fig_functions.py
+++ b/src/find_interaction_cluster/community_figures/fig_functions.py
@@ -55,7 +55,7 @@ def get_community_table(communities: List[List[str]],
 def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
-                     test_type: str) -> pd.DataFrame:
+                     test_type: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Make the lm analysis to see if the exon regulated by a splicing factor \
    are equally distributed among the communities.
@@ -67,7 +67,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
    :param outfile: A name of a file
    :param target_col: The name of the column containing the data of interest
    :param test_type: The type of test to make (permutation or lm)
-    :return: the pvalue of lm
+    :return: the entry dataframe and the result dataframe post analysis
    """
    pandas2ri.activate()
    if test_type == "lm":
@@ -76,6 +76,13 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
    else:
        mod = f"mod <- glm({target_col} ~ log(community_size) + community," \
              f"data=data, family=binomial(link='logit'))"
+        df[target_col] = df[target_col].astype(int)
+        tmp = df[[target_col, 'community']].groupby('community').mean().reset_index()
+        bad_groups = tmp.loc[tmp[target_col] == 0, "community"].to_list()
+        if "C-CTRL" in bad_groups:
+            print("Control group as a mean value equals to 0, exiting...")
+            exit(1)
+        df = df[-df["community"].isin(bad_groups)]
    lmf = r(
        """
        require("DHARMa")
@@ -100,7 +107,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str,
    res_df.loc[res_df['community'] == "(Intercept)", "community"] = "C-CTRL"
    mean_df = df[[target_col, "community", "community_size"]]. \
        groupby(["community", "community_size"]).mean().reset_index()
-    return res_df.merge(mean_df, how="left", on="community")
+    return df, res_df.merge(mean_df, how="left", on="community")
 def lm_with_ctrl(df: pd.DataFrame,
@@ -122,7 +129,7 @@ def lm_with_ctrl(df: pd.DataFrame,
    size = df.loc[df["community"] == "C-CTRL", :].shape[0]
    df['community_size'] = df['community_size'].fillna(size)
    df['community_size'] = df['community_size'].astype(int)
-    return df, lm_maker_summary(df, outfile, target_col, test_type)
+    return lm_maker_summary(df, outfile, target_col, test_type)
 def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame,
@@ -152,7 +159,8 @@ def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame,
    df = df.merge(rdf, how="left", on=["community", "community_size"])
    df_ctrl = df[df["community"] == "C-CTRL"]
    df = df[df["community"] != "C-CTRL"].copy()
-    df.sort_values(f"mean_{target_col}", ascending=True, inplace=True)
+    df.sort_values([f"mean_{target_col}", "community"], ascending=True,
+                   inplace=True)
    return pd.concat([df_ctrl, df], axis=0, ignore_index=True)