diff --git a/src/find_interaction_cluster/community_figures/__main__.py b/src/find_interaction_cluster/community_figures/__main__.py index 259b1179f4760706674a387570e16faec9b5bc6b..884f616020ab508cd8d371331b4759b5773972c2 100644 --- a/src/find_interaction_cluster/community_figures/__main__.py +++ b/src/find_interaction_cluster/community_figures/__main__.py @@ -49,7 +49,7 @@ def load_and_check_table(table: str, feature: str, target_col: str): return df -@lp.parse(table="file", test_type=["lm", "permutation"], +@lp.parse(table="file", test_type=["lm", "permutation", "logit"], iteration="20 < iteration", sd_community = ["y", "n", "Y", "N"]) def create_community_figures(table: str, feature: str, target_col: str, output: str, outfile: str, test_type: str, diff --git a/src/find_interaction_cluster/community_figures/fig_functions.py b/src/find_interaction_cluster/community_figures/fig_functions.py index a05c2561a6626fe7166b89b3593b1605bfdf321d..2c5080c12168edbba00bdec093f509b79d37f661 100644 --- a/src/find_interaction_cluster/community_figures/fig_functions.py +++ b/src/find_interaction_cluster/community_figures/fig_functions.py @@ -54,8 +54,8 @@ def get_community_table(communities: List[List[str]], return pd.DataFrame(dic) -def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str - ) -> pd.DataFrame: +def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str, + test_type: str) -> pd.DataFrame: """ Make the lm analysis to see if the exon regulated by a splicing factor \ are equally distributed among the communities. @@ -66,15 +66,22 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str and the size of the community of the feature if it has one (None, else). :param outfile: A name of a file :param target_col: The name of the column containing the data of interest + :param test_type: The type of test to make (permutation or lm) :return: the pvalue of lm """ pandas2ri.activate() + if test_type == "lm": + mod = f"mod <- lm({target_col} ~ log(community_size) + " \ + f"community, data=data)" + else: + mod = f"mod <- glm({target_col} ~ log(community_size) + community," \ + f"data=data, family=binomial(link='logit'))" lmf = r( """ require("DHARMa") function(data, folder, partial_name) { - mod <- lm(%s ~ log(community_size) + community, data=data) + %s simulationOutput <- simulateResiduals(fittedModel = mod, n = 250) png(paste0(folder, "/dignostics_summary", partial_name, ".png")) plot(simulationOutput) @@ -82,7 +89,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str return(as.data.frame(summary(mod)$coefficients)) } - """ % target_col) + """ % mod) folder = outfile.parent / "diagnostics" folder.mkdir(parents=True, exist_ok=True) partial_name = outfile.name.replace('.pdf', '') @@ -97,7 +104,7 @@ def lm_maker_summary(df: pd.DataFrame, outfile: Path, target_col: str def lm_with_ctrl(df: pd.DataFrame, - target_col: str, outfile: Path, + target_col: str, outfile: Path, test_type: str ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ :param df: A dataframe containing the id of the chosen `feature` \ @@ -106,6 +113,7 @@ def lm_with_ctrl(df: pd.DataFrame, and the size of the community of the feature if it has one (None, else). :param target_col: The name of the column containing the data of interest :param outfile: File that will contains the final figure + :param test_type: The type of test to make (permutation or lm) :return: The dataframe with ctrl exon and \ The dataframe with the p-value compared to the control \ list of feature. @@ -114,11 +122,12 @@ def lm_with_ctrl(df: pd.DataFrame, size = df.loc[df["community"] == "C-CTRL", :].shape[0] df['community_size'] = df['community_size'].fillna(size) df['community_size'] = df['community_size'].astype(int) - return df, lm_maker_summary(df, outfile, target_col) + return df, lm_maker_summary(df, outfile, target_col, test_type) def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame, - target_col: str, feature: str) -> pd.DataFrame: + target_col: str, feature: str, + test_type: str) -> pd.DataFrame: """ Merge df and rdf together. @@ -131,9 +140,10 @@ def expand_results_lm(df: pd.DataFrame, rdf: pd.DataFrame, exons. :param target_col: The name of the column containing the data of interest :param feature: The feature of interest + :param test_type: The kind of test to make :return: The merged dataframe: i.e df with the stats columns """ - p_col = "Pr(>|t|)" + p_col = "Pr(>|t|)" if test_type == "lm" else "Pr(>|z|)" df = df[[f"id_{feature}", target_col, "community", "community_size"]].copy() rdf = rdf[["community", "community_size", p_col, target_col]].copy() @@ -347,7 +357,7 @@ def display_size_fig(g: sns.FacetGrid, display_size: bool, target_col: str, return g -def make_barplot(df_bar: pd.DataFrame, outfile: Path, +def make_barplot(df_bar: pd.DataFrame, outfile: Path, test_type: str, target_col: str, feature: str, target_kind: str = "", sd_community: Optional[str] = "sd", display_size: bool = False) -> None: @@ -360,6 +370,7 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, :param outfile: File were the figure will be stored :param target_kind: An optional name that describe a bit further \ target_col. + :param test_type: The kind of test to perform :param target_col: The name of the column containing the data of interest :param feature: The king of feature of interest :param sd_community: sd to display community error bar, None to display \ @@ -378,7 +389,7 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, g.fig.subplots_adjust(top=0.9) target_kind = f" ({target_kind})" if target_kind else "" g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}" - f"among community of {feature}s\n" + f" among community of {feature}s\n" f"(stats obtained with a lm test)") g = display_size_fig(g, display_size, target_col, df_bar) g.ax.set_ylabel(f'Frequency of {target_col}') @@ -439,7 +450,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, g.fig.subplots_adjust(top=0.9) target_kind = f" ({target_kind})" if target_kind else "" g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}" - f"among community of {feature}s\n" + f" among community of {feature}s\n" f"(stats obtained with a permutation test)") g = display_size_fig(g, display_size, target_col, df_bar) g.ax.set_ylabel(f'Frequency of {target_col}') @@ -483,8 +494,8 @@ def barplot_creation(df_bar: pd.DataFrame, outfig: Path, each one of them False to display nothing. (default False) """ sd_community = "sd" if sd_community else None - if test_type == "lm": - make_barplot(df_bar, outfig, cpnt, feature, target_kind, + if test_type != "permutation": + make_barplot(df_bar, outfig, test_type, cpnt, feature, target_kind, display_size=display_size) else: make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind, @@ -553,11 +564,11 @@ def create_community_fig(df: pd.DataFrame, feature: str, each one of them False to display nothing. (default False) """ if dic_com is None: - dic_com = {} if test_type == 'lm' \ + dic_com = {} if test_type != 'permutation' \ else get_feature_by_community(df, feature) - if test_type == "lm": - ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl) - df_bar = expand_results_lm(ndf, rdf, target_col, feature) + if test_type != "permutation": + ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl, test_type) + df_bar = expand_results_lm(ndf, rdf, target_col, feature, test_type) else: rdf = perm_with_ctrl(df, feature, target_col, dic_com, iteration) df_bar = expand_results_perm(df, rdf, target_col, feature, iteration)