diff --git a/src/find_interaction_cluster/clip_figures/clip_compo_analyser.py b/src/find_interaction_cluster/clip_figures/clip_compo_analyser.py index 799e398a9694bc0895d77b20dc0aca33b10c90cb..408d8ffed1a99f38e4e00795a37b43ba7fa256da 100644 --- a/src/find_interaction_cluster/clip_figures/clip_compo_analyser.py +++ b/src/find_interaction_cluster/clip_figures/clip_compo_analyser.py @@ -363,11 +363,34 @@ def make_barplot(df_comp: pd.DataFrame, outfile: Path, cpnt: str, plt.close() +def update_composition_group(df_comp: pd.DataFrame, display_size: bool + ) -> pd.DataFrame: + """ + Update the group name of the dataframe df_com. + + :param df_comp: The dataframe containing the frequency of every `cpnt_type` + :param display_size: True to display the size of \ + the community. False to display nothing. + :return: if display_size is False return df_comp else return df_comp \ + with the group column containing the size of the groups. + """ + if not display_size: + return df_comp + d = { + gr: f"{gr}({df_comp[df_comp['group'] == gr].shape[0]})" + for gr in df_comp["group"].unique() + } + + df_comp["group"].map(d) + return df_comp + + def create_figure_4_clip_table(clip_table_file: Path, feature: str, threshold_feature: int, default_groups: int, df_comp: pd.DataFrame, cpnt_type: str, cpnt: str, df_id_ctrl: pd.DataFrame, - write_comp_table: bool) -> None: + write_comp_table: bool, + display_size: bool) -> None: """ Create a component figure for a clip file @@ -384,19 +407,21 @@ def create_figure_4_clip_table(clip_table_file: Path, feature: str, :param cpnt: The kind of component of interest :param df_id_ctrl: A dataframe indicating control exons :param write_comp_table: True to save comp table, false else + :param display_size: True to display the size of the community . + False to display nothing. (default False) """ logging.info(f"Working on {clip_table_file.name} - {cpnt} ({cpnt_type})") df_group = get_interest_groups(clip_table_file, feature, threshold_feature, default_groups) if df_group is None: return None + df_group = df_group.drop_duplicates() if feature == "gene": df_group["id_gene"] = df_group["id_gene"].astype(int) df_comp["id_gene"] = df_comp["id_gene"].astype(int) df_group = pd.concat([df_group, df_id_ctrl], ignore_index=True) - if len(np.unique(df_group[f"id_{feature}"].values)) != df_group.shape[0]: - + print(df_group[df_group.duplicated(subset=f"id_{feature}")]) raise ValueError("Found duplicates value in df_group, exiting...") df_comp = df_comp.merge(df_group, how="left", on=f"id_{feature}") df_comp = df_comp[-df_comp["group"].isna()] @@ -408,6 +433,7 @@ def create_figure_4_clip_table(clip_table_file: Path, feature: str, outfiles[0].parent.mkdir(exist_ok=True) df_stat = create_stat_df(df_comp, cpnt) df_stat.to_csv(outfiles[0], sep="\t", index=False) + df_comp = update_composition_group(df_comp, display_size) make_barplot(df_comp, outfiles[1], cpnt, cpnt_type, sf_name) if write_comp_table: df_comp.to_csv(outfiles[2], sep="\t", index=False) @@ -416,7 +442,7 @@ def create_figure_4_clip_table(clip_table_file: Path, feature: str, def create_multiple_figures(clip_result_folder: Path, feature: str, threshold_feature: int, default_groups: int, cpnt_type: str, region: str = "gene", - ps: int = 1, + ps: int = 1, display_size: bool = False, logging_level: str = "DEBUG") -> None: """ Create multiple composition figures from a result clip folder. @@ -433,6 +459,8 @@ def create_multiple_figures(clip_result_folder: Path, feature: str, :param region: Only used when feature is gene. Corresponds to \ the region studied within genes. :param ps: The number of processes to create + :param display_size: True to display the size of the community, + False to display nothing. (default False) :param logging_level: The level of data to display (default 'DISABLE') """ logging_def(ConfigGraph.community_folder, __file__, logging_level) @@ -453,7 +481,8 @@ def create_multiple_figures(clip_result_folder: Path, feature: str, write_bar_table = True for clip_file, cpnt in conditions: args = [clip_file, feature, threshold_feature, default_groups, - df_comp, cpnt_type, cpnt, df_ctrl, write_bar_table] + df_comp, cpnt_type, cpnt, df_ctrl, write_bar_table, + display_size] write_bar_table = False processes.append(pool.apply_async(create_figure_4_clip_table, args)) [p.get(timeout=None) for p in processes] diff --git a/src/find_interaction_cluster/clip_figures/clip_compo_launcher.py b/src/find_interaction_cluster/clip_figures/clip_compo_launcher.py index 8d4d05ed120b519f7f8fe60a0616b31ec86170c7..9edd36867c6e9cf1e17d8538a3df47bf6e829822 100644 --- a/src/find_interaction_cluster/clip_figures/clip_compo_launcher.py +++ b/src/find_interaction_cluster/clip_figures/clip_compo_launcher.py @@ -19,7 +19,8 @@ import multiprocessing as mp def launcher(clip_result_folder: str, feature: str, threshold_feature: int, default_groups: int, cpnt_type: str, region: str = "gene", - ps: int = 1, logging_level: str = "DISABLE") -> None: + ps: int = 1, display_size: bool=False, + logging_level: str = "DISABLE") -> None: """ Create multiple composition figures from a result clip folder. @@ -35,6 +36,8 @@ def launcher(clip_result_folder: str, feature: str, :param region: Only used when feature is gene. Corresponds to \ the region studied within genes. (default: 'gene') :param ps: The number of processes to create (default 1) + :param display_size: True to display the size of the community. \ + False to display nothing. (default False) :param logging_level: The level of data to display (default 'DISABLE') """ clip_result_folder = Path(clip_result_folder) @@ -42,7 +45,7 @@ def launcher(clip_result_folder: str, feature: str, raise NotADirectoryError(f"Directory {clip_result_folder} don't exist") create_multiple_figures(clip_result_folder, feature, threshold_feature, default_groups, cpnt_type, region, ps, - logging_level) + display_size, logging_level) - -launcher() +if __name__ == "__main__": + launcher()