From 6e73fade89cfa018d0f988bc37aa7c921bd1c7ff Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Mon, 25 Jan 2021 17:24:33 +0100 Subject: [PATCH] src/find_interaction_cluster/clip_figures/clip_analyser.py: change in add_regulation_column to display if the gene contains an exons regulated by a splicing factor rather than the proportion of regulated exon inside each gene --- .../clip_figures/clip_analyser.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/find_interaction_cluster/clip_figures/clip_analyser.py b/src/find_interaction_cluster/clip_figures/clip_analyser.py index 96318279..5e53c78c 100644 --- a/src/find_interaction_cluster/clip_figures/clip_analyser.py +++ b/src/find_interaction_cluster/clip_figures/clip_analyser.py @@ -240,8 +240,8 @@ def select_community_file(project: str, weight: int, global_weight: int, def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str, ) -> pd.DataFrame: """ - Add a column community_data on df_table corresponding to the mean \ - number of exons regulated in each gene if feature is gene or 1 if \ + Add a column community_data on df_table corresponding to 1 \ + if an exon is regulated in a gene (0 elseĆ if feature is gene or 1 if \ the exon is regulated. :param df_table: A dataframe containing the peak density for each gene \ @@ -263,9 +263,9 @@ def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str, ... 'community': ['C1', 'C1', 'C2']} >>> add_regulation_column(pd.DataFrame(dgene), 'TRA2A_B', 'gene') id_gene clip_peak peak_density community community_data - 0 11553 0 0.0 C1 0.117647 - 1 3222 1 0.1 C1 0.020408 - 2 1001 0 0.0 C2 0.000000 + 0 11553 0 0.0 C1 1 + 1 3222 1 0.1 C1 1 + 2 1001 0 0.0 C2 0 >>> add_regulation_column(pd.DataFrame(dgene), 'TRAgdghfh', 'gene') id_gene clip_peak peak_density community 0 11553 0 0.0 C1 @@ -291,7 +291,7 @@ def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str, df["community_data"] = [0] * df.shape[0] df.loc[df["id_exon"].isin(exons), "community_data"] = 1 df.drop("id_exon", axis=1, inplace=True) - df = df.groupby("id_gene").mean().reset_index() + df = df.groupby("id_gene").max().reset_index() df_table = df_table.merge(df, how="left", on="id_gene") return df_table @@ -386,3 +386,7 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int, community_file, sl_reg] processes.append(pool.apply_async(create_figure, args)) [p.get(timeout=None) for p in processes] + +if __name__ == "__main__": + import doctest + doctest.testmod() \ No newline at end of file -- GitLab