From 6e73fade89cfa018d0f988bc37aa7c921bd1c7ff Mon Sep 17 00:00:00 2001
From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr>
Date: Mon, 25 Jan 2021 17:24:33 +0100
Subject: [PATCH] src/find_interaction_cluster/clip_figures/clip_analyser.py:
 change in add_regulation_column to display if the gene contains an exons
 regulated by a splicing factor rather than the proportion of regulated exon
 inside each gene

---
 .../clip_figures/clip_analyser.py                | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/find_interaction_cluster/clip_figures/clip_analyser.py b/src/find_interaction_cluster/clip_figures/clip_analyser.py
index 96318279..5e53c78c 100644
--- a/src/find_interaction_cluster/clip_figures/clip_analyser.py
+++ b/src/find_interaction_cluster/clip_figures/clip_analyser.py
@@ -240,8 +240,8 @@ def select_community_file(project: str, weight: int, global_weight: int,
 def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str,
                           ) -> pd.DataFrame:
     """
-    Add a column community_data on df_table corresponding to the mean \
-    number of exons regulated in each gene if feature is gene or 1 if \
+    Add a column community_data on df_table corresponding to 1 \
+    if an exon is regulated in a gene (0 elseà if feature is gene or 1 if \
     the exon is regulated.
 
     :param df_table: A dataframe containing the peak density for each gene \
@@ -263,9 +263,9 @@ def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str,
     ...          'community': ['C1', 'C1', 'C2']}
     >>> add_regulation_column(pd.DataFrame(dgene), 'TRA2A_B', 'gene')
        id_gene  clip_peak  peak_density community  community_data
-    0    11553          0           0.0        C1        0.117647
-    1     3222          1           0.1        C1        0.020408
-    2     1001          0           0.0        C2        0.000000
+    0    11553          0           0.0        C1               1
+    1     3222          1           0.1        C1               1
+    2     1001          0           0.0        C2               0
     >>> add_regulation_column(pd.DataFrame(dgene), 'TRAgdghfh', 'gene')
        id_gene  clip_peak  peak_density community
     0    11553          0           0.0        C1
@@ -291,7 +291,7 @@ def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str,
         df["community_data"] = [0] * df.shape[0]
         df.loc[df["id_exon"].isin(exons), "community_data"] = 1
         df.drop("id_exon", axis=1, inplace=True)
-        df = df.groupby("id_gene").mean().reset_index()
+        df = df.groupby("id_gene").max().reset_index()
         df_table = df_table.merge(df, how="left", on="id_gene")
     return df_table
 
@@ -386,3 +386,7 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int,
                 community_file, sl_reg]
         processes.append(pool.apply_async(create_figure, args))
     [p.get(timeout=None) for p in processes]
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
\ No newline at end of file
-- 
GitLab