src/figures_utils/circle_packing_fc.py :script to generate circle packing of...

src/figures_utils/circle_packing_fc.py :script to generate circle packing of the most enriched components

src/figures_utils/circle_packing_fc.py :script to generate circle packing of...
a7402bf3 · nfontrod · 6ca793de · a7402bf3
Verified Commit a7402bf3 authored 3 months ago by nfontrod
--- a/src/figures_utils/circle_packing_fc.py
+++ b/src/figures_utils/circle_packing_fc.py
+#!/usr/bin/env python3
+
+# -*- coding: utf-8 -*-
+
+"""
+Description: The goal of this script is to be able to get n most enriched \
+components in communities given by the user and create a circle packing \
+visualization of these components.
+"""
+
+import circlify
+import lazyparser as lp
+import pandas as pd
+import polars as pl
+
+from .circle_packing import (
+    create_bubble_packing,
+    create_colors,
+    create_hierarchical_dictionnary,
+)
+from .community_hubs_umap import (
+    create_dataframe,
+)
+from .config_figures import Config
+
+
+@lp.parse(
+    cpnt_type=[
+        "5mer",
+        "4mer",
+        "nt",
+        "aa",
+        "codon",
+        "dnt",
+        "properties",
+        "properties_lvl2",
+        "prop",
+    ],
+    region=["gene", "premrna", "mrna", "cds", "prot"],
+)
+def main_circle_pack_hub(
+    com_file: str,
+    cpnt_type: str = "nt",
+    region: str = "gene",
+    top_motif: int = 5,
+) -> None:
+    """
+    The goal of this script is to be able to get n most enriched \
+    components in communities given by the user and create a circle packing \
+    visualization of these components.
+
+    :param com_file: A community file or leave it empty to create an umap \
+    of hubs and coloring each hub in the color of it's SPIN
+    :param cpnt_type: The component type of interest
+    :param region: The region of the gene considered
+    :param keep_all_gene: A boolean indicating whether or not to keep all gene \
+    in the analysis, not only those in a defined community
+    :param top_motif: The number of top motif to consider in each HUB
+    """
+    Config.output_hubs_umap.mkdir(exist_ok=True)
+    groups = pd.read_csv(com_file, sep="\t")["community"].unique().tolist()
+    df_freq = create_dataframe(
+        com_file, "gene", region, cpnt_type, size_threshold=0
+    ).rename({"community": "group"}, axis=1)
+    df = pl.from_pandas(df_freq)
+    df = df.drop("id_gene").group_by(["group", "community_size"]).mean()
+    for col in [c for c in df.columns if c not in ["group", "community_size"]]:
+        df = df.with_columns((pl.col(col) / pl.col(col).mean()).log(base=2))
+    df = (
+        df.melt(
+            id_vars=["community_size", "group"],
+            value_name="log2fc",
+            variable_name="cpnt",
+        )
+        .sort(["group", "log2fc"], descending=[False, True])
+        .with_columns(
+            pl.col("log2fc")
+            .rank(method="dense", descending=True)
+            .over("group")
+            .alias("rank"),
+            pl.col("log2fc").round(2),
+        )
+    )
+    cpnt_2_keep = (
+        df.filter(pl.col("rank") <= top_motif)["cpnt"].unique().to_list()
+    )
+    df = df.filter(
+        (pl.col("cpnt").is_in(cpnt_2_keep)) & (pl.col("log2fc") > 0)
+    )
+    dfp = df.to_pandas().drop(["community_size", "rank"], axis=1)
+    dfp["group"] = pd.Categorical(
+        dfp["group"], categories=groups, ordered=True
+    )
+    dfp = dfp.sort_values(["group", "log2fc"], ascending=[True, False])
+    res = [
+        create_hierarchical_dictionnary(
+            dfp,
+            name_group="group",
+            name_weight="log2fc",
+        )
+    ]
+    colors_base = create_colors(groups)
+    circles = circlify.circlify(
+        res,
+        show_enclosure=False,
+        target_enclosure=circlify.Circle(x=0, y=0, r=1),
+    )
+    create_bubble_packing(
+        circles,
+        Config.output_hubs_umap
+        / f"Circle_packing_log2fc_{region}_{cpnt_type}_{top_motif}.pdf",
+        region,
+        cpnt_type,
+        colors_base,
+    )
+
+
+if __name__ == "__main__":
+    main_circle_pack_hub()