Skip to content
Snippets Groups Projects
Verified Commit a7402bf3 authored by nfontrod's avatar nfontrod
Browse files

src/figures_utils/circle_packing_fc.py :script to generate circle packing of...

src/figures_utils/circle_packing_fc.py :script to generate circle packing of the most enriched components
parent 6ca793de
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Description: The goal of this script is to be able to get n most enriched \
components in communities given by the user and create a circle packing \
visualization of these components.
"""
import circlify
import lazyparser as lp
import pandas as pd
import polars as pl
from .circle_packing import (
create_bubble_packing,
create_colors,
create_hierarchical_dictionnary,
)
from .community_hubs_umap import (
create_dataframe,
)
from .config_figures import Config
@lp.parse(
cpnt_type=[
"5mer",
"4mer",
"nt",
"aa",
"codon",
"dnt",
"properties",
"properties_lvl2",
"prop",
],
region=["gene", "premrna", "mrna", "cds", "prot"],
)
def main_circle_pack_hub(
com_file: str,
cpnt_type: str = "nt",
region: str = "gene",
top_motif: int = 5,
) -> None:
"""
The goal of this script is to be able to get n most enriched \
components in communities given by the user and create a circle packing \
visualization of these components.
:param com_file: A community file or leave it empty to create an umap \
of hubs and coloring each hub in the color of it's SPIN
:param cpnt_type: The component type of interest
:param region: The region of the gene considered
:param keep_all_gene: A boolean indicating whether or not to keep all gene \
in the analysis, not only those in a defined community
:param top_motif: The number of top motif to consider in each HUB
"""
Config.output_hubs_umap.mkdir(exist_ok=True)
groups = pd.read_csv(com_file, sep="\t")["community"].unique().tolist()
df_freq = create_dataframe(
com_file, "gene", region, cpnt_type, size_threshold=0
).rename({"community": "group"}, axis=1)
df = pl.from_pandas(df_freq)
df = df.drop("id_gene").group_by(["group", "community_size"]).mean()
for col in [c for c in df.columns if c not in ["group", "community_size"]]:
df = df.with_columns((pl.col(col) / pl.col(col).mean()).log(base=2))
df = (
df.melt(
id_vars=["community_size", "group"],
value_name="log2fc",
variable_name="cpnt",
)
.sort(["group", "log2fc"], descending=[False, True])
.with_columns(
pl.col("log2fc")
.rank(method="dense", descending=True)
.over("group")
.alias("rank"),
pl.col("log2fc").round(2),
)
)
cpnt_2_keep = (
df.filter(pl.col("rank") <= top_motif)["cpnt"].unique().to_list()
)
df = df.filter(
(pl.col("cpnt").is_in(cpnt_2_keep)) & (pl.col("log2fc") > 0)
)
dfp = df.to_pandas().drop(["community_size", "rank"], axis=1)
dfp["group"] = pd.Categorical(
dfp["group"], categories=groups, ordered=True
)
dfp = dfp.sort_values(["group", "log2fc"], ascending=[True, False])
res = [
create_hierarchical_dictionnary(
dfp,
name_group="group",
name_weight="log2fc",
)
]
colors_base = create_colors(groups)
circles = circlify.circlify(
res,
show_enclosure=False,
target_enclosure=circlify.Circle(x=0, y=0, r=1),
)
create_bubble_packing(
circles,
Config.output_hubs_umap
/ f"Circle_packing_log2fc_{region}_{cpnt_type}_{top_motif}.pdf",
region,
cpnt_type,
colors_base,
)
if __name__ == "__main__":
main_circle_pack_hub()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment