Skip to content
Snippets Groups Projects
Verified Commit 5519d883 authored by nfontrod's avatar nfontrod
Browse files

update

parent 9c206182
Branches nf-core-template-merge-2.6
No related tags found
No related merge requests found
...@@ -14,6 +14,7 @@ from typing import Dict, List ...@@ -14,6 +14,7 @@ from typing import Dict, List
import lazyparser as lp import lazyparser as lp
import pandas as pd import pandas as pd
import polars as pl
from ...figures_utils.config_figures import Config as ConfF from ...figures_utils.config_figures import Config as ConfF
from ..config import Config from ..config import Config
...@@ -73,7 +74,6 @@ def create_df_4_a_community( ...@@ -73,7 +74,6 @@ def create_df_4_a_community(
community: str, community: str,
size: int, size: int,
dic_id: Dict[int, str], dic_id: Dict[int, str],
hg38_dic: Dict[int, str],
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Create a small dataframe based on a string containing gene id separated \ Create a small dataframe based on a string containing gene id separated \
...@@ -84,8 +84,6 @@ def create_df_4_a_community( ...@@ -84,8 +84,6 @@ def create_df_4_a_community(
belongs belongs
:param size: The size a the community :param size: The size a the community
:param dic_id: A dicitonary linking id of gene to their symbol :param dic_id: A dicitonary linking id of gene to their symbol
:param hg38_dic: A dicitonary linking id of gene to their hg38 symbol
:return: A dataframe containing
>>> create_df_4_a_community('1, 2, 3', 'C4', 3, {1: 'DSC2', 2: 'DSC1', 3: >>> create_df_4_a_community('1, 2, 3', 'C4', 3, {1: 'DSC2', 2: 'DSC1', 3:
... 'DSG1', 4: 'DSG4', 5: 'KCTD4', 6: 'TPT1'}, {1: 'DSC2-38', 2: 'DSC1', 3: ... 'DSG1', 4: 'DSG4', 5: 'KCTD4', 6: 'TPT1'}, {1: 'DSC2-38', 2: 'DSC1', 3:
...@@ -97,18 +95,15 @@ def create_df_4_a_community( ...@@ -97,18 +95,15 @@ def create_df_4_a_community(
""" """
gene_ids = get_gene_list(gene_str) gene_ids = get_gene_list(gene_str)
gene_names = [dic_id[gn] for gn in gene_ids] gene_names = [dic_id[gn] for gn in gene_ids]
gene_names_hg38 = [hg38_dic.get(gn, "") for gn in gene_ids]
if len(gene_names) != size: if len(gene_names) != size:
raise ValueError( raise ValueError(
f"gene name size ({len(gene_names)})" f"gene name size ({len(gene_names)}) and size ({size}) differt! "
f" and size ({size}) differt! "
) )
return pd.DataFrame( return pd.DataFrame(
{ {
"cluster": [community] * size, "cluster": [community] * size,
"size": [size] * size, "size": [size] * size,
"fasterdb_symbol": gene_names, "symbol": gene_names,
"hg38_symbol": gene_names_hg38,
"gene_id": gene_ids, "gene_id": gene_ids,
} }
) )
...@@ -132,10 +127,9 @@ def create_full_df(df: pd.DataFrame) -> pd.DataFrame: ...@@ -132,10 +127,9 @@ def create_full_df(df: pd.DataFrame) -> pd.DataFrame:
4 C2 2 KCTD4 KCTD4 5 4 C2 2 KCTD4 KCTD4 5
""" """
dic_id = create_gene_dic() dic_id = create_gene_dic()
hg38_dic = create_hg38_dic()
df_list = [ df_list = [
create_df_4_a_community( create_df_4_a_community(
row["genes"], row["community"], row["nodes"], dic_id, hg38_dic row["genes"], row["community"], row["nodes"], dic_id
) )
for _, row in df.iterrows() for _, row in df.iterrows()
] ]
...@@ -156,7 +150,16 @@ def gene_table_creator(community_file: str, outname: str = "") -> None: ...@@ -156,7 +150,16 @@ def gene_table_creator(community_file: str, outname: str = "") -> None:
df = load_community_file(Path(community_file)) df = load_community_file(Path(community_file))
df = create_full_df(df) df = create_full_df(df)
outf = outname or Path(community_file).stem outf = outname or Path(community_file).stem
df.to_csv(output / f"{outf}.csv", sep="\t", index=False) df = pl.from_pandas(df)
df = df.with_columns(
symbol=pl.col("symbol")
.over("cluster", mapping_strategy="join")
.list.join(", "),
gene_id=pl.col("gene_id")
.over("cluster", mapping_strategy="join")
.list.join(", "),
).unique()
df.write_csv(output / f"{outf}.csv", separator="\t")
if __name__ == "__main__": if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment