Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
ChIA-PET_network
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
ReGArDS
ChIA-PET_network
Commits
5519d883
Verified
Commit
5519d883
authored
2 months ago
by
nfontrod
Browse files
Options
Downloads
Patches
Plain Diff
update
parent
9c206182
Branches
nf-core-template-merge-2.6
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/find_interaction_cluster/community_file_tools/community_file_2_gene_list.py
+14
-11
14 additions, 11 deletions
...luster/community_file_tools/community_file_2_gene_list.py
with
14 additions
and
11 deletions
src/find_interaction_cluster/community_file_tools/community_file_2_gene_list.py
+
14
−
11
View file @
5519d883
...
@@ -14,6 +14,7 @@ from typing import Dict, List
...
@@ -14,6 +14,7 @@ from typing import Dict, List
import
lazyparser
as
lp
import
lazyparser
as
lp
import
pandas
as
pd
import
pandas
as
pd
import
polars
as
pl
from
...figures_utils.config_figures
import
Config
as
ConfF
from
...figures_utils.config_figures
import
Config
as
ConfF
from
..config
import
Config
from
..config
import
Config
...
@@ -73,7 +74,6 @@ def create_df_4_a_community(
...
@@ -73,7 +74,6 @@ def create_df_4_a_community(
community
:
str
,
community
:
str
,
size
:
int
,
size
:
int
,
dic_id
:
Dict
[
int
,
str
],
dic_id
:
Dict
[
int
,
str
],
hg38_dic
:
Dict
[
int
,
str
],
)
->
pd
.
DataFrame
:
)
->
pd
.
DataFrame
:
"""
"""
Create a small dataframe based on a string containing gene id separated
\
Create a small dataframe based on a string containing gene id separated
\
...
@@ -84,8 +84,6 @@ def create_df_4_a_community(
...
@@ -84,8 +84,6 @@ def create_df_4_a_community(
belongs
belongs
:param size: The size a the community
:param size: The size a the community
:param dic_id: A dicitonary linking id of gene to their symbol
:param dic_id: A dicitonary linking id of gene to their symbol
:param hg38_dic: A dicitonary linking id of gene to their hg38 symbol
:return: A dataframe containing
>>>
create_df_4_a_community
(
'
1, 2, 3
'
,
'
C4
'
,
3
,
{
1
:
'
DSC2
'
,
2
:
'
DSC1
'
,
3
:
>>>
create_df_4_a_community
(
'
1, 2, 3
'
,
'
C4
'
,
3
,
{
1
:
'
DSC2
'
,
2
:
'
DSC1
'
,
3
:
...
'
DSG1
'
,
4
:
'
DSG4
'
,
5
:
'
KCTD4
'
,
6
:
'
TPT1
'
},
{
1
:
'
DSC2-38
'
,
2
:
'
DSC1
'
,
3
:
...
'
DSG1
'
,
4
:
'
DSG4
'
,
5
:
'
KCTD4
'
,
6
:
'
TPT1
'
},
{
1
:
'
DSC2-38
'
,
2
:
'
DSC1
'
,
3
:
...
@@ -97,18 +95,15 @@ def create_df_4_a_community(
...
@@ -97,18 +95,15 @@ def create_df_4_a_community(
"""
"""
gene_ids
=
get_gene_list
(
gene_str
)
gene_ids
=
get_gene_list
(
gene_str
)
gene_names
=
[
dic_id
[
gn
]
for
gn
in
gene_ids
]
gene_names
=
[
dic_id
[
gn
]
for
gn
in
gene_ids
]
gene_names_hg38
=
[
hg38_dic
.
get
(
gn
,
""
)
for
gn
in
gene_ids
]
if
len
(
gene_names
)
!=
size
:
if
len
(
gene_names
)
!=
size
:
raise
ValueError
(
raise
ValueError
(
f
"
gene name size (
{
len
(
gene_names
)
}
)
"
f
"
gene name size (
{
len
(
gene_names
)
}
) and size (
{
size
}
) differt!
"
f
"
and size (
{
size
}
) differt!
"
)
)
return
pd
.
DataFrame
(
return
pd
.
DataFrame
(
{
{
"
cluster
"
:
[
community
]
*
size
,
"
cluster
"
:
[
community
]
*
size
,
"
size
"
:
[
size
]
*
size
,
"
size
"
:
[
size
]
*
size
,
"
fasterdb_symbol
"
:
gene_names
,
"
symbol
"
:
gene_names
,
"
hg38_symbol
"
:
gene_names_hg38
,
"
gene_id
"
:
gene_ids
,
"
gene_id
"
:
gene_ids
,
}
}
)
)
...
@@ -132,10 +127,9 @@ def create_full_df(df: pd.DataFrame) -> pd.DataFrame:
...
@@ -132,10 +127,9 @@ def create_full_df(df: pd.DataFrame) -> pd.DataFrame:
4
C2
2
KCTD4
KCTD4
5
4
C2
2
KCTD4
KCTD4
5
"""
"""
dic_id
=
create_gene_dic
()
dic_id
=
create_gene_dic
()
hg38_dic
=
create_hg38_dic
()
df_list
=
[
df_list
=
[
create_df_4_a_community
(
create_df_4_a_community
(
row
[
"
genes
"
],
row
[
"
community
"
],
row
[
"
nodes
"
],
dic_id
,
hg38_dic
row
[
"
genes
"
],
row
[
"
community
"
],
row
[
"
nodes
"
],
dic_id
)
)
for
_
,
row
in
df
.
iterrows
()
for
_
,
row
in
df
.
iterrows
()
]
]
...
@@ -156,7 +150,16 @@ def gene_table_creator(community_file: str, outname: str = "") -> None:
...
@@ -156,7 +150,16 @@ def gene_table_creator(community_file: str, outname: str = "") -> None:
df
=
load_community_file
(
Path
(
community_file
))
df
=
load_community_file
(
Path
(
community_file
))
df
=
create_full_df
(
df
)
df
=
create_full_df
(
df
)
outf
=
outname
or
Path
(
community_file
).
stem
outf
=
outname
or
Path
(
community_file
).
stem
df
.
to_csv
(
output
/
f
"
{
outf
}
.csv
"
,
sep
=
"
\t
"
,
index
=
False
)
df
=
pl
.
from_pandas
(
df
)
df
=
df
.
with_columns
(
symbol
=
pl
.
col
(
"
symbol
"
)
.
over
(
"
cluster
"
,
mapping_strategy
=
"
join
"
)
.
list
.
join
(
"
,
"
),
gene_id
=
pl
.
col
(
"
gene_id
"
)
.
over
(
"
cluster
"
,
mapping_strategy
=
"
join
"
)
.
list
.
join
(
"
,
"
),
).
unique
()
df
.
write_csv
(
output
/
f
"
{
outf
}
.csv
"
,
separator
=
"
\t
"
)
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment