Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
ChIA-PET_network
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
ReGArDS
ChIA-PET_network
Commits
c06baf31
Commit
c06baf31
authored
4 years ago
by
nfontrod
Browse files
Options
Downloads
Patches
Plain Diff
src/find_interaction_cluster/clip_figures/clip_analyser.py: add sl_reg parameter
parent
0effc7d1
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/find_interaction_cluster/clip_figures/clip_analyser.py
+77
-7
77 additions, 7 deletions
src/find_interaction_cluster/clip_figures/clip_analyser.py
with
77 additions
and
7 deletions
src/find_interaction_cluster/clip_figures/clip_analyser.py
+
77
−
7
View file @
c06baf31
...
...
@@ -19,6 +19,9 @@ from ...logging_conf import logging_def
import
logging
from
typing
import
Tuple
import
multiprocessing
as
mp
from
...figures_utils.exons_interactions
import
get_every_events_4_a_sl
import
sqlite3
from
..sf_and_communities
import
get_sfname
def
bedtools_intersect
(
gene_bed
:
Path
,
clip_bed
:
Path
,
...
...
@@ -230,15 +233,74 @@ def select_community_file(project: str, weight: int, global_weight: int,
raise
FileNotFoundError
(
f
"
File
{
com_file
}
was not found !
"
)
tmp_name
=
com_file
.
name
.
replace
(
"
.txt
"
,
""
)
output
=
ConfigClip
.
output_folder
/
\
f
"
CLIP_community_figures-
{
feature
}
-
{
tmp_name
}
"
f
"
CLIP_community_figures-
{
feature
}
-
{
tmp_name
}
"
return
com_file
,
output
def
add_regulation_column
(
df_table
:
pd
.
DataFrame
,
sf_name
:
str
,
feature
:
str
,
)
->
pd
.
DataFrame
:
"""
Add a column community_data on df_table corresponding to the mean
\
number of exons regulated in each gene if feature is gene or 1 if
\
the exon is regulated.
:param df_table: A dataframe containing the peak density for each gene
\
or exons.
:param sf_name: The splicing factor of interest
:param feature: The kind of feature analysed
:return: The dataframe updated
>>>
dexon
=
{
"
id_exon
"
:
[
'
11553_16
'
,
'
3222_30
'
,
'
1001_3
'
],
...
'
clip_peak
'
:
[
0
,
1
,
0
],
"
peak_density
"
:
[
0
,
0.1
,
0
],
...
'
community
'
:
[
'
C1
'
,
'
C1
'
,
'
C2
'
]}
>>>
add_regulation_column
(
pd
.
DataFrame
(
dexon
),
'
TRA2A_B
'
,
'
exon
'
)
id_exon
clip_peak
peak_density
community
community_data
0
11553_16
0
0.0
C1
1
1
3222_30
1
0.1
C1
1
2
1001_3
0
0.0
C2
0
>>>
dgene
=
{
"
id_gene
"
:
[
11553
,
3222
,
1001
],
...
'
clip_peak
'
:
[
0
,
1
,
0
],
"
peak_density
"
:
[
0
,
0.1
,
0
],
...
'
community
'
:
[
'
C1
'
,
'
C1
'
,
'
C2
'
]}
>>>
add_regulation_column
(
pd
.
DataFrame
(
dgene
),
'
TRA2A_B
'
,
'
gene
'
)
id_gene
clip_peak
peak_density
community
community_data
0
11553
0
0.0
C1
0.117647
1
3222
1
0.1
C1
0.020408
2
1001
0
0.0
C2
0.000000
>>>
add_regulation_column
(
pd
.
DataFrame
(
dgene
),
'
TRAgdghfh
'
,
'
gene
'
)
id_gene
clip_peak
peak_density
community
0
11553
0
0.0
C1
1
3222
1
0.1
C1
2
1001
0
0.0
C2
"""
if
sf_name
not
in
get_sfname
():
return
df_table
up_exon
,
x
=
get_every_events_4_a_sl
(
sqlite3
.
connect
(
ConfigGraph
.
db_file
),
sf_name
,
"
up
"
)
down_exon
,
x
=
get_every_events_4_a_sl
(
sqlite3
.
connect
(
ConfigGraph
.
db_file
),
sf_name
,
"
down
"
)
exons
=
up_exon
[
f
"
{
sf_name
}
_up
"
]
+
down_exon
[
f
"
{
sf_name
}
_down
"
]
if
feature
==
"
exon
"
:
df_table
[
"
community_data
"
]
=
[
0
]
*
len
(
df_table
)
df_table
.
loc
[
df_table
[
f
"
id_
{
feature
}
"
].
isin
(
exons
),
"
community_data
"
]
=
1
else
:
df
=
pd
.
read_csv
(
ConfigClip
.
bed_exon
,
sep
=
"
\t
"
,
names
=
[
"
chr
"
,
"
start
"
,
"
end
"
,
"
id_exon
"
,
"
sc
"
,
"
s
"
])
df
=
df
[[
"
id_exon
"
]]
df
[
"
id_gene
"
]
=
df
[
"
id_exon
"
].
str
.
replace
(
r
"
_\d+
"
,
""
).
astype
(
int
)
df
[
"
community_data
"
]
=
[
0
]
*
df
.
shape
[
0
]
df
.
loc
[
df
[
"
id_exon
"
].
isin
(
exons
),
"
community_data
"
]
=
1
df
.
drop
(
"
id_exon
"
,
axis
=
1
,
inplace
=
True
)
df
=
df
.
groupby
(
"
id_gene
"
).
mean
().
reset_index
()
df_table
=
df_table
.
merge
(
df
,
how
=
"
left
"
,
on
=
"
id_gene
"
)
return
df_table
def
create_figure
(
project
:
str
,
weight
:
int
,
global_weight
:
int
,
same_gene
:
bool
,
feature
:
str
,
clip_file
:
Path
,
feature_bed
:
Path
,
test_type
:
str
=
"
permutation
"
,
iteration
:
int
=
10000
,
display_size
:
bool
=
False
,
community_file
:
str
=
""
)
->
None
:
iteration
:
int
=
10000
,
display_size
:
bool
=
False
,
community_file
:
str
=
""
,
sl_reg
:
bool
=
False
)
->
None
:
"""
Create the final figure
:param project: The name of the project of interest
...
...
@@ -261,7 +323,8 @@ def create_figure(project: str, weight: int, global_weight: int,
used to find the community files computed with ChIA-PET data.
:param display_size: True to display the size of the community.
\
False to display nothing. (default False)
:param ps: The number of processes to create (default 1)
:param sl_reg: True to display the FaRLine regulation of the
\
same factor, False to not display it.
"""
logging
.
info
(
f
"
Working on
{
clip_file
}
"
)
com_file
,
output
=
select_community_file
(
project
,
weight
,
global_weight
,
...
...
@@ -270,6 +333,10 @@ def create_figure(project: str, weight: int, global_weight: int,
output
.
mkdir
(
exist_ok
=
True
,
parents
=
True
)
outfile
=
output
/
f
"
{
clip_file
.
name
.
split
(
'
.
'
)[
0
]
}
.pdf
"
final_table
=
create_table
(
feature
,
clip_file
,
feature_bed
,
com_file
)
if
sl_reg
:
final_table
=
add_regulation_column
(
final_table
,
clip_file
.
name
.
split
(
"
_
"
)[
0
],
feature
)
create_community_fig
(
final_table
,
feature
,
"
peak_density
"
,
outfile
,
test_type
,
iteration
=
iteration
,
display_size
=
display_size
)
...
...
@@ -279,8 +346,8 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int,
global_weight
:
int
,
same_gene
:
bool
,
feature
:
str
,
test_type
:
str
=
"
permutation
"
,
iteration
:
int
=
10000
,
display_size
:
bool
=
False
,
community_file
:
str
=
""
,
ps
:
int
=
1
,
logging_level
:
str
=
"
DEBUG
"
)
->
None
:
community_file
:
str
=
""
,
sl_reg
:
bool
=
False
,
ps
:
int
=
1
,
logging_level
:
str
=
"
DEBUG
"
)
->
None
:
"""
Create the final figure
:param project: The name of the project of interest
...
...
@@ -301,6 +368,8 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int,
:param community_file: A file containing custom communities. If
\
it equals to
''
then weight, global weight and same genes parameter are
\
used to find the community files computed with ChIA-PET data.
:param sl_reg: True to display the FaRLine regulation of the
\
same factor, False to not display it.
:param ps: The number of processes to create (default 1)
:param logging_level: The level of data to display (default
'
DISABLE
'
)
"""
...
...
@@ -309,11 +378,12 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int,
else
ConfigClip
.
bed_exon
files
=
list
(
clip_folder
.
glob
(
"
*.bed
"
))
+
\
list
(
clip_folder
.
glob
(
"
*.bed.gz
"
))
files
=
[
files
[
0
]]
pool
=
mp
.
Pool
(
processes
=
min
(
len
(
files
),
ps
))
processes
=
[]
for
mfile
in
files
:
args
=
[
project
,
weight
,
global_weight
,
same_gene
,
feature
,
mfile
,
feature_bed
,
test_type
,
iteration
,
display_size
,
community_file
]
community_file
,
sl_reg
]
processes
.
append
(
pool
.
apply_async
(
create_figure
,
args
))
[
p
.
get
(
timeout
=
None
)
for
p
in
processes
]
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment