Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
ChIA-PET_network
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
ReGArDS
ChIA-PET_network
Commits
c2cfe070
"6_unix_processes.Rmd" did not exist on "5815613c3e2f2abb6bd70cda6aa6225232c2d391"
Commit
c2cfe070
authored
4 years ago
by
nfontrod
Browse files
Options
Downloads
Patches
Plain Diff
src/find_interaction_cluster/nt_and_community.py: add a region parameter
parent
011bb626
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/find_interaction_cluster/nt_and_community.py
+17
-6
17 additions, 6 deletions
src/find_interaction_cluster/nt_and_community.py
with
17 additions
and
6 deletions
src/find_interaction_cluster/nt_and_community.py
+
17
−
6
View file @
c2cfe070
...
@@ -27,13 +27,14 @@ from .sf_and_communities import get_key
...
@@ -27,13 +27,14 @@ from .sf_and_communities import get_key
def
get_nt_frequency
(
cnx
:
sqlite3
.
Connection
,
list_ft
:
List
[
str
],
def
get_nt_frequency
(
cnx
:
sqlite3
.
Connection
,
list_ft
:
List
[
str
],
feature
:
str
)
->
pd
.
DataFrame
:
feature
:
str
,
region
:
str
=
""
)
->
pd
.
DataFrame
:
"""
"""
Get the frequency of every nucleotides for features in list_ft.
Get the frequency of every nucleotides for features in list_ft.
:param cnx: Connection to chia-pet database
:param cnx: Connection to chia-pet database
:param list_ft: The list of exons for which we want to get
:param list_ft: The list of exons for which we want to get
:param feature: the kind of feature analysed
:param feature: the kind of feature analysed
:param region: The region of gene analysed if feature is gene
:return: the frequency of nucleotides for the list of exons.
:return: the frequency of nucleotides for the list of exons.
>>>
d
=
get_nt_frequency
(
sqlite3
.
connect
(
ConfigGraph
.
db_file
),
>>>
d
=
get_nt_frequency
(
sqlite3
.
connect
(
ConfigGraph
.
db_file
),
...
@@ -49,13 +50,18 @@ def get_nt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
...
@@ -49,13 +50,18 @@ def get_nt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
0
1
29.49376
18.34271
18.43874
33.72479
0
1
29.49376
18.34271
18.43874
33.72479
1
2
31.90401
16.40251
18.79033
32.90315
1
2
31.90401
16.40251
18.79033
32.90315
"""
"""
query_region
=
""
if
feature
==
"
gene
"
:
if
feature
==
"
gene
"
:
list_ft
=
[
int
(
ft
)
for
ft
in
list_ft
]
list_ft
=
[
int
(
ft
)
for
ft
in
list_ft
]
if
region
==
""
:
region
=
"
gene
"
query_region
=
f
"
AND region =
'
{
region
}
'"
query
=
f
"""
query
=
f
"""
SELECT ft, id_
{
feature
}
, frequency
SELECT ft, id_
{
feature
}
, frequency
FROM cin_
{
feature
}
_frequency
FROM cin_
{
feature
}
_frequency
WHERE id_
{
feature
}
IN
{
tuple
(
list_ft
)
}
WHERE id_
{
feature
}
IN
{
tuple
(
list_ft
)
}
AND ft_type=
"
nt
"
AND ft_type=
"
nt
"
{
query_region
}
"""
"""
df
=
pd
.
read_sql_query
(
query
,
cnx
)
df
=
pd
.
read_sql_query
(
query
,
cnx
)
df
=
df
.
pivot_table
(
index
=
f
"
id_
{
feature
}
"
,
columns
=
"
ft
"
,
values
=
"
frequency
"
)
\
df
=
df
.
pivot_table
(
index
=
f
"
id_
{
feature
}
"
,
columns
=
"
ft
"
,
values
=
"
frequency
"
)
\
...
@@ -190,7 +196,8 @@ def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]:
...
@@ -190,7 +196,8 @@ def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]:
def
create_ctrl_community
(
df
:
pd
.
DataFrame
,
outfile
:
Path
,
def
create_ctrl_community
(
df
:
pd
.
DataFrame
,
outfile
:
Path
,
feature
:
str
=
'
exon
'
)
->
pd
.
DataFrame
:
feature
:
str
=
'
exon
'
,
region
:
str
=
""
)
->
pd
.
DataFrame
:
"""
"""
:param df: A dataframe containing the frequency of each feature in a
\
:param df: A dataframe containing the frequency of each feature in a
\
community.
community.
...
@@ -206,7 +213,7 @@ def create_ctrl_community(df: pd.DataFrame, outfile: Path,
...
@@ -206,7 +213,7 @@ def create_ctrl_community(df: pd.DataFrame, outfile: Path,
ft_id
=
get_ft_id
(
cnx
,
feature
)
ft_id
=
get_ft_id
(
cnx
,
feature
)
list_ft
=
[
cid
for
cid
in
ft_id
list_ft
=
[
cid
for
cid
in
ft_id
if
cid
not
in
df
[
f
"
id_
{
feature
}
"
].
to_list
()]
if
cid
not
in
df
[
f
"
id_
{
feature
}
"
].
to_list
()]
df_nt
=
get_nt_frequency
(
cnx
,
list_ft
,
feature
)
df_nt
=
get_nt_frequency
(
cnx
,
list_ft
,
feature
,
region
)
df_com
=
get_community_table
([
list_ft
],
size_threshold
,
feature
)
df_com
=
get_community_table
([
list_ft
],
size_threshold
,
feature
)
df_nt
=
df_nt
.
merge
(
df_com
,
how
=
"
left
"
,
on
=
f
"
id_
{
feature
}
"
)
df_nt
=
df_nt
.
merge
(
df_com
,
how
=
"
left
"
,
on
=
f
"
id_
{
feature
}
"
)
df_nt
[
'
community
'
]
=
[
'
C-CTRL
'
]
*
df_nt
.
shape
[
0
]
df_nt
[
'
community
'
]
=
[
'
C-CTRL
'
]
*
df_nt
.
shape
[
0
]
...
@@ -218,6 +225,7 @@ def create_ctrl_community(df: pd.DataFrame, outfile: Path,
...
@@ -218,6 +225,7 @@ def create_ctrl_community(df: pd.DataFrame, outfile: Path,
def
get_stat_nt_communities
(
df
:
pd
.
DataFrame
,
project
:
str
,
weight
:
int
,
def
get_stat_nt_communities
(
df
:
pd
.
DataFrame
,
project
:
str
,
weight
:
int
,
global_weight
:
int
,
same_gene
:
bool
,
global_weight
:
int
,
same_gene
:
bool
,
nt
:
str
,
feature
:
str
=
"
exon
"
,
nt
:
str
,
feature
:
str
=
"
exon
"
,
region
:
str
=
""
,
)
->
pd
.
Series
:
)
->
pd
.
Series
:
"""
"""
Get data (proportion of `reg` regulated exons by a splicing factor
Get data (proportion of `reg` regulated exons by a splicing factor
...
@@ -235,6 +243,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
...
@@ -235,6 +243,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
same gene (True) or not (False) (default False)
same gene (True) or not (False) (default False)
:param nt: The nucleotide of interest
:param nt: The nucleotide of interest
:param feature: The king of feature analysed
:param feature: The king of feature analysed
:param region: the region of interest to extract from gene
"""
"""
logging
.
debug
(
f
"
lmm for
{
project
}
, w:
{
weight
}
,
"
logging
.
debug
(
f
"
lmm for
{
project
}
, w:
{
weight
}
,
"
f
"
g:
{
global_weight
}
nt:
{
nt
}
"
)
f
"
g:
{
global_weight
}
nt:
{
nt
}
"
)
...
@@ -254,7 +263,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
...
@@ -254,7 +263,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
res
[
'
pval
'
]
=
pval
res
[
'
pval
'
]
=
pval
nt_ctrl_table
=
noutfile
.
parent
/
noutfile
.
name
.
replace
(
"
_stat.txt
"
,
nt_ctrl_table
=
noutfile
.
parent
/
noutfile
.
name
.
replace
(
"
_stat.txt
"
,
"
_ctrl.txt
"
)
"
_ctrl.txt
"
)
ndf
=
create_ctrl_community
(
df
,
nt_ctrl_table
,
feature
)
ndf
=
create_ctrl_community
(
df
,
nt_ctrl_table
,
feature
,
region
)
sum_df
=
lmm_maker_summary
(
ndf
,
outfile
,
nt
)
sum_df
=
lmm_maker_summary
(
ndf
,
outfile
,
nt
)
outfile_ctrl
=
ConfigGraph
.
get_community_file
(
project
,
weight
,
outfile_ctrl
=
ConfigGraph
.
get_community_file
(
project
,
weight
,
global_weight
,
global_weight
,
...
@@ -298,7 +307,7 @@ def create_dataframe(project: str, weight: int, global_weight: int,
...
@@ -298,7 +307,7 @@ def create_dataframe(project: str, weight: int, global_weight: int,
def
multiple_nt_lmm_launcher
(
ps
:
int
,
weights
:
List
[
int
],
def
multiple_nt_lmm_launcher
(
ps
:
int
,
weights
:
List
[
int
],
global_weights
:
List
[
int
],
global_weights
:
List
[
int
],
same_gene
:
bool
,
same_gene
:
bool
,
feature
:
str
=
'
exon
'
,
feature
:
str
=
'
exon
'
,
region
:
str
=
''
,
logging_level
:
str
=
"
DISABLE
"
):
logging_level
:
str
=
"
DISABLE
"
):
"""
"""
Launch the statistical analysis for every
Launch the statistical analysis for every
...
@@ -312,6 +321,7 @@ def multiple_nt_lmm_launcher(ps: int, weights: List[int],
...
@@ -312,6 +321,7 @@ def multiple_nt_lmm_launcher(ps: int, weights: List[int],
:param same_gene: Say if we consider as co-localised exon within the
\
:param same_gene: Say if we consider as co-localised exon within the
\
same gene
same gene
:param feature: The kind of analysed feature
:param feature: The kind of analysed feature
:param region: the region of interest to extract from gene
:param logging_level: Level of information to display
:param logging_level: Level of information to display
"""
"""
ConfigGraph
.
community_folder
.
mkdir
(
exist_ok
=
True
,
parents
=
True
)
ConfigGraph
.
community_folder
.
mkdir
(
exist_ok
=
True
,
parents
=
True
)
...
@@ -342,7 +352,8 @@ def multiple_nt_lmm_launcher(ps: int, weights: List[int],
...
@@ -342,7 +352,8 @@ def multiple_nt_lmm_launcher(ps: int, weights: List[int],
True
)
True
)
df
.
to_csv
(
nfile_table
,
sep
=
"
\t
"
,
index
=
False
)
df
.
to_csv
(
nfile_table
,
sep
=
"
\t
"
,
index
=
False
)
dic_df
[
ckey
]
=
df
dic_df
[
ckey
]
=
df
args
=
[
df
,
project
,
weight
,
global_weight
,
same_gene
,
nt
,
feature
]
args
=
[
df
,
project
,
weight
,
global_weight
,
same_gene
,
nt
,
feature
,
region
]
if
ckey
not
in
processes
.
keys
():
if
ckey
not
in
processes
.
keys
():
processes
[
ckey
]
=
[
pool
.
apply_async
(
get_stat_nt_communities
,
args
)]
processes
[
ckey
]
=
[
pool
.
apply_async
(
get_stat_nt_communities
,
args
)]
else
:
else
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment