Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
ChIA-PET_network
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
ReGArDS
ChIA-PET_network
Commits
a520d7c7
Commit
a520d7c7
authored
Nov 24, 2020
by
nfontrod
Browse files
Options
Downloads
Patches
Plain Diff
src/find_interaction_cluster/nt_and_community.py: error bar on barplots
parent
c1ec2983
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/find_interaction_cluster/nt_and_community.py
+81
-24
81 additions, 24 deletions
src/find_interaction_cluster/nt_and_community.py
with
81 additions
and
24 deletions
src/find_interaction_cluster/nt_and_community.py
+
81
−
24
View file @
a520d7c7
...
@@ -152,7 +152,6 @@ def lmm_maker_summary(df: pd.DataFrame, outfile: Path, nt: str
...
@@ -152,7 +152,6 @@ def lmm_maker_summary(df: pd.DataFrame, outfile: Path, nt: str
:param outfile: A name of a file
:param outfile: A name of a file
:param nt: the nucleotide of interest
:param nt: the nucleotide of interest
:return: the pvalue of lmm
:return: the pvalue of lmm
"""
"""
pandas2ri
.
activate
()
pandas2ri
.
activate
()
lmm
=
r
(
lmm
=
r
(
...
@@ -222,7 +221,8 @@ def create_ctrl_community(df: pd.DataFrame,
...
@@ -222,7 +221,8 @@ def create_ctrl_community(df: pd.DataFrame,
def
lmm_with_ctrl
(
df
:
pd
.
DataFrame
,
feature
:
str
,
region
:
str
,
def
lmm_with_ctrl
(
df
:
pd
.
DataFrame
,
feature
:
str
,
region
:
str
,
nt
:
str
,
outfile_diag
:
Path
)
->
pd
.
DataFrame
:
nt
:
str
,
outfile_diag
:
Path
)
->
Tuple
[
pd
.
DataFrame
,
pd
.
DataFrame
]:
"""
"""
:param df: df: A dataframe containing the frequency of each nucleotide
\
:param df: df: A dataframe containing the frequency of each nucleotide
\
...
@@ -232,11 +232,12 @@ def lmm_with_ctrl(df: pd.DataFrame, feature: str, region: str,
...
@@ -232,11 +232,12 @@ def lmm_with_ctrl(df: pd.DataFrame, feature: str, region: str,
:param nt: The nucleotide of interest
:param nt: The nucleotide of interest
:param outfile_diag: File from which the diagnostics folder will be
\
:param outfile_diag: File from which the diagnostics folder will be
\
inferred
inferred
:return: The dataframe with the p-value compared to the control
\
:return: The dataframe with ctrl exon and
\
list of exons.
The dataframe with the p-value compared to the control
\
list of feature.
"""
"""
ndf
=
create_ctrl_community
(
df
,
feature
,
region
)
ndf
=
create_ctrl_community
(
df
,
feature
,
region
)
return
lmm_maker_summary
(
ndf
,
outfile_diag
,
nt
)
return
ndf
,
lmm_maker_summary
(
ndf
,
outfile_diag
,
nt
)
def
get_feature_by_community
(
df
:
pd
.
DataFrame
,
feature
:
str
)
->
Dict
:
def
get_feature_by_community
(
df
:
pd
.
DataFrame
,
feature
:
str
)
->
Dict
:
...
@@ -418,24 +419,78 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, nt: str, test_type: str,
...
@@ -418,24 +419,78 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, nt: str, test_type: str,
sns
.
set
()
sns
.
set
()
test_name
=
"
permutation
"
if
test_type
==
"
perm
"
else
"
lmm
"
test_name
=
"
permutation
"
if
test_type
==
"
perm
"
else
"
lmm
"
g
=
sns
.
catplot
(
x
=
"
community
"
,
y
=
nt
,
data
=
df_bar
,
kind
=
"
bar
"
,
g
=
sns
.
catplot
(
x
=
"
community
"
,
y
=
nt
,
data
=
df_bar
,
kind
=
"
bar
"
,
ci
=
"
sd
"
,
aspect
=
2.5
,
height
=
12
,
ci
=
"
sd
"
,
aspect
=
2.5
,
height
=
12
,
errwidth
=
0.5
,
capsize
=
.
4
,
palette
=
[
"
red
"
]
+
[
"
gr
e
y
"
]
*
(
df_bar
.
shape
[
0
]
-
1
))
palette
=
[
"
red
"
]
+
[
"
light
gr
a
y
"
]
*
(
df_bar
.
shape
[
0
]
-
1
))
g
.
fig
.
suptitle
(
f
"
Mean frequency of
{
nt
}
among community of
{
feature
}
s
\n
"
g
.
fig
.
suptitle
(
f
"
Mean frequency of
{
nt
}
among community of
{
feature
}
s
\n
"
f
"
(stats obtained with as
{
test_name
}
test)
"
)
f
"
(stats obtained with as
{
test_name
}
test)
"
)
g
.
set
(
xticklabels
=
[])
g
.
set
(
xticklabels
=
[])
g
.
ax
.
set_ylabel
(
f
'
Frequency of
{
nt
}
'
)
g
.
ax
.
set_ylabel
(
f
'
Frequency of
{
nt
}
'
)
if
test_type
==
"
perm
"
:
df_bara
=
df_bar
.
drop_duplicates
(
subset
=
"
community
"
,
keep
=
"
first
"
)
df_bar
=
df_bar
.
drop_duplicates
(
subset
=
"
community
"
,
keep
=
"
last
"
)
for
i
,
p
in
enumerate
(
g
.
ax
.
patches
):
for
i
,
p
in
enumerate
(
g
.
ax
.
patches
):
stats
=
"
*
"
if
df_bar
.
iloc
[
i
,
:][
"
p-adj
"
]
<
0.05
else
""
stats
=
"
*
"
if
df_bara
.
iloc
[
i
,
:][
"
p-adj
"
]
<
0.05
else
""
print
(
i
,
stats
,
df_bar
.
iloc
[
i
,
:][
"
p-adj
"
])
com
=
df_bara
.
iloc
[
i
,
:][
"
community
"
]
csd
=
np
.
std
(
df_bar
.
loc
[
df_bar
[
"
community
"
]
==
com
,
nt
])
g
.
ax
.
annotate
(
stats
,
g
.
ax
.
annotate
(
stats
,
(
p
.
get_x
()
+
p
.
get_width
()
/
2.
,
p
.
get_height
()),
(
p
.
get_x
()
+
p
.
get_width
()
/
2.
,
p
.
get_height
()
+
csd
),
ha
=
'
center
'
,
va
=
'
center
'
,
xytext
=
(
0
,
10
),
ha
=
'
center
'
,
va
=
'
center
'
,
xytext
=
(
0
,
10
),
textcoords
=
'
offset points
'
)
textcoords
=
'
offset points
'
)
g
.
savefig
(
outfile
)
g
.
savefig
(
outfile
)
def
expand_results_lmm
(
df
:
pd
.
DataFrame
,
rdf
:
pd
.
DataFrame
,
nt
:
str
,
feature
:
str
)
->
pd
.
DataFrame
:
"""
Merge df and rdf together.
:param df: A dataframe containing the frequency of each nucleotide
\
in each feature belonging to a community.
:param rdf: The dataframe containing the mean frequency for
\
each community and the p-value of their enrichment compared to control
\
exons.
:param nt: the nucleotide of interest
:param feature: The feature of interest
:return: The merged dataframe: i.e df with the stats columns
"""
p_col
=
"
Pr(>|t|)
"
df
=
df
[[
f
"
id_
{
feature
}
"
,
nt
,
"
community
"
,
"
community_size
"
]].
copy
()
rdf
=
rdf
[[
"
community
"
,
"
community_size
"
,
p_col
,
nt
]].
copy
()
rdf
.
rename
({
nt
:
f
"
mean_
{
nt
}
"
,
p_col
:
"
p-adj
"
},
axis
=
1
,
inplace
=
True
)
df
=
df
.
merge
(
rdf
,
how
=
"
left
"
,
on
=
[
"
community
"
,
"
community_size
"
])
df_ctrl
=
df
[
df
[
"
community
"
]
==
"
C-CTRL
"
]
df
=
df
[
df
[
"
community
"
]
!=
"
C-CTRL
"
].
copy
()
df
.
sort_values
(
f
"
mean_
{
nt
}
"
,
ascending
=
True
,
inplace
=
True
)
return
pd
.
concat
([
df_ctrl
,
df
],
axis
=
0
,
ignore_index
=
True
)
def
expand_results_perm
(
df
:
pd
.
DataFrame
,
rdf
:
pd
.
DataFrame
,
nt
:
str
,
feature
:
str
,
iteration
:
int
)
->
pd
.
DataFrame
:
"""
Merge df and rdf together.
:param df: A dataframe containing the frequency of each nucleotide
\
in each feature belonging to a community.
:param rdf: The dataframe containing the mean frequency for
\
each community and the p-value of their enrichment compared to control
\
exons.
:param nt: the nucleotide of interest
:param feature: The feature of interest
:param iteration: The number of iteration
:return: The merged dataframe: i.e df with the stats columns
"""
df
=
df
[[
f
"
id_
{
feature
}
"
,
nt
,
"
community
"
,
"
community_size
"
]].
copy
()
ctrl_val
=
rdf
[
f
"
{
nt
}
_mean_
{
iteration
}
_ctrl
"
]
rdf
=
rdf
[[
"
community
"
,
"
community_size
"
,
nt
,
"
p-adj
"
]].
copy
()
rdf
.
rename
({
nt
:
f
"
mean_
{
nt
}
"
},
axis
=
1
,
inplace
=
True
)
df
=
df
.
merge
(
rdf
,
how
=
"
left
"
,
on
=
[
"
community
"
,
"
community_size
"
])
df_ctrl
=
pd
.
DataFrame
({
nt
:
ctrl_val
,
f
"
mean_
{
nt
}
"
:
[
np
.
mean
(
ctrl_val
)]
*
len
(
ctrl_val
),
f
"
id_
{
feature
}
"
:
[
'
ctrl
'
]
*
len
(
ctrl_val
),
"
community_size
"
:
[
len
(
ctrl_val
)]
*
len
(
ctrl_val
),
"
community
"
:
[
"
C-CTRL
"
]
*
len
(
ctrl_val
)})
df
.
sort_values
(
f
"
mean_
{
nt
}
"
,
ascending
=
True
,
inplace
=
True
)
return
pd
.
concat
([
df_ctrl
,
df
],
axis
=
0
,
ignore_index
=
True
)
def
create_and_save_ctrl_dataframe
(
df
:
pd
.
DataFrame
,
feature
:
str
,
def
create_and_save_ctrl_dataframe
(
df
:
pd
.
DataFrame
,
feature
:
str
,
region
:
str
,
nt
:
str
,
outfile
:
Path
,
region
:
str
,
nt
:
str
,
outfile
:
Path
,
test_type
:
str
,
df_ctrl
:
pd
.
DataFrame
,
test_type
:
str
,
df_ctrl
:
pd
.
DataFrame
,
...
@@ -461,32 +516,34 @@ def create_and_save_ctrl_dataframe(df: pd.DataFrame, feature: str,
...
@@ -461,32 +516,34 @@ def create_and_save_ctrl_dataframe(df: pd.DataFrame, feature: str,
containing the test communities and the control community
containing the test communities and the control community
"""
"""
if
test_type
==
"
lmm
"
:
if
test_type
==
"
lmm
"
:
rdf
=
lmm_with_ctrl
(
df
,
feature
,
region
,
nt
,
ndf
,
rdf
=
lmm_with_ctrl
(
df
,
feature
,
region
,
nt
,
outfile
.
parents
[
1
]
/
outfile
.
name
)
outfile
.
parents
[
1
]
/
outfile
.
name
)
df_bar
=
expand_results_lmm
(
ndf
,
rdf
,
nt
,
feature
)
else
:
else
:
rdf
=
perm_with_ctrl
(
df
,
feature
,
nt
,
df_ctrl
,
dic_com
,
iteration
)
rdf
=
perm_with_ctrl
(
df
,
feature
,
nt
,
df_ctrl
,
dic_com
,
iteration
)
df_bar
=
expand_results_perm
(
df
,
rdf
,
nt
,
feature
,
iteration
)
rdf
.
to_csv
(
outfile_ctrl
,
sep
=
"
\t
"
,
index
=
False
)
rdf
.
to_csv
(
outfile_ctrl
,
sep
=
"
\t
"
,
index
=
False
)
barplot_creation
(
r
df
,
outfile_ctrl
,
nt
,
barplot_creation
(
df
_bar
,
outfile_ctrl
,
nt
,
test_type
,
feature
,
iteration
)
test_type
,
feature
)
def
barplot_creation
(
r
df
:
pd
.
DataFrame
,
outfile
:
Path
,
nt
:
str
,
def
barplot_creation
(
df
_bar
:
pd
.
DataFrame
,
outfile
:
Path
,
nt
:
str
,
test_type
:
str
,
feature
:
str
,
iteration
:
int
)
->
None
:
test_type
:
str
,
feature
:
str
)
->
None
:
"""
"""
Reformat a dataframe with the enrichment of a nucleotide frequency
\
Reformat a dataframe with the enrichment of a nucleotide frequency
\
for every community and then create a barplot showing those frequencies.
for every feature for every community and then create a
\
barplot showing those frequencies.
:param rdf: A dataframe with the enrichment of a
\
:param df_bar: A dataframe with the enrichment of a
\
nucleotide frequency for every community
nucleotide frequency for every community and showing the frequency
\
of each feature in each community
:param outfile: File were rdf is stored
:param outfile: File were rdf is stored
:param nt: The nucleotide for which we are seeking enrichment
:param nt: The nucleotide for which we are seeking enrichment
:param test_type: The kind of test make
:param test_type: The kind of test make
:param feature: The king of feature of interest
:param feature: The king of feature of interest
:param iteration: The number of sub samples to create
"""
"""
rdf
=
prepare_dataframe
(
rdf
,
test_type
,
nt
,
iteration
)
outfig
=
outfile
.
parent
/
outfile
.
name
.
replace
(
"
.txt
"
,
"
.pdf
"
)
outfig
=
outfile
.
parent
/
outfile
.
name
.
replace
(
"
.txt
"
,
"
.pdf
"
)
make_barplot
(
r
df
,
outfig
,
nt
,
test_type
,
feature
)
make_barplot
(
df
_bar
,
outfig
,
nt
,
test_type
,
feature
)
def
create_outfiles
(
project
:
str
,
weight
:
int
,
global_weight
:
int
,
def
create_outfiles
(
project
:
str
,
weight
:
int
,
global_weight
:
int
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment