Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
TDD_MAPKi
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
LBMC
ReGArDS
TDD_MAPKi
Commits
62d7c8ff
Commit
62d7c8ff
authored
Jun 28, 2022
by
nfontrod
Browse files
Options
Downloads
Patches
Plain Diff
src/06_compute_tdd_index.R: create barplot of number of peaks in relation to tdd + tdd correlation
parent
fb7429a7
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/06_compute_tdd_index.R
+455
-50
455 additions, 50 deletions
src/06_compute_tdd_index.R
with
455 additions
and
50 deletions
src/06_compute_tdd_index.R
+
455
−
50
View file @
62d7c8ff
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
# The goal of this script is to compute TDD index and TID index of every genes
# The goal of this script is to compute TDD index and TID index of every genes
library
(
MASS
)
library
(
tidyverse
)
library
(
tidyverse
)
library
(
genefilter
)
library
(
genefilter
)
library
(
gridExtra
)
library
(
gridExtra
)
...
@@ -13,13 +14,19 @@ library(emmeans)
...
@@ -13,13 +14,19 @@ library(emmeans)
#' @param treatment The treatment for which we want to compute the TDD
#' @param treatment The treatment for which we want to compute the TDD
#' @param mean_gene_count The minimum mean count a gene must have to compute
#' @param mean_gene_count The minimum mean count a gene must have to compute
#' its TDDindex
#' its TDDindex
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @return The count tablme with the tdd for each replicate and mean TDD and sd
#' @return The count tablme with the tdd for each replicate and mean TDD and sd
#' @import tidyverse
#' @import tidyverse
#' @import genefilter
#' @import genefilter
compute_tdd
<-
function
(
treatment
=
"BRAF"
,
mean_gene_count
=
10
)
{
compute_tdd
<-
function
(
treatment
=
"BRAF"
,
mean_gene_count
=
10
,
ercc
=
F
)
{
ercc_name
<-
""
if
(
ercc
)
{
ercc_name
<-
"_ercc"
}
norm_df
<-
read_tsv
(
norm_df
<-
read_tsv
(
paste0
(
paste0
(
"./results/deseq2_normalisation/"
,
treatment
,
"./results/deseq2_normalisation/"
,
treatment
,
ercc_name
,
"_norm_stable_gene.txt"
"_norm_stable_gene.txt"
)
)
)
)
...
@@ -64,7 +71,6 @@ compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10) {
...
@@ -64,7 +71,6 @@ compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10) {
}
}
#' Create a density figure of TDD index for a given replicate
#' Create a density figure of TDD index for a given replicate
#'
#'
#' @param rep The replicate name
#' @param rep The replicate name
...
@@ -97,7 +103,7 @@ create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") {
...
@@ -97,7 +103,7 @@ create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") {
#' @import gridExtra
#' @import gridExtra
create_density_figs
<-
function
(
tdd_tibble
,
treatment
=
"BRAF"
,
create_density_figs
<-
function
(
tdd_tibble
,
treatment
=
"BRAF"
,
output_folder
=
"./results/TDD_analysis"
)
{
output_folder
=
"./results/TDD_analysis"
)
{
dir.create
(
output_folder
)
dir.create
(
output_folder
,
showWarnings
=
F
)
rep
<-
tdd_tibble
%>%
rep
<-
tdd_tibble
%>%
select
(
-
gene
)
%>%
select
(
-
gene
)
%>%
colnames
()
%>%
colnames
()
%>%
...
@@ -128,9 +134,15 @@ create_density_figs <- function(tdd_tibble, treatment = "BRAF",
...
@@ -128,9 +134,15 @@ create_density_figs <- function(tdd_tibble, treatment = "BRAF",
#' @param treatment The treatment of samples for which we want to compute
#' @param treatment The treatment of samples for which we want to compute
#' the TDD
#' the TDD
#' @param output_folder folder where the TDD table and figure will be created
#' @param output_folder folder where the TDD table and figure will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse
#' @import tidyverse
get_tdd_table
<-
function
(
treatment
,
output_folder
=
"./results/TDD_analysis"
)
{
get_tdd_table
<-
function
(
treatment
,
tdd_table
<-
compute_tdd
(
treatment
)
mean_gene_count
=
10
,
output_folder
=
"./results/TDD_analysis"
,
ercc
=
F
)
{
tdd_table
<-
compute_tdd
(
treatment
,
mean_gene_count
,
ercc
)
create_density_figs
(
tdd_table
,
treatment
,
output_folder
)
create_density_figs
(
tdd_table
,
treatment
,
output_folder
)
write_tsv
(
write_tsv
(
tdd_table
,
tdd_table
,
...
@@ -147,18 +159,30 @@ get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis") {
...
@@ -147,18 +159,30 @@ get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis") {
#' TDD table
#' TDD table
#'
#'
#' @param output_folder Folder where the figures and TDD tables will be created
#' @param output_folder Folder where the figures and TDD tables will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse
#' @import tidyverse
get_final_tdd_table
<-
function
(
output_folder
=
"./results/TDD_analysis"
)
{
get_final_tdd_table
<-
function
(
mean_gene_count
=
10
,
braf_tdd
<-
get_tdd_table
(
"BRAF"
,
output_folder
)
ercc
=
F
,
dmso_tdd
<-
get_tdd_table
(
"DMSO"
,
output_folder
)
output_folder
=
"./results/TDD_analysis"
)
{
braf_tdd
<-
braf_tdd
[,
grep
(
"gene|mean|sd"
,
colnames
(
braf_tdd
))]
braf_tdd
<-
get_tdd_table
(
"BRAF"
,
mean_gene_count
,
output_folder
,
ercc
)
dmso_tdd
<-
dmso_tdd
[,
grep
(
"gene|mean|sd"
,
colnames
(
dmso_tdd
))]
dmso_tdd
<-
get_tdd_table
(
"DMSO"
,
mean_gene_count
,
output_folder
,
ercc
)
braf_tdd
<-
braf_tdd
[,
grep
(
"gene|mean|TDD|sd"
,
colnames
(
braf_tdd
))]
dmso_tdd
<-
dmso_tdd
[,
grep
(
"gene|mean|TDD|sd"
,
colnames
(
dmso_tdd
))]
tdd_full
<-
dmso_tdd
%>%
inner_join
(
braf_tdd
,
by
=
"gene"
)
# nolint
tdd_full
<-
dmso_tdd
%>%
inner_join
(
braf_tdd
,
by
=
"gene"
)
# nolint
tdd_full
<-
tdd_full
%>%
tdd_full
<-
tdd_full
%>%
mutate
(
diff_TDD
=
mean_BRAF_TDD
-
mean_DMSO_TDD
)
# nolint
mutate
(
diff_TDD
=
mean_BRAF_TDD
-
mean_DMSO_TDD
)
# nolint
ercc_name
<-
""
if
(
ercc
)
{
ercc_name
<-
"_erccnorm"
}
write_tsv
(
write_tsv
(
tdd_full
,
tdd_full
,
paste0
(
output_folder
,
"/full_TDD_table.txt"
)
paste0
(
output_folder
,
"/"
,
ercc_name
,
"_"
,
mean_gene_count
,
"_full_TDD_table.txt"
)
)
)
return
(
tdd_full
)
return
(
tdd_full
)
}
}
...
@@ -180,7 +204,7 @@ compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis",
...
@@ -180,7 +204,7 @@ compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis",
name
=
"diag"
)
{
name
=
"diag"
)
{
mod
<-
lm
(
TDD
~
group
,
data
=
tdd_table
)
mod
<-
lm
(
TDD
~
group
,
data
=
tdd_table
)
output_folderd
<-
paste0
(
output_folder
,
"/diagnostics"
)
output_folderd
<-
paste0
(
output_folder
,
"/diagnostics"
)
dir.create
(
output_folderd
)
dir.create
(
output_folderd
,
showWarnings
=
F
)
simulationOutput
<-
simulateResiduals
(
fittedModel
=
mod
,
n
=
2000
)
# nolint
simulationOutput
<-
simulateResiduals
(
fittedModel
=
mod
,
n
=
2000
)
# nolint
pdf
(
paste0
(
output_folderd
,
"/"
,
name
,
"_diag.pdf"
),
width
=
12
,
height
=
8
)
pdf
(
paste0
(
output_folderd
,
"/"
,
name
,
"_diag.pdf"
),
width
=
12
,
height
=
8
)
print
(
plot
(
simulationOutput
))
print
(
plot
(
simulationOutput
))
...
@@ -224,7 +248,7 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
...
@@ -224,7 +248,7 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
target_columns
,
outfile
,
title
=
"groups"
,
target_columns
,
outfile
,
title
=
"groups"
,
output_folder
=
"./results/TDD_analysis"
)
{
output_folder
=
"./results/TDD_analysis"
)
{
output_folder
<-
paste0
(
output_folder
,
"/boxplot_figures"
)
output_folder
<-
paste0
(
output_folder
,
"/boxplot_figures"
)
dir.create
(
output_folder
)
dir.create
(
output_folder
,
showWarnings
=
F
)
if
(
length
(
gene_files
)
!=
length
(
gene_names
)
|
if
(
length
(
gene_files
)
!=
length
(
gene_names
)
|
length
(
gene_files
)
!=
length
(
target_columns
))
{
length
(
gene_files
)
!=
length
(
target_columns
))
{
message
(
"The inputs should have the same lengths"
)
message
(
"The inputs should have the same lengths"
)
...
@@ -243,7 +267,9 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
...
@@ -243,7 +267,9 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
)
)
new_tdd
<-
rbind
(
new_tdd
,
tmp_tdd
)
new_tdd
<-
rbind
(
new_tdd
,
tmp_tdd
)
}
}
if
(
!
grepl
(
"ercc"
,
outfile
,
fixed
=
T
))
{
new_tdd
<-
new_tdd
%>%
distinct
(
gene
,
.keep_all
=
T
)
# nolint
new_tdd
<-
new_tdd
%>%
distinct
(
gene
,
.keep_all
=
T
)
# nolint
}
compute_stat
(
new_tdd
,
output_folder
,
outfile
)
compute_stat
(
new_tdd
,
output_folder
,
outfile
)
p
<-
ggplot
(
new_tdd
,
mapping
=
aes
(
x
=
group
,
y
=
TDD
))
+
p
<-
ggplot
(
new_tdd
,
mapping
=
aes
(
x
=
group
,
y
=
TDD
))
+
geom_violin
(
mapping
=
aes
(
fill
=
group
))
+
geom_violin
(
mapping
=
aes
(
fill
=
group
))
+
...
@@ -255,9 +281,29 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
...
@@ -255,9 +281,29 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
return
(
new_tdd
)
return
(
new_tdd
)
}
}
#' Create a boxplot figure to compare TDD values of 3 different sets of genes
tdd_full
<-
get_final_tdd_table
(
output_folder
=
"./results/TDD_analysis"
)
#'
#' @details Create 3 boxplot figures:
#' 1. Create a boxplot displaying the TDD of down-regulated genes by BRAF with
#' a BRAF peak
#' 2. Create a boxplot displaying the TDD of up-regulated genes by BRAF with
#' a DMSO peak
#' 3. Create a boxplot displaying the TDD of stable genes in DMSO and BRAF
#' conditions
#'
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @param ercc a bolean indicating if we want to use ercc or stable gene
#' normalisation
#' @param output_folder Folder where the figures will be created
create_figures
<-
function
(
mean_gene_count
=
10
,
ercc
=
F
,
output_folder
=
"./results/TDD_analysis"
)
{
tdd_full
<-
get_final_tdd_table
(
mean_gene_count
,
ercc
,
output_folder
)
dir
<-
"./data/gene_lists/"
dir
<-
"./data/gene_lists/"
ercc_name
<-
""
if
(
ercc
)
{
ercc_name
<-
"_erccnorm"
}
gn
<-
paste0
(
"_geneMean>"
,
mean_gene_count
)
r
<-
create_tdd_fig_on_lists
(
r
<-
create_tdd_fig_on_lists
(
tdd_df
=
tdd_full
,
tdd_df
=
tdd_full
,
gene_files
=
c
(
gene_files
=
c
(
...
@@ -266,7 +312,7 @@ r <- create_tdd_fig_on_lists(
...
@@ -266,7 +312,7 @@ r <- create_tdd_fig_on_lists(
),
),
gene_names
=
c
(
"BRAF_down_riboRNApeak"
,
"BRAF_down"
),
gene_names
=
c
(
"BRAF_down_riboRNApeak"
,
"BRAF_down"
),
target_columns
=
c
(
"mean_BRAF_TDD"
,
"mean_BRAF_TDD"
),
target_columns
=
c
(
"mean_BRAF_TDD"
,
"mean_BRAF_TDD"
),
outfile
=
"TDD
_BRAF_down-BRAFpeak"
,
outfile
=
paste0
(
"TDD"
,
ercc_name
,
"
_BRAF_down-BRAFpeak"
,
gn
),
title
=
"down-reglated mRNA without and with ribosome peaks"
title
=
"down-reglated mRNA without and with ribosome peaks"
)
)
r
<-
create_tdd_fig_on_lists
(
r
<-
create_tdd_fig_on_lists
(
...
@@ -277,18 +323,377 @@ r <- create_tdd_fig_on_lists(
...
@@ -277,18 +323,377 @@ r <- create_tdd_fig_on_lists(
),
),
gene_names
=
c
(
"BRAF_up_riboRNApeakDMSO"
,
"BRAF_up"
),
gene_names
=
c
(
"BRAF_up_riboRNApeakDMSO"
,
"BRAF_up"
),
target_columns
=
c
(
"mean_DMSO_TDD"
,
"mean_DMSO_TDD"
),
target_columns
=
c
(
"mean_DMSO_TDD"
,
"mean_DMSO_TDD"
),
outfile
=
"TDD
_BRAF_up
-
BRAFpeakDMSO"
,
outfile
=
paste0
(
"TDD"
,
ercc_name
,
"
_BRAF_up
_
BRAFpeakDMSO"
,
gn
),
title
=
"down-reglated mRNA without and with ribosome peaks"
title
=
"down-reglated mRNA without and with ribosome peaks"
)
)
dir
<-
"./results/stable_genes/"
dir
<-
"./results/stable_genes/"
r
<-
create_tdd_fig_on_lists
(
if
(
!
ercc
)
{
tdd_df
=
tdd_full
,
stable_files
<-
c
(
gene_files
=
c
(
paste0
(
dir
,
"braf_stable_genes.txt"
),
paste0
(
dir
,
"braf_stable_genes.txt"
),
paste0
(
dir
,
"dmso_stable_genes.txt"
)
paste0
(
dir
,
"dmso_stable_genes.txt"
)
),
)
}
else
{
stable_files
<-
c
(
paste0
(
dir
,
"ercc.txt"
),
paste0
(
dir
,
"ercc.txt"
)
)
}
r
<-
create_tdd_fig_on_lists
(
tdd_df
=
tdd_full
,
gene_files
=
stable_files
,
gene_names
=
c
(
"BRAF_stable"
,
"DMSO_stable"
),
gene_names
=
c
(
"BRAF_stable"
,
"DMSO_stable"
),
target_columns
=
c
(
"mean_BRAF_TDD"
,
"mean_DMSO_TDD"
),
target_columns
=
c
(
"mean_BRAF_TDD"
,
"mean_DMSO_TDD"
),
outfile
=
"BRAF_DMSO_stable"
,
outfile
=
paste0
(
"BRAF_DMSO_stable"
,
ercc_name
,
gn
),
title
=
"BRAF and DMSO stable genes"
title
=
"BRAF and DMSO stable genes"
)
)
}
#' perform a t.test on TDD values replicate
#'
#' @param row a row of a TDD dataframe
#' @return The t.test pvalue
t_test_func
<-
function
(
row
)
{
x1
<-
as.numeric
(
row
[
1
:
3
])
x2
<-
as.numeric
(
row
[
4
:
6
])
test
<-
t.test
(
x1
,
x2
)
return
(
test
$
p.value
)
# nolint
}
#' Get correlation value between BRAF and DMSO TDD of genes
#'
#' @details Get, the correlation values, the slope and the intercept of the
#' best linear model between the TDD of a given set of
#' genes in BRAF conditions or in DMSO conditions
#'
#' @param tdd_full A table with a column with the mean TDD of DMSO
#' and BRAF condition + a group column indicating if the gene is up
#' of down regulated by BRAF
#' @param my_group The group of gene of interest to select
#' @return a list with the correlation value of TDD, its intercept and
#' slope
get_cor_value
<-
function
(
tdd_full
,
my_group
=
"BRAF_DOWN"
)
{
tdd
<-
tdd_full
%>%
filter
(
group
==
my_group
)
## nolint
c
<-
cor
(
tdd
%>%
filter
(
group
==
my_group
)
%>%
# nolint
select
(
mean_DMSO_TDD
)
%>%
unlist
(),
# nolint
tdd
%>%
filter
(
group
==
my_group
)
%>%
# nolint
select
(
mean_BRAF_TDD
)
%>%
unlist
()
# nolint
)
res
<-
coef
(
lm
(
mean_BRAF_TDD
~
mean_DMSO_TDD
,
tdd
%>%
filter
(
group
==
my_group
)))
return
(
list
(
cor
=
round
(
c
,
3
),
intercept
=
res
[
1
],
slope
=
res
[
2
]))
}
#' Add a column group to a table containing a gene column
#'
#' @param tdd_full a table of tdd with a gene columne
#' @return The table with a column indicating if the genes are up, down or not
#' regulated by BRAF
get_group_columns
<-
function
(
tdd_full
)
{
down_braf
<-
read.table
(
paste0
(
dir
,
"BRAF_DOWN_1.5_name.txt"
))
$
V1
up_braf
<-
read.table
(
paste0
(
dir
,
"BRAF_UP_1.5_name.txt"
))
$
V1
tdd_full
$
group
<-
rep
(
"CTRL"
,
times
=
nrow
(
tdd_full
))
tdd_full
[
tdd_full
$
gene
%in%
down_braf
,
"group"
]
<-
"BRAF_DOWN"
tdd_full
[
tdd_full
$
gene
%in%
up_braf
,
"group"
]
<-
"BRAF_UP"
return
(
tdd_full
)
}
#' Prepare a dataframe used to compute the correlation between BRAF and DMSO TDD
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
#' @return the dataframe ready to be used for correlation figures
prepare_df_for_cor
<-
function
(
mean_gene_count
=
10
,
ercc
=
F
,
output_folder
=
"./results/TDD_analysis"
)
{
tdd_full
<-
get_final_tdd_table
(
mean_gene_count
=
10
,
ercc
=
F
)
tdd_full
<-
tdd_full
[
,
c
(
"gene"
,
grep
(
"X...|mean"
,
colnames
(
tdd_full
),
value
=
T
))
]
tdd_full
<-
tdd_full
%>%
mutate
(
p_value
=
apply
(
tdd_full
%>%
select
(
starts_with
(
"X"
)),
1
,
function
(
x
)
{
t_test_func
(
x
)
}
))
# nolint
tdd_full
<-
tdd_full
%>%
arrange
(
-
p_value
)
# nolint
dir
<-
"./data/gene_lists/"
down_braf
<-
read.table
(
paste0
(
dir
,
"BRAF_DOWN_1.5_name.txt"
))
$
V1
up_braf
<-
read.table
(
paste0
(
dir
,
"BRAF_UP_1.5_name.txt"
))
$
V1
tdd_full
$
group
<-
rep
(
"CTRL"
,
times
=
nrow
(
tdd_full
))
tdd_full
[
tdd_full
$
gene
%in%
down_braf
,
"group"
]
<-
"BRAF_DOWN"
tdd_full
[
tdd_full
$
gene
%in%
up_braf
,
"group"
]
<-
"BRAF_UP"
tdd_full
$
sig
<-
rep
(
""
,
times
=
nrow
(
tdd_full
))
tdd_full
[
tdd_full
$
p_value
<=
0.05
,
"sig"
]
<-
"_sig"
tdd_full
<-
tdd_full
%>%
mutate
(
kind
=
paste0
(
group
,
sig
))
# nolint
tdd_full
<-
tdd_full
%>%
filter
(
group
!=
"CTRL"
)
%>%
arrange
(
kind
)
write_tsv
(
tdd_full
,
paste0
(
output_folder
,
"/TDD_correlation_table.txt"
))
# nolint
return
(
tdd_full
)
}
#' Produce the TDD correlation figures
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
produce_tdd_cor_figure
<-
function
(
mean_gene_count
=
10
,
ercc
=
F
,
output_folder
=
"./results/TDD_analysis"
)
{
tdd_full
<-
prepare_df_for_cor
(
mean_gene_count
,
ercc
,
output_folder
)
downv
<-
get_cor_value
(
tdd_full
,
my_group
=
"BRAF_DOWN"
)
upv
<-
get_cor_value
(
tdd_full
,
my_group
=
"BRAF_UP"
)
p
<-
ggplot
(
tdd_full
,
mapping
=
aes
(
x
=
mean_DMSO_TDD
,
y
=
mean_BRAF_TDD
,
color
=
kind
))
+
geom_point
(
aes
(
fill
=
kind
),
colour
=
"white"
,
pch
=
21
,
size
=
3
)
+
scale_fill_manual
(
values
=
c
(
"#4169e1"
,
"darkblue"
,
"#CD5C5C"
,
"darkred"
))
+
coord_cartesian
(
xlim
=
c
(
0
,
1
),
ylim
=
c
(
0
,
1
))
+
geom_abline
(
intercept
=
downv
$
intercept
,
slope
=
downv
$
slope
,
color
=
"#4169e1"
)
+
geom_abline
(
intercept
=
upv
$
intercept
,
slope
=
upv
$
slope
,
color
=
"#CD5C5C"
)
+
ggtitle
(
paste0
(
"TDD of genes with BRAF or DMSO treatment\n"
,
"(R gene BRAF_down = "
,
downv
$
cor
,
")(R BRAF_up = "
,
upv
$
cor
,
")"
))
p_down
<-
ggplot
(
tdd_full
%>%
filter
(
group
%in%
c
(
"BRAF_DOWN"
,
"BRAF_DOWN_sig"
)),
mapping
=
aes
(
x
=
mean_DMSO_TDD
,
y
=
mean_BRAF_TDD
,
color
=
kind
)
)
+
geom_point
(
aes
(
fill
=
kind
),
colour
=
"white"
,
pch
=
21
,
size
=
3
)
+
scale_fill_manual
(
values
=
c
(
"black"
,
"red"
))
+
coord_cartesian
(
xlim
=
c
(
0
,
1
),
ylim
=
c
(
0
,
1
))
+
geom_abline
(
intercept
=
downv
$
intercept
,
slope
=
downv
$
slope
,
color
=
"#4169e1"
)
+
geom_abline
(
slope
=
1
,
color
=
"black"
)
+
ggtitle
(
paste0
(
"TDD of genes BRAF down-regulated with BRAF or DMSO"
,
"treatment\n(R gene BRAF_down = "
,
downv
$
cor
,
")"
))
p_up
<-
ggplot
(
tdd_full
%>%
filter
(
group
%in%
c
(
"BRAF_UP"
,
"BRAF_UP_sig"
)),
mapping
=
aes
(
x
=
mean_DMSO_TDD
,
y
=
mean_BRAF_TDD
,
color
=
kind
)
)
+
geom_point
(
aes
(
fill
=
kind
),
colour
=
"white"
,
pch
=
21
,
size
=
3
)
+
scale_fill_manual
(
values
=
c
(
"black"
,
"red"
))
+
coord_cartesian
(
xlim
=
c
(
0
,
1
),
ylim
=
c
(
0
,
1
))
+
geom_abline
(
intercept
=
downv
$
intercept
,
slope
=
downv
$
slope
,
color
=
"#CD5C5C"
)
+
geom_abline
(
slope
=
1
,
color
=
"black"
)
+
ggtitle
(
paste0
(
"TDD of genes BRAF up-regulated with BRAF or DMSO"
,
"treatment\n(R gene BRAF_up = "
,
upv
$
cor
,
")"
))
pdf
(
paste0
(
output_folder
,
"/TDD_correlation_figures.pdf"
),
width
=
12
,
height
=
17
)
grid.arrange
(
p
,
p_down
,
p_up
,
ncol
=
1
)
# nolint
dev.off
()
}
#' Get the negative binomial regression p-value
#'
#' @details glm negative binomial test to check if the number of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_nb_pvalue
<-
function
(
tab
,
pic_col
,
output_folder
=
"./results/TDD_analysis"
)
{
mod
<-
glm.nb
(
paste0
(
pic_col
,
"~ group"
),
data
=
tab
)
# nolint
output_diag
<-
paste0
(
output_folder
,
"/diagnotics"
)
dir.create
(
output_diag
,
showWarnings
=
F
)
output_fig
<-
paste0
(
output_diag
,
"/glm_nb_"
,
pic_col
,
".pdf"
)
pdf
(
output_fig
,
width
=
12
,
height
=
8
)
plot
(
simulateResiduals
(
mod
))
# nolint
dev.off
()
return
(
summary
(
mod
)
$
coeff
[
2
,
4
])
}
#' Get the negative binomial regression p-value
#'
#' @details glm logistic test to check if the presence of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_log_pvalue
<-
function
(
tab
,
pic_col
,
output_folder
=
"./results/TDD_analysis"
)
{
mod
<-
glm
(
paste0
(
pic_col
,
"~ group"
),
data
=
tab
,
family
=
binomial
(
link
=
"logit"
)
)
output_diag
<-
paste0
(
output_folder
,
"/diagnotics"
)
dir.create
(
output_diag
,
showWarnings
=
F
)
output_fig
<-
paste0
(
output_diag
,
"/glm_nb_"
,
pic_col
,
".pdf"
)
pdf
(
output_fig
,
width
=
12
,
height
=
8
)
plot
(
simulateResiduals
(
mod
))
# nolint
dev.off
()
return
(
summary
(
mod
)
$
coeff
[
2
,
4
])
}
#' Create barplots showing if different groups of genes have a different number
#' of ribosome peaks
#'
#' @param tdd_full A table with TDD and number of peaks
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param output_folder Folder where the results will be created
#' @param peak_type The type of the peak
create_pic_figs
<-
function
(
tdd_full
,
gene_type
=
"BRAF_DOWN"
,
output_folder
=
"./results/TDD_analysis"
,
peak_type
=
"RiboRNApeak"
)
{
treatment
<-
sub
(
"_.*"
,
""
,
gene_type
)
regulation
<-
sub
(
".*_"
,
""
,
gene_type
)
if
(
regulation
==
"UP"
)
{
treatment
<-
"DMSO"
}
target
<-
paste0
(
"TDD_"
,
treatment
)
tdd_filt
<-
tdd_full
%>%
filter
(
group
==
gene_type
)
# nolint
tdd_filt
<-
tdd_filt
%>%
mutate
(
group
=
ifelse
(
!!
as.symbol
(
target
),
paste0
(
group
,
"_TDD"
),
group
))
data_col
<-
"peak"
res_col
<-
paste0
(
"mean_"
,
data_col
)
has_col
<-
paste0
(
"has_"
,
data_col
)
has_res_col
<-
paste0
(
"mean_"
,
has_col
)
tdd_filt
<-
tdd_filt
%>%
mutate
(
!!
as.symbol
(
has_col
)
:=
ifelse
(
!!
as.symbol
(
data_col
)
>
0
,
1
,
0
)
)
tdd_fig
<-
tdd_filt
%>%
select
(
!!
as.symbol
(
data_col
),
!!
as.symbol
(
has_col
),
group
)
%>%
group_by
(
group
)
%>%
summarise
(
!!
as.symbol
(
res_col
)
:=
mean
(
!!
as.symbol
(
data_col
)),
!!
as.symbol
(
has_res_col
)
:=
mean
(
!!
as.symbol
(
has_col
))
)
pval
<-
glm_nb_pvalue
(
tdd_filt
,
data_col
,
output_folder
)
p1
<-
ggplot
(
tdd_fig
,
mapping
=
aes_string
(
x
=
"group"
,
y
=
res_col
,
fill
=
"group"
)
)
+
geom_bar
(
stat
=
"identity"
)
+
ggtitle
(
paste0
(
"Average proportion of "
,
treatment
,
" peaks in gene"
,
regulation
,
"-regulated by BRAF (p = "
,
pval
,
")"
))
pval
<-
glm_log_pvalue
(
tdd_filt
,
has_col
,
output_folder
)
p2
<-
ggplot
(
tdd_fig
,
mapping
=
aes_string
(
x
=
"group"
,
y
=
has_res_col
,
fill
=
"group"
))
+
geom_bar
(
stat
=
"identity"
)
+
ggtitle
(
paste0
(
"Average proportion of "
,
regulation
,
"-regulated gene by BRAF having at least a "
,
treatment
,
" peak (p = "
,
pval
,
")"
))
pdf
(
paste0
(
output_folder
,
"/barplot_pic_analysis"
,
gene_type
,
"_"
,
peak_type
,
".pdf"
),
width
=
12
,
height
=
12
)
grid.arrange
(
p1
,
p2
,
ncol
=
1
)
#nolint
dev.off
()
}
#' function that calcultaed if genes are TDD (more in BRAF or DMSO condition)
#' and if they have peaks
#'
#' @param peak_file A file containing peaks in a bed format
#' @param mean_gene_count The minimum gene count of a gene to be analysed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
get_tdd_pic_table
<-
function
(
peak_file
,
mean_gene_count
=
10
,
ercc
=
F
)
{
peak_list
<-
read.table
(
peak_file
)
$
V1
peak_table
<-
tibble
(
gene
=
names
(
table
(
peak_list
)),
peak
=
as.vector
(
table
(
peak_list
))
)
tdd_full
<-
get_final_tdd_table
(
mean_gene_count
,
ercc
)
tdd_full
<-
tdd_full
[,
c
(
"gene"
,
grep
(
"X..."
,
colnames
(
tdd_full
),
value
=
T
))]
tdd_full
<-
tdd_full
%>%
mutate
(
TDD_BRAF
=
X276_BRAF_TDD
-
X276_DMSO_TDD
>
0.2
&
X277_BRAF_TDD
-
X277_DMSO_TDD
>
0.2
&
X278_BRAF_TDD
-
X278_DMSO_TDD
>
0.2
,
TDD_DMSO
=
X276_DMSO_TDD
-
X276_BRAF_TDD
>
0.2
&
X277_DMSO_TDD
-
X277_BRAF_TDD
>
0.2
&
X278_DMSO_TDD
-
X278_BRAF_TDD
>
0.2
)
%>%
select
(
gene
,
TDD_BRAF
,
TDD_DMSO
)
#nolint
tdd_full
<-
tdd_full
%>%
left_join
(
peak_table
)
#nolint
tdd_full
<-
tdd_full
%>%
replace_na
(
list
(
peak
=
0
))
#nolint
return
(
tdd_full
)
}
#' Create all peaks fig
#'
#' @param peak_file A file containing peaks
#' @param mean_gene_count The minimum mean count of gene for its tdd to
#' be computed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param peak_type The type of peak inside the figure
build_peaks_fig
<-
function
(
peak_file
,
mean_gene_count
=
10
,
ercc
=
F
,
gene_type
=
"BRAF_DOWN"
,
output_folder
=
"./results/TDD_analysis"
,
peak_type
=
"RiboRNApeak"
)
{
tdd_table
<-
get_tdd_pic_table
(
peak_file
,
mean_gene_count
,
ercc
)
tdd_table
<-
get_group_columns
(
tdd_table
)
create_pic_figs
(
tdd_table
,
gene_type
,
output_folder
,
peak_type
=
peak_type
)
}
create_figures
(
mean_gene_count
=
0
,
ercc
=
F
,
output_folder
=
"./results/TDD_analysis"
)
create_figures
(
mean_gene_count
=
10
,
ercc
=
F
,
output_folder
=
"./results/TDD_analysis"
)
create_figures
(
mean_gene_count
=
20
,
ercc
=
F
,
output_folder
=
"./results/TDD_analysis"
)
create_figures
(
mean_gene_count
=
50
,
ercc
=
F
,
output_folder
=
"./results/TDD_analysis"
)
create_figures
(
mean_gene_count
=
0
,
ercc
=
T
,
output_folder
=
"./results/TDD_analysis"
)
produce_tdd_cor_figure
()
build_peaks_fig
()
dir
<-
"./data/gene_lists/"
files
<-
c
(
"BRAF_vs_DMSO_RepMean.merged.bed"
,
"DMSO_vs_BRAF_RepMean.merged.bed"
,
"BRAF_sorted_26-40.merged.bed"
,
"DMSO_sorted_26-40.merged.bed"
)
gene_types
<-
c
(
"BRAF_DOWN"
,
"BRAF_UP"
,
"BRAF_DOWN"
,
"BRAF_UP"
)
peak_types
<-
c
(
"RiboRNAPeak"
,
"RiboRNAPeak"
,
"RiboPeak"
,
"RiboPeak"
)
for
(
i
in
seq_len
(
length
(
files
)))
{
peak_file
<-
paste0
(
dir
,
files
[
i
])
build_peaks_fig
(
peak_file
=
peak_file
,
gene_type
=
gene_types
[
i
],
peak_type
=
peak_types
[
i
])
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment