Skip to content
Snippets Groups Projects
Verified Commit b156ab32 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

version v0.0.8

parent 85dab5be
Branches
No related tags found
No related merge requests found
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/poiss_boostrap_em.R
% Please edit documentation in R/poiss_boostrap_em.R, R/poiss_boostrap_em_sub.R
\name{poiss_compare_models}
\alias{poiss_compare_models}
\title{Function to compile statistics differences between model XY and XO on boostrap
......@@ -10,9 +10,19 @@ poiss_compare_models(
threshold = 1,
nboot = 100,
bootsize = 1,
frac = 1,
core = 1,
max_iter = 100
max_iter = 100,
max_error = 5
)
poiss_compare_models(
x,
threshold = 1,
nboot = 100,
bootsize = 1,
core = 1,
max_iter = 100,
max_error = 5
)
}
\arguments{
......@@ -24,20 +34,33 @@ poiss_compare_models(
\item{bootsize}{size of the boostrap sample (if < 0 we take a percentage of x)}
\item{frac}{(default: 1.) fraction of the data to use at each step}
\item{core}{number of cpus to use for the computation}
\item{max_iter}{(default: 100) maximum number of iteration}
\item{max_error}{(default: 5) maximum number of try}
\item{frac}{(default: 1.) fraction of the data to use at each step}
}
\value{
tibble with the statistics
tibble with the statistics
}
\description{
Function to compile statistics differences between model XY and XO on boostrap
samples of the data
Function to compile statistics differences between model XY and XO on boostrap
samples of the data
}
\examples{
res <- rbind(
extraDistr::rbvpois(1000, 10, 10, 20), # A
extraDistr::rbvpois(1000, 0, 20, 20), # X
extraDistr::rbvpois(1000, 20, 0, 5) # Y
) \%>\%
poiss_compare_models(nboot = 3, core = 1)
res <- rbind(
extraDistr::rbvpois(1000, 10, 10, 20), # A
extraDistr::rbvpois(1000, 0, 20, 20), # X
......
......@@ -103,6 +103,29 @@ test_model_XY <- function(data) {
}
model_XY <- test_model_XY(data)
plot_model(model_XY)
data <- sim_kmer(1e4, 100, "XY", count = T)
test_model_XY <- function(data) {
data %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
# compute_tpm() %>%
EM_constraint(
frac = 1,
kappa_weight = c(.5, .5, .5),
mu_weight = c(.5, .5, .5),
sigma_weight = c(.1, .1, .1),
kappa_prior = c(1/3, 1/3, 1/3),
mu_prior = list(c(1, 1) * mean(.), c(.6, 1) * mean(.), c(.5, 0) * mean(.)),
sigma_prior = list(
matrix(c(1, .95, .95, 1) * mean(.) * 10, ncol = 2),
matrix(c(1.05, 1.5, 1.5, 4.05) * mean(.) * 2, ncol = 2),
matrix(c(2, .05, .05, .05) * mean(.) * 2, ncol = 2)
)
)
}
model_XO <- test_model_XY(data)
plot_model(model_XO)
```
## XO
......
......@@ -69,13 +69,18 @@ pkgload::load_all()
## Params diff
## Get clusters
## E batch
## EM
# Clustering on simulated data
## XY
We simulate 3 clusters from bi-Poisson distribution
We simulate 3 clusters from bi-Poisson distribution and we use the `em_bipoiss_clust()` function and plot the results
```{r simulating_data_XY}
n <- 1e3
......@@ -85,28 +90,12 @@ data <- rbind(
extraDistr::rbvpois(n * .1, 200, 0, 50) # Y
)
data %>%
as_tibble() %>%
dplyr::rename(male = 'V1', female = 'V2') %>%
sample_n(1000) %>%
ggplot() +
geom_point(aes(x = male, y = female)) +
geom_abline(intercept = 0, slope = 1, color = "blue") +
geom_abline(intercept = 0, slope = 2, color = "red") +
geom_abline(intercept = 0, slope = 0, color = "green") +
coord_equal() +
theme_bw()
```
We use the `em_bipoiss_clust()` function and plot the results
res <- data %>% em_bipoiss_clust()
```{r fit_simulation}
res <- data %>% em_bipoiss_clust(frac = .1)
rbind(
extraDistr::rbvpois(1000*res[[1]]$kappa, res[[1]]$l1, res[[1]]$l2, res[[1]]$l3), # X
extraDistr::rbvpois(1000*res[[2]]$kappa, res[[2]]$l1, res[[2]]$l2, res[[2]]$l3), # A
extraDistr::rbvpois(1000*res[[3]]$kappa, res[[3]]$l1, res[[3]]$l2, res[[3]]$l3) # Y
extraDistr::rbvpois(1000*res$A$kappa, res$A$l1, res$A$l2, res$A$l3), # X
extraDistr::rbvpois(1000*res$X$kappa, res$X$l1, res$X$l2, res$X$l3), # A
extraDistr::rbvpois(1000*res$Y$kappa, res$Y$l1, res$Y$l2, res$Y$l3) # Y
) %>%
as_tibble() %>%
dplyr::rename(male = 'V1', female = 'V2') %>%
......@@ -116,6 +105,7 @@ rbind(
geom_abline(intercept = 0, slope = 2, color = "red") +
geom_abline(intercept = 0, slope = 0, color = "green") +
coord_equal() +
ylim(c(-10,500)) +
theme_bw()
```
......@@ -150,7 +140,9 @@ res <- data %>% em_bipoiss_clust(params = list(
l2 = mean(data),
l3 = mean(data)
)
), frac = .1)
),
gamma = c(round(nrow(data) * .8), round(nrow(data) * .2))
)
rbind(
extraDistr::rbvpois(1000*res[[1]]$kappa, res[[1]]$l1, res[[1]]$l2, res[[1]]$l3), # X
extraDistr::rbvpois(1000*res[[2]]$kappa, res[[2]]$l1, res[[2]]$l2, res[[2]]$l3) # A
......@@ -182,7 +174,8 @@ res <- data %>% em_bipoiss_clust(params = list(
l2 = mean(data),
l3 = mean(data)
)
), frac = .1)
),
gamma = c(nrow(data)))
extraDistr::rbvpois(1000*res[[1]]$kappa, res[[1]]$l1, res[[1]]$l2, res[[1]]$l3) %>% # X
as_tibble() %>%
dplyr::rename(male = 'V1', female = 'V2') %>%
......@@ -208,7 +201,7 @@ data <- rbind(
extraDistr::rbvpois(n, 0, 200, 200), # X
extraDistr::rbvpois(n, 200, 0, 5) # Y
)
res <- data %>% poiss_compare_models(nboot = 5, frac = 1e-1, core = 1, max_iter = 30)
res <- data %>% poiss_compare_models(nboot = 5, core = 1, max_iter = 30)
res %>%
pivot_longer(cols = -c("name"), names_to = "metric") %>%
ggplot(aes(x = name, y = value)) +
......@@ -227,7 +220,7 @@ data <- rbind(
extraDistr::rbvpois(n, 100, 100, 200), # A
extraDistr::rbvpois(n, 0, 200, 200) # X
)
res <- data %>% poiss_compare_models(nboot = 5, frac = 1e-2, core = 1, max_iter = 30)
res <- data %>% poiss_compare_models(nboot = 5, core = 1, max_iter = 30)
res %>%
pivot_longer(cols = -c("name"), names_to = "metric") %>%
ggplot(aes(x = name, y = value)) +
......@@ -243,9 +236,9 @@ We simulate 1 clusters from bi-Poisson distribution
```{r model_compare_OO}
data <- rbind(
extraDistr::rbvpois(1000, 100, 100, 200) # A
extraDistr::rbvpois(10000, 100, 100, 200) # A
)
res <- data %>% poiss_compare_models(nboot = 5, frac = 1e-2, core = 1, max_iter = 30)
res <- data %>% poiss_compare_models(nboot = 10, core = 1, max_iter = 30)
res %>%
pivot_longer(cols = -c("name"), names_to = "metric") %>%
ggplot(aes(x = name, y = value)) +
......
......@@ -64,6 +64,17 @@ data %>%
coord_fixed()
```
## OO data
```{r sim_OO}
data <- sim_kmer(1e4, 1000, "OO")
data %>%
ggplot(aes(x = count_m, y = count_f, color = sex)) +
geom_point() +
coord_fixed()
```
# Clustering
EN functions
......@@ -85,6 +96,32 @@ data %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
plot_proba(model_XY$proba, sex = "XY")
data <- sim_kmer(1e6, 1000, "XY", count = T)
model_XY <- data %>%
dplyr::select(count_m, count_f) %>%
mutate(
count_m = log1p(count_m),
count_f = log1p(count_f),
) %>%
as.matrix() %>%
EM_clust(sex = "XO")
data %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
plot_proba(model_XY$proba, sex = "XO")
data <- sim_kmer(1e6, 1000, "XY", count = T)
model_XY <- data %>%
dplyr::select(count_m, count_f) %>%
mutate(
count_m = log1p(count_m),
count_f = log1p(count_f),
) %>%
as.matrix() %>%
EM_clust(sex = "OO")
data %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
plot_proba(model_XY$proba, sex = "OO")
```
# clustering XO
......@@ -104,7 +141,7 @@ data %>%
# clustering OO
```{r clustering_OO}
data <- sim_kmer(1e4, 1000, "XO")
data <- sim_kmer(1e4, 1000, "OO")
model_XO <- data %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
......@@ -152,26 +189,39 @@ pchisq(-2 * (model_XY$loglik - model_XO$loglik), 5)
## For XY
```{r BIC_XY}
data <- sim_kmer(1e7, 1000, "XY")
res <- compare_models(data, nboot = 10, bootsize = 0.01, core = 1)
data <- sim_kmer(1e6, 1000, "XY")
res <- compare_models(data, nboot = 10, bootsize = 1, core = 1)
res %>%
ggplot(aes(x = name, y = BIC)) +
geom_violin()
res %>%
ggplot(aes(x = name, y = WSS_f / BSS)) +
ggplot(aes(x = name, y = loglik)) +
geom_violin()
```
## For XO
```{r BIC_XO}
data <- sim_kmer(1e7, 1000, "XO")
res <- compare_models(data, nboot = 10, bootsize = 0.01, core = 1)
data <- sim_kmer(1e6, 1000, "XO")
res <- compare_models(data, nboot = 10, bootsize = 1, core = 1)
res %>%
ggplot(aes(x = name, y = BIC)) +
geom_violin()
res %>%
ggplot(aes(x = name, y = loglik)) +
geom_violin()
```
## For OO
```{r BIC_OO}
data <- sim_kmer(1e6, 1000, "OO")
res <- compare_models(data, nboot = 10, bootsize = 1, core = 1)
res %>%
ggplot(aes(x = name, y = BIC)) +
geom_violin()
res %>%
ggplot(aes(x = name, y = WSS_f / BSS)) +
ggplot(aes(x = name, y = loglik)) +
geom_violin()
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment