Skip to content
Snippets Groups Projects
Verified Commit 13bcec50 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

update plot

parent bd49510e
Branches
Tags
No related merge requests found
......@@ -39,6 +39,7 @@ Simulate k-mer counts data
#' @param n_kmer the total number of kmers
#' @param mean_coef the base mean of the kmers counts
#' @param sex = c("XY", "XO") the sex of the specie
#' @param count (default = F) generate count data
#' @return a tibble with a c("sex, count_m, count_f) columns
#' @importFrom mvtnorm rmvnorm
#' @importFrom dplyr bind_rows rename
......@@ -48,7 +49,7 @@ Simulate k-mer counts data
#' @examples
#' sim_kmer(1e6, 1000, "XY")
#' sim_kmer(1e6, 1000, "XO")
sim_kmer <- function(n_kmer, mean_coef, sex = "XY") {
sim_kmer <- function(n_kmer, mean_coef, sex = "XY", count = F) {
data <- tibble(
sex = "F",
count = mvtnorm::rmvnorm(n_kmer * .1, mean = c(1, 2)*mean_coef, sigma = matrix(c(1.05, 2, 2, 4.05) * mean_coef^1.5, ncol = 2), checkSymmetry = F, method = "svd") %>%
......@@ -73,9 +74,21 @@ sim_kmer <- function(n_kmer, mean_coef, sex = "XY") {
unnest(count)
)
}
data %>%
data <- data %>%
rename(count_m = ...2,
count_f = ...3)
if (count) {
return(
data %>%
filter(count_m > 0, count_f > 0) %>%
mutate(
count_m = map(count_m, function(x){rpois(1, x)}),
count_f = map(count_f, function(x){rpois(1, x)})
) %>%
unnest(count_m, count_f)
)
}
return(data)
}
```
......@@ -293,23 +306,33 @@ plot_proba <- function(x, proba, sex = "XY") {
proba_a = proba[, 1],
proba_f = proba[, 2],
proba_m = proba[, 3],
clust_proba = rgb(proba_f, proba_m, proba_a, maxColorValue = 1)
clust_proba = rgb(proba_f, proba_m, proba_a, maxColorValue = 1),
) %>%
sample_n(100000) %>%
ggplot(aes(x = count_m, y = count_f, color = clust_proba)) +
geom_point() +
scale_color_identity()
scale_color_identity() +
coord_equal() +
theme_bw()
} else {
as_tibble(x, .name_repair = "universal") %>%
mutate(
proba_a = proba[, 1],
proba_f = proba[, 2],
clust_proba = rgb(proba_f, 0, proba_a, maxColorValue = 1)
clust_proba = rgb(proba_f, 0, proba_a, maxColorValue = 1),
) %>%
sample_n(100000) %>%
ggplot(aes(x = count_m, y = count_f, color = clust_proba)) +
geom_point() +
scale_color_identity()
scale_color_identity() +
coord_equal() +
theme_bw()
}
}
sample %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
plot_proba(model_XY$proba, sex = "XY")
```
```{r fun-init_param}
......@@ -491,7 +514,7 @@ BSS_WSS <- function(x, cluster) {
# clustering XY
```{r clustering_XY}
data <- sim_kmer(1e4, 1000, "XY")
data <- sim_kmer(1e4, 1000, "XY", count =T)
model_XY <- data %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
......@@ -502,6 +525,35 @@ data %>%
plot_proba(model_XY$proba)
```
```{r clustering_XY_flexmix}
library(flexmix)
m_xy <- flexmix(
log1p(count_m) ~ I(log1p(count_f) + I(log1p(count_f))),
k = 3,
data = data %>%
dplyr::select(count_m, count_f),
# model = FLXglm(family = "poisson")
)
summary(m_xy)
parameters(m_xy, component = 1)
parameters(m_xy, component = 2)
parameters(m_xy, component = 3)
plot(m_xy)
rm_xy <- refit(m_xy)
summary(rm_xy)
data %>%
mutate(
proba1 = m_xy@posterior$scaled[, 1],
proba2 = m_xy@posterior$scaled[, 2],
proba3 = m_xy@posterior$scaled[, 3]
) %>%
pivot_longer(cols = c(proba1, proba2, proba3)) %>%
ggplot(aes(x = log1p(count_m), y = log1p(count_f), color = value)) +
geom_point() +
facet_wrap(~name) +
theme_bw()
```
# clustering XO
```{r clustering_XO}
......@@ -516,6 +568,29 @@ data %>%
plot_proba(model_XO$proba, sex = "X0")
```
```{r clustering_X0_flexmix}
m_xo <- flexmix(
count_m ~ I(count_f + I(count_f)),
k = 2,
data = data %>%
dplyr::select(count_m, count_f)
)
summary(m_xo)
parameters(m_xo, component = 1)
parameters(m_xo, component = 2)
plot(m_xo)
rm_xo <- refit(m_xo)
summary(rm_xo)
data %>%
mutate(
proba1 = m_xo@posterior$scaled[, 1],
proba2 = m_xo@posterior$scaled[, 2],
) %>%
ggplot(aes(x = count_m, y = count_f, color = proba2)) +
geom_point() +
theme_bw()
```
# LRT
## For XY
......@@ -641,10 +716,108 @@ annotate_counts <- function(annotation, count, name) {
## M belari data
```{r dev}
load("../../results/12/mbelari/sample.Rdata", v = T)
```
```{r dev}
model_XY <- sample %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
EM_clust(sex = "XY")
model_XO <- sample %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
EM_clust(sex = "X0")
sample %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
plot_proba(model_XY$proba, sex = "XY")
sample %>%
dplyr::select(count_m, count_f) %>%
as.matrix() %>%
plot_proba(model_XO$proba, sex = "X0")
```
```{r dev}
sample %>%
select(count_m, count_f) %>%
sample_frac(0.01) %>%
mutate(
count_m = expm1(count_m),
count_f = expm1(count_f),
) %>%
ggplot(aes(x = count_m, y = count_f)) +
geom_point() +
theme_bw()
```
```{r dev}
sample %>%
select(count_m, count_f) %>%
sample_frac(0.01) %>%
ggplot(aes(x = count_m, y = count_f)) +
geom_point() +
theme_bw()
```
```{r dev}
library(flexmix)
count <- sample %>%
dplyr::select(count_m, count_f) %>%
mutate(
count_m = round(expm1(count_m)),
count_f = round(expm1(count_f)),
) %>%
filter(count_m > 0) %>%
mutate(
ratio = count_f / count_m
) %>%
sample_n(10000)
count %>%
ggplot(aes(x = ratio)) +
geom_histogram() +
scale_x_log10()
m_xy <- flexmix(
ratio ~ ratio,
k = 3,
data = count,
)
m_xo <- flexmix(
count_m ~ count_f,
k = 2,
data = count,
model = FLXglm(family = "poisson")
)
summary(m_xy)
parameters(m_xy, component = 1)
parameters(m_xy, component = 2)
parameters(m_xy, component = 3)
plot(m_xy)
rm_xy <- refit(m_xy)
summary(rm_xy)
count %>%
mutate(
proba1 = m_xy@posterior$scaled[, 1],
proba2 = m_xy@posterior$scaled[, 2],
proba3 = m_xy@posterior$scaled[, 3]
) %>%
pivot_longer(cols = c(proba1, proba2, proba3)) %>%
ggplot(aes(x = count_m, y = count_f, color = value)) +
geom_point() +
facet_wrap(~name) +
coord_equal() +
theme_bw()
```
```{r dev, eval=F}
data <- readr::read_tsv("results/12/mlongespiculosa/mlongespiculosa.csv", show_col_types = FALSE)
data <- readr::read_tsv("../../results/12/mlongespiculosa/mlongespiculosa.csv", show_col_types = FALSE)
format(object.size(data), units = "Mb")
annotation <- parse_annotation("data/sample.csv")
annotation <- parse_annotation("../../data/sample.csv")
count <- annotate_counts(annotation, data, "mlongespiculosa")
save(count, file = "results/12/mlongespiculosa/counts.Rdata")
load("results/12/mlongespiculosa/counts.Rdata")
......@@ -659,3 +832,5 @@ res %>%
ggplot(aes(x = name, y = WSS_f / BSS)) +
geom_violin()
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment