Skip to content
Snippets Groups Projects
Unverified Commit fde1f52a authored by Laurent Modolo's avatar Laurent Modolo
Browse files

clonality_paper.R: test on heatmap

parent 17425f65
No related branches found
No related tags found
No related merge requests found
...@@ -417,40 +417,6 @@ for (day in names(sce_day)) { ...@@ -417,40 +417,6 @@ for (day in names(sce_day)) {
save(sce_day, file = "results/2020_01_02_clonality_paper_sce.Rdata") save(sce_day, file = "results/2020_01_02_clonality_paper_sce.Rdata")
load(file = "results/2020_01_02_clonality_paper_sce.Rdata") load(file = "results/2020_01_02_clonality_paper_sce.Rdata")
DEA_DEA_clone_cell_type_size <- list()
min_clone_size <- 3
for (day in names(sce_day)) {
colData(sce_day[[day]])$clone_id <- colData(sce_day[[day]]) %>%
as_tibble() %>%
mutate(clone_id = ifelse(
clone_id == 0,
(max(clone_id) + 1):(max(clone_id) + length(which(clone_id == 0))),
clone_id)) %>%
pull(clone_id)
colData(sce_day[[day]])$clone_size <- colData(sce_day[[day]]) %>%
as_tibble() %>%
left_join(
colData(sce_day[[day]]) %>%
as_tibble() %>%
group_by(clone_id) %>%
dplyr::summarise(clone_size = n())
) %>%
pull(clone_size)
DEA_DEA_clone_cell_type_size[[day]] <- DEA(
sce_day[[day]][, colData(sce_day[[day]])$clone_size >= min_clone_size],
test = "~ (1|clone_id)",
formula = "count ~ p_PLS_DEA_cell_type + (1|clone_id)",
assay_name = "counts_vst",
cpus = 10
)
save(
DEA_DEA_clone_cell_type_size,
file = "results/2020_01_01_DEA_DEA_clone_size.Rdata"
)
}
genes_PLS <- read_csv("data/2017_11_28_List_Laurent_Genes_PLS.csv") %>% genes_PLS <- read_csv("data/2017_11_28_List_Laurent_Genes_PLS.csv") %>%
pivot_longer(cols = c("Genes_EFF", "Genes_MEM"), pivot_longer(cols = c("Genes_EFF", "Genes_MEM"),
names_to = "type", names_to = "type",
...@@ -509,25 +475,25 @@ for (day in names(sce_day)) { ...@@ -509,25 +475,25 @@ for (day in names(sce_day)) {
min_clone_size <- 3 min_clone_size <- 3
load(file = "results/2020_01_02_clonality_paper_sce.Rdata") load(file = "results/2020_01_02_clonality_paper_sce.Rdata")
load(file = "results/2020_01_01_DEA_DEA_clone_size.Rdata") load(file = "results/2020_01_01_DEA_DEA_clone_size.Rdata")
load(file = "results/2020_01_01_DEA_clone_PCA_cell_type_size.Rdata", v=T)
## adj pvalue ## adj pvalue
for (day in names(sce_day)) { for (day in names(sce_day)) {
print(day) print(day)
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size <- NA rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size <- NA
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size <- rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size <-
get_genes_pval(DEA_DEA_clone_cell_type_size[[day]], sce_day[[day]]) get_genes_pval(DEA_clone_PCA_cell_type_size[[day]], sce_day[[day]])
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size %>% rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size %>%
is.na() %>% is.na() %>%
table() %>% table() %>%
print() print()
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj <- p.adjust( rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj <- p.adjust(
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size, rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size,
method = "BH" method = "BH"
) )
table(rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj < 0.05) %>% print() table(rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj < 0.05) %>% print()
} }
day <- "593" day <- "593"
assays(sce_day[[day]])$logcounts <- scater::logNormCounts( assays(sce_day[[day]])$logcounts <- scater::logNormCounts(
...@@ -538,8 +504,8 @@ for (day in names(sce_day)) { ...@@ -538,8 +504,8 @@ for (day in names(sce_day)) {
assay(., "logcounts") %>% assay(., "logcounts") %>%
Matrix::Matrix(sparse = T) Matrix::Matrix(sparse = T)
sce_DEA_hm <- sce_day[[day]][ sce_DEA_hm <- sce_day[[day]][
!is.na(rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj) & !is.na(rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj) &
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj < 0.05 rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj < 0.05
] ]
sce_DEA_hm %>% dim() sce_DEA_hm %>% dim()
colData(sce_DEA_hm) <- colData(sce_DEA_hm) %>% colData(sce_DEA_hm) <- colData(sce_DEA_hm) %>%
...@@ -568,7 +534,6 @@ rm_genes <- readr::read_delim( ...@@ -568,7 +534,6 @@ rm_genes <- readr::read_delim(
future::plan("multiprocess", workers = 10) future::plan("multiprocess", workers = 10)
test <- assay(sce_DEA_hm, "logcounts") %>% test <- assay(sce_DEA_hm, "logcounts") %>%
log1p() %>%
as.matrix() %>% as.matrix() %>%
as_tibble(rownames = "gene_name") %>% as_tibble(rownames = "gene_name") %>%
tidyr::nest(counts = !c(gene_name)) %>% tidyr::nest(counts = !c(gene_name)) %>%
...@@ -584,17 +549,23 @@ rm_genes <- readr::read_delim( ...@@ -584,17 +549,23 @@ rm_genes <- readr::read_delim(
mutate( mutate(
count_var = purrr::map(.x = counts, .f = function(.x){ count_var = purrr::map(.x = counts, .f = function(.x){
var(.x$count) var(.x$count)
}),
count_mean = purrr::map(.x = counts, .f = function(.x){
(.x$count)
}) })
) %>% ) %>%
unnest(count_var) %>% unnest(count_var) %>%
filter(count_var > 0.2) %>% unnest(count_mean) %>%
select(count_var, count_mean) %>% summary()
filter(!(gene_name %in% rm_genes$geneid)) %>%
filter(count_var > 0.1) %>%
mutate( mutate(
model = furrr::future_map(.x = counts, .f = function(.x){ model = furrr::future_map(.x = counts, .f = function(.x){
model.matrix(~-1 + clone_id, data = .x) %>% model.matrix(~-1 + clone_id, data = .x) %>%
glmnet( glmnet::glmnet(
x = ., x = .,
y = (.x %>% pull(count)), y = (.x %>% pull(count)),
lambda = cv.glmnet( lambda = glmnet::cv.glmnet(
., .,
(.x %>% pull(count)) (.x %>% pull(count))
)$lambda.1se )$lambda.1se
...@@ -606,57 +577,103 @@ rm_genes <- readr::read_delim( ...@@ -606,57 +577,103 @@ rm_genes <- readr::read_delim(
select(-c(counts, model)) %>% select(-c(counts, model)) %>%
tidyr::unnest(coefs) tidyr::unnest(coefs)
gene_name <- test %>%
janitor::clean_names() %>%
dplyr::filter(
dev_ratio > 0
) %>%
group_by(gene_name) %>%
dplyr::summarize(
estimate = max(abs(estimate)),
) %>%
# filter(estimate >= quantile(estimate, 0.90)) %>%
pull(gene_name)
# cluster_row <-
test %>% cluster_row <- assay(sce_DEA_hm, "logcounts") %>%
as.matrix() %>%
as_tibble(rownames = "gene_name") %>%
filter(!(gene_name %in% rm_genes$geneid)) %>%
tidyr::nest(counts = !c(gene_name)) %>%
mutate(
counts = purrr::map(.x = counts, .f = function(.x){
tibble(
id = colnames(.x),
count = t(.x)[, 1],
clone_id = colData(sce_DEA_hm)$clone_id)
}
)
) %>%
mutate(
count_var = purrr::map(.x = counts, .f = function(.x){
var(.x$count)
})
) %>%
unnest(count_var) %>%
mutate(
model = furrr::future_map(.x = counts, .f = function(.x){
lm(count ~ clone_id, data = .x)
},
.progress = TRUE)
) %>%
mutate(coefs = map(model, tidy)) %>%
select(-c(counts, model)) %>%
tidyr::unnest(coefs) %>%
janitor::clean_names() %>% janitor::clean_names() %>%
filter(gene_name %in% gene_name) %>% dplyr::mutate(term = ifelse(
mutate(term = ifelse(
term == "(Intercept)", term == "(Intercept)",
colData(sce_DEA_hm)$clone_id %>% as.factor() %>% levels() %>% .[1] %>% colData(sce_DEA_hm)$clone_id %>% as.factor() %>% levels() %>% .[1] %>%
str_c("clone_id", .), str_c("clone_id", .),
term) term)
) %>% ) %>%
mutate( dplyr::mutate(
term = str_replace(term, "clone_id(.*)", "\\1") term = str_replace(term, "clone_id(.*)", "\\1")
) %>% ) %>%
select(gene_name, term, estimate) %>% dplyr::select(gene_name, term, estimate) %>%
mutate(id = 1:nrow(.), tidyr::pivot_wider(
estimate = ifelse(is.na(estimate), 0, estimate)) %>% id_cols = gene_name,
group_by(c(gene_name, id) %>%
summarise(estimate = sum(estimate))
summarize()
pivot_wider(
id_cols = c(id, gene_name),
names_from = term, names_from = term,
values_from = estimate values_from = estimate,
values_fill = 0,
values_fn = sum
) %>% ) %>%
select(-id) %>%
as.data.frame() as.data.frame()
rownames(cluster_row) <- cluster_row$gene_name rownames(cluster_row) <- cluster_row$gene_name
cluster_row %>% dim() cluster_row <-
sce_DEA_hm %>% dim() assay(sce_DEA_hm, "logcounts") %>%
as.matrix() %>%
as_tibble(rownames = "gene_name") %>%
filter(!(gene_name %in% rm_genes$geneid)) %>%
tidyr::nest(counts = !c(gene_name)) %>%
mutate(
counts = purrr::map(.x = counts, .f = function(.x){
tibble(
id = colnames(.x),
count = t(.x)[, 1],
clone_id = as.factor(colData(sce_DEA_hm)$clone_id)
) %>%
group_by(clone_id) %>%
dplyr::summarise(
id = id,
count = count,
clone_id = clone_id,
count_mean = max(count)
) %>%
ungroup()
}
)
) %>%
tidyr::unnest(counts) %>%
janitor::clean_names() %>%
dplyr::select(gene_name, clone_id, count_mean) %>%
tidyr::pivot_wider(
id_cols = gene_name,
names_from = clone_id,
values_from = count_mean,
values_fill = 0,
values_fn = sum
) %>%
as.data.frame()
rownames(cluster_row) <- cluster_row$gene_name
sce_DEA_hm_plot <- sce_DEA_hm[rownames(sce_DEA_hm) %in% gene_name, ] sce_DEA_hm_plot <- sce_DEA_hm[rownames(sce_DEA_hm) %in% cluster_row$gene_name, ]
rowData(sce_DEA_hm_plot)$gene_order <- cluster_row[, -1] %>% rowData(sce_DEA_hm_plot)$gene_order <- cluster_row[, -1] %>%
dist() %>% dist(method = "canberra") %>%
hclust() %>% .$order hclust() %>% .$order
rownames(sce_DEA_hm_plot) <- rowData(sce_DEA_hm_plot)$gene_name
sce_DEA_hm_plot <- sce_DEA_hm_plot[rowData(sce_DEA_hm_plot)$gene_order, ]
plotHeatmap( plotHeatmap(
sce_DEA_hm_plot, sce_DEA_hm_plot[!(rowData(sce_DEA_hm_plot)$gene_name %in% rm_genes),],
features = rownames(sce_DEA_hm_plot), features = rownames(sce_DEA_hm_plot),
order_columns_by = c("clone_id", "p_PLS_DEA_cell_type"), order_columns_by = c("clone_id", "p_PLS_DEA_cell_type"),
colour_columns_by = c("clone_id", "p_PLS_DEA_cell_type"), colour_columns_by = c("clone_id", "p_PLS_DEA_cell_type"),
...@@ -665,7 +682,4 @@ plotHeatmap( ...@@ -665,7 +682,4 @@ plotHeatmap(
zlim = c(-5, 5), zlim = c(-5, 5),
main = day, main = day,
cluster_rows = F, cluster_rows = F,
# order_rows_by = c("gene_order")
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment