Skip to content
Snippets Groups Projects
Unverified Commit fde1f52a authored by Laurent Modolo's avatar Laurent Modolo
Browse files

clonality_paper.R: test on heatmap

parent 17425f65
No related branches found
No related tags found
No related merge requests found
......@@ -417,40 +417,6 @@ for (day in names(sce_day)) {
save(sce_day, file = "results/2020_01_02_clonality_paper_sce.Rdata")
load(file = "results/2020_01_02_clonality_paper_sce.Rdata")
DEA_DEA_clone_cell_type_size <- list()
min_clone_size <- 3
for (day in names(sce_day)) {
colData(sce_day[[day]])$clone_id <- colData(sce_day[[day]]) %>%
as_tibble() %>%
mutate(clone_id = ifelse(
clone_id == 0,
(max(clone_id) + 1):(max(clone_id) + length(which(clone_id == 0))),
clone_id)) %>%
pull(clone_id)
colData(sce_day[[day]])$clone_size <- colData(sce_day[[day]]) %>%
as_tibble() %>%
left_join(
colData(sce_day[[day]]) %>%
as_tibble() %>%
group_by(clone_id) %>%
dplyr::summarise(clone_size = n())
) %>%
pull(clone_size)
DEA_DEA_clone_cell_type_size[[day]] <- DEA(
sce_day[[day]][, colData(sce_day[[day]])$clone_size >= min_clone_size],
test = "~ (1|clone_id)",
formula = "count ~ p_PLS_DEA_cell_type + (1|clone_id)",
assay_name = "counts_vst",
cpus = 10
)
save(
DEA_DEA_clone_cell_type_size,
file = "results/2020_01_01_DEA_DEA_clone_size.Rdata"
)
}
genes_PLS <- read_csv("data/2017_11_28_List_Laurent_Genes_PLS.csv") %>%
pivot_longer(cols = c("Genes_EFF", "Genes_MEM"),
names_to = "type",
......@@ -509,25 +475,25 @@ for (day in names(sce_day)) {
min_clone_size <- 3
load(file = "results/2020_01_02_clonality_paper_sce.Rdata")
load(file = "results/2020_01_01_DEA_DEA_clone_size.Rdata")
load(file = "results/2020_01_01_DEA_clone_PCA_cell_type_size.Rdata", v=T)
## adj pvalue
for (day in names(sce_day)) {
print(day)
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size <- NA
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size <-
get_genes_pval(DEA_DEA_clone_cell_type_size[[day]], sce_day[[day]])
rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size <- NA
rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size <-
get_genes_pval(DEA_clone_PCA_cell_type_size[[day]], sce_day[[day]])
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size %>%
rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size %>%
is.na() %>%
table() %>%
print()
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj <- p.adjust(
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size,
rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj <- p.adjust(
rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size,
method = "BH"
)
table(rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj < 0.05) %>% print()
table(rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj < 0.05) %>% print()
}
day <- "593"
assays(sce_day[[day]])$logcounts <- scater::logNormCounts(
......@@ -538,8 +504,8 @@ for (day in names(sce_day)) {
assay(., "logcounts") %>%
Matrix::Matrix(sparse = T)
sce_DEA_hm <- sce_day[[day]][
!is.na(rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj) &
rowData(sce_day[[day]])$pval_DEA_clone_cell_type_size_adj < 0.05
!is.na(rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj) &
rowData(sce_day[[day]])$pval_DEA_clone_PCA_cell_type_size_adj < 0.05
]
sce_DEA_hm %>% dim()
colData(sce_DEA_hm) <- colData(sce_DEA_hm) %>%
......@@ -568,7 +534,6 @@ rm_genes <- readr::read_delim(
future::plan("multiprocess", workers = 10)
test <- assay(sce_DEA_hm, "logcounts") %>%
log1p() %>%
as.matrix() %>%
as_tibble(rownames = "gene_name") %>%
tidyr::nest(counts = !c(gene_name)) %>%
......@@ -584,17 +549,23 @@ rm_genes <- readr::read_delim(
mutate(
count_var = purrr::map(.x = counts, .f = function(.x){
var(.x$count)
}),
count_mean = purrr::map(.x = counts, .f = function(.x){
(.x$count)
})
) %>%
unnest(count_var) %>%
filter(count_var > 0.2) %>%
unnest(count_mean) %>%
select(count_var, count_mean) %>% summary()
filter(!(gene_name %in% rm_genes$geneid)) %>%
filter(count_var > 0.1) %>%
mutate(
model = furrr::future_map(.x = counts, .f = function(.x){
model.matrix(~-1 + clone_id, data = .x) %>%
glmnet(
glmnet::glmnet(
x = .,
y = (.x %>% pull(count)),
lambda = cv.glmnet(
lambda = glmnet::cv.glmnet(
.,
(.x %>% pull(count))
)$lambda.1se
......@@ -605,58 +576,104 @@ rm_genes <- readr::read_delim(
mutate(coefs = map(model, tidy)) %>%
select(-c(counts, model)) %>%
tidyr::unnest(coefs)
gene_name <- test %>%
janitor::clean_names() %>%
dplyr::filter(
dev_ratio > 0
) %>%
group_by(gene_name) %>%
dplyr::summarize(
estimate = max(abs(estimate)),
) %>%
# filter(estimate >= quantile(estimate, 0.90)) %>%
pull(gene_name)
# cluster_row <-
test %>%
cluster_row <- assay(sce_DEA_hm, "logcounts") %>%
as.matrix() %>%
as_tibble(rownames = "gene_name") %>%
filter(!(gene_name %in% rm_genes$geneid)) %>%
tidyr::nest(counts = !c(gene_name)) %>%
mutate(
counts = purrr::map(.x = counts, .f = function(.x){
tibble(
id = colnames(.x),
count = t(.x)[, 1],
clone_id = colData(sce_DEA_hm)$clone_id)
}
)
) %>%
mutate(
count_var = purrr::map(.x = counts, .f = function(.x){
var(.x$count)
})
) %>%
unnest(count_var) %>%
mutate(
model = furrr::future_map(.x = counts, .f = function(.x){
lm(count ~ clone_id, data = .x)
},
.progress = TRUE)
) %>%
mutate(coefs = map(model, tidy)) %>%
select(-c(counts, model)) %>%
tidyr::unnest(coefs) %>%
janitor::clean_names() %>%
filter(gene_name %in% gene_name) %>%
mutate(term = ifelse(
dplyr::mutate(term = ifelse(
term == "(Intercept)",
colData(sce_DEA_hm)$clone_id %>% as.factor() %>% levels() %>% .[1] %>%
str_c("clone_id", .),
term)
) %>%
mutate(
dplyr::mutate(
term = str_replace(term, "clone_id(.*)", "\\1")
) %>%
select(gene_name, term, estimate) %>%
mutate(id = 1:nrow(.),
estimate = ifelse(is.na(estimate), 0, estimate)) %>%
group_by(c(gene_name, id) %>%
summarise(estimate = sum(estimate))
summarize()
pivot_wider(
id_cols = c(id, gene_name),
dplyr::select(gene_name, term, estimate) %>%
tidyr::pivot_wider(
id_cols = gene_name,
names_from = term,
values_from = estimate
values_from = estimate,
values_fill = 0,
values_fn = sum
) %>%
select(-id) %>%
as.data.frame()
as.data.frame()
rownames(cluster_row) <- cluster_row$gene_name
cluster_row %>% dim()
sce_DEA_hm %>% dim()
cluster_row <-
assay(sce_DEA_hm, "logcounts") %>%
as.matrix() %>%
as_tibble(rownames = "gene_name") %>%
filter(!(gene_name %in% rm_genes$geneid)) %>%
tidyr::nest(counts = !c(gene_name)) %>%
mutate(
counts = purrr::map(.x = counts, .f = function(.x){
tibble(
id = colnames(.x),
count = t(.x)[, 1],
clone_id = as.factor(colData(sce_DEA_hm)$clone_id)
) %>%
group_by(clone_id) %>%
dplyr::summarise(
id = id,
count = count,
clone_id = clone_id,
count_mean = max(count)
) %>%
ungroup()
}
)
) %>%
tidyr::unnest(counts) %>%
janitor::clean_names() %>%
dplyr::select(gene_name, clone_id, count_mean) %>%
tidyr::pivot_wider(
id_cols = gene_name,
names_from = clone_id,
values_from = count_mean,
values_fill = 0,
values_fn = sum
) %>%
as.data.frame()
rownames(cluster_row) <- cluster_row$gene_name
sce_DEA_hm_plot <- sce_DEA_hm[rownames(sce_DEA_hm) %in% gene_name, ]
sce_DEA_hm_plot <- sce_DEA_hm[rownames(sce_DEA_hm) %in% cluster_row$gene_name, ]
rowData(sce_DEA_hm_plot)$gene_order <- cluster_row[, -1] %>%
dist() %>%
dist(method = "canberra") %>%
hclust() %>% .$order
rownames(sce_DEA_hm_plot) <- rowData(sce_DEA_hm_plot)$gene_name
sce_DEA_hm_plot <- sce_DEA_hm_plot[rowData(sce_DEA_hm_plot)$gene_order, ]
plotHeatmap(
sce_DEA_hm_plot,
sce_DEA_hm_plot[!(rowData(sce_DEA_hm_plot)$gene_name %in% rm_genes),],
features = rownames(sce_DEA_hm_plot),
order_columns_by = c("clone_id", "p_PLS_DEA_cell_type"),
colour_columns_by = c("clone_id", "p_PLS_DEA_cell_type"),
......@@ -665,7 +682,4 @@ plotHeatmap(
zlim = c(-5, 5),
main = day,
cluster_rows = F,
# order_rows_by = c("gene_order")
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment