diff --git a/src/gradients.R b/src/gradients.R index 9e9c6710369c58c38fff329aac4a129519ae561b..963e9c4699b60c9ecd30871053c1a79ede0eaf60 100644 --- a/src/gradients.R +++ b/src/gradients.R @@ -1,8 +1,10 @@ source('src/00_utils.R') +source('src/00_base_functions.R') source('src/gradients_fcts.R') rds.name <- 'seuObj_wt_preprocessed_filtered' -output.dir <- 'results/filtered/wt' +output.dir <- 'results/filtered/230217_wt' +dir.create(output.dir) output.name <- 'ryu2019' # load data @@ -18,21 +20,29 @@ seuObj[['id']] <- id seuObj[['background']] <- 'wt' - # similarity score to final id # select terminal ref -ref_ids <- c('Atrichoblast', 'LRC') +hist(seuObj@meta.data[,3]) +ref_ids <- c('Atrichoblast', 'Trichoblast') ref.i1.cells <- seuObj@meta.data %>% - filter(zone == 'Mature' & id == ref_ids[1]) %>% + filter(zone == 'Mature' & id == ref_ids[1] & nFeature_RNA < quantile(nFeature_RNA, 0.1)) %>% rownames() length(ref.i1.cells) -DimPlot(seuObj, reduction = "umap", label = FALSE, cells.highlight = ref.i1.cells) +png(sprintf("%s/refcells_%s_%s.png", output.dir, output.name, ref_ids[1]), + width = 130, height = 130, res = 300, units = 'mm') +DimPlot(seuObj, reduction = "umap", label = FALSE, cells.highlight = ref.i1.cells) + + theme_void() + theme(legend.position = 'none') +dev.off() ref.i2.cells <- seuObj@meta.data %>% - filter(zone == 'Mature' & id == ref_ids[2]) %>% + filter(zone == 'Mature' & id == ref_ids[2] & nFeature_RNA < quantile(nFeature_RNA, 0.1)) %>% rownames() length(ref.i2.cells) -DimPlot(seuObj, reduction = "umap", label = FALSE, cells.highlight = ref.i2.cells) +png(sprintf("%s/refcells_%s_%s.png", output.dir, output.name, ref_ids[2]), + width = 130, height = 130, res = 300, units = 'mm') +DimPlot(seuObj, reduction = "umap", label = FALSE, cells.highlight = ref.i2.cells) + + theme_void() + theme(legend.position = 'none') +dev.off() # select sc data filter.cells <- rownames(seuObj@meta.data[which(seuObj@meta.data$id %in% ref_ids), ]) @@ -45,19 +55,46 @@ sc.mat <- GetAssayData(seuObj[,filter.cells], assay = Assay, slot = Slot) features <- rownames(seuObj) features <- root_genes -# compute ref based on features +# create labels based on features +labels <- labeller(list(K4_genes, K27K4_genes, K27_genes), + l_names = c('K4-only', 'Bivalent', 'K27-only'), + features) +proportions(table(labels)) + +glist <- setdiff(c(K27_genes, K27K4_genes), K27_wr) +labels <- labeller(list(glist), + l_names = c('PcG_WOX5'), + features) + +write.table(names(labels[which(labels == 'PcG_WOX5')]), + file = sprintf('%s/PcG_WOX5_root.txt', output.dir), + quote = FALSE, col.names = F, row.names = F) +FeaturePlot(seuObj, features = names(labels[which(labels == 'PcG_WOX5')][10])) # they seem to be polarized + + +# ////////////////////////////////////////////////////////////////////////////// +# Compute refs (average transcriptome) based on features ref.i1 <- rowMeans(sc.mat[features,ref.i1.cells]) ref.i2 <- rowMeans(sc.mat[features,ref.i2.cells]) -# compute similarity to the reference +# Compute similarity to the reference cor.ref <- proxyC::simil(x = t(sc.mat[features,]), y = rbind(ref.i1, ref.i2), method = 'correlation') - +saveRDS(cor.ref, file = sprintf('%s/cor_%s.rds', output.dir, paste0(ref_ids, collapse = '_'))) +cor.ref <- readRDS(sprintf('%s/cor_%s.rds', output.dir, paste0(ref_ids, collapse = '_'))) + +# mixture model +tzs <- sapply(1:2, function(idx){ + cells <- rownames(seuObj@meta.data[which(seuObj@meta.data$id == ref_ids[idx]),]) + f <- factor(seuObj@meta.data[cells, ]$zone, levels = c('Meristem', 'Elongation', 'Mature')) + + find_tzs(cor = cor.ref[,idx], cells = cells, factor = f, id_name = ref_ids[idx]) + }) +saveRDS(tzs, file = sprintf('%s/tzs_%s.rds', output.dir, paste0(ref_ids, collapse = '_'))) + +# Plot selected cells on the similarity space factor <- seuObj@meta.data[filter.cells,]$zone -cols <- brewer.pal(length(unique(factor)),"Set1") -names(cols) <- unique(factor) -cols subset.cells <- rownames(seuObj@meta.data[which(seuObj@meta.data$id %in% c('Atrichoblast') & seuObj@meta.data$background == 'wt'), ]) png(sprintf("%s/gradient_%s_%s.png", output.dir, output.name, paste0(ref_ids, collapse = '_')), @@ -67,61 +104,98 @@ plot(cor.ref[filter.cells,1], cor.ref[filter.cells,2], pch = 19, cex = 0.2, col points(cor.ref[subset.cells,1], cor.ref[subset.cells,2], pch = 19, cex = 0.4, col = 'red') dev.off() -png(sprintf("%s/gradient_zones_%s_%s.png", output.dir, output.name, paste0(ref_ids, collapse = '_')), +png(sprintf("%s/gradient_zones_%s_%s_1.png", output.dir, output.name, paste0(ref_ids, collapse = '_')), width = 130, height = 130, res = 300, units = 'mm') -plot(cor.ref[filter.cells,1], cor.ref[filter.cells,2], pch = 19, cex = 0.2, col = cols[factor],#scales::alpha(cols[factor], 1), +plot(cor.ref[filter.cells,1], cor.ref[filter.cells,2], pch = 19, cex = 0.2, col = make_pal(factor)[factor],#scales::alpha(cols[factor], 1), xlab = ref_ids[1], ylab = ref_ids[2]) +abline(v = tzs[,1], lty = 'dashed') dev.off() +png(sprintf("%s/gradient_zones_%s_%s_2.png", output.dir, output.name, paste0(ref_ids, collapse = '_')), + width = 130, height = 130, res = 300, units = 'mm') +plot(cor.ref[filter.cells,1], cor.ref[filter.cells,2], pch = 19, cex = 0.2, col = make_pal(factor)[factor],#scales::alpha(cols[factor], 1), + xlab = ref_ids[1], ylab = ref_ids[2]) +abline(h = tzs[,2], lty = 'dashed') +dev.off() +# ////////////////////////////////////////////////////////////////////////////// +# Gene expression changes along similarity axes - -# selection of cells +# Selection of cells i1_wt <- rownames(seuObj@meta.data[which(seuObj@meta.data$id %in% ref_ids[1] & seuObj@meta.data$background == 'wt'), ]) +DimPlot(seuObj, reduction = "umap", label = FALSE, cells.highlight = i1_wt) + i2_wt <- rownames(seuObj@meta.data[which(seuObj@meta.data$id %in% ref_ids[2] & seuObj@meta.data$background == 'wt'), ]) +DimPlot(seuObj, reduction = "umap", label = FALSE, cells.highlight = i2_wt) -# compute gradients +# Compute gradients step <- 0.1 -sd <- 0.1 -x <- seq(apply(cor.ref, 2, min)[1], apply(cor.ref, 2, max)[1], step) -gr_i2_wt <- epidermis_gr(x, dx = step, sd = sd, scores.mat = cor.ref, sc.mat[features,], cells = i2_wt, cores = 2) # both at the same time -gr_i1_wt <- epidermis_gr(x, dx = step, sd = sd, scores.mat = cor.ref, sc.mat[features,], cells = i1_wt, cores = 2) # both at the same time - -# plot gradients -gene_id <- 'AT5G49270' #'AT1G66470' #'AT1G27740'#'AT5G43175' #'AT4G33880' # RSL family +sd <- 0.3 +cores <- 6 +x <- seq(apply(cor.ref, 2, quantile, 0.03)[1], apply(cor.ref, 2, quantile, 0.97)[1], step) +gr_i1_wt <- epidermis_gr(x, dx = step, sd = sd, scores.mat = cor.ref, sc.mat[features,], cells = i1_wt, cores = cores) +gr_i2_wt <- epidermis_gr(x, dx = step, sd = sd, scores.mat = cor.ref, sc.mat[features,], cells = i2_wt, cores = cores) + +# plot gradients for a given feature +gene_id <- 'AT4G33880' #'AT1G66470' #'AT1G27740'#'AT5G43175' #'AT4G33880' # RSL family +pdf(file = sprintf("%s/gr_%s_%s.pdf", output.dir, gene_id, paste0(ref_ids, collapse = '_'))) FeaturePlot(seuObj, features = gene_id, split.by = 'background') -plot(x, gr_i2_wt[[1]][gene_id,], xlab = 'Identity score (i2blasts)', - ylab = 'Expression gradient', - pch = 15, main = gene_id) -abline(h = 0) - -plot(x, gr_i2_wt[[2]][gene_id,], xlab = 'Identity score (i1blasts)', - ylab = 'Expression gradient', - ) # i1blast score +plot(cor.ref[i1_wt, 1], sc.mat[gene_id, i1_wt]) +plot(cor.ref[i2_wt, 1], sc.mat[gene_id, i2_wt]) +plot_gr_feature(gr_mat = gr_i1_wt, ref_ids, feature = gene_id) +plot_gr_feature(gr_mat = gr_i2_wt, ref_ids, feature = gene_id) +dev.off() -n_features <- 500 -top_i2 <- plot_gr(gr_i2_wt[[2]], n_features, which_genes = 'top', label = NULL) +# +n_features <- 250 top_i1 <- plot_gr(gr_i1_wt[[1]], n_features, which_genes = 'top', label = NULL) +top_i2 <- plot_gr(gr_i2_wt[[2]], n_features, which_genes = 'top', label = NULL) + + +png(sprintf("%s/heatmap_top_nolab_%s_%s.png", output.dir, ref_ids[2], output.name), + width = 85, height = 75, res = 300, units = 'mm') +top_i2$heatmap +dev.off() + +png(sprintf("%s/heatmap_top_nolab_%s_%s.png", output.dir, ref_ids[1], output.name), + width = 85, height = 75, res = 300, units = 'mm') +top_i1$heatmap +dev.off() -bottom_i2 <- plot_gr(gr_i2_wt[[2]], n_features, which_genes = 'bottom', label = NULL) bottom_i1 <- plot_gr(gr_i1_wt[[1]], n_features, which_genes = 'bottom', label = NULL) +bottom_i2 <- plot_gr(gr_i2_wt[[2]], n_features, which_genes = 'bottom', label = NULL) + +# +hclust.res <- hclust(dist(gr_i2_wt[[2]][top_i2$genes, ]), method = "complete") +cl <- cutree(hclust.res, k = 3) +tapply(top_i2$which.max[top_i2$genes], cl, summary) + +tzs.delta <- sapply(tzs, function(x){ abs(top_i2$which.max - x) }) # find the closest to 0 +apply(tzs.delta, 2, function(y){ head(sort(y), 20) }, simplify = F) # gradient threshold based on full genome # top-peaking genes -png(sprintf("%s/gr_boxplot_top_%s.png", output.dir, paste0(ref_ids, collapse = '_')), +png(sprintf("%s/gr_boxplot_top_%s_%s.png", output.dir, paste0(ref_ids, collapse = '_'), n_features), width = 100, height = 130, res = 300, units = 'mm') -boxplot(list(top_i1$max, top_i2$max), ylab = 'Maximum gradient', names = ref_ids) -points(x = c(1, 2), y = c(top_i1$threshold, top_i2$threshold), col = 'red', pch = 19) +boxplot(list(top_i1$max, top_i2$max), ylab = 'Maximum gradient', names = ref_ids, cex = 0.5) +points(x = c(1, 2), y = c(top_i1$threshold, top_i2$threshold), col = 'red', pch = 19, cex = 2) +#data.frame('max' = c(top_i1$max, top_i2$max), +# 'cell_type' = rep(ref_ids, each = length(top_i1$max))) %>% +# ggplot(aes(x = cell_type, y = max, fill = cell_type)) + geom_violin() + geom_boxplot() dev.off() +write.table(intersect(top_i1$genes, top_i2$genes), + file = sprintf('%s/gr_genes_top_common_%s_%s_%s.txt', output.dir, ref_ids[1], ref_ids[2], length(top_i2$genes)), + quote = FALSE, col.names = F, row.names = F) + write.table(top_i2$genes, - file = sprintf('%s/gr_genes_top_%s_%s.txt', output.dir, ref_ids[2], n_features), + file = sprintf('%s/gr_genes_top_%s_%s.txt', output.dir, ref_ids[2], length(top_i2$genes)), quote = FALSE, col.names = F, row.names = F) write.table(top_i1$genes, - file = sprintf('%s/gr_genes_top_%s_%s.txt', output.dir, ref_ids[1], n_features), + file = sprintf('%s/gr_genes_top_%s_%s.txt', output.dir, ref_ids[1], length(top_i1$genes)), quote = FALSE, col.names = F, row.names = F) length(intersect(top_i2$genes, top_i1$genes)) #150 png(sprintf("%s/venn_top_%s_%s.png", output.dir, ref_ids[1], ref_ids[2]), @@ -132,9 +206,14 @@ dev.off() # bottom-peaking genes png(sprintf("%s/gr_boxplot_bottom_%s.png", output.dir, paste0(ref_ids, collapse = '_')), width = 100, height = 130, res = 300, units = 'mm') -boxplot(list(bottom_i1$max, bottom_i2$max), ylab = 'Maximum gradient', names = ref_ids) -points(x = c(1, 2), y = c(bottom_i1$threshold, bottom_i2$threshold), col = 'red', pch = 19) +boxplot(list(bottom_i1$max, bottom_i2$max), ylab = 'Maximum gradient', names = ref_ids, cex = 0.5) +points(x = c(1, 2), y = c(bottom_i1$threshold, bottom_i2$threshold), col = 'red', pch = 19, cex = 2) dev.off() + +write.table(intersect(bottom_i1$genes, bottom_i2$genes), + file = sprintf('%s/gr_genes_bottom_common_%s_%s_%s.txt', output.dir, ref_ids[1], ref_ids[2], length(top_i2$genes)), + quote = FALSE, col.names = F, row.names = F) + write.table(bottom_i2$genes, file = sprintf('%s/gr_genes_down_%s_%s.txt', output.dir, ref_ids[2], n_features), quote = FALSE, col.names = F, row.names = F) @@ -156,52 +235,75 @@ dev.off() # waves of down/upregulation png(sprintf("%s/gr_mean_%s.png", output.dir, ref_ids[2]), width = 130, height = 100, res = 300, units = 'mm') -plot(x, colMeans(abs(gr_i2_wt[[2]][bottom_i2$genes,])), +y <- colMeans(abs(gr_i2_wt[[2]][bottom_i2$genes,])) +z <- colMeans(abs(gr_i2_wt[[2]][top_i2$genes,])) +plot(x, y, xlab = 'Similarity score', ylab = 'Average gradient', main = ref_ids[2], - col = 'blue') # downregulated genes -points(x, colMeans(abs(gr_i2_wt[[2]][top_i2$genes,])), pch = 19, col = 'red') # upregulated genes + col = 'blue', + ylim = c(min(y, z), max(y, z))) # downregulated genes +points(x, z, pch = 19, col = 'red') # upregulated genes +legend("topleft", legend = c('Down', 'Up'), pch = c(21, 19), col = c('blue', 'red'), title = "Genes") +abline(v = tzs[,2], lty = 'dashed') dev.off() png(sprintf("%s/gr_mean_%s.png", output.dir, ref_ids[1]), width = 130, height = 100, res = 300, units = 'mm') -plot(x, colMeans(abs(gr_i1_wt[[1]][bottom_i2$genes,])), +y <- colMeans(abs(gr_i1_wt[[1]][bottom_i1$genes,])) +z <- colMeans(abs(gr_i1_wt[[1]][top_i1$genes,])) +plot(x, y, xlab = 'Similarity score', ylab = 'Average gradient', main = ref_ids[1], - col = 'blue') -points(x, colMeans(abs(gr_i1_wt[[1]][top_i2$genes,])), pch = 19, col = 'red') + col = 'blue', + ylim = c(min(y, z), max(y, z))) +points(x, z, pch = 19, col = 'red') +legend("topleft", legend = c('Down', 'Up'), pch = c(21, 19), col = c('blue', 'red'), title = "Genes") +abline(v = tzs[,1], lty = 'dashed') dev.off() # Pcg -glist <- c(FIE, K27_genes, K27K4_genes) -lab <- ifelse(rownames(sc.mat) %in% glist, 'PcG', '-') -names(lab) <- rownames(sc.mat) -top_i1 <- plot_gr(gr_i1_wt[[1]][features,], n_features, which_genes = 'top', label = lab) -top_i2 <- plot_gr(gr_i2_wt[[2]][features,], n_features, which_genes = 'top', label = lab) -proportions(table(lab[top_i2$genes])) -proportions(table(lab[top_i1$genes])) - -bottom_i1 <- plot_gr(gr_i1_wt[[1]][features,], n_features, which_genes = 'bottom', label = lab) -bottom_i2 <- plot_gr(gr_i2_wt[[2]][features,], n_features, which_genes = 'bottom', label = lab) -proportions(table(lab[bottom_i1$genes])) -proportions(table(lab[bottom_i2$genes])) -proportions(table(lab)) +head(labels) +cols <- c('lightblue', 'darkblue', 'white', 'grey') +names(cols) <- levels(labels) +top_i1 <- plot_gr(gr_i1_wt[[1]][features,], n_features, which_genes = 'top', label = labels, cols = cols) +top_i2 <- plot_gr(gr_i2_wt[[2]][features,], n_features, which_genes = 'top', label = labels, cols = cols) +proportions(table(labels[top_i2$genes])) +proportions(table(labels[top_i1$genes])) + +bottom_i1 <- plot_gr(gr_i1_wt[[1]][features,], n_features, which_genes = 'bottom', label = labels, cols = cols) +bottom_i2 <- plot_gr(gr_i2_wt[[2]][features,], n_features, which_genes = 'bottom', label = labels, cols = cols) +proportions(table(labels[bottom_i1$genes])) +proportions(table(labels[bottom_i2$genes])) +proportions(table(labels)) + +png(sprintf("%s/props_%s_%s.png", output.dir, ref_ids[2], output.name), + width = 100, height = 100, res = 300, units = 'mm') +rbind('top' = proportions(table(labels[top_i2$genes])), + 'bottom' = proportions(table(labels[bottom_i2$genes])), + 'all' = proportions(table(labels))) %>% + data.frame() %>% + rownames_to_column(var = 'set') %>% + pivot_longer(cols = c(2, 3, 4, 5), names_to = 'type', values_to = 'p') %>% + mutate(type = gsub('\\.o', '-o', type)) %>% + ggplot(aes(x = set, y = p, fill = type)) + geom_col() + + scale_fill_manual(values = cols) +dev.off() png(sprintf("%s/heatmap_bottom_%s_%s.png", output.dir, ref_ids[2], output.name), - width = 130, height = 130, res = 300, units = 'mm') + width = 100, height = 150, res = 300, units = 'mm') bottom_i2$heatmap dev.off() png(sprintf("%s/heatmap_bottom_%s_%s.png", output.dir, ref_ids[1], output.name), - width = 130, height = 130, res = 300, units = 'mm') + width = 100, height = 150, res = 300, units = 'mm') bottom_i1$heatmap dev.off() png(sprintf("%s/heatmap_top_%s_%s.png", output.dir, ref_ids[2], output.name), - width = 130, height = 130, res = 300, units = 'mm') + width = 100, height = 150, res = 300, units = 'mm') top_i2$heatmap dev.off() png(sprintf("%s/heatmap_top_%s_%s.png", output.dir, ref_ids[1], output.name), - width = 130, height = 130, res = 300, units = 'mm') + width = 100, height = 150, res = 300, units = 'mm') top_i1$heatmap dev.off() diff --git a/src/gradients_fcts.R b/src/gradients_fcts.R index d0c1424053d418b4f3f912b2062c009312d6b807..27bafee24f57817f48602ba10ace4f490be75a1a 100644 --- a/src/gradients_fcts.R +++ b/src/gradients_fcts.R @@ -23,7 +23,8 @@ epidermis_gr <- function(x, dx, sd, scores.mat, sc.mat, cells, method, cores){ return(output) } -plot_gr <- function(matrix, n_features, which_genes, label){ +# heatmap +plot_gr <- function(matrix, n_features, which_genes, label, cols = NULL){ output <- NULL if (which_genes == 'top'){ min.matrix <- apply(matrix, 1, max) @@ -48,17 +49,28 @@ plot_gr <- function(matrix, n_features, which_genes, label){ output$which.max <- which.min.matrix output$genes <- g + paletteLength <- 50 + myColor <- colorRampPalette(c("black", "white", "purple"))(paletteLength) + # length(breaks) == length(paletteLength) + 1 + # use floor and ceiling to deal with even/odd length pallettelengths + myBreaks <- c(seq(min(matrix[g,]), 0, length.out = ceiling(paletteLength/2) + 1), + seq(max(matrix[g,])/paletteLength, max(matrix[g,]), length.out = floor(paletteLength/2))) + if (is.null(label)){ hm <- pheatmap::pheatmap(matrix[g,], cluster_cols = FALSE, - cluster_rows = F, show_rownames = F, show_colnames = T) + cluster_rows = F, show_rownames = F, show_colnames = T, + color = myColor, + breaks = myBreaks) } else { - cols <- c('gray', 'red') - names(cols) <- unique(label) - print(cols) + if (is.null(cols) ) { + cols <- make_pal(label) + } hm <- pheatmap::pheatmap(matrix[g,], cluster_cols = FALSE, cluster_rows = F, show_rownames = F, show_colnames = T, annotation_row = data.frame('PcG' = label[g]), - annotation_colors = list('PcG' = cols)) + annotation_colors = list('PcG' = cols), + color = myColor, + breaks = myBreaks) } output$heatmap <- hm @@ -74,4 +86,29 @@ plot_venneuler <- function(lists, labels){ plot(v) #dev.off() return(v) -} \ No newline at end of file +} + +plot_gr_feature <- function(gr_mat, ref_ids, feature){ + plot(x, gr_mat[[1]][feature,], xlab = 'Identity score with reference', + ylab = 'Expression gradient', + pch = 15, main = feature, + ylim = c(min(gr_mat[[1]][feature,], gr_mat[[2]][feature,]), + max(gr_mat[[1]][feature,], gr_mat[[2]][feature,]))) + abline(h = 0) + points(x, gr_mat[[2]][feature,], + ylab = 'Expression gradient', pch = 21) + legend("topleft", legend = ref_ids, pch = c(15, 21), cex = 0.8, title = "Reference") +} + + +find_tzs <- function(cor, cells, factor, id_name){ + l_zone <- split(cor[cells], factor) + md <- glm(cor[cells]~factor+0, family = 'gaussian') + #zone_clusters <- kmeans(cor.ref[,1], centers = 3) + boxplot(l_zone, border = brewer.pal(length(levels(factor)), 'Set1'), col = 'white', + main = id_name) + points(x = 1:3, md$coefficients, pch = 5, col = 'black') + abline(h = md$coefficients[1:2] + diff(md$coefficients)/2, lty = 'dashed') + tzs <- md$coefficients[1:2] + diff(md$coefficients)/2 + return(tzs) +}