Practical_b.Rmd: speed-up computation

236bfdec · Laurent Modolo · 37f3c649 · 236bfdec
Verified Commit 236bfdec authored Sep 21, 2022 by Laurent Modolo
--- a/Practical_b.Rmd
+++ b/Practical_b.Rmd
@@ -74,9 +74,10 @@ if (!file.exists("practical_b.Rdata")) {
    remotes::install_github('satijalab/seurat-data')
  library(SeuratData)
  InstallData("pbmc3k")
-  pbmc <- LoadData("pbmc3k")
+  pbmc <- LoadData("pbmc3k", type = "pbmc3k.final")
  pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
  pbmc <- ScaleData(pbmc)
+  pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
  data <- Assays(pbmc, slot = "RNA")@scale.data
  cell_annotation <- pbmc@meta.data$seurat_annotations
  colnames(data) <- str_c(1:ncol(data), "_", cell_annotation)
@@ -138,10 +139,10 @@ Why do you think that we need a list of the 2000 most variable genes ?
 The clustering algorithms [seen this morning](https://lbmc.gitbiopages.ens-lyon.fr/hub/formations/ens_m1_ml/clustering.pdf) rely on Gram matrices.
 You can compute the Euclidean distance matrices of `data` with the `dist()` function (but don't try to run it on the `r nrow(data)` genes)
-The following code computes the cell-to-cell Euclidean distances for the 10 most variable genes
+The following code computes the cell-to-cell Euclidean distances for the 10 most variable genes and the first 100 cells
 ```{r}
-c2c_dist_10 <- data[var_gene_2000[1:10], ] %>% 
+c2c_dist_10 <- data[var_gene_2000[1:10], 1:100] %>% 
  t() %>% 
  dist()
 ```
@@ -162,14 +163,14 @@ What happens when the number of dimensions increases ?
 <details><summary>Solution</summary>
 <p>
-```{r}
+```{r, cache=T}
 c2c_dist_n <- tibble(
    n_var = c(seq(from = 10, to = 200, by = 50),
-               seq(from = 200, to = 2000, by = 500))
+               seq(from = 200, to = 2000, by = 500), 2000)
  ) %>% 
  mutate(
    cell_dist = purrr::map(n_var, function(n_var, data, var_gene_2000){
-      data[var_gene_2000[1:n_var], ] %>% 
+      data[var_gene_2000[1:n_var], 1:100] %>% 
      t() %>% 
      dist() %>% 
      as.vector() %>%