From e0f4f8f572a767c1f41f15d0872f454fdaace933 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent@modolo.fr>
Date: Wed, 15 Apr 2020 09:29:08 +0200
Subject: [PATCH] fix typo in velocity section

---
 README.Rmd | 250 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 183 insertions(+), 67 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 07392f7..242e75e 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -71,7 +71,7 @@ needed_packages <- c(
 install_if_needed <- function(packages){
   if (!requireNamespace("BiocManager", quietly = TRUE))
     install.packages("BiocManager")
-  BiocManager::install(version = "3.10", ask = F)
+  BiocManager::install(version = "3.10", update = F, ask = F)
   if (!requireNamespace("devtools", quietly = TRUE))
     install.packages("devtools")
   sapply(
@@ -83,8 +83,12 @@ install_if_needed <- function(packages){
         }, error = function(e) {
           devtools::install_github(x, upgrade = "never")
         })
-      if (!stringr::str_detect(x, "/"))
+      if (!stringr::str_detect(x, "/")){
         library(x, character.only = TRUE)
+      } else {
+        library(stringr::str_replace(x, ".*/(.*)", "\\1"),
+                character.only = TRUE)
+      }
     }
   )
   return(str_c(str_c(packages, collapse = ", "), " installed and loaded."))
@@ -260,7 +264,7 @@ The `kb pipeline` uses [Kallisto](https://pachterlab.github.io/kallisto/). There
 wget ftp://ftp.ensembl.org/pub/release-99/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz -O data/hgmm_1k/hs_cdna.fa.gz
 wget ftp://ftp.ensembl.org/pub/release-99/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz -O data/hgmm_1k/mm_cdna.fa.gz
 # on pedago-ngs
-singularity exec /data/share/MADT/scrnaseq/kallistobustools_0.24.4.simg \
+singularity exec -B /data:/data /data/share/MADT/scrnaseq/kallistobustools_0.24.4.simg \
     kallisto index -i data/hgmm_1k/hs_mm_tr_index.idx data/hgmm_1k/hs_cdna.fa.gz data/hgmm_1k/mm_cdna.fa.gz
 ```
 
@@ -272,8 +276,8 @@ Also, since the reads were generated with the 10x Genomics Chromium Single Cell
 
 ```{bash quantif_kallisto, eval=F}
 mkdir -p results/hgmm_1k/
-singularity exec /data/share/MADT/scrnaseq/kallistobustools_0.24.4.simg \
 # singularity exec docker://lbmc/kallistobustools:0.24.4 \
+singularity  exec -B /data:/data /data/share/MADT/scrnaseq/kallistobustools_0.24.4.simg \
   kallisto bus \
   -i data/hgmm_1k/hs_mm_tr_index.idx \
   -o results/hgmm_1k/ -x 10xv2 -t 10 \
@@ -285,19 +289,6 @@ singularity exec /data/share/MADT/scrnaseq/kallistobustools_0.24.4.simg \
   data/hgmm_1k/fastqs/hgmm_1k_S1_L006_R1_001.fastq.gz data/hgmm_1k/fastqs/hgmm_1k_S1_L006_R2_001.fastq.gz \
   data/hgmm_1k/fastqs/hgmm_1k_S1_L007_R1_001.fastq.gz data/hgmm_1k/fastqs/hgmm_1k_S1_L007_R2_001.fastq.gz \
   data/hgmm_1k/fastqs/hgmm_1k_S1_L008_R1_001.fastq.gz data/hgmm_1k/fastqs/hgmm_1k_S1_L008_R2_001.fastq.gz
-
-singularity  exec -B /data:/data /data/share/MADT/scrnaseq/kallistobustools_0.24.4.simg \
-kallisto bus \
--i /data/share/MADT/scrnaseq/hgmm_1k/hs_mm_tr_index.idx \
--o results/hgmm_1k/ -x 10xv2 -t 10 \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L001_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L001_R2_001.fastq.gz \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L002_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L002_R2_001.fastq.gz \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L003_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L003_R2_001.fastq.gz \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L004_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L004_R2_001.fastq.gz \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L005_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L005_R2_001.fastq.gz \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L006_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L006_R2_001.fastq.gz \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L007_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L007_R2_001.fastq.gz \
-/data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L008_R1_001.fastq.gz /data/share/MADT/scrnaseq/hgmm_1k/fastqs/hgmm_1k_S1_L008_R2_001.fastq.gz
 ```
 
 The quantification run in around 2 min on 10 threads and creates 4 files:
@@ -328,13 +319,6 @@ The `/data/share/MADT/scrnaseq/10xv2_whitelist.txt` contains all the barcodes kn
 We are ready to make the gene count matrix. First, `bustools` runs barcode error correction on the bus file. Then, the corrected bus file is sorted by barcode, UMI, and equivalence classes. Then the UMIs are counted and the counts are collapsed into gene level.
 
 ```{bash bustools, eval=F}
-mkdir -p results/hgmm_1k/genecount tmp
-singularity exec /data/share/MADT/scrnaseq/kallistobustools_0.24.4.simg sh -c "\
-  bustools correct -w data/whitelist_v2.txt -p results/hgmm_1k/output.bus | \
-  bustools sort -T tmp/ -t 4 -p - | \
-  bustools count -o results/hgmm_1k/genecount/genes -g results/hgmm_1k/tr2g_hgmm.tsv \
-  -e results/hgmm_1k/matrix.ec -t results/hgmm_1k/transcripts.txt --genecounts -"
-
 mkdir -p results/hgmm_1k/genecount tmp
 cp /data/share/MADT/scrnaseq/10xv2_whitelist.txt data/whitelist_v2.txt
 cp /data/share/MADT/scrnaseq/hgmm_1k/tr2g_hgmm.tsv data/
@@ -365,7 +349,7 @@ Single-cell RNA Seq counts contains a loot of zeros, to save space we can use th
 
 You can use the `read_count_output()` function to load kb output into a `sparceMatrix`, and the `SingleCellExperiment()` to create a `SingleCellExperiment` object.
 
-```{r kb_2_scexp, dependson="t2g_hgmm_1k", echo=F}
+```{r kb_2_scexp, dependson="t2g_hgmm_1k", echo = T, include=F}
 # read counts (generate a sparse matrix)
 res_mat <- read_count_output(
   dir = "results/hgmm_1k/genecount",
@@ -795,6 +779,7 @@ library("BSgenome.Mmusculus.UCSC.mm10")
 get_velocity_files(edb, L = 91, Genome = BSgenome.Mmusculus.UCSC.mm10,
                    out_path = "results/neuron10k_velocity",
                    isoform_action = "separate")
+rm(ah, edb)
 ```
 
 We build the kallisto index for the spliced and unspliced transcript with the `kallistobustools` container (`docker://lbmc/kallistobustools:0.24.4`)
@@ -828,6 +813,8 @@ singularity exec docker://lbmc/kallistobustools:0.24.4 \
   data/neuron_10k_v3_fastqs/neuron_10k_v3_S1_L001_R2_001.fastq.gz
 ```
 
+## Loading data
+
 We can load the kb outputs with the `read_velocity_output()` from the `neuron10k_velocity/kb/` folder. We will get a list of two `sparceMatrix`.
 
 ```{r velo_load, dependson="library", eval = T}
@@ -839,6 +826,7 @@ res_mat_list <- read_velocity_output(
   )
 spliced <- res_mat_list$spliced
 unspliced <- res_mat_list$unspliced
+rm(res_mat_list)
 ```
 From the total sum (`sum()`) of each of these matrix, we can compute the percentage of unspliced UMI counts.
 
@@ -856,7 +844,7 @@ We expect around 10,000 cells. There are over 10 times more barcodes here, since
 We will have to remove empty droplets with the `barcodesRanks()` function.
 
 ```{r velo_knee_plot, dependson="velo_load", eval = T}
-bc_rank <-barcodeRanks(spliced)
+bc_rank <- barcodeRanks(spliced)
 bc_uns <- barcodeRanks(unspliced)
 
 knee_plot <- function(bc_ranks) {
@@ -892,25 +880,86 @@ For now, for simplicity, the inflection point for the spliced matrix will be use
 Construct a `sf` and `uf` `sparceMatrix` without empty droplets.
 
 ```{r velo_empty_remove, dependson="velo_load", eval = T}
-tot_count <- Matrix::colSums(spliced)
-summary(tot_count)
-bcs_use <- colnames(spliced)[tot_count > metadata(bc_rank)$inflection]
+rm(bc_uns)
+rownames(spliced) <- str_replace(rownames(spliced), "(.*)\\..*", "\\1") 
+rownames(unspliced) <- str_replace(rownames(unspliced), "(.*)\\..*", "\\1") 
+bcs_use <- colnames(spliced)[
+  nexprs(spliced, byrow = F) > metadata(bc_rank)$inflection
+]
 # Remove genes that aren’t detected
-tot_genes <- Matrix::rowSums(spliced)
-genes_use <- rownames(spliced)[tot_genes > 0]
-sf <- spliced[genes_use, bcs_use]
-uf <- unspliced[genes_use, bcs_use]
+genes_use <- rownames(spliced)[
+  nexprs(spliced, byrow = T) > 0
+]
+sce <- SingleCellExperiment(
+      assays = list(
+          spliced = spliced[genes_use, bcs_use],
+          unspliced = unspliced[genes_use, bcs_use]
+      )
+  )
+if (file.exists("results/neuron10k_velocity/sce_raw.Rdata")) {
+  load(file = "results/neuron10k_velocity/sce_raw.Rdata",
+       verbose = T)
+} else {
+  save(sce, file = "results/neuron10k_velocity/sce_raw.Rdata")
+}
+rm(bcs_use, genes_use, spliced, unspliced, bc_rank)
 ```
+## Normalisation
 
-We are going to normalize for library effect with `SCTransform` but contrary to the first part of the practical we are going to work with `Seurat` objects instead of `SingleCellExperiment` objects.
-`Seurat` is an R package which contains a large set of tools for scRNASeq analysis. Of course, it also has its own way of storing single-cell data (with the `CreateSeuratObject()` function).
+We are going to normalize for library effect with `SCTransform`.
 
 ```{r velo_normalisation, dependson="velo_empty_remove", eval = T}
-seu <- CreateSeuratObject(sf, assay = "sf") %>%
-  SCTransform(assay = "sf", new.assay.name = "spliced")
+colData(sce)$umi_spliced <- colSums(assays(sce)$spliced)
+colData(sce)$log_umi_spliced <- log10(colData(sce)$umi_spliced)
+spliced_norm <- sctransform::vst(
+  assays(sce)$spliced,
+  cell_attr = colData(sce),
+  latent_var = c("log_umi_spliced"),
+  return_gene_attr = T,
+  return_cell_attr = T,
+  show_progress = F) %>% 
+  correct() %>% 
+  Matrix(sparse = T)
 ```
 
-The functionalities of a `SeuratObject` are roughly the same as the ones of an `SingleCellExperiment`.
+We then normalize and add the unspliced data to our `sce` object.
+
+```{r velo_normalisation_unspliced, dependson="velo_normalisation", eval = T}
+colData(sce)$umi_unspliced <- colSums(assays(sce)$unspliced)
+colData(sce)$log_umi_unspliced <- log10(colData(sce)$umi_unspliced)
+unspliced_norm <- sctransform::vst(
+  assays(sce)$unspliced,
+  cell_attr = colData(sce),
+  latent_var = c("log_umi_spliced"),
+  return_gene_attr = T,
+  return_cell_attr = T,
+  show_progress = F) %>% 
+  correct() %>% 
+  Matrix(sparse = T)
+u_genes <- intersect(rownames(spliced_norm), rownames(unspliced_norm))
+spliced_norm[rownames(spliced_norm) %in% u_genes, ] %>% dim()
+unspliced_norm[rownames(unspliced_norm) %in% u_genes, ] %>% dim()
+sce_norm <- SingleCellExperiment(
+      assays = list(
+          spliced = spliced_norm[rownames(spliced_norm) %in% u_genes, ] %>% 
+            Matrix(sparse = T),
+          unspliced = unspliced_norm[rownames(unspliced_norm) %in% u_genes, ] %>% 
+            Matrix(sparse = T)
+      )
+  )
+```
+We can save our `sce` object to use for future crash
+
+```{r save_velo_normalisation, dependson="velo_normalisation_unspliced", eval = T}
+if(file.exists("results/neuron10k_velocity/sce_norm.Rdata")){
+  load(file = "results/neuron10k_velocity/sce_norm.Rdata",
+       verbose = T)
+}else{
+  save(sce_norm, file = "results/neuron10k_velocity/sce_norm.Rdata")
+}
+
+rm(sce, spliced_norm, unspliced_norm, u_genes)
+```
 
 ## Cell type annotation
 
@@ -918,42 +967,59 @@ With most droplets experiments, we don’t have additional information on the ce
 
 ```{r velo_cell_type_annot, dependson="velo_normalisation", eval = T}
 mouse_rnaseq <- MouseRNAseqData(ensembl = TRUE)
+sce <- sce_norm[rownames(sce_norm) %in% rownames(mouse_rnaseq), ]
 annots <- SingleR(
-  GetAssayData(seu, assay = "spliced", slot = "data"),
+  assays(sce)$spliced,
   ref = mouse_rnaseq,
   labels = colData(mouse_rnaseq)$label.fine,
   de.method = "wilcox",
   method = "single"
 )
+rm(mouse_rnaseq)
 ```
 
 For our RNA velocity analyze, we are mostly interested in neural or glial lineages.
 For clarity, we filter out cells that are not from these two lineages.
 
-```{r velo_filter_cell_type, dependson="velo_cell_type_annot", eval = F}
+```{r velo_filter_cell_type, dependson="velo_cell_type_annot", eval = T}
 inds <- annots$pruned.labels %in% c(
   "NPCs", "Neurons", "OPCs", "Oligodendrocytes",
   "qNSCs", "aNSCs", "Astrocytes", "Ependymal"
 )
-seu <- seu[, cells_use]
-seu$cell_type <- annots$pruned.labels[inds]
-# Also only keep relevant cell types in the unspliced matrix
-uf <- uf[, cells_use]
+colData(sce)$cell_type <- annots$pruned.labels
+sce <- sce[, inds]
 ```
 
-We then normalize and add the unspliced data to our `Seurat` object.
+## Identification of highly variable
 
-```{r velo_normalisation_unspliced, dependson="velo_filter_cell_type", eval = F}
-seu[["uf"]] <- CreateAssayObject(uf)
-seu <- SCTransform(seu, assay = "uf", new.assay.name = "unspliced")
+Contrary to the first part of the practical we are going to work with `Seurat` objects instead of `SingleCellExperiment` objects, from now on.
+`Seurat` is an R package which contains a large set of tools for scRNASeq analysis. Of course, it also has its own way of storing single-cell data (with the `CreateSeuratObject()` function).
+The functionalities of a `SeuratObject` are roughly the same as the ones of an `SingleCellExperiment`.
+
+```{r velo_sce_to_seu, dependson="velo_normalisation_unspliced", eval = T}
+sce <- logNormCounts(sce, exprs_values = "spliced")
+seu <- as.Seurat(sce, counts = "spliced", assay = "spliced")
+seu[["unspliced"]] <- CreateAssayObject(assays(sce)$unspliced)
+DefaultAssay(seu) <- "spliced"
 ```
 
-## Identification of highly variable
+We can save the `seu` object.
+
+```{r save_velo_seu_annot, dependson="velo_sce_to_seu", eval = T}
+if(file.exists("results/neuron10k_velocity/seu_annot.Rdata")){
+  load(file = "results/neuron10k_velocity/seu_annot.Rdata",
+       verbose = T)
+}else{
+  save(seu, file = "results/neuron10k_velocity/seu_annot.Rdata")
+}
+
+rm(sce, sce_norm) 
+```
 
 To speedup the computation and focus on interesting genes, we are going to work only with the highly variable genes. We are going to use the `Seurat` procedure described [here](https://www.biorxiv.org/content/early/2018/11/02/460147.full.pdf).
 You can use the `FindVariableFeatures()` function to identify highly variable genes.
 
-```{r velo_hvg, dependson="velo_normalisation_unspliced", eval = F}
+```{r velo_hvg, dependson="save_velo_seu_annot", eval = T}
 seu <- FindVariableFeatures(seu, selection.method = "vst", nfeatures = 2000)
 
 # Identify the 10 most highly variable genes
@@ -976,15 +1042,15 @@ When visualizing RNA velocity on reduced dimensions, should the cell embeddings
 We can use the `DefaultAssay()` function to define the assay we will be working on in `Seurat`.
 The `RunPCA()` and `ElbowPlot()` function performe an SVD decomposition and plot the variance explained by the first axis.
 
-```{r velo_elbow_plot, dependson="velo_normalisation_unspliced", eval = F}
-DefaultAssay(seu) <- "spliced"
+```{r velo_elbow_plot, dependson="velo_normalisation_unspliced", eval = T}
+seu <- ScaleData(seu)
 seu <- RunPCA(seu, verbose = FALSE, npcs = 70)
 ElbowPlot(seu, ndims = 70)
 ```
 
 In `Seurat` when the dimension reduction is computed you can plot the results, with the function `DimPlot()`.
 
-```{r velo_pca_plot, dependson="velo_elbow_plot", eval = F}
+```{r velo_pca_plot, dependson="velo_elbow_plot", eval = T}
 DimPlot(
   seu, reduction = "pca", group.by = "cell_type",
   pt.size = 0.5, label = TRUE, repel = TRUE
@@ -994,21 +1060,21 @@ DimPlot(
 
 The `DimHeatmap()` function allows for easy exploration of the primary sources of heterogeneity in a dataset, and can be useful when trying to decide which PCs to include for further downstream analyses. Both cells and features are ordered according to their PCA scores. Setting `cells` to a number plots the ‘extreme’ cells on both ends of the spectrum, which dramatically speeds plotting for large datasets. Though clearly a supervised analysis, we find this to be a valuable tool for exploring correlated feature sets.
 
-```{r velo_pca_plot_hm, dependson="velo_pca_plot", eval = F}
-DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)
+```{r velo_pca_plot_hm, dependson="velo_pca_plot", eval = T}
+DimHeatmap(seu, dims = 1, cells = 500, balanced = TRUE)
 ```
 
 We can also use non-linear dimension reduction technique like $t$-SNE and UMAP in `Seurat`.
 Try the `RunTSNE()` and `RunUMAP()` function.
 
-```{r velo_tsne_plot, dependson="velo_pca_plot", eval = F}
+```{r velo_tsne_plot, dependson="velo_pca_plot", eval = T}
 seu <- RunTSNE(seu, dims = 1:50, verbose = FALSE)
   DimPlot(seu, reduction = "tsne",
         group.by = "cell_type", pt.size = 0.5, label = TRUE, repel = TRUE) +
   scale_color_brewer(type = "qual", palette = "Set2")
 ```
 
-```{r velo_umap_plot, dependson="velo_tsne_plot", eval = F}
+```{r velo_umap_plot, dependson="velo_tsne_plot", eval = T}
 seu <- RunUMAP(seu, dims = 1:50, umap.method = "uwot")
 DimPlot(seu, reduction = "umap",
         group.by = "cell_type", pt.size = 0.5, label = TRUE, repel = TRUE) +
@@ -1018,7 +1084,7 @@ DimPlot(seu, reduction = "umap",
 As expected, one end of the plot has mostly stem cells, and the other end has mostly neurons. Clustering should petition the big blob of NPCs that `SingleR` could not further partition due to limitations in the `SingleR` reference for mouse brains.
 Use the `FindNeighbors()` and `FindClusters()` function for the clustering.
 
-```{r velo_slingr_clust, dependson="velo_umap_plot", eval = F}
+```{r velo_slingr_clust, dependson="velo_umap_plot", eval = T}
 seu <- FindNeighbors(seu, verbose = FALSE) %>% 
   FindClusters(resolution = 1, verbose = FALSE) # Louvain
 DimPlot(seu, pt.size = 0.5, reduction = "umap", label = TRUE)
@@ -1027,13 +1093,45 @@ DimPlot(seu, pt.size = 0.5, reduction = "umap", label = TRUE)
 
 The `RunVelocity()` function compute the RNA Velocity vector of each cell.
 
-```{r compute_RNA_velocity, dependson="velo_slingr_clust", eval = F}
-seu <- RunVelocity(seu, ncores = 10, reduction = "pca", verbose = FALSE)
+We can save the `seu` object.
+
+```{r save_velo_seu_dimred, dependson="velo_slingr_clust", eval = T}
+if(file.exists("results/neuron10k_velocity/seu_dimred.Rdata")){
+  load(file = "results/neuron10k_velocity/seu_dimred.Rdata",
+       verbose = T)
+}else{
+  save(seu, file = "results/neuron10k_velocity/seu_dimred.Rdata")
+}
+if (!requireNamespace("SeuratWrappers", quietly = TRUE))
+  devtools::install_github("satijalab/seurat-wrappers")
+library("SeuratWrappers")
 ```
 
+The following step can take up to 20Gb of RAM.
+
+```{r compute_RNA_velocity, dependson="save_velo_seu_dimred", eval = F}
+seu <- RunVelocity(
+  seu,
+  ncores = 4,
+  reduction = "pca",
+  verbose = T,
+  deltaT = 1, kCells = 25, fit.quantile = 0.02
+  )
+```
+
+And we save the `seu` object.
+
+```{r save_velo_seu_velo, dependson="save_velo_seu_dimred", eval = T}
+if (file.exists("results/neuron10k_velocity/seu_velo.Rdata")) {
+  load(file = "results/neuron10k_velocity/seu_velo.Rdata",
+       verbose = T)
+} else {
+  save(seu, file = "results/neuron10k_velocity/seu_velo.Rdata")
+}
+```
 To display the results, we also need two helper functions to assign colors to each cell in base R and label the clusters.
 
-```{r velo_helper_function, dependson="compute_RNA_velocity", eval = F}
+```{r velo_helper_function, dependson="save_velo_seu_velo", eval = T}
 cell_pal <- function(cell_cats, pal_fun) {
   categories <- sort(unique(cell_cats))
   pal <- setNames(pal_fun(length(categories)), categories)
@@ -1050,7 +1148,8 @@ label_clusters <- function(labels, coords, ...) {
 
 `velocyto.R` requires that the vector of colors should have cell barcodes/IDs as names to match color to the cell.
 
-```{r velo_colors, dependson="velo_helper_function", eval = F}
+```{r velo_colors, dependson="velo_helper_function", eval = T}
+library("scales")
 cell_colors <- cell_pal(seu$cell_type, brewer_pal("qual", "Set2"))
 cell_colors_clust <- cell_pal(seu$seurat_clusters, hue_pal())
 names(cell_colors) <- names(cell_colors_clust) <- Cells(seu)
@@ -1058,15 +1157,26 @@ names(cell_colors) <- names(cell_colors_clust) <- Cells(seu)
 
 Would a clean trajectory from qNSCs to NPCs to neurons be traced? The arrows are projected onto non-linear dimension reductions by correlation between the predicted cell state and gene expression of other cells in the dataset.
 
-```{r velocyto_umap_projection, dependson="velo_colors", eval = F}
+This is a long step which takes arround 10Gb of RAM.
+
+```{r velocyto_umap_projection, dependson="velo_colors", eval = T}
 cc_umap <- show.velocity.on.embedding.cor(emb = Embeddings(seu, "umap"),
                                           vel = Tool(seu, slot = "RunVelocity"),
-                                          n.cores = 50, show.grid.flow = TRUE,
+                                          n.cores = 4, show.grid.flow = TRUE,
                                           grid.n = 50, cell.colors = cell_colors,
                                           cex = 0.5, cell.border.alpha = 0,
                                           arrow.scale = 2, arrow.lwd = 0.6,
                                           xlab = "UMAP1", ylab = "UMAP2")
 label_clusters(seu$cell_type, Embeddings(seu, "umap"), font = 2, col = "brown")
+```
+
+```{r velocyto_umap_projection_save, dependson="velo_colors", eval = T}
+if (file.exists("results/neuron10k_velocity/cc_umap.Rdata")) {
+  load(file = "results/neuron10k_velocity/cc_umap.Rdata",
+       verbose = T)
+} else {
+  save(cc_umap, file = "results/neuron10k_velocity/cc_umap.Rdata")
+}
 ```
 
  The cells labeled qNSCs and astrocytes are at the very top, going into two paths, one going down and to the right to the neurons, and the other going left towards the OPCs. There also seems to be a cycle to the left of what’s labeled qNSCs and astrocytes at the top. To the lower right of the cluster containing what’s labeled OPCs (cluster 7), there are two branches, but those also look like a cycle.
@@ -1074,10 +1184,10 @@ label_clusters(seu$cell_type, Embeddings(seu, "umap"), font = 2, col = "brown")
 This step is computationally expensive; in subsequent calls to `show.velocity.on.embedding.cor` for the same dimension reduction, the expensive part can be bypassed by supplying the output of the first call.
 
 
-```{r velocyto_umap_projection_cluster, dependson="velocyto_umap_projection", eval = F}
+```{r velocyto_umap_projection_cluster, dependson="velocyto_umap_projection_save", eval = T}
 show.velocity.on.embedding.cor(emb = Embeddings(seu, "umap"),
                                vel = Tool(seu, slot = "RunVelocity"),
-                               n.cores = 50, show.grid.flow = TRUE,
+                               n.cores = 4, show.grid.flow = TRUE,
                                grid.n = 50, cell.colors = cell_colors_clust,
                                cex = 0.5, cell.border.alpha = 0,
                                arrow.scale = 2, arrow.lwd = 0.6,
@@ -1090,10 +1200,16 @@ label_clusters(seu$seurat_clusters, Embeddings(seu, "umap"), font = 2, cex = 1.2
 
 With RNA velocity we can also compute phase portraits of genes. `Mef2c` (myocyte enhancer factor 2C), which is highly expressed in the mouse adult cortex though not much in the embryonic CNS until E18, according to the [NCBI page of this gene](https://www.ncbi.nlm.nih.gov/gene/17260).
 
-```{r velocyto_phase_protrait, eval = F}
+```{r velocyto_phase_protrait, eval = T}
+# Get gene names
+ah <- AnnotationHub()
+edb <- ah[["AH73905"]]
+gns <- tr2g_EnsDb(edb, use_gene_version = FALSE)[,c("gene", "gene_name")] %>% 
+  distinct()
 gene.relative.velocity.estimates(GetAssayData(seu, slot = "data", assay = "spliced"),
                                  GetAssayData(seu, slot = "data", assay = "unspliced"),
                                  cell.emb = Embeddings(seu, "umap"),
+                                 n.cores = 4,
                                  show.gene = gns$gene[gns$gene_name == "Mef2c"],
                                  old.fit = Tool(seu, slot = "RunVelocity"),
                                  cell.colors = cell_colors)
-- 
GitLab