From 01b816a2d1749ec4341a6cc26e503ab965303a40 Mon Sep 17 00:00:00 2001
From: Gilquin <laurent.gilquin@ens-lyon.fr>
Date: Tue, 8 Nov 2022 09:58:54 +0100
Subject: [PATCH] added scripts folder and test script

---
 .gitignore                           |  10 ++-
 5hAPF.R => scripts/5hAPF.R           |  33 ++++++---
 9396.R => scripts/9396.R             |  41 +++++++----
 WP.R => scripts/WP.R                 |  33 ++++++---
 analysis.R => scripts/analysis.R     |  44 ++++++-----
 export.R => scripts/export.R         |   8 +-
 ggtheme.R => scripts/ggtheme.R       |   0
 statistics.R => scripts/statistics.R |   1 +
 scripts/test.R                       | 106 +++++++++++++++++++++++++++
 utilities.R => scripts/utilities.R   |   0
 umap.yml => seurat.yml               |   2 +-
 11 files changed, 223 insertions(+), 55 deletions(-)
 rename 5hAPF.R => scripts/5hAPF.R (84%)
 rename 9396.R => scripts/9396.R (81%)
 rename WP.R => scripts/WP.R (85%)
 rename analysis.R => scripts/analysis.R (83%)
 rename export.R => scripts/export.R (98%)
 rename ggtheme.R => scripts/ggtheme.R (100%)
 rename statistics.R => scripts/statistics.R (99%)
 create mode 100644 scripts/test.R
 rename utilities.R => scripts/utilities.R (100%)
 rename umap.yml => seurat.yml (95%)

diff --git a/.gitignore b/.gitignore
index 802b6f8..c4d5e6b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,14 @@
-# specific files
+# specific files and folders
 mnist.R
 pbmc.R
+data/
+results/
+scripts/Python/
+
+# files at root
+*.pdf
+*.pptx
+*.xlsm
 
 # History files
 .Rhistory
diff --git a/5hAPF.R b/scripts/5hAPF.R
similarity index 84%
rename from 5hAPF.R
rename to scripts/5hAPF.R
index c3b2697..a612da9 100644
--- a/5hAPF.R
+++ b/scripts/5hAPF.R
@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE)
 import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE)
 
 # relative import
-import::from("ggtheme.R", "theme_custom", .character_only=TRUE)
-import::from("statistics.R", "varPCA", .character_only=TRUE)
+import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
+import::from("./scripts/statistics.R", "varPCA", .character_only=TRUE)
 
 # set global ggplot2 theme
 ggplot2::theme_set(theme_custom())
 
 # load reticulate env
-reticulate::use_condaenv("umap", required = TRUE)
+reticulate::use_condaenv("seurat", required = TRUE)
 
 # path to read raw data
-datadir <- "/data"
+datadir <- "./data"
 
 # path to save figure and data
-savedir <- "../results/5hAPF"
+savedir <- "./results/5hAPF"
 
 # ---------------------------------------------------------------
 # DATA LOADING AND PREPROCESSING
@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet(
 gg <- VlnPlot(
   Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
   )
-ggsave(file=paste0(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
 
 # empirical filtering based on QC metrics
 Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000)
@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0)
 gg <- VlnPlot(
   Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
   )
-ggsave(file=paste0(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
 
 # ---------------------------------------------------------------
 # DATA NORMALIZATION THROUGH SCTRANSFORM
@@ -115,7 +115,7 @@ gg <- vfp_labels &
   scale_y_continuous(trans='log10') & 
   theme_custom() & 
   theme(legend.position=c(0.5, 0.2))
-ggsave(file=paste0(savedir, "top_variable_features.svg"), device = "svg",
+ggsave(file=file.path(savedir, "top_variable_features.svg"), device = "svg",
        width = 10, height = 8, gg)
 
 # ---------------------------------------------------------------
@@ -135,7 +135,7 @@ ncomp <- ncol(Sobject@reductions$pca)
 # view the PCA threshold
 gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") &
   theme_custom()
-ggsave(file=paste0(savedir, "pca.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "pca.svg"), device="svg", gg)
 ncomp <- 20
 
 # export PCA data to text file
@@ -170,7 +170,7 @@ gg <- clustree::clustree(
   scale_colour_gradientn(
   colors = rev(pals::plasma(256)[120:256])
   )
-ggsave(file=paste0(savedir, "clustree.svg"), device="svg",
+ggsave(file=file.path(savedir, "clustree.svg"), device="svg",
        width = 8, height = 12, gg)
 
 # construct UMAP embedding with custom parameters
@@ -188,5 +188,18 @@ write.table(
   row.names = FALSE, col.names=FALSE
   )
 
+# export list of missing genes
+sheets <- c(
+  "TF_with_names", "Secreted_proteins", "GPI_anchored protein",
+  "multi-pass_transmembrane", "transmembrane_Cterm", "transmembrane_Nterm"
+)
+for (sheet in sheets){
+  features <- readxl::read_excel(
+    path = file.path(datadir, "List_genes.xlsm"),
+    sheet = sheet,
+    col_names = TRUE)$SYMBOL
+  export_missing_markers(Sobject, features, savedir, prefix = sheet)
+}
+
 # save Seurat object
 SaveH5Seurat(Sobject, filename = file.path(savedir, "5hAPF.h5Seurat"))
diff --git a/9396.R b/scripts/9396.R
similarity index 81%
rename from 9396.R
rename to scripts/9396.R
index b942af8..eb3dd2e 100644
--- a/9396.R
+++ b/scripts/9396.R
@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE)
 import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE)
 
 # relative import
-import::from("ggtheme.R", "theme_custom", .character_only=TRUE)
-import::from("statistics.R", "varPCA", .character_only=TRUE)
+import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
+import::from("./scripts/statistics.R", "varPCA", .character_only=TRUE)
 
 # set global ggplot2 theme
 ggplot2::theme_set(theme_custom())
 
 # load reticulate env
-reticulate::use_condaenv("umap", required = TRUE)
+reticulate::use_condaenv("seurat", required = TRUE)
 
 # path to read raw data
-datadir <- "/data"
+datadir <- "./data"
 
 # path to save figure and data
-savedir <- "../results/9396"
+savedir <- "./results/9396"
 
 # ---------------------------------------------------------------
 # DATA LOADING AND PREPROCESSING
@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet(
 gg <- VlnPlot(
   Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
 )
-ggsave(file=paste0(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
 
 # empirical filtering based on QC metrics
 Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000)
@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0)
 gg <- VlnPlot(
   Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
 )
-ggsave(file=paste0(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
 
 # ---------------------------------------------------------------
 # DATA NORMALIZATION THROUGH SCTRANSFORM
@@ -115,7 +115,7 @@ gg <- vfp_labels &
   scale_y_continuous(trans='log10') & 
   theme_custom() & 
   theme(legend.position=c(0.5, 0.2))
-ggsave(file=paste0(savedir, "top_variable_features.svg"), device = "svg",
+ggsave(file=file.path(savedir, "top_variable_features.svg"), device = "svg",
        width = 10, height = 8, gg)
 
 # ---------------------------------------------------------------
@@ -135,8 +135,8 @@ ncomp <- ncol(Sobject@reductions$pca)
 # view the PCA threshold
 gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") &
   theme_custom()
-ggsave(file=paste0(savedir, "pca.svg"), device="svg", gg)
-ncomp <- 12
+ggsave(file=file.path(savedir, "pca.svg"), device="svg", gg)
+ncomp <- 25
 
 # export PCA data to text file
 write.table(
@@ -170,23 +170,36 @@ gg <- clustree::clustree(
   scale_colour_gradientn(
     colors = rev(pals::plasma(256)[120:256])
   )
-ggsave(file=paste0(savedir, "clustree.svg"), device="svg",
+ggsave(file=file.path(savedir, "clustree.svg"), device="svg",
        width = 8, height = 12, gg)
 
 # construct UMAP embedding with custom parameters
 Sobject <- RunUMAP(
   Sobject , dims = 1:ncomp, n.neighbors = n_neighbors, metric="cosine",
   reduction = "pca", umap.method = "umap-learn", n.epochs = 1000,
-  learning.rate = 1.0, negative.sample.rate = 2, local.connectivity = 2,
-  set.op.mix.ratio = 1.0, a = 1.0, b = 1.0, seed.use = 1672071369
+  learning.rate = 1, negative.sample.rate = 9, local.connectivity = 1,
+  set.op.mix.ratio = 0.5, min.dist = 0.05, seed.use = 1672071369
   )
 
 # export clusters ids to text file
 write.table(
-  x = matrix(as.numeric(Sobject@meta.data$SCT_snn_res.0.45)-1, nrow=1),
+  x = matrix(as.numeric(Sobject@meta.data$SCT_snn_res.0.4)-1, nrow=1),
   file = file.path(savedir, "clustid.txt"), sep = "\t",
   row.names = FALSE, col.names=FALSE
 )
 
+# export list of missing genes
+sheets <- c(
+  "TF_with_names", "Secreted_proteins", "GPI_anchored protein",
+  "multi-pass_transmembrane", "transmembrane_Cterm", "transmembrane_Nterm"
+)
+for (sheet in sheets){
+  features <- readxl::read_excel(
+    path = file.path(datadir, "List_genes.xlsm"),
+    sheet = sheet,
+    col_names = TRUE)$SYMBOL
+  export_missing_markers(Sobject, features, savedir, prefix = sheet)
+}
+
 # save Seurat object
 SaveH5Seurat(Sobject, filename = file.path(savedir, "9396.h5Seurat"))
diff --git a/WP.R b/scripts/WP.R
similarity index 85%
rename from WP.R
rename to scripts/WP.R
index 530b94d..b204216 100644
--- a/WP.R
+++ b/scripts/WP.R
@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE)
 import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE)
 
 # relative import
-import::from("ggtheme.R", "theme_custom", .character_only=TRUE)
-import::from("statistics.R", "varPCA", .character_only=TRUE)
+import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
+import::from("./scripts/statistics.R", "varPCA", .character_only=TRUE)
 
 # set global ggplot2 theme
 ggplot2::theme_set(theme_custom())
 
 # load reticulate env
-reticulate::use_condaenv("umap", required = TRUE)
+reticulate::use_condaenv("seurat", required = TRUE)
 
 # path to read raw data
-datadir <- "/data"
+datadir <- "./data"
 
 # path to save figure and data
-savedir <- "../results/WP"
+savedir <- "./results/WP"
 
 # ---------------------------------------------------------------
 # DATA LOADING AND PREPROCESSING
@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet(
 gg <- VlnPlot(
   Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
 )
-ggsave(file=paste0(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
 
 # empirical filtering based on QC metrics
 Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000)
@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0)
 gg <- VlnPlot(
   Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
 )
-ggsave(file=paste0(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
 
 # ---------------------------------------------------------------
 # DATA NORMALIZATION THROUGH SCTRANSFORM
@@ -115,7 +115,7 @@ gg <- vfp_labels &
   scale_y_continuous(trans='log10') & 
   theme_custom() & 
   theme(legend.position=c(0.5, 0.2))
-ggsave(file=paste0(savedir, "top_variable_features.svg"), device = "svg",
+ggsave(file=file.path(savedir, "top_variable_features.svg"), device = "svg",
        width = 10, height = 8, gg)
 
 # ---------------------------------------------------------------
@@ -135,7 +135,7 @@ ncomp <- ncol(Sobject@reductions$pca)
 # view the PCA threshold
 gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") &
   theme_custom()
-ggsave(file=paste0(savedir, "pca.svg"), device="svg", gg)
+ggsave(file=file.path(savedir, "pca.svg"), device="svg", gg)
 ncomp <- 14
 
 # export PCA data to text file
@@ -170,7 +170,7 @@ gg <- clustree::clustree(
   scale_colour_gradientn(
     colors = rev(pals::plasma(256)[120:256])
   )
-ggsave(file=paste0(savedir, "clustree.svg"), device="svg",
+ggsave(file=file.path(savedir, "clustree.svg"), device="svg",
        width = 8, height = 12, gg)
 
 # construct UMAP embedding with custom parameters
@@ -192,5 +192,18 @@ write.table(
   row.names = FALSE, col.names=FALSE
   )
 
+# export list of missing genes
+sheets <- c(
+  "TF_with_names", "Secreted_proteins", "GPI_anchored protein",
+  "multi-pass_transmembrane", "transmembrane_Cterm", "transmembrane_Nterm"
+)
+for (sheet in sheets){
+  features <- readxl::read_excel(
+    path = file.path(datadir, "List_genes.xlsm"),
+    sheet = sheet,
+    col_names = TRUE)$SYMBOL
+  export_missing_markers(Sobject, features, savedir, prefix = sheet)
+}
+
 # save Seurat object
 SaveH5Seurat(Sobject, filename = file.path(savedir, "WP.h5Seurat"))
diff --git a/analysis.R b/scripts/analysis.R
similarity index 83%
rename from analysis.R
rename to scripts/analysis.R
index 7b8ac43..e9b884d 100644
--- a/analysis.R
+++ b/scripts/analysis.R
@@ -13,7 +13,7 @@ import::from("SeuratDisk", "LoadH5Seurat", .character_only=TRUE)
 
 # local imports
 import::from(
-  "export.R",
+  "./scripts/export.R",
   c(
     "export_dotplots", "export_embeddings", "export_heatmap", 
     "export_markers", "export_markers_unique", "FindFullMarkers",
@@ -21,6 +21,8 @@ import::from(
   ),
   .character_only=TRUE
 )
+import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
+
 
 # -------------------------------------------------------------------------
 
@@ -28,21 +30,17 @@ import::from(
 ggplot2::theme_set(theme_custom())
 
 # path to the folder where the raw data are stored
-datadir <- "/data"
+datadir <- "./data"
 
 # path to the folder where to export figures and save data
-savedir <- "/results/..."
+savedir <- "./results/9396"
 
 # Load Seurat object
-Sobject <- LoadH5Seurat(file = file.path(savedir, "[...].h5Seurat"))
+Sobject <- LoadH5Seurat(file = file.path(savedir, "9396.h5Seurat"))
 
 # -------------------------------------------------------------------------
 
-# choose clustering resolution (the number of clusters)
-ident <- "SCT_snn_res.0.35"
-Idents(Sobject) <- ident
-
-# set colormap
+# set colormap (a valid string from the pals package)
 cmap <- "alphabet"
 
 # export UMAP embedding with custom coloring based on clustering
@@ -55,25 +53,34 @@ export_embeddings(Sobject, res_vec, cmap, savedir, assay="SCT", device = "png")
 
 # -------------------------------------------------------------------------
 
+# choose clustering resolution (the number of clusters)
+ident <- "SCT_snn_res.0.4"
+Idents(Sobject) <- ident
+
+# -------------------------------------------------------------------------
+
 # find all relevant markers with constrained parameters
 markers <- FindAllMarkers(
   Sobject, only.pos = TRUE, min.pct = 0.1, min.diff.pct = 0.50,
-  logfc.threshold = 0.25)
+  logfc.threshold = 0.25
+  )
 
 # save result table
 export_markers(markers, ident, 10, savedir)
 
 # save heatmap of top50 genes
 top_genes <- head(VariableFeatures(object = Sobject, assay = "SCT"), 50)
-plot_heatmap(Sobject, top_genes, "SCT", TRUE, cmap, "top50", savedir,
-             device = "png")
+export_heatmap(
+  Sobject, top_genes, "SCT", TRUE, cmap, "top50", savedir, device = "png"
+  )
 
 # -------------------------------------------------------------------------
 
 # visualization of genes: "GFP", "Mef2" and "twi"
 features <- c("GFP","Mef2","twi")
-visualize_markers(Sobject, features, ident, cmap, "GMt", savedir,
-                  device = "png")
+visualize_markers(
+  Sobject, features, ident, cmap, "GMt", savedir, device = "png"
+  )
 
 # -------------------------------------------------------------------------
 
@@ -94,8 +101,9 @@ for (sheet in sheets){
     path = file.path(datadir, "List_genes.xlsm"),
     sheet = sheet,
     col_names = TRUE)$SYMBOL
-  export_dotplots(Sobject, features, ident, savedir, prefix = sheet,
-                  device = "png")
+  export_dotplots(
+    Sobject, features, ident, savedir, prefix = sheet, device = "png"
+    )
 }
 
 # -------------------------------------------------------------------------
@@ -108,7 +116,7 @@ export_markers(markers, ident, NULL, savedir, prefix = "full_")
 top5_genes <- markers %>% 
   dplyr::group_by(cluster) %>% 
   dplyr::top_n(n = 5, wt = avg_log2FC)
-plot_heatmap(
+export_heatmap(
   Sobject, top5_genes$gene, "SCT", TRUE, cmap, "top5_perclust", savedir,
   device = "png"
   )
@@ -117,7 +125,7 @@ plot_heatmap(
 top10_genes <- markers %>% 
   dplyr::group_by(cluster) %>% 
   dplyr::top_n(n = 10, wt = avg_log2FC)
-plot_heatmap(
+export_heatmap(
   Sobject, top10_genes$gene, "SCT", TRUE, cmap, "top10_perclust", savedir,
   device = "png"
   )
diff --git a/export.R b/scripts/export.R
similarity index 98%
rename from export.R
rename to scripts/export.R
index 65e27cc..3f8114b 100644
--- a/export.R
+++ b/scripts/export.R
@@ -15,6 +15,7 @@ import::here(
     "ggsave",
     "guide_axis",
     "scale_color_gradientn",
+    "scale_color_manual",
     "scale_fill_gradientn",
     "scale_x_discrete"
     ),
@@ -490,7 +491,12 @@ export_heatmap <- function(
   # save the heatmap
   filepath <- file.path(savedir, subdir, prefix)
   gg <- suppressWarnings(suppressMessages(
-    DoHeatmap(object, assay=assay, features=features, group.colors=cols)
+    DoHeatmap(
+      object, assay=assay, features=features, group.colors=cols
+      ) &
+      scale_color_manual(
+        values = cols, name = "Identity", na.translate = FALSE
+      )
   ))
   if (!legend) { gg <- gg + NoLengend()}
   ggsave(file=paste0(filepath, "_heatmap", ".", device), device = device,
diff --git a/ggtheme.R b/scripts/ggtheme.R
similarity index 100%
rename from ggtheme.R
rename to scripts/ggtheme.R
diff --git a/statistics.R b/scripts/statistics.R
similarity index 99%
rename from statistics.R
rename to scripts/statistics.R
index 05339c0..2830183 100644
--- a/statistics.R
+++ b/scripts/statistics.R
@@ -6,6 +6,7 @@
 import::here("dplyr", c("left_join", "rename"), .character_only=TRUE)
 import::here("janitor", "adorn_totals", .character_only=TRUE)
 import::here("magrittr", "%>%", .character_only=TRUE)
+import::here("matrixStats", "rowVars", .character_only=TRUE)
 import::here("glue", "glue_collapse", .character_only=TRUE)
 import::here(
   "Seurat",
diff --git a/scripts/test.R b/scripts/test.R
new file mode 100644
index 0000000..7327ba2
--- /dev/null
+++ b/scripts/test.R
@@ -0,0 +1,106 @@
+# -------------------------------------------------------------------------
+#         Test clustering analysis on pbmc_small dataset
+# -------------------------------------------------------------------------
+
+# global imports
+import::from("magrittr", "%>%", .character_only=TRUE)
+import::from(
+  "Seurat",
+  c("FindAllMarkers", "FindMarkers", "Idents<-", "VariableFeatures"),
+  .character_only=TRUE
+)
+import::from("SeuratDisk", "LoadH5Seurat", .character_only=TRUE)
+
+# local imports
+import::from(
+  "./scripts/export.R",
+  c(
+    "export_dotplots", "export_embeddings", "export_heatmap", 
+    "export_markers", "export_markers_unique", "FindFullMarkers",
+    "visualize_markers"
+  ),
+  .character_only=TRUE
+)
+import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
+
+
+# -------------------------------------------------------------------------
+
+# set global ggplot2 theme
+ggplot2::theme_set(theme_custom())
+
+# path to the folder where the raw data are stored
+datadir <- "./data"
+
+# path to the folder where to export figures and save data
+savedir <- "./results/pbmc"
+
+# Load Seurat object
+Sobject <- LoadH5Seurat(file = file.path(savedir, "pbmc_small.h5Seurat"))
+
+# -------------------------------------------------------------------------
+
+# set colormap (a valid string from the pals package)
+cmap <- "alphabet"
+
+# export UMAP embedding with custom coloring based on clustering
+res_vec <- colnames(Sobject@meta.data) %>% 
+  strsplit(., split="SCT_snn_res.") %>%
+  sapply(., function(x) x[2]) %>% 
+  na.omit(.) %>%
+  as.numeric(.)
+export_embeddings(Sobject, res_vec, cmap, savedir, assay="SCT", device = "png")
+
+# -------------------------------------------------------------------------
+
+# choose clustering resolution (the number of clusters)
+ident <- "SCT_snn_res.1"
+Idents(Sobject) <- ident
+
+# -------------------------------------------------------------------------
+
+# find all relevant markers with constrained parameters
+markers <- FindAllMarkers(
+  Sobject, only.pos = TRUE, min.pct = 0.1, min.diff.pct = 0.50,
+  logfc.threshold = 0.25
+)
+
+# save result table
+export_markers(markers, ident, 10, savedir)
+
+# save heatmap of top50 genes
+top_genes <- head(VariableFeatures(object = Sobject, assay = "SCT"), 50)
+export_heatmap(
+  Sobject, top_genes, "SCT", TRUE, cmap, "top50", savedir, device = "png"
+)
+
+# -------------------------------------------------------------------------
+
+# export table of markers that are mostly expressed in a single cluster
+# min_pct = 5 means expressed in at least 5% of the cells of a cluster
+# threshold = 1 means expressed in less than 1% of the remaining cells
+export_markers_unique(Sobject, ident, savedir, threshold = 1, min_pct = 5)
+
+# -------------------------------------------------------------------------
+
+# get all markers per cluster irregardless of their expression power
+markers <- FindFullMarkers(Sobject, ident, savedir)
+export_markers(markers, ident, NULL, savedir, prefix = "full_")
+
+# heatmap of top5 genes per cluster
+top5_genes <- markers %>% 
+  dplyr::group_by(cluster) %>% 
+  dplyr::top_n(n = 5, wt = avg_log2FC)
+export_heatmap(
+  Sobject, top5_genes$gene, "SCT", TRUE, cmap, "top5_perclust", savedir,
+  device = "png"
+)
+
+# heatmap of top10 genes per cluster
+top10_genes <- markers %>% 
+  dplyr::group_by(cluster) %>% 
+  dplyr::top_n(n = 10, wt = avg_log2FC)
+export_heatmap(
+  Sobject, top10_genes$gene, "SCT", TRUE, cmap, "top10_perclust", savedir,
+  device = "png"
+)
diff --git a/utilities.R b/scripts/utilities.R
similarity index 100%
rename from utilities.R
rename to scripts/utilities.R
diff --git a/umap.yml b/seurat.yml
similarity index 95%
rename from umap.yml
rename to seurat.yml
index 44ff267..005159e 100644
--- a/umap.yml
+++ b/seurat.yml
@@ -1,4 +1,4 @@
-name: umap
+name: seurat
 channels:
   - bokeh
   - conda-forge
-- 
GitLab