Skip to content
Snippets Groups Projects
Commit 01b816a2 authored by Gilquin's avatar Gilquin
Browse files

added scripts folder and test script

parent bfbfc998
No related branches found
No related tags found
No related merge requests found
# specific files # specific files and folders
mnist.R mnist.R
pbmc.R pbmc.R
data/
results/
scripts/Python/
# files at root
*.pdf
*.pptx
*.xlsm
# History files # History files
.Rhistory .Rhistory
......
...@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE) ...@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE)
import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE) import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE)
# relative import # relative import
import::from("ggtheme.R", "theme_custom", .character_only=TRUE) import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
import::from("statistics.R", "varPCA", .character_only=TRUE) import::from("./scripts/statistics.R", "varPCA", .character_only=TRUE)
# set global ggplot2 theme # set global ggplot2 theme
ggplot2::theme_set(theme_custom()) ggplot2::theme_set(theme_custom())
# load reticulate env # load reticulate env
reticulate::use_condaenv("umap", required = TRUE) reticulate::use_condaenv("seurat", required = TRUE)
# path to read raw data # path to read raw data
datadir <- "/data" datadir <- "./data"
# path to save figure and data # path to save figure and data
savedir <- "../results/5hAPF" savedir <- "./results/5hAPF"
# --------------------------------------------------------------- # ---------------------------------------------------------------
# DATA LOADING AND PREPROCESSING # DATA LOADING AND PREPROCESSING
...@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet( ...@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet(
gg <- VlnPlot( gg <- VlnPlot(
Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
) )
ggsave(file=paste0(savedir, "_vlnplot_before_QC.svg"), device="svg", gg) ggsave(file=file.path(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
# empirical filtering based on QC metrics # empirical filtering based on QC metrics
Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000) Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000)
...@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0) ...@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0)
gg <- VlnPlot( gg <- VlnPlot(
Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
) )
ggsave(file=paste0(savedir, "_vlnplot_after_QC.svg"), device="svg", gg) ggsave(file=file.path(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
# --------------------------------------------------------------- # ---------------------------------------------------------------
# DATA NORMALIZATION THROUGH SCTRANSFORM # DATA NORMALIZATION THROUGH SCTRANSFORM
...@@ -115,7 +115,7 @@ gg <- vfp_labels & ...@@ -115,7 +115,7 @@ gg <- vfp_labels &
scale_y_continuous(trans='log10') & scale_y_continuous(trans='log10') &
theme_custom() & theme_custom() &
theme(legend.position=c(0.5, 0.2)) theme(legend.position=c(0.5, 0.2))
ggsave(file=paste0(savedir, "top_variable_features.svg"), device = "svg", ggsave(file=file.path(savedir, "top_variable_features.svg"), device = "svg",
width = 10, height = 8, gg) width = 10, height = 8, gg)
# --------------------------------------------------------------- # ---------------------------------------------------------------
...@@ -135,7 +135,7 @@ ncomp <- ncol(Sobject@reductions$pca) ...@@ -135,7 +135,7 @@ ncomp <- ncol(Sobject@reductions$pca)
# view the PCA threshold # view the PCA threshold
gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") & gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") &
theme_custom() theme_custom()
ggsave(file=paste0(savedir, "pca.svg"), device="svg", gg) ggsave(file=file.path(savedir, "pca.svg"), device="svg", gg)
ncomp <- 20 ncomp <- 20
# export PCA data to text file # export PCA data to text file
...@@ -170,7 +170,7 @@ gg <- clustree::clustree( ...@@ -170,7 +170,7 @@ gg <- clustree::clustree(
scale_colour_gradientn( scale_colour_gradientn(
colors = rev(pals::plasma(256)[120:256]) colors = rev(pals::plasma(256)[120:256])
) )
ggsave(file=paste0(savedir, "clustree.svg"), device="svg", ggsave(file=file.path(savedir, "clustree.svg"), device="svg",
width = 8, height = 12, gg) width = 8, height = 12, gg)
# construct UMAP embedding with custom parameters # construct UMAP embedding with custom parameters
...@@ -188,5 +188,18 @@ write.table( ...@@ -188,5 +188,18 @@ write.table(
row.names = FALSE, col.names=FALSE row.names = FALSE, col.names=FALSE
) )
# export list of missing genes
sheets <- c(
"TF_with_names", "Secreted_proteins", "GPI_anchored protein",
"multi-pass_transmembrane", "transmembrane_Cterm", "transmembrane_Nterm"
)
for (sheet in sheets){
features <- readxl::read_excel(
path = file.path(datadir, "List_genes.xlsm"),
sheet = sheet,
col_names = TRUE)$SYMBOL
export_missing_markers(Sobject, features, savedir, prefix = sheet)
}
# save Seurat object # save Seurat object
SaveH5Seurat(Sobject, filename = file.path(savedir, "5hAPF.h5Seurat")) SaveH5Seurat(Sobject, filename = file.path(savedir, "5hAPF.h5Seurat"))
...@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE) ...@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE)
import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE) import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE)
# relative import # relative import
import::from("ggtheme.R", "theme_custom", .character_only=TRUE) import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
import::from("statistics.R", "varPCA", .character_only=TRUE) import::from("./scripts/statistics.R", "varPCA", .character_only=TRUE)
# set global ggplot2 theme # set global ggplot2 theme
ggplot2::theme_set(theme_custom()) ggplot2::theme_set(theme_custom())
# load reticulate env # load reticulate env
reticulate::use_condaenv("umap", required = TRUE) reticulate::use_condaenv("seurat", required = TRUE)
# path to read raw data # path to read raw data
datadir <- "/data" datadir <- "./data"
# path to save figure and data # path to save figure and data
savedir <- "../results/9396" savedir <- "./results/9396"
# --------------------------------------------------------------- # ---------------------------------------------------------------
# DATA LOADING AND PREPROCESSING # DATA LOADING AND PREPROCESSING
...@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet( ...@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet(
gg <- VlnPlot( gg <- VlnPlot(
Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
) )
ggsave(file=paste0(savedir, "_vlnplot_before_QC.svg"), device="svg", gg) ggsave(file=file.path(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
# empirical filtering based on QC metrics # empirical filtering based on QC metrics
Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000) Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000)
...@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0) ...@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0)
gg <- VlnPlot( gg <- VlnPlot(
Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
) )
ggsave(file=paste0(savedir, "_vlnplot_after_QC.svg"), device="svg", gg) ggsave(file=file.path(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
# --------------------------------------------------------------- # ---------------------------------------------------------------
# DATA NORMALIZATION THROUGH SCTRANSFORM # DATA NORMALIZATION THROUGH SCTRANSFORM
...@@ -115,7 +115,7 @@ gg <- vfp_labels & ...@@ -115,7 +115,7 @@ gg <- vfp_labels &
scale_y_continuous(trans='log10') & scale_y_continuous(trans='log10') &
theme_custom() & theme_custom() &
theme(legend.position=c(0.5, 0.2)) theme(legend.position=c(0.5, 0.2))
ggsave(file=paste0(savedir, "top_variable_features.svg"), device = "svg", ggsave(file=file.path(savedir, "top_variable_features.svg"), device = "svg",
width = 10, height = 8, gg) width = 10, height = 8, gg)
# --------------------------------------------------------------- # ---------------------------------------------------------------
...@@ -135,8 +135,8 @@ ncomp <- ncol(Sobject@reductions$pca) ...@@ -135,8 +135,8 @@ ncomp <- ncol(Sobject@reductions$pca)
# view the PCA threshold # view the PCA threshold
gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") & gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") &
theme_custom() theme_custom()
ggsave(file=paste0(savedir, "pca.svg"), device="svg", gg) ggsave(file=file.path(savedir, "pca.svg"), device="svg", gg)
ncomp <- 12 ncomp <- 25
# export PCA data to text file # export PCA data to text file
write.table( write.table(
...@@ -170,23 +170,36 @@ gg <- clustree::clustree( ...@@ -170,23 +170,36 @@ gg <- clustree::clustree(
scale_colour_gradientn( scale_colour_gradientn(
colors = rev(pals::plasma(256)[120:256]) colors = rev(pals::plasma(256)[120:256])
) )
ggsave(file=paste0(savedir, "clustree.svg"), device="svg", ggsave(file=file.path(savedir, "clustree.svg"), device="svg",
width = 8, height = 12, gg) width = 8, height = 12, gg)
# construct UMAP embedding with custom parameters # construct UMAP embedding with custom parameters
Sobject <- RunUMAP( Sobject <- RunUMAP(
Sobject , dims = 1:ncomp, n.neighbors = n_neighbors, metric="cosine", Sobject , dims = 1:ncomp, n.neighbors = n_neighbors, metric="cosine",
reduction = "pca", umap.method = "umap-learn", n.epochs = 1000, reduction = "pca", umap.method = "umap-learn", n.epochs = 1000,
learning.rate = 1.0, negative.sample.rate = 2, local.connectivity = 2, learning.rate = 1, negative.sample.rate = 9, local.connectivity = 1,
set.op.mix.ratio = 1.0, a = 1.0, b = 1.0, seed.use = 1672071369 set.op.mix.ratio = 0.5, min.dist = 0.05, seed.use = 1672071369
) )
# export clusters ids to text file # export clusters ids to text file
write.table( write.table(
x = matrix(as.numeric(Sobject@meta.data$SCT_snn_res.0.45)-1, nrow=1), x = matrix(as.numeric(Sobject@meta.data$SCT_snn_res.0.4)-1, nrow=1),
file = file.path(savedir, "clustid.txt"), sep = "\t", file = file.path(savedir, "clustid.txt"), sep = "\t",
row.names = FALSE, col.names=FALSE row.names = FALSE, col.names=FALSE
) )
# export list of missing genes
sheets <- c(
"TF_with_names", "Secreted_proteins", "GPI_anchored protein",
"multi-pass_transmembrane", "transmembrane_Cterm", "transmembrane_Nterm"
)
for (sheet in sheets){
features <- readxl::read_excel(
path = file.path(datadir, "List_genes.xlsm"),
sheet = sheet,
col_names = TRUE)$SYMBOL
export_missing_markers(Sobject, features, savedir, prefix = sheet)
}
# save Seurat object # save Seurat object
SaveH5Seurat(Sobject, filename = file.path(savedir, "9396.h5Seurat")) SaveH5Seurat(Sobject, filename = file.path(savedir, "9396.h5Seurat"))
...@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE) ...@@ -29,20 +29,20 @@ import::from("SeuratObject", "Misc<-", .character_only=TRUE)
import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE) import::from("SeuratDisk", "SaveH5Seurat", .character_only=TRUE)
# relative import # relative import
import::from("ggtheme.R", "theme_custom", .character_only=TRUE) import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
import::from("statistics.R", "varPCA", .character_only=TRUE) import::from("./scripts/statistics.R", "varPCA", .character_only=TRUE)
# set global ggplot2 theme # set global ggplot2 theme
ggplot2::theme_set(theme_custom()) ggplot2::theme_set(theme_custom())
# load reticulate env # load reticulate env
reticulate::use_condaenv("umap", required = TRUE) reticulate::use_condaenv("seurat", required = TRUE)
# path to read raw data # path to read raw data
datadir <- "/data" datadir <- "./data"
# path to save figure and data # path to save figure and data
savedir <- "../results/WP" savedir <- "./results/WP"
# --------------------------------------------------------------- # ---------------------------------------------------------------
# DATA LOADING AND PREPROCESSING # DATA LOADING AND PREPROCESSING
...@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet( ...@@ -68,7 +68,7 @@ Sobject <- PercentageFeatureSet(
gg <- VlnPlot( gg <- VlnPlot(
Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
) )
ggsave(file=paste0(savedir, "_vlnplot_before_QC.svg"), device="svg", gg) ggsave(file=file.path(savedir, "_vlnplot_before_QC.svg"), device="svg", gg)
# empirical filtering based on QC metrics # empirical filtering based on QC metrics
Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000) Sobject <- subset(Sobject, subset = nFeature_RNA > 600 & nFeature_RNA < 5000)
...@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0) ...@@ -82,7 +82,7 @@ Sobject <- subset(Sobject, subset = GFP != 0 | Mef2 != 0 | twi != 0)
gg <- VlnPlot( gg <- VlnPlot(
Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3 Sobject, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3
) )
ggsave(file=paste0(savedir, "_vlnplot_after_QC.svg"), device="svg", gg) ggsave(file=file.path(savedir, "_vlnplot_after_QC.svg"), device="svg", gg)
# --------------------------------------------------------------- # ---------------------------------------------------------------
# DATA NORMALIZATION THROUGH SCTRANSFORM # DATA NORMALIZATION THROUGH SCTRANSFORM
...@@ -115,7 +115,7 @@ gg <- vfp_labels & ...@@ -115,7 +115,7 @@ gg <- vfp_labels &
scale_y_continuous(trans='log10') & scale_y_continuous(trans='log10') &
theme_custom() & theme_custom() &
theme(legend.position=c(0.5, 0.2)) theme(legend.position=c(0.5, 0.2))
ggsave(file=paste0(savedir, "top_variable_features.svg"), device = "svg", ggsave(file=file.path(savedir, "top_variable_features.svg"), device = "svg",
width = 10, height = 8, gg) width = 10, height = 8, gg)
# --------------------------------------------------------------- # ---------------------------------------------------------------
...@@ -135,7 +135,7 @@ ncomp <- ncol(Sobject@reductions$pca) ...@@ -135,7 +135,7 @@ ncomp <- ncol(Sobject@reductions$pca)
# view the PCA threshold # view the PCA threshold
gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") & gg <- ElbowPlot(Sobject, ndims=ncomp) & ylab("Variance explained (%)") &
theme_custom() theme_custom()
ggsave(file=paste0(savedir, "pca.svg"), device="svg", gg) ggsave(file=file.path(savedir, "pca.svg"), device="svg", gg)
ncomp <- 14 ncomp <- 14
# export PCA data to text file # export PCA data to text file
...@@ -170,7 +170,7 @@ gg <- clustree::clustree( ...@@ -170,7 +170,7 @@ gg <- clustree::clustree(
scale_colour_gradientn( scale_colour_gradientn(
colors = rev(pals::plasma(256)[120:256]) colors = rev(pals::plasma(256)[120:256])
) )
ggsave(file=paste0(savedir, "clustree.svg"), device="svg", ggsave(file=file.path(savedir, "clustree.svg"), device="svg",
width = 8, height = 12, gg) width = 8, height = 12, gg)
# construct UMAP embedding with custom parameters # construct UMAP embedding with custom parameters
...@@ -192,5 +192,18 @@ write.table( ...@@ -192,5 +192,18 @@ write.table(
row.names = FALSE, col.names=FALSE row.names = FALSE, col.names=FALSE
) )
# export list of missing genes
sheets <- c(
"TF_with_names", "Secreted_proteins", "GPI_anchored protein",
"multi-pass_transmembrane", "transmembrane_Cterm", "transmembrane_Nterm"
)
for (sheet in sheets){
features <- readxl::read_excel(
path = file.path(datadir, "List_genes.xlsm"),
sheet = sheet,
col_names = TRUE)$SYMBOL
export_missing_markers(Sobject, features, savedir, prefix = sheet)
}
# save Seurat object # save Seurat object
SaveH5Seurat(Sobject, filename = file.path(savedir, "WP.h5Seurat")) SaveH5Seurat(Sobject, filename = file.path(savedir, "WP.h5Seurat"))
...@@ -13,7 +13,7 @@ import::from("SeuratDisk", "LoadH5Seurat", .character_only=TRUE) ...@@ -13,7 +13,7 @@ import::from("SeuratDisk", "LoadH5Seurat", .character_only=TRUE)
# local imports # local imports
import::from( import::from(
"export.R", "./scripts/export.R",
c( c(
"export_dotplots", "export_embeddings", "export_heatmap", "export_dotplots", "export_embeddings", "export_heatmap",
"export_markers", "export_markers_unique", "FindFullMarkers", "export_markers", "export_markers_unique", "FindFullMarkers",
...@@ -21,6 +21,8 @@ import::from( ...@@ -21,6 +21,8 @@ import::from(
), ),
.character_only=TRUE .character_only=TRUE
) )
import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
...@@ -28,21 +30,17 @@ import::from( ...@@ -28,21 +30,17 @@ import::from(
ggplot2::theme_set(theme_custom()) ggplot2::theme_set(theme_custom())
# path to the folder where the raw data are stored # path to the folder where the raw data are stored
datadir <- "/data" datadir <- "./data"
# path to the folder where to export figures and save data # path to the folder where to export figures and save data
savedir <- "/results/..." savedir <- "./results/9396"
# Load Seurat object # Load Seurat object
Sobject <- LoadH5Seurat(file = file.path(savedir, "[...].h5Seurat")) Sobject <- LoadH5Seurat(file = file.path(savedir, "9396.h5Seurat"))
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# choose clustering resolution (the number of clusters) # set colormap (a valid string from the pals package)
ident <- "SCT_snn_res.0.35"
Idents(Sobject) <- ident
# set colormap
cmap <- "alphabet" cmap <- "alphabet"
# export UMAP embedding with custom coloring based on clustering # export UMAP embedding with custom coloring based on clustering
...@@ -55,25 +53,34 @@ export_embeddings(Sobject, res_vec, cmap, savedir, assay="SCT", device = "png") ...@@ -55,25 +53,34 @@ export_embeddings(Sobject, res_vec, cmap, savedir, assay="SCT", device = "png")
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# choose clustering resolution (the number of clusters)
ident <- "SCT_snn_res.0.4"
Idents(Sobject) <- ident
# -------------------------------------------------------------------------
# find all relevant markers with constrained parameters # find all relevant markers with constrained parameters
markers <- FindAllMarkers( markers <- FindAllMarkers(
Sobject, only.pos = TRUE, min.pct = 0.1, min.diff.pct = 0.50, Sobject, only.pos = TRUE, min.pct = 0.1, min.diff.pct = 0.50,
logfc.threshold = 0.25) logfc.threshold = 0.25
)
# save result table # save result table
export_markers(markers, ident, 10, savedir) export_markers(markers, ident, 10, savedir)
# save heatmap of top50 genes # save heatmap of top50 genes
top_genes <- head(VariableFeatures(object = Sobject, assay = "SCT"), 50) top_genes <- head(VariableFeatures(object = Sobject, assay = "SCT"), 50)
plot_heatmap(Sobject, top_genes, "SCT", TRUE, cmap, "top50", savedir, export_heatmap(
device = "png") Sobject, top_genes, "SCT", TRUE, cmap, "top50", savedir, device = "png"
)
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# visualization of genes: "GFP", "Mef2" and "twi" # visualization of genes: "GFP", "Mef2" and "twi"
features <- c("GFP","Mef2","twi") features <- c("GFP","Mef2","twi")
visualize_markers(Sobject, features, ident, cmap, "GMt", savedir, visualize_markers(
device = "png") Sobject, features, ident, cmap, "GMt", savedir, device = "png"
)
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
...@@ -94,8 +101,9 @@ for (sheet in sheets){ ...@@ -94,8 +101,9 @@ for (sheet in sheets){
path = file.path(datadir, "List_genes.xlsm"), path = file.path(datadir, "List_genes.xlsm"),
sheet = sheet, sheet = sheet,
col_names = TRUE)$SYMBOL col_names = TRUE)$SYMBOL
export_dotplots(Sobject, features, ident, savedir, prefix = sheet, export_dotplots(
device = "png") Sobject, features, ident, savedir, prefix = sheet, device = "png"
)
} }
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
...@@ -108,7 +116,7 @@ export_markers(markers, ident, NULL, savedir, prefix = "full_") ...@@ -108,7 +116,7 @@ export_markers(markers, ident, NULL, savedir, prefix = "full_")
top5_genes <- markers %>% top5_genes <- markers %>%
dplyr::group_by(cluster) %>% dplyr::group_by(cluster) %>%
dplyr::top_n(n = 5, wt = avg_log2FC) dplyr::top_n(n = 5, wt = avg_log2FC)
plot_heatmap( export_heatmap(
Sobject, top5_genes$gene, "SCT", TRUE, cmap, "top5_perclust", savedir, Sobject, top5_genes$gene, "SCT", TRUE, cmap, "top5_perclust", savedir,
device = "png" device = "png"
) )
...@@ -117,7 +125,7 @@ plot_heatmap( ...@@ -117,7 +125,7 @@ plot_heatmap(
top10_genes <- markers %>% top10_genes <- markers %>%
dplyr::group_by(cluster) %>% dplyr::group_by(cluster) %>%
dplyr::top_n(n = 10, wt = avg_log2FC) dplyr::top_n(n = 10, wt = avg_log2FC)
plot_heatmap( export_heatmap(
Sobject, top10_genes$gene, "SCT", TRUE, cmap, "top10_perclust", savedir, Sobject, top10_genes$gene, "SCT", TRUE, cmap, "top10_perclust", savedir,
device = "png" device = "png"
) )
...@@ -15,6 +15,7 @@ import::here( ...@@ -15,6 +15,7 @@ import::here(
"ggsave", "ggsave",
"guide_axis", "guide_axis",
"scale_color_gradientn", "scale_color_gradientn",
"scale_color_manual",
"scale_fill_gradientn", "scale_fill_gradientn",
"scale_x_discrete" "scale_x_discrete"
), ),
...@@ -490,7 +491,12 @@ export_heatmap <- function( ...@@ -490,7 +491,12 @@ export_heatmap <- function(
# save the heatmap # save the heatmap
filepath <- file.path(savedir, subdir, prefix) filepath <- file.path(savedir, subdir, prefix)
gg <- suppressWarnings(suppressMessages( gg <- suppressWarnings(suppressMessages(
DoHeatmap(object, assay=assay, features=features, group.colors=cols) DoHeatmap(
object, assay=assay, features=features, group.colors=cols
) &
scale_color_manual(
values = cols, name = "Identity", na.translate = FALSE
)
)) ))
if (!legend) { gg <- gg + NoLengend()} if (!legend) { gg <- gg + NoLengend()}
ggsave(file=paste0(filepath, "_heatmap", ".", device), device = device, ggsave(file=paste0(filepath, "_heatmap", ".", device), device = device,
......
File moved
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import::here("dplyr", c("left_join", "rename"), .character_only=TRUE) import::here("dplyr", c("left_join", "rename"), .character_only=TRUE)
import::here("janitor", "adorn_totals", .character_only=TRUE) import::here("janitor", "adorn_totals", .character_only=TRUE)
import::here("magrittr", "%>%", .character_only=TRUE) import::here("magrittr", "%>%", .character_only=TRUE)
import::here("matrixStats", "rowVars", .character_only=TRUE)
import::here("glue", "glue_collapse", .character_only=TRUE) import::here("glue", "glue_collapse", .character_only=TRUE)
import::here( import::here(
"Seurat", "Seurat",
......
# -------------------------------------------------------------------------
# Test clustering analysis on pbmc_small dataset
# -------------------------------------------------------------------------
# global imports
import::from("magrittr", "%>%", .character_only=TRUE)
import::from(
"Seurat",
c("FindAllMarkers", "FindMarkers", "Idents<-", "VariableFeatures"),
.character_only=TRUE
)
import::from("SeuratDisk", "LoadH5Seurat", .character_only=TRUE)
# local imports
import::from(
"./scripts/export.R",
c(
"export_dotplots", "export_embeddings", "export_heatmap",
"export_markers", "export_markers_unique", "FindFullMarkers",
"visualize_markers"
),
.character_only=TRUE
)
import::from("./scripts/ggtheme.R", "theme_custom", .character_only=TRUE)
# -------------------------------------------------------------------------
# set global ggplot2 theme
ggplot2::theme_set(theme_custom())
# path to the folder where the raw data are stored
datadir <- "./data"
# path to the folder where to export figures and save data
savedir <- "./results/pbmc"
# Load Seurat object
Sobject <- LoadH5Seurat(file = file.path(savedir, "pbmc_small.h5Seurat"))
# -------------------------------------------------------------------------
# set colormap (a valid string from the pals package)
cmap <- "alphabet"
# export UMAP embedding with custom coloring based on clustering
res_vec <- colnames(Sobject@meta.data) %>%
strsplit(., split="SCT_snn_res.") %>%
sapply(., function(x) x[2]) %>%
na.omit(.) %>%
as.numeric(.)
export_embeddings(Sobject, res_vec, cmap, savedir, assay="SCT", device = "png")
# -------------------------------------------------------------------------
# choose clustering resolution (the number of clusters)
ident <- "SCT_snn_res.1"
Idents(Sobject) <- ident
# -------------------------------------------------------------------------
# find all relevant markers with constrained parameters
markers <- FindAllMarkers(
Sobject, only.pos = TRUE, min.pct = 0.1, min.diff.pct = 0.50,
logfc.threshold = 0.25
)
# save result table
export_markers(markers, ident, 10, savedir)
# save heatmap of top50 genes
top_genes <- head(VariableFeatures(object = Sobject, assay = "SCT"), 50)
export_heatmap(
Sobject, top_genes, "SCT", TRUE, cmap, "top50", savedir, device = "png"
)
# -------------------------------------------------------------------------
# export table of markers that are mostly expressed in a single cluster
# min_pct = 5 means expressed in at least 5% of the cells of a cluster
# threshold = 1 means expressed in less than 1% of the remaining cells
export_markers_unique(Sobject, ident, savedir, threshold = 1, min_pct = 5)
# -------------------------------------------------------------------------
# get all markers per cluster irregardless of their expression power
markers <- FindFullMarkers(Sobject, ident, savedir)
export_markers(markers, ident, NULL, savedir, prefix = "full_")
# heatmap of top5 genes per cluster
top5_genes <- markers %>%
dplyr::group_by(cluster) %>%
dplyr::top_n(n = 5, wt = avg_log2FC)
export_heatmap(
Sobject, top5_genes$gene, "SCT", TRUE, cmap, "top5_perclust", savedir,
device = "png"
)
# heatmap of top10 genes per cluster
top10_genes <- markers %>%
dplyr::group_by(cluster) %>%
dplyr::top_n(n = 10, wt = avg_log2FC)
export_heatmap(
Sobject, top10_genes$gene, "SCT", TRUE, cmap, "top10_perclust", savedir,
device = "png"
)
File moved
name: umap name: seurat
channels: channels:
- bokeh - bokeh
- conda-forge - conda-forge
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment