Here, we demonstrate how to use Escort
to evaluate multiple embeddings and trajectories in parallel during Steps 2 and 3, assuming Step 1 was successful.
Below we show an example using a simulated single-cell RNA-seq dataset from Saelens et al., 2019, available at (https://zenodo.org/records/1443566). For this example, we pre-normalized the data and removed genes having mean expression less than three. This simulated dataset is characterized by a linear topology structure.
library(Escort)
set.seed(11111)
data("exampleData_linear")
We will utilize a function to generate all embeddings, then we store them in a list which will be accessed in parallel in the following steps.
gene.var <- quick_model_gene_var(norm_counts)
library(mclust)
library(slingshot)
myembeds <- function(varyg, usedim) {
genes.HVGs <- rownames(gene.var)[1:varyg]
embedding1 <- getDR_2D(norm_counts[genes.HVGs,], usedim)
cls1 <- Mclust(embedding1)$classification
ti_out1 <- slingshot(data=embedding1, clusterLabels=cls1)
rawpse1 <- slingPseudotime(ti_out1, na=T)
ls_fitLine1 <- lapply(slingCurves(ti_out1), function(x) x$s[x$ord,])
embed_obj <- prepTraj(dimred=embedding1, PT=rawpse1, fitLine=ls_fitLine1)
return(embed_obj)
}
pca_embeds <- lapply(c(100, 1000, 2000), myembeds, usedim="PCA")
umap_embeds <- lapply(c(100, 1000, 2000), myembeds, usedim="UMAP")
mds_embeds <- lapply(c(100, 1000, 2000), myembeds, usedim="MDS")
embeddings <- c(pca_embeds, umap_embeds, mds_embeds)
We will use the mclapply()
function from the parallel
package to evaluate the embeddings. Each evaluate step will be performed separately as before.
library(parallel)
n = detectCores() - 1
DRLvsCs <- mclapply(embeddings, function(x) LD_DCClusterscheck(x$Embedding), mc.cores = n)
sapply(DRLvsCs, function(x) x$DCcheck)
## [1] "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step."
## This was obtained in Step 1, or can be re-run here.
LvsC <- HD_DCClusterscheck(normcounts=norm_counts, rawcounts=rawcounts)
simi_cells <- mclapply(embeddings, function(x) {
Similaritycheck(dimred=x$Embedding, clusters=LvsC)
}, mc.cores = n)
sapply(simi_cells, function(x) x$GoodRate)
## [1] 0.886 0.944 0.960 0.846 0.896 0.922 0.882 0.952 0.952
gof_evals <- mclapply(embeddings, function(x) GOFeval(x$Embedding), mc.cores = n)
sapply(gof_evals, function(x) x$occupiedRate)
## [1] 0.532 0.353 0.312 0.226 0.165 0.110 0.546 0.636 0.385
ushap_evals <- mclapply(embeddings, UshapeDetector, mc.cores = n)
sapply(ushap_evals, function(x) x$Ambpct)
## [1] 0.032 0.006 0.004 0.008 0.000 0.000 0.028 0.002 0.012
scoredf <- data.frame(DCcheck=sapply(DRLvsCs, function(x) x$ifConnected),
SimiRetain=sapply(simi_cells, function(x) x$GoodRate),
GOF=sapply(gof_evals, function(x) x$occupiedRate),
USHAPE=sapply(ushap_evals, function(x) x$Ambpct))
rownames(scoredf) <- c(paste(rep(c("PCA","UMAP","MDS"), each=3), rep(c(100, 1000, 2000), 3)))
calcScore(scoredf)
## Row.names DCcheck SimiRetain GOF USHAPE Scaled_GOF Scaled_USHAPE score ranking decision note
## 9 UMAP 2000 TRUE 0.922 0.110 0.000 0.792 1.000 1.653 1 Recommended Embedding NA
## 8 UMAP 1000 TRUE 0.896 0.165 0.000 0.689 1.000 1.513 2 Recommended Embedding NA
## 6 PCA 2000 TRUE 0.960 0.312 0.004 0.411 0.862 1.222 3 Recommended Embedding NA
## 7 UMAP 100 TRUE 0.846 0.226 0.008 0.574 0.724 1.098 4 Recommended Embedding NA
## 5 PCA 1000 TRUE 0.944 0.353 0.006 0.334 0.793 1.064 5 Recommended Embedding NA
## 3 MDS 2000 TRUE 0.952 0.385 0.012 0.274 0.586 0.819 6 Recommended Embedding NA
## 2 MDS 1000 TRUE 0.952 0.636 0.002 -0.200 0.931 0.696 7 Recommended Embedding NA
## 1 MDS 100 TRUE 0.882 0.546 0.028 -0.030 0.034 0.004 8 Recommended Embedding NA
## 4 PCA 100 TRUE 0.886 0.532 0.032 -0.004 -0.103 -0.095 9 Non-recommended Embedding NA
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS Ventura 13.6.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] SCORPIUS_1.0.9 umap_0.2.10.0 slingshot_2.12.0 TrajectoryUtils_1.12.0 SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0 Biobase_2.64.0 GenomicRanges_1.56.1 GenomeInfoDb_1.40.1 IRanges_2.38.1 S4Vectors_0.42.1 BiocGenerics_0.50.0 MatrixGenerics_1.16.0 matrixStats_1.3.0 princurve_2.1.6 mclust_6.1.1 Escort_0.1.13 BiocStyle_2.32.1
##
## loaded via a namespace (and not attached):
## [1] coin_1.4-3 R.methodsS3_1.8.2 SC3_1.32.0 DT_0.33 Biostrings_2.72.1 TH.data_1.1-2 vctrs_0.6.5 spatstat.random_3.3-1 digest_0.6.36 png_0.1-8 proxy_0.4-27 pcaPP_2.0-4 ggrepel_0.9.5 org.Mm.eg.db_3.19.1 deldir_2.0-4 dynparam_1.0.2 MASS_7.3-61 reshape2_1.4.4 httpuv_1.6.15 foreach_1.5.2 qvalue_2.36.0 withr_3.0.0 xfun_0.45 ggfun_0.1.5 survival_3.7-0 doRNG_1.8.6 memoise_2.0.1 proxyC_0.4.1 clusterProfiler_4.12.0 gson_0.1.0 tidytree_0.4.6 zoo_1.8-12 pbapply_1.7-2 R.oo_1.26.0 DEoptimR_1.1-3 KEGGREST_1.44.1 promises_1.3.0 httr_1.4.7
## [39] rstatix_0.7.2 ps_1.7.7 rstudioapi_0.16.0 UCSC.utils_1.0.0 generics_0.1.3 DOSE_3.30.1 processx_3.8.4 zlibbioc_1.50.0 sfsmisc_1.1-18 ggraph_2.2.1 polyclip_1.10-6 GenomeInfoDbData_1.2.12 SparseArray_1.4.8 xtable_1.8-4 stringr_1.5.1 desc_1.4.3 pracma_2.4.4 doParallel_1.0.17 evaluate_0.24.0 S4Arrays_1.4.1 hms_1.1.3 bookdown_0.40 irlba_2.3.5.1 colorspace_2.1-0 jmuOutlier_2.2 ROCR_1.0-11 reticulate_1.38.0 spatstat.data_3.1-2 magrittr_2.0.3 readr_2.1.5 later_1.3.2 viridis_0.6.5 modeltools_0.2-23 ggtree_3.12.0 lattice_0.22-6 spatstat.geom_3.3-2 robustbase_0.99-3 shadowtext_0.1.3
## [77] cowplot_1.1.3 class_7.3-22 pillar_1.9.0 nlme_3.1-165 iterators_1.0.14 compiler_4.4.1 RSpectra_0.16-1 stringi_1.8.4 shinycssloaders_1.0.0 TSP_1.2-4 plyr_1.8.9 crayon_1.5.3 abind_1.4-5 gridGraphics_0.5-1 locfit_1.5-9.10 sp_2.1-4 graphlayouts_1.1.1 org.Hs.eg.db_3.19.1 bit_4.0.5 sandwich_3.1-0 libcoin_1.0-10 dplyr_1.1.4 fastmatch_1.1-4 codetools_0.2-20 multcomp_1.4-25 openssl_2.2.0 bslib_0.7.0 e1071_1.7-14 mime_0.12 splines_4.4.1 Rcpp_1.0.12 sparseMatrixStats_1.16.0 HDO.db_0.99.1 interp_1.1-6 knitr_1.48 blob_1.2.4 utf8_1.2.4 WriteXLS_6.6.0
## [115] fs_1.6.4 alphahull_2.5 DelayedMatrixStats_1.26.0 ggplotify_0.1.2 tibble_3.2.1 Matrix_1.7-0 statmod_1.5.0 tzdb_0.4.0 tweenr_2.0.3 pkgconfig_2.0.3 pheatmap_1.0.12 sgeostat_1.0-27 tools_4.4.1 cachem_1.1.0 RSQLite_2.3.7 viridisLite_0.4.2 DBI_1.2.3 fastmap_1.2.0 rmarkdown_2.27 scales_1.3.0 grid_4.4.1 shinydashboard_0.7.2 broom_1.0.6 sass_0.4.9 patchwork_1.2.0 FNN_1.1.4 BiocManager_1.30.23 carData_3.0-5 RANN_2.6.1 farver_2.1.2 tidygraph_1.3.1 scatterpie_0.2.3 yaml_2.3.9 cli_3.6.3 purrr_1.0.2 lifecycle_1.0.4 askpass_1.2.0 mvtnorm_1.2-5
## [153] backports_1.5.0 BiocParallel_1.38.0 gtable_0.3.5 lmds_0.1.0 ape_5.8 dynutils_1.0.11 limma_3.60.3 jsonlite_1.8.8 edgeR_4.2.1 ggplot2_3.5.1 shotGroups_0.8.2 bit64_4.0.5 assertthat_0.2.1 Rtsne_0.17 yulab.utils_0.1.4 spatstat.utils_3.0-5 ranger_0.16.0 RcppParallel_5.1.8 jquerylib_0.1.4 GOSemSim_2.30.0 shinyjs_2.1.0 spatstat.univar_3.0-0 R.utils_2.12.3 rrcov_1.7-5 lazyeval_0.2.2 shiny_1.8.1.1 htmltools_0.5.8.1 enrichplot_1.24.0 GO.db_3.19.1 glue_1.7.0 carrier_0.1.1 XVector_0.44.0 treeio_1.28.0 dynwrap_1.2.4 RMTstat_0.3.1 gridExtra_2.3 boot_1.3-30 babelwhale_1.2.0
## [191] igraph_2.0.3 R6_2.5.1 tidyr_1.3.1 CompQuadForm_1.4.3 cluster_2.1.6 rngtools_1.5.2 aplot_0.2.3 DelayedArray_0.30.1 tidyselect_1.2.1 ggforce_0.4.2 ash_1.0-15 car_3.1-2 AnnotationDbi_1.66.0 fastICA_1.2-4 munsell_0.5.1 KernSmooth_2.23-24 splancs_2.01-45 data.table_1.15.4 htmlwidgets_1.6.4 fgsea_1.30.0 RColorBrewer_1.1-3 rlang_1.1.4 remotes_2.5.0 fansi_1.0.6 parallelDist_0.2.6