Contents

1 Introduction

Here, we demonstrate how to use Escort to evaluate multiple embeddings and trajectories in parallel during Steps 2 and 3, assuming Step 1 was successful.

2 Example using a simulated single-cell RNA-seq dataset

Below we show an example using a simulated single-cell RNA-seq dataset from Saelens et al., 2019, available at (https://zenodo.org/records/1443566). For this example, we pre-normalized the data and removed genes having mean expression less than three. This simulated dataset is characterized by a linear topology structure.

library(Escort)
set.seed(11111)
data("exampleData_linear")

3 Generating multiple embeddings

We will utilize a function to generate all embeddings, then we store them in a list which will be accessed in parallel in the following steps.

gene.var <- quick_model_gene_var(norm_counts)

library(mclust)
library(slingshot)
myembeds <- function(varyg, usedim) {
  genes.HVGs <- rownames(gene.var)[1:varyg]
  embedding1 <- getDR_2D(norm_counts[genes.HVGs,], usedim)
  cls1 <- Mclust(embedding1)$classification
  ti_out1 <- slingshot(data=embedding1, clusterLabels=cls1)
  rawpse1 <- slingPseudotime(ti_out1, na=T)
  ls_fitLine1 <- lapply(slingCurves(ti_out1), function(x) x$s[x$ord,])
  
  embed_obj <- prepTraj(dimred=embedding1, PT=rawpse1, fitLine=ls_fitLine1)
  return(embed_obj)
}

pca_embeds <- lapply(c(100, 1000, 2000), myembeds, usedim="PCA")
umap_embeds <- lapply(c(100, 1000, 2000), myembeds, usedim="UMAP")
mds_embeds <- lapply(c(100, 1000, 2000), myembeds, usedim="MDS")

embeddings <- c(pca_embeds, umap_embeds, mds_embeds)

4 Parallelizing embedding evaluations

We will use the mclapply() function from the parallel package to evaluate the embeddings. Each evaluate step will be performed separately as before.

4.0.0.1 Examining cell connectivity on embeddings

library(parallel)
n = detectCores() - 1
DRLvsCs <- mclapply(embeddings, function(x) LD_DCClusterscheck(x$Embedding), mc.cores = n)
sapply(DRLvsCs, function(x) x$DCcheck)
## [1] "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step." "Congratulations! Escort did not find null spaces between clusters. Proceed to the next evaluation step."

4.0.0.2 Examining preservation of cell relationships

## This was obtained in Step 1, or can be re-run here.
LvsC <- HD_DCClusterscheck(normcounts=norm_counts, rawcounts=rawcounts)

simi_cells <- mclapply(embeddings, function(x) {
  Similaritycheck(dimred=x$Embedding, clusters=LvsC)
}, mc.cores = n)
sapply(simi_cells, function(x) x$GoodRate)
## [1] 0.886 0.944 0.960 0.846 0.896 0.922 0.882 0.952 0.952

4.0.0.3 Examining cell density

gof_evals <- mclapply(embeddings, function(x) GOFeval(x$Embedding), mc.cores = n)
sapply(gof_evals, function(x) x$occupiedRate)
## [1] 0.532 0.353 0.312 0.226 0.165 0.110 0.546 0.636 0.385

4.0.0.4 Examining ambiguous cells

ushap_evals <- mclapply(embeddings, UshapeDetector, mc.cores = n)
sapply(ushap_evals, function(x) x$Ambpct)
## [1] 0.032 0.006 0.004 0.008 0.000 0.000 0.028 0.002 0.012

5 Scoring System

scoredf <- data.frame(DCcheck=sapply(DRLvsCs, function(x) x$ifConnected),
                      SimiRetain=sapply(simi_cells, function(x) x$GoodRate),
                      GOF=sapply(gof_evals, function(x) x$occupiedRate), 
                      USHAPE=sapply(ushap_evals, function(x) x$Ambpct))
rownames(scoredf) <- c(paste(rep(c("PCA","UMAP","MDS"), each=3), rep(c(100, 1000, 2000), 3)))
calcScore(scoredf)
##   Row.names DCcheck SimiRetain   GOF USHAPE Scaled_GOF Scaled_USHAPE  score ranking                  decision note
## 9 UMAP 2000    TRUE      0.922 0.110  0.000      0.792         1.000  1.653       1     Recommended Embedding   NA
## 8 UMAP 1000    TRUE      0.896 0.165  0.000      0.689         1.000  1.513       2     Recommended Embedding   NA
## 6  PCA 2000    TRUE      0.960 0.312  0.004      0.411         0.862  1.222       3     Recommended Embedding   NA
## 7  UMAP 100    TRUE      0.846 0.226  0.008      0.574         0.724  1.098       4     Recommended Embedding   NA
## 5  PCA 1000    TRUE      0.944 0.353  0.006      0.334         0.793  1.064       5     Recommended Embedding   NA
## 3  MDS 2000    TRUE      0.952 0.385  0.012      0.274         0.586  0.819       6     Recommended Embedding   NA
## 2  MDS 1000    TRUE      0.952 0.636  0.002     -0.200         0.931  0.696       7     Recommended Embedding   NA
## 1   MDS 100    TRUE      0.882 0.546  0.028     -0.030         0.034  0.004       8     Recommended Embedding   NA
## 4   PCA 100    TRUE      0.886 0.532  0.032     -0.004        -0.103 -0.095       9 Non-recommended Embedding   NA

6 SessionInfo

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS Ventura 13.6.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] SCORPIUS_1.0.9              umap_0.2.10.0               slingshot_2.12.0            TrajectoryUtils_1.12.0      SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0 Biobase_2.64.0              GenomicRanges_1.56.1        GenomeInfoDb_1.40.1         IRanges_2.38.1              S4Vectors_0.42.1            BiocGenerics_0.50.0         MatrixGenerics_1.16.0       matrixStats_1.3.0           princurve_2.1.6             mclust_6.1.1                Escort_0.1.13               BiocStyle_2.32.1           
## 
## loaded via a namespace (and not attached):
##   [1] coin_1.4-3                R.methodsS3_1.8.2         SC3_1.32.0                DT_0.33                   Biostrings_2.72.1         TH.data_1.1-2             vctrs_0.6.5               spatstat.random_3.3-1     digest_0.6.36             png_0.1-8                 proxy_0.4-27              pcaPP_2.0-4               ggrepel_0.9.5             org.Mm.eg.db_3.19.1       deldir_2.0-4              dynparam_1.0.2            MASS_7.3-61               reshape2_1.4.4            httpuv_1.6.15             foreach_1.5.2             qvalue_2.36.0             withr_3.0.0               xfun_0.45                 ggfun_0.1.5               survival_3.7-0            doRNG_1.8.6               memoise_2.0.1             proxyC_0.4.1              clusterProfiler_4.12.0    gson_0.1.0                tidytree_0.4.6            zoo_1.8-12                pbapply_1.7-2             R.oo_1.26.0               DEoptimR_1.1-3            KEGGREST_1.44.1           promises_1.3.0            httr_1.4.7               
##  [39] rstatix_0.7.2             ps_1.7.7                  rstudioapi_0.16.0         UCSC.utils_1.0.0          generics_0.1.3            DOSE_3.30.1               processx_3.8.4            zlibbioc_1.50.0           sfsmisc_1.1-18            ggraph_2.2.1              polyclip_1.10-6           GenomeInfoDbData_1.2.12   SparseArray_1.4.8         xtable_1.8-4              stringr_1.5.1             desc_1.4.3                pracma_2.4.4              doParallel_1.0.17         evaluate_0.24.0           S4Arrays_1.4.1            hms_1.1.3                 bookdown_0.40             irlba_2.3.5.1             colorspace_2.1-0          jmuOutlier_2.2            ROCR_1.0-11               reticulate_1.38.0         spatstat.data_3.1-2       magrittr_2.0.3            readr_2.1.5               later_1.3.2               viridis_0.6.5             modeltools_0.2-23         ggtree_3.12.0             lattice_0.22-6            spatstat.geom_3.3-2       robustbase_0.99-3         shadowtext_0.1.3         
##  [77] cowplot_1.1.3             class_7.3-22              pillar_1.9.0              nlme_3.1-165              iterators_1.0.14          compiler_4.4.1            RSpectra_0.16-1           stringi_1.8.4             shinycssloaders_1.0.0     TSP_1.2-4                 plyr_1.8.9                crayon_1.5.3              abind_1.4-5               gridGraphics_0.5-1        locfit_1.5-9.10           sp_2.1-4                  graphlayouts_1.1.1        org.Hs.eg.db_3.19.1       bit_4.0.5                 sandwich_3.1-0            libcoin_1.0-10            dplyr_1.1.4               fastmatch_1.1-4           codetools_0.2-20          multcomp_1.4-25           openssl_2.2.0             bslib_0.7.0               e1071_1.7-14              mime_0.12                 splines_4.4.1             Rcpp_1.0.12               sparseMatrixStats_1.16.0  HDO.db_0.99.1             interp_1.1-6              knitr_1.48                blob_1.2.4                utf8_1.2.4                WriteXLS_6.6.0           
## [115] fs_1.6.4                  alphahull_2.5             DelayedMatrixStats_1.26.0 ggplotify_0.1.2           tibble_3.2.1              Matrix_1.7-0              statmod_1.5.0             tzdb_0.4.0                tweenr_2.0.3              pkgconfig_2.0.3           pheatmap_1.0.12           sgeostat_1.0-27           tools_4.4.1               cachem_1.1.0              RSQLite_2.3.7             viridisLite_0.4.2         DBI_1.2.3                 fastmap_1.2.0             rmarkdown_2.27            scales_1.3.0              grid_4.4.1                shinydashboard_0.7.2      broom_1.0.6               sass_0.4.9                patchwork_1.2.0           FNN_1.1.4                 BiocManager_1.30.23       carData_3.0-5             RANN_2.6.1                farver_2.1.2              tidygraph_1.3.1           scatterpie_0.2.3          yaml_2.3.9                cli_3.6.3                 purrr_1.0.2               lifecycle_1.0.4           askpass_1.2.0             mvtnorm_1.2-5            
## [153] backports_1.5.0           BiocParallel_1.38.0       gtable_0.3.5              lmds_0.1.0                ape_5.8                   dynutils_1.0.11           limma_3.60.3              jsonlite_1.8.8            edgeR_4.2.1               ggplot2_3.5.1             shotGroups_0.8.2          bit64_4.0.5               assertthat_0.2.1          Rtsne_0.17                yulab.utils_0.1.4         spatstat.utils_3.0-5      ranger_0.16.0             RcppParallel_5.1.8        jquerylib_0.1.4           GOSemSim_2.30.0           shinyjs_2.1.0             spatstat.univar_3.0-0     R.utils_2.12.3            rrcov_1.7-5               lazyeval_0.2.2            shiny_1.8.1.1             htmltools_0.5.8.1         enrichplot_1.24.0         GO.db_3.19.1              glue_1.7.0                carrier_0.1.1             XVector_0.44.0            treeio_1.28.0             dynwrap_1.2.4             RMTstat_0.3.1             gridExtra_2.3             boot_1.3-30               babelwhale_1.2.0         
## [191] igraph_2.0.3              R6_2.5.1                  tidyr_1.3.1               CompQuadForm_1.4.3        cluster_2.1.6             rngtools_1.5.2            aplot_0.2.3               DelayedArray_0.30.1       tidyselect_1.2.1          ggforce_0.4.2             ash_1.0-15                car_3.1-2                 AnnotationDbi_1.66.0      fastICA_1.2-4             munsell_0.5.1             KernSmooth_2.23-24        splancs_2.01-45           data.table_1.15.4         htmlwidgets_1.6.4         fgsea_1.30.0              RColorBrewer_1.1-3        rlang_1.1.4               remotes_2.5.0             fansi_1.0.6               parallelDist_0.2.6