Tutorial

Data Preparation

Required Data

  • Single-cell reference data: scRNA-seq or snRNA-seq with cell type annotations
  • Spatial transcriptomics data: ST data from platforms like 10X Visium

Data Format Requirements

# R format - Seurat objects
# Single-cell reference
sc_ref <- readRDS("path/to/sc_data.rds")
# Must contain: sc_ref$clust_vr (cell type annotations)

# Spatial data
st_vis <- readRDS("path/to/st_data.rds")
# Standard Seurat object with spatial assays
# Python format - AnnData objects
# Single-cell reference
sc_ref = sc.read("path/to/sc.h5ad")
# Must contain: sc_ref.obs['clust_vr'] (cell type annotations)

# Spatial data
st_vis = sc.read("path/to/st.h5ad")

Basic Usage

R Implementation

# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)

# Load your data
st_vis <- readRDS('/datapath/ST_data.rds')
sc_ref <- readRDS('/datapath/SC_data.rds')  # cluster info in 'sc_ref$clust_vr'

# Set cluster variable name (adjust based on your data)
clust_vr <- "celltype"  

# Run UCASpatial to map cell subpopulations to spatial locations
UCASpatial_result <- UCASpatial_deconv(
    sc_ref = sc_ref,
    st_vis = st_vis,
    clust_vr = clust_vr
)

# Extract and process results
decon_matr <- as.matrix(UCASpatial_result[[2]])[,1:(ncol(UCASpatial_result[[2]])-1)]
decon_matr <- decon_matr/rowSums(decon_matr)
rownames(decon_matr) <- colnames(st_vis)

# Create dataframe for integration with Seurat
decon_df <- decon_matr %>%
    data.frame() %>%
    tibble::rownames_to_column("barcodes")

# Add deconvolution results to Seurat object metadata
st_vis@meta.data <- st_vis@meta.data %>%
    tibble::rownames_to_column("barcodes") %>%
    dplyr::left_join(decon_df, by = "barcodes") %>%
    tibble::column_to_rownames("barcodes")

Python Implementation

import UCASpatial_ds_R1
import scanpy as sc
import pandas as pd

# Load the data
sc_ref = sc.read("path/to/sc.h5ad")
st_vis = sc.read("path/to/st.h5ad")

# Normalize single-cell data
X_norm = sc.pp.normalize_total(sc_ref, target_sum=1, inplace=False)['X']
sc_ref.layers['data'] = X_norm

# Initialize and run UCASpatial
ucas = UCASpatial_ds_R1.UCASpatial(
    sc_ref=sc_ref,
    st_vis=st_vis,
    clust_vr='celltype',  # adjust based on your data
    meta_filter=False,
    random_seed=12345
)

# Get results
result = ucas.run()
nmf_components = result['nmf_components']
cell_proportions = result['proportions']

# Save results
cell_proportions.to_csv('cell_proportions.csv')

Complete Example: Human Colorectal Cancer Analysis

Dataset Information

This example uses human colorectal cancer data from Qi et al, 2022.

Download Example Data

Download the example datasets from: Zenodo Repository

Spatial example data: Example_Data_Human_CRC_ST_data.rds

Single cell reference example data: Example_Data_sc_ref_downsampled.rds

Load and Explore Data

# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)
library(UCASpatial)

# Load spatial transcriptomics data
st_vis <- readRDS("./Human_CRC_ST_data.rds")
st_vis
    
An object of class Seurat 
36601 features across 4248 samples within 1 assay 
Active assay: Spatial (36601 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 2 dimensional reductions calculated: pca, umap
 1 image present: image

# Load single-cell reference data
sc_ref <- readRDS('./sc_ref_down.rds')
sc_ref
    
An object of class Seurat 
45217 features across 2913 samples within 7 assays 
Active assay: RNA (27905 features, 0 variable features)
 2 layers present: counts, data
 6 other assays present: integrated, source, protein, transfered_type, transfered_subtype, SCT
 2 dimensional reductions calculated: pca, umap

# Set cell type annotation variable
clust_vr <- 'UCASpatial_clus_v7'

# Check cell type distribution in reference
table(sc_ref$UCASpatial_clus_v7)
              CD8 Tem               CD8 Tex               CD8 Trm 
                  100                   100                    48 
               CD4 Tn                CD4 Tm                   Tfh 
                  100                   100                    15 
             Th1/Th17                  Treg                    NK 
                   34                   100                   100 
                  ILC                  Mast              Monocyte 
                  100                   100                   100 
           Neutrophil             IGF1+ Mac              FN1+ Mac 
                    2                   100                   100 
            PDPN+ Mac                  cDC1                  cDC2 
                   72                    58                   100 
                 cDC3                   pDC            Fibroblast 
                   58                    31                   100 
          FAP+ Myofib         POSTN+ Myofib         Smooth Muscle 
                  100                   100                   100 
             Pericyte           Endothelium                 Glial 
                  100                   100                   100 
                    B                Plasma         Epithelium c1 
                  100                   100                   100 
LEFTY1+ Epithelium c2 TM4SF1+ Epithelium c3   MUC2+ Epithelium c4 
                  100                   100                   100 
 REG1A+ Epithelium c5 
                   95 

Run UCASpatial Deconvolution

# Run UCASpatial with optimized parameters
UCASpatial_result <- UCASpatial_deconv(
    sc_ref = sc_ref,
    st_vis = st_vis,
    clust_vr = clust_vr,
    downsample_n = 0,                    # No downsampling
    meta.filter = FALSE,                 # No meta filtering
    cos.filter.threshold = 0.05,         # Cosine similarity threshold
    ent.filter.threshold = 0.5,          # Entropy threshold
    weight.filter.threshold = 0.2        # Weight filtering threshold
)

Load required packages...
Step0    Check the variables............................
Warning: Using current path: "/data/xy/Spatial_transcriptome/eWEIDE/20251201_NC_revision_R1/R3Q8_tutorial" as the 'output_path'...
...........
Load the spatial expression matrix (row counts):st_vis@assays$Spatial@counts
Step0    Preprocess the sc_ref data.....................
Step0.1  Meta.cell filter...............................
Step1    Calculate the markers and add weights..........
Step1.1  Calculate the marker genes.....................
Change the idents of sc_ref into 'sc_ref@meta.data$UCASpatial_clus_v7'...
Calculating cluster CD8 Tem
Calculating cluster CD8 Tex
Calculating cluster CD8 Trm
Calculating cluster CD4 Tn
Calculating cluster CD4 Tm
Calculating cluster Tfh
Calculating cluster Th1/Th17
Calculating cluster Treg
Calculating cluster NK
Calculating cluster ILC
Calculating cluster Mast
Calculating cluster Monocyte
Calculating cluster Neutrophil
Calculating cluster IGF1+ Mac
Calculating cluster FN1+ Mac
Calculating cluster PDPN+ Mac
Calculating cluster cDC1
Calculating cluster cDC2
Calculating cluster cDC3
Calculating cluster pDC
Calculating cluster Fibroblast
Calculating cluster FAP+ Myofib
Calculating cluster POSTN+ Myofib
Calculating cluster Smooth Muscle
Calculating cluster Pericyte
Calculating cluster Endothelium
Calculating cluster Glial
Calculating cluster B
Calculating cluster Plasma
Calculating cluster Epithelium c1
Calculating cluster LEFTY1+ Epithelium c2
Calculating cluster TM4SF1+ Epithelium c3
Calculating cluster MUC2+ Epithelium c4
Calculating cluster REG1A+ Epithelium c5
Auto save the marker genes under the path:
/data/xy/Spatial_transcriptome/eWEIDE/20251201_NC_revision_R1/R3Q8_tutorial/cluster_markers.rds
Step1.2  Calculate the entropy-based weight.............
  |=====================================================================| 100%

Warning: Setting row names on a tibble is deprecated.

Step1.3  Calculate the cosine-based weight..............
Warning: x or y has vectors with all zero; consider setting use_nan = TRUE to set these values to NaN or use_nan = FALSE to suppress this warning
Step1.4  Filter the markers.............................
Save the markers........................................
Warning: Layer counts isn't present in the assay object; returning NULL
Step2  Train the nsNMF model............................
[1] "Preparing Gene set"
Warning: Layer counts isn't present in the assay object; returning NULL
Warning in asMethod(object) :
  sparse->dense coercion: allocating vector of size 1.2 GiB
Warning: Layer counts isn't present in the assay object; returning NULL
Normalize the sc_ref matrix...
Initialize the NMF matrices...
[1] "NMF Training..."
[1] "Time to initialize and train NMF model was 0.05mins"
Step3  Calculate the cluster-topic profile..............
Step4  Weighted-NNLS to implement the deconvolution.....
[1] "Deconvoluting spots"
  |=====================================================================| 100%

Quality Control Assessment

# Generate quality control plot
p <- dot_plot_profiles_fun(
    UCASpatial_result[[1]][[1]]@h,
    UCASpatial_result[[1]][[2]]
)[2]
print(p)

Quality Control Plot Example:

Quality Control Plot

Dot plot showing the quality of deconvolution for each cell type. The size represents the expression level and color intensity represents the percentage of expressing cells.

Data Source: View on GitHub

Extract and Process Results

# Extract deconvolution matrix
decon_matr <- as.matrix(UCASpatial_result[[2]])
cell_proportions <- decon_matr[,1:(ncol(decon_matr)-1)]
cell_proportions <- cell_proportions/rowSums(cell_proportions)
rownames(cell_proportions) <- colnames(st_vis)

# Create dataframe for visualization
decon_df <- cell_proportions %>%
    data.frame() %>%
    tibble::rownames_to_column("barcodes")

# Add results to Seurat object
st_vis@meta.data <- st_vis@meta.data %>%
    tibble::rownames_to_column("barcodes") %>%
    dplyr::left_join(decon_df, by = "barcodes") %>%
    tibble::column_to_rownames("barcodes")

# Create annotation assay for visualization
Annotation_assay <- CreateAssayObject(t(cell_proportions))
st_vis@assays$Annotation <- Annotation_assay
st_vis@assays$Annotation@key <- "annotation_"
DefaultAssay(st_vis) <- "Annotation"

Visualize Cell Type Distributions

# Select specific cell types for visualization
selected_cell_types <- c('FAP+ Myofib', 'CD4 Tn', 'B')

# Create spatial plots for selected cell types
plot_list <- Seurat::SpatialFeaturePlot(
    object = st_vis,
    features = selected_cell_types,
    stroke = NA,
    pt.size.factor = 1000,
    alpha = c(0.3, 1),
    min.cutoff = 0.03
)

# Display plots
plot_list

Cell Type Distribution Example:

Cell Type Distributions

Spatial distribution of selected cell types in colorectal cancer tissue. Color indicates the proportion of each cell type at each spatial location.

Data Source: View on GitHub

10X Visium HD Analysis

R Implementation for Visium HD

# For 10X Visium HD data
rowname_st_vis <- rownames(st_vis)
UCASpatial_result <- UCASpatial_HD_deconv(
    sc_ref = sc_ref,
    st_vis = st_vis,
    spatial.assay = 'Spatial.008um',  # adjust resolution as needed
    clust_vr = clust_vr,
    rowname_st_vis = rowname_st_vis,
    meta.filter = FALSE
)

# Alternative resolution options:
# spatial.assay = 'Spatial.016um'  # 16μm resolution
# spatial.assay = 'Spatial.008um'  # 8μm resolution (highest)

Tips for HD Data

  • Use appropriate spatial resolution (e.g., 'Spatial.008um', 'Spatial.016um')
  • Consider computational resources for high-resolution data
  • Validate results with known tissue markers
  • Start with lower resolution for initial testing
  • Monitor memory usage during processing

Result Interpretation

Understanding Outputs

Cell Proportion Matrix

  • Rows: spatial spots/barcodes
  • Columns: cell types
  • Values: estimated proportion of each cell type (0-1)

NMF Components

  • Represents gene expression programs for each cell type
  • Can be used to identify cell-type specific genes