Tutorial

Data Preparation

Required Data

Single-cell reference data: scRNA-seq or snRNA-seq with cell type annotations
Spatial transcriptomics data: ST data from platforms like 10X Visium

Data Format Requirements

# R format - Seurat objects
# Single-cell reference
sc_ref <- readRDS("path/to/sc_data.rds")
# Must contain: sc_ref$clust_vr (cell type annotations)

# Spatial data
st_vis <- readRDS("path/to/st_data.rds")
# Standard Seurat object with spatial assays

# Python format - AnnData objects
# Single-cell reference
sc_ref = sc.read("path/to/sc.h5ad")
# Must contain: sc_ref.obs['clust_vr'] (cell type annotations)

# Spatial data
st_vis = sc.read("path/to/st.h5ad")

Basic Usage

R Implementation

# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)

# Load your data
st_vis <- readRDS('/datapath/ST_data.rds')
sc_ref <- readRDS('/datapath/SC_data.rds')  # cluster info in 'sc_ref$clust_vr'

# Set cluster variable name (adjust based on your data)
clust_vr <- "celltype"  

# Run UCASpatial to map cell subpopulations to spatial locations
UCASpatial_result <- UCASpatial_deconv(
    sc_ref = sc_ref,
    st_vis = st_vis,
    clust_vr = clust_vr
)

# Extract and process results
decon_matr <- as.matrix(UCASpatial_result[[2]])[,1:(ncol(UCASpatial_result[[2]])-1)]
decon_matr <- decon_matr/rowSums(decon_matr)
rownames(decon_matr) <- colnames(st_vis)

# Create dataframe for integration with Seurat
decon_df <- decon_matr %>%
    data.frame() %>%
    tibble::rownames_to_column("barcodes")

# Add deconvolution results to Seurat object metadata
st_vis@meta.data <- st_vis@meta.data %>%
    tibble::rownames_to_column("barcodes") %>%
    dplyr::left_join(decon_df, by = "barcodes") %>%
    tibble::column_to_rownames("barcodes")

Python Implementation

import UCASpatial_ds_R1
import scanpy as sc
import pandas as pd

# Load the data
sc_ref = sc.read("path/to/sc.h5ad")
st_vis = sc.read("path/to/st.h5ad")

# Normalize single-cell data
X_norm = sc.pp.normalize_total(sc_ref, target_sum=1, inplace=False)['X']
sc_ref.layers['data'] = X_norm

# Initialize and run UCASpatial
ucas = UCASpatial_ds_R1.UCASpatial(
    sc_ref=sc_ref,
    st_vis=st_vis,
    clust_vr='celltype',  # adjust based on your data
    meta_filter=False,
    random_seed=12345
)

# Get results
result = ucas.run()
nmf_components = result['nmf_components']
cell_proportions = result['proportions']

# Save results
cell_proportions.to_csv('cell_proportions.csv')

Complete Example: Human Colorectal Cancer Analysis

Dataset Information

This example uses human colorectal cancer data from Qi et al, 2022.

Download Example Data

Download the example datasets from: Zenodo Repository

Spatial example data: Example_Data_Human_CRC_ST_data.rds

Single cell reference example data: Example_Data_sc_ref_downsampled.rds

Load and Explore Data

# Load required libraries
library(Seurat)
library(dplyr)
library(tibble)
library(UCASpatial)

# Load spatial transcriptomics data
st_vis <- readRDS("./Human_CRC_ST_data.rds")
st_vis
    
An object of class Seurat 
36601 features across 4248 samples within 1 assay 
Active assay: Spatial (36601 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 2 dimensional reductions calculated: pca, umap
 1 image present: image

# Load single-cell reference data
sc_ref <- readRDS('./sc_ref_down.rds')
sc_ref
    
An object of class Seurat 
45217 features across 2913 samples within 7 assays 
Active assay: RNA (27905 features, 0 variable features)
 2 layers present: counts, data
 6 other assays present: integrated, source, protein, transfered_type, transfered_subtype, SCT
 2 dimensional reductions calculated: pca, umap

# Set cell type annotation variable
clust_vr <- 'UCASpatial_clus_v7'

# Check cell type distribution in reference
table(sc_ref$UCASpatial_clus_v7)
              CD8 Tem               CD8 Tex               CD8 Trm 
                  100                   100                    48 
               CD4 Tn                CD4 Tm                   Tfh 
                  100                   100                    15 
             Th1/Th17                  Treg                    NK 
                   34                   100                   100 
                  ILC                  Mast              Monocyte 
                  100                   100                   100 
           Neutrophil             IGF1+ Mac              FN1+ Mac 
                    2                   100                   100 
            PDPN+ Mac                  cDC1                  cDC2 
                   72                    58                   100 
                 cDC3                   pDC            Fibroblast 
                   58                    31                   100 
          FAP+ Myofib         POSTN+ Myofib         Smooth Muscle 
                  100                   100                   100 
             Pericyte           Endothelium                 Glial 
                  100                   100                   100 
                    B                Plasma         Epithelium c1 
                  100                   100                   100 
LEFTY1+ Epithelium c2 TM4SF1+ Epithelium c3   MUC2+ Epithelium c4 
                  100                   100                   100 
 REG1A+ Epithelium c5 
                   95

Run UCASpatial Deconvolution

# Run UCASpatial with optimized parameters
UCASpatial_result <- UCASpatial_deconv(
    sc_ref = sc_ref,
    st_vis = st_vis,
    clust_vr = clust_vr,
    downsample_n = 0,                    # No downsampling
    meta.filter = FALSE,                 # No meta filtering
    cos.filter.threshold = 0.05,         # Cosine similarity threshold
    ent.filter.threshold = 0.5,          # Entropy threshold
    weight.filter.threshold = 0.2        # Weight filtering threshold
)

Load required packages...
Step0    Check the variables............................
Warning: Using current path: "/data/xy/Spatial_transcriptome/eWEIDE/20251201_NC_revision_R1/R3Q8_tutorial" as the 'output_path'...
...........
Load the spatial expression matrix (row counts):st_vis@assays$Spatial@counts
Step0    Preprocess the sc_ref data.....................
Step0.1  Meta.cell filter...............................
Step1    Calculate the markers and add weights..........
Step1.1  Calculate the marker genes.....................
Change the idents of sc_ref into 'sc_ref@meta.data$UCASpatial_clus_v7'...
Calculating cluster CD8 Tem
Calculating cluster CD8 Tex
Calculating cluster CD8 Trm
Calculating cluster CD4 Tn
Calculating cluster CD4 Tm
Calculating cluster Tfh
Calculating cluster Th1/Th17
Calculating cluster Treg
Calculating cluster NK
Calculating cluster ILC
Calculating cluster Mast
Calculating cluster Monocyte
Calculating cluster Neutrophil
Calculating cluster IGF1+ Mac
Calculating cluster FN1+ Mac
Calculating cluster PDPN+ Mac
Calculating cluster cDC1
Calculating cluster cDC2
Calculating cluster cDC3
Calculating cluster pDC
Calculating cluster Fibroblast
Calculating cluster FAP+ Myofib
Calculating cluster POSTN+ Myofib
Calculating cluster Smooth Muscle
Calculating cluster Pericyte
Calculating cluster Endothelium
Calculating cluster Glial
Calculating cluster B
Calculating cluster Plasma
Calculating cluster Epithelium c1
Calculating cluster LEFTY1+ Epithelium c2
Calculating cluster TM4SF1+ Epithelium c3
Calculating cluster MUC2+ Epithelium c4
Calculating cluster REG1A+ Epithelium c5
Auto save the marker genes under the path:
/data/xy/Spatial_transcriptome/eWEIDE/20251201_NC_revision_R1/R3Q8_tutorial/cluster_markers.rds
Step1.2  Calculate the entropy-based weight.............
  |=====================================================================| 100%

Warning: Setting row names on a tibble is deprecated.

Step1.3  Calculate the cosine-based weight..............
Warning: x or y has vectors with all zero; consider setting use_nan = TRUE to set these values to NaN or use_nan = FALSE to suppress this warning
Step1.4  Filter the markers.............................
Save the markers........................................
Warning: Layer counts isn't present in the assay object; returning NULL
Step2  Train the nsNMF model............................
[1] "Preparing Gene set"
Warning: Layer counts isn't present in the assay object; returning NULL
Warning in asMethod(object) :
  sparse->dense coercion: allocating vector of size 1.2 GiB
Warning: Layer counts isn't present in the assay object; returning NULL
Normalize the sc_ref matrix...
Initialize the NMF matrices...
[1] "NMF Training..."
[1] "Time to initialize and train NMF model was 0.05mins"
Step3  Calculate the cluster-topic profile..............
Step4  Weighted-NNLS to implement the deconvolution.....
[1] "Deconvoluting spots"
  |=====================================================================| 100%

Quality Control Assessment

# Generate quality control plot
p <- dot_plot_profiles_fun(
    UCASpatial_result[[1]][[1]]@h,
    UCASpatial_result[[1]][[2]]
)[2]
print(p)

Quality Control Plot Example:

Dot plot showing the quality of deconvolution for each cell type. The size represents the expression level and color intensity represents the percentage of expressing cells.

Data Source: View on GitHub

Extract and Process Results

# Extract deconvolution matrix
decon_matr <- as.matrix(UCASpatial_result[[2]])
cell_proportions <- decon_matr[,1:(ncol(decon_matr)-1)]
cell_proportions <- cell_proportions/rowSums(cell_proportions)
rownames(cell_proportions) <- colnames(st_vis)

# Create dataframe for visualization
decon_df <- cell_proportions %>%
    data.frame() %>%
    tibble::rownames_to_column("barcodes")

# Add results to Seurat object
st_vis@meta.data <- st_vis@meta.data %>%
    tibble::rownames_to_column("barcodes") %>%
    dplyr::left_join(decon_df, by = "barcodes") %>%
    tibble::column_to_rownames("barcodes")

# Create annotation assay for visualization
Annotation_assay <- CreateAssayObject(t(cell_proportions))
st_vis@assays$Annotation <- Annotation_assay
st_vis@assays$Annotation@key <- "annotation_"
DefaultAssay(st_vis) <- "Annotation"

Visualize Cell Type Distributions

# Select specific cell types for visualization
selected_cell_types <- c('FAP+ Myofib', 'CD4 Tn', 'B')

# Create spatial plots for selected cell types
plot_list <- Seurat::SpatialFeaturePlot(
    object = st_vis,
    features = selected_cell_types,
    stroke = NA,
    pt.size.factor = 1000,
    alpha = c(0.3, 1),
    min.cutoff = 0.03
)

# Display plots
plot_list

Cell Type Distribution Example:

Spatial distribution of selected cell types in colorectal cancer tissue. Color indicates the proportion of each cell type at each spatial location.