library(SingleCellMQC)
library(Seurat)
library(BPCells)
#Load a pre-existing Seurat object or processed raw data using SingleCellMQC.
pbmc <- readRDS("/data/pbmc.rds")3 QC HTML report
SingleCellMQC provides automated, interactive HTML-based QC reports using R Markdown, offering users with a comprehensive overview of data quality before downstream analysis. Users can perform basic preprocessing with the ‘RunPreprocess’ function or selectively execute individual QC modules. The ‘RunReport’ function then compiles all QC results into a HTML document containing interactable plots, tables, and QC warnings across sample-, cell-, feature-, and batch-levels. To support large-scale datasets, SingleCellMQC integrates BPCells to reduce memory usage and improve computational efficiency. The HTML QC reports for all three datasets are provided (https://doi.org/10.5281/zenodo.15120930). Notably, for the large-scale public breast tissue single-cell RNA-seq dataset containing 167 samples and over one million cells (Dataset 3), SingleCellMQC produced a complete, navigable report by ‘RunReport’ function in approximately 22 minutes without the need for high-performance computing.
Dataset 1: an in-house single-cell multi-omics dataset generated using the 10x Genomics Chromium 5’ Gene Expression platform, which included matched scRNA-seq, surface proteomics (antibody-derived tags, ADTs), and TCR/BCR sequencing across four peripheral blood mononuclear cells (PBMC) samples (totaling 28,498 cells). Three samples (TP1, TP2, and TP3) were collected from the same donor at three-day intervals, and one sample (TP3) that failed initial QC was re-measured (TP3-rep). Example PBMC single-cell multi-omics data can be downloaded from the URL: https://doi.org/10.5281/zenodo.15120930.
Dataset 2: a public scRNA-seq dataset (10x Genomics) comprising 17 PBMC samples (137,214 cells). Dataset can be downloaded from GEO:GSE157007.
Dataset 3: a large-scale scRNA-seq dataset (10x Genomics) comprising 167 breast tissue samples and over one million cells. Dataset can be downloaded from GEO:GSE235326.
3.1 Dataset1
3.1.1 Load data
3.1.2 Run Preprocessing for Single-Cell Data (If no prior preprocessing has been performed)
# metrics
pbmc <- CalculateMetrics(pbmc)
# cell annotation
pbmc <- RunScType(pbmc, split.by = "orig.ident")
# low-quality
pbmc <- RunLQ(pbmc, split.by = "orig.ident", add.Seurat = T, methods = c("ddqc", "miQC", "fixed"), percent.mt=10, min.nFeature_RNA=200 , min.nCount_RNA=500)
pbmc <- RunLQ_MAD(pbmc, split.by = "orig.ident")
pbmc <- RunLQ_VDJ(pbmc)
# doublet
pbmc <- RunDbt_VDJ(pbmc)
pbmc <- RunDbt_scDblFinder(pbmc, split.by = "orig.ident", do.topscore = T)
pbmc <- RunDbt_cxds(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_bcds(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_hybrid(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_DoubletFinder(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_ADT(pbmc, split.by = "orig.ident", add.Seurat =T, feature1 = c("Hu.CD20-2H7","Hu.CD19"), feature2 = c("Hu.CD3-UCHT1","Hu.CD3-UCHT1"))
# cell clustering
pbmc <- RunPipeline(pbmc, preprocess = "rna.umap")
pbmc <- RunPipeline(pbmc, preprocess = "adt.umap")3.1.3 Generate QC HTML Report
RunReport(pbmc, tissue = "blood", , sample.by = "orig.ident",
RNA_cluster_name = "rna_cluster",
ADT_cluster_name = "adt_cluster",
ADT.batch.by = c("orig.ident", "Batch_TotalSeq_C_Antibodies"),
ADT.covariate.formula = ~ (1|Batch_TotalSeq_C_Antibodies),
outputFile = "../Dataset1_html/SingleCellMQC.html"
)3.2 Dataset2
3.2.1 Load data
library(SingleCellMQC)
library(Seurat)
library(BPCells)
#Load a pre-existing Seurat object or processed raw data using SingleCellMQC.
pbmc <- readRDS("/data/GSE157007.rds")3.2.2 Run Preprocessing for Single-Cell Data (If no prior preprocessing has been performed)
# metrics
GSE157007 <- CalculateMetrics(GSE157007)
# cell annotation
GSE157007 <- RunScType(GSE157007, split.by = "orig.ident")
# low-quality
GSE157007 <- RunLQ_MAD(GSE157007, split.by = "orig.ident")
GSE157007 <- RunLQ_ddqc(GSE157007, split.by = "orig.ident")
GSE157007 <- RunLQ_miQC(GSE157007, split.by = "orig.ident")
GSE157007 <- RunLQ_VDJ(GSE157007)
GSE157007 <- RunLQ_fixed(GSE157007, min.nFeature_RNA = 200, percent.mt = 10)
# doublet
GSE157007 <- RunDbt_scDblFinder(GSE157007, split.by = "orig.ident", do.topscore = T)
GSE157007 <- RunDbt_cxds(GSE157007, split.by = "orig.ident")
GSE157007 <- RunDbt_bcds(GSE157007, split.by = "orig.ident")
GSE157007 <- RunDbt_hybrid(GSE157007, split.by = "orig.ident")
GSE157007 <- RunDbt_VDJ(GSE157007)
GSE157007 <- RunDbt_DoubletFinder(GSE157007, split.by = "orig.ident")
# cell clustering
GSE157007 <- RunPipeline(GSE157007, preprocess = "rna.umap")3.2.3 Generate QC HTML Report
RunReport(GSE157007, tissue = "blood", , sample.by = "orig.ident",
RNA.batch.by = c("orig.ident"),
RNA_cluster_name = "rna_cluster",
outputFile = "../Dataset2_html/SingleCellMQC.html"
)3.3 Dataset3
3.3.1 Load data
library(SingleCellMQC)
library(Seurat)
library(BPCells)
#Load a pre-existing Seurat object or processed raw data using SingleCellMQC.
pbmc <- readRDS("/data/breast_process.rds")3.3.2 Run Preprocessing for Single-Cell Data (If no prior preprocessing has been performed)
# metrics
breast_process <- CalculateMetrics(breast_process)
# cell annotation
breast_process <- RunScType(breast_process, split.by = "orig.ident")
# low-quality
breast_process <- RunLQ_fixed(breast_process, min.nCount_RNA = 500, min.nFeature_RNA = 200, percent.mt = 10, percent.rb = 50)
breast_process <- RunLQ_MAD(breast_process, split.by = "orig.ident")
breast_process <- RunLQ_ddqc(breast_process, split.by = "orig.ident")
# doublet
breast_process <- RunDbt_scDblFinder(breast_process, split.by = "orig.ident", do.topscore = T)
breast_process <- RunDbt_cxds(breast_process, split.by = "orig.ident")
breast_process <- RunDbt_bcds(breast_process, split.by = "orig.ident")
breast_process <- RunDbt_hybrid(breast_process, split.by = "orig.ident")
breast_process <- RunDbt_DoubletFinder(breast_process, split.by = "orig.ident")
# cell clustering
breast_process <- RunPipeline(breast_process, preprocess = "rna.umap")3.3.3 Generate QC HTML Report
start_time <- Sys.time() #
run_report_result <- system.time({
RunReport(breast_process,
tissue = "breast",
sample.by = "orig.ident",
RNA.batch.by = c("sequencing_platform","procedure_group", "sample_source"),
RNA_cluster_name = "rna_cluster",
RNA.covariate.formula = ~ (1|sequencing_platform) + (1|procedure_group) + (1|sample_source),
outputFile = "../Dataset3_html/SingleCellMQC.html"
)
})
end_time <- Sys.time()
# system.time()
print("RunReport function execution time details:")
print(run_report_result)