3  QC HTML report

SingleCellMQC provides automated, interactive HTML-based QC reports using R Markdown, offering users with a comprehensive overview of data quality before downstream analysis. Users can perform basic preprocessing with the ‘RunPreprocess’ function or selectively execute individual QC modules. The ‘RunReport’ function then compiles all QC results into a HTML document containing interactable plots, tables, and QC warnings across sample-, cell-, feature-, and batch-levels. To support large-scale datasets, SingleCellMQC integrates BPCells to reduce memory usage and improve computational efficiency. The HTML QC reports for all three datasets are provided (https://doi.org/10.5281/zenodo.15120930). Notably, for the large-scale public breast tissue single-cell RNA-seq dataset containing 167 samples and over one million cells (Dataset 3), SingleCellMQC produced a complete, navigable report by ‘RunReport’ function in approximately 22 minutes without the need for high-performance computing.

3.1 Dataset1

3.1.1 Load data

library(SingleCellMQC)
library(Seurat)
library(BPCells)
#Load a pre-existing Seurat object or processed raw data using SingleCellMQC.
pbmc <- readRDS("/data/pbmc.rds")

3.1.2 Run Preprocessing for Single-Cell Data (If no prior preprocessing has been performed)

# metrics
pbmc <- CalculateMetrics(pbmc)

# cell annotation
pbmc <- RunScType(pbmc, split.by = "orig.ident")

# low-quality
pbmc <- RunLQ(pbmc, split.by = "orig.ident", add.Seurat = T, methods = c("ddqc", "miQC", "fixed"), percent.mt=10, min.nFeature_RNA=200 , min.nCount_RNA=500)
pbmc <- RunLQ_MAD(pbmc, split.by = "orig.ident")
pbmc <- RunLQ_VDJ(pbmc)

# doublet
pbmc <- RunDbt_VDJ(pbmc)
pbmc <- RunDbt_scDblFinder(pbmc, split.by = "orig.ident", do.topscore = T)
pbmc <- RunDbt_cxds(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_bcds(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_hybrid(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_DoubletFinder(pbmc, split.by = "orig.ident")
pbmc <- RunDbt_ADT(pbmc, split.by = "orig.ident", add.Seurat =T, feature1 = c("Hu.CD20-2H7","Hu.CD19"), feature2 = c("Hu.CD3-UCHT1","Hu.CD3-UCHT1"))

# cell clustering
pbmc <- RunPipeline(pbmc, preprocess = "rna.umap")
pbmc <- RunPipeline(pbmc, preprocess = "adt.umap")

3.1.3 Generate QC HTML Report

RunReport(pbmc, tissue = "blood", , sample.by = "orig.ident",
          RNA_cluster_name = "rna_cluster",
          ADT_cluster_name = "adt_cluster",
          ADT.batch.by = c("orig.ident", "Batch_TotalSeq_C_Antibodies"),
          ADT.covariate.formula = ~ (1|Batch_TotalSeq_C_Antibodies),
          outputFile = "../Dataset1_html/SingleCellMQC.html"
          )

3.2 Dataset2

3.2.1 Load data

library(SingleCellMQC)
library(Seurat)
library(BPCells)
#Load a pre-existing Seurat object or processed raw data using SingleCellMQC.
pbmc <- readRDS("/data/GSE157007.rds")

3.2.2 Run Preprocessing for Single-Cell Data (If no prior preprocessing has been performed)

# metrics
GSE157007 <- CalculateMetrics(GSE157007)

# cell annotation
GSE157007 <- RunScType(GSE157007, split.by = "orig.ident")

# low-quality
GSE157007 <- RunLQ_MAD(GSE157007, split.by = "orig.ident")
GSE157007 <- RunLQ_ddqc(GSE157007, split.by = "orig.ident")
GSE157007 <- RunLQ_miQC(GSE157007, split.by = "orig.ident")
GSE157007 <- RunLQ_VDJ(GSE157007)
GSE157007 <- RunLQ_fixed(GSE157007, min.nFeature_RNA = 200, percent.mt = 10)

# doublet
GSE157007 <- RunDbt_scDblFinder(GSE157007, split.by = "orig.ident", do.topscore = T)
GSE157007 <- RunDbt_cxds(GSE157007, split.by = "orig.ident")
GSE157007 <- RunDbt_bcds(GSE157007, split.by = "orig.ident")
GSE157007 <- RunDbt_hybrid(GSE157007, split.by = "orig.ident")
GSE157007 <- RunDbt_VDJ(GSE157007)
GSE157007 <- RunDbt_DoubletFinder(GSE157007, split.by = "orig.ident")

# cell clustering
GSE157007 <- RunPipeline(GSE157007, preprocess = "rna.umap")

3.2.3 Generate QC HTML Report

RunReport(GSE157007, tissue = "blood", , sample.by = "orig.ident",
          RNA.batch.by = c("orig.ident"),
          RNA_cluster_name = "rna_cluster",
          outputFile = "../Dataset2_html/SingleCellMQC.html"
)

3.3 Dataset3

3.3.1 Load data

library(SingleCellMQC)
library(Seurat)
library(BPCells)
#Load a pre-existing Seurat object or processed raw data using SingleCellMQC.
pbmc <- readRDS("/data/breast_process.rds")

3.3.2 Run Preprocessing for Single-Cell Data (If no prior preprocessing has been performed)

# metrics
breast_process <- CalculateMetrics(breast_process)

# cell annotation
breast_process <- RunScType(breast_process, split.by = "orig.ident")

# low-quality
breast_process <- RunLQ_fixed(breast_process, min.nCount_RNA = 500, min.nFeature_RNA = 200, percent.mt = 10, percent.rb = 50)
breast_process <- RunLQ_MAD(breast_process, split.by = "orig.ident")
breast_process <- RunLQ_ddqc(breast_process, split.by = "orig.ident")

# doublet
breast_process <- RunDbt_scDblFinder(breast_process, split.by = "orig.ident", do.topscore = T)
breast_process <- RunDbt_cxds(breast_process, split.by = "orig.ident")
breast_process <- RunDbt_bcds(breast_process, split.by = "orig.ident")
breast_process <- RunDbt_hybrid(breast_process, split.by = "orig.ident")
breast_process <- RunDbt_DoubletFinder(breast_process, split.by = "orig.ident")

# cell clustering
breast_process <- RunPipeline(breast_process, preprocess = "rna.umap")

3.3.3 Generate QC HTML Report

start_time <- Sys.time() #
run_report_result <- system.time({
  RunReport(breast_process,
            tissue = "breast",
            sample.by = "orig.ident",
            RNA.batch.by = c("sequencing_platform","procedure_group", "sample_source"),
            RNA_cluster_name = "rna_cluster",
            RNA.covariate.formula = ~ (1|sequencing_platform) + (1|procedure_group) + (1|sample_source),
            outputFile = "../Dataset3_html/SingleCellMQC.html"
  )
})
end_time <- Sys.time() 
# system.time() 
print("RunReport function execution time details:")
print(run_report_result)