Page Comparison

...

Code Block

title	Load modules and execute R script

module load seurat-scripts/ctr-0.0.5--r341_0

#OPEN AN IDEV SESSION:
idev -m 120 -q normal -A OTH21164 -r RNAseq2

R CMD BATCH seuratTotalScript5k.mod.3.4.R &

The script does the following (newer versions of the functions given here):

Load Data and Create Seurat Object

Code Block

title	Load data and create Seurat object

pbmc_data <- Read10X(data.dir = "filtered_feature_bc_matrix")
pbmc <- CreateSeuratObject(counts = pbmc_data)

Check what the counts matrix looks like

Code Block

title	Look at counts matrix

pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30]

##output                                                                   
## CD3D  4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2  3 . . . . . 3 4 1 5
## TCL1A . .  . . . . . . 1 . . . . . . . . . . .  . 1 . . . . . . . .
## MS4A1 . 6  . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .

Normalize Data Using SCTransform and Regress out Genes Related to Cell Cycle

Code Block

title	Normalize data

s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
pbmc <- CellCycleScoring(pbmc, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
pbmc$CC.Difference <- pbmc$S.Score - pbmc$G2M.Score
pbmc <- SCTransform(pbmc, vars.to.regress = "CC.Difference")

Identify Highly Variable Features

Code Block

title	Identify Highly Variable Features and Generate Plots

pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(pbmc), 10)
pdf("variableFeaturesPlot.pdf")
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
dev.off()

Scale the Data

Code Block

title	Scale data

all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)

Select Dimensions using PCA (using variable features)

Code Block

title	Perform PCA for Dimensionality Reduction

#Perform PCA for Dimensionality Reduction
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))

#Print and Examine PCA Results in Multiple Formats
pdf("findDimensions.pdf")
print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")
DimPlot(pbmc, reduction = "pca")

#Generate Principal Component Heatmaps through PC_30
DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE)
DimHeatmap(pbmc, dims = 16:30, cells = 500, balanced = TRUE)

#Create Elbow Plot of Principal Components through PC_30
ElbowPlot(pbmc, ndims = 30, reduction = "pca")
dev.off()

Cluster the cells (using the selected number of dimensions)

Code Block

title	Cluster the Cells for the selected numeber of Principal Components

#Cluster the Cells for the selected numeber of Principal Components (we selected 14 for 5k Data)
pbmc <- FindNeighbors(pbmc, dims = 1:14)
pbmc <- FindClusters(pbmc, resolution = 0.5)

#View the Cluster IDs of the First 5 Cells
head(Idents(pbmc), 5)

Visualize the clusters using tSNE or UMAP

Code Block

title	Visualize using tSNE

pdf("tsne.pdf")
pbmcTSNE <- RunTSNE(pbmc, dims = 1:14)
DimPlot(pbmcTSNE, reduction = "tsne")
dev.off()

Find markers for each cluster

Code Block

title	Find Markers for Every Cluster and Print Top 2 Per Cluster

pbmc.markers <- FindAllMarkers(pbmcTSNE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
write.csv(pbmc.markers, file = "markersAll.csv")

#Generate Heatmap of Top 10 Markers per Cluster
top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
DoHeatmap(pbmcTSNE, features = top10$gene, size = 3) + theme(axis.text.y = element_text(size = 5)) + NoLegend()

Identify cells expressing genes of interest

Code Block

title	Identify Clusters for Marker Genes of Interest

markerGenes <- c("CD27", "CCR7", "CD8A", "CD8B", "IL7R", "GZMK", "CD79A", "CD37", "CD160", "NKG7", "GNL$
seuratClusters <- function(output, genes) {
        for (gene in genes) {
        print(output[output$gene == gene,]) }
        }
seuratClusters(pbmc.markers, markerGenes)

#Visualize Feature Expression on TSNE Plot of Top Gene per Cluster
FeaturePlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP"))
FeaturePlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3"))
FeaturePlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1"))
FeaturePlot(pbmcTSNE, features = c("PPBP","AC084033.3"))

#Visualize Expression Probability Distribution Across Clusters for Top Gene per Cluster
VlnPlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP"))
VlnPlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3"))
VlnPlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1"))
VlnPlot(pbmcTSNE, features = c("PPBP","AC084033.3"))

Label clusters as cell types based on expression of known marker genes.

...

Versions Compared

Old Version 14

New Version Current

Key