Load packages.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(Seurat)
## Attaching SeuratObject
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.12.1
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(ggalluvial)
Load data.
load("pbmc_clean.RData")
geom_hline( )
ggplot(data = pbmc[["percent.mt"]]) +
aes(x = 1, y = percent.mt) +
geom_violin(fill = "salmon") + #Add color to violin
geom_jitter(height = 0, width = 0.4, size = 0.5) + #Make points smaller
#Add nice labels
labs(x = "Identity pbmc3k",
y = "",
title = "percent.mt") +
#Format background and axes
theme_classic() +
#Remove meaningless x-axis values
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
#Add horizontal cutoff line
geom_hline(yintercept = 5, linetype = "dashed")
pbmc[["nFeature_RNA"]]
and our original
quality filtering retained cells with 200 - 2500 features.ggplot(data = pbmc[["nFeature_RNA"]]) +
aes(x = 1, y = nFeature_RNA) +
geom_violin(fill = "salmon") + #Add color to violin
geom_jitter(height = 0, width = 0.4, size = 0.5) + #Make points smaller
#Add nice labels
labs(x = "Identity pbmc3k",
y = "",
title = "nFeature_RNA") +
#Format background and axes
theme_classic() +
#Remove meaningless x-axis values
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
#Add horizontal cutoff lines. Notice how you can add 2 at once
geom_hline(yintercept = c(200, 2500), linetype = "dashed")
Re-run tSNE parts of the workshop if needed. This is copied exactly from the workshop.
Calculate tSNE.
pbmc <- RunTSNE(pbmc, dims = 1:10)
Extract tSNE data.
#Extract the XY values for UMP
tsne_xy <- as.data.frame(pbmc[["tsne"]]@cell.embeddings)
#Add cell cluster information
cell_clusters <- as.data.frame(pbmc$seurat_clusters)
#Use the same cell clusters as UMAP
#Combine
tsne_xy_clusters <- tsne_xy %>%
#move rownames to data column
rownames_to_column("cell") %>%
#merge with cluster info
full_join(
cell_clusters %>% rownames_to_column("cell")
) %>%
#clean column names
rename(ident = `pbmc$seurat_clusters`)
## Joining, by = "cell"
First, extract and format gene expression data similar to what we did for the UMAP.
#Define gene of interest
genes_of_interest <- c("CD14", "CD8A")
#Extract normalized gene expression data
pbmc_counts <- as.data.frame(GetAssayData(
object = pbmc, slot = "data"))
#Filter for gene of interest
tsne_xy_expression <- pbmc_counts %>%
rownames_to_column("gene") %>%
filter(gene %in% genes_of_interest) %>%
#long format
pivot_longer(-gene, names_to = "cell") %>%
#combine with UMAP data
full_join(tsne_xy_clusters)
## Joining, by = "cell"
Then create the plot.
ggplot(data = tsne_xy_expression) +
aes(x = tSNE_1, y = tSNE_2) +
geom_point(size = 0.5) +
aes(color = value) + #Color by expression
scale_color_gradient(low = "lightgrey",
high = "blue") + #Change colors
facet_wrap(~ gene, scales = "free") + #Split by gene
labs(color = "") +
theme_classic()
We will use the tSNE and the Wong colors from the second reference above.
#List colors
col.palette <- c("#000000","#E69F00","#56B4E9","#009E73",
"#F0E442","#0072B2","#D55E00","#CC79A7",
"#6F0A4C")
#Add names to the vector to specify which color goes to which cluster
##Here we're keeping mostly the default order but forcing the lower right
## cluster to be black instead
names(col.palette) <- c("3","0","1","2",
"4","5","6","7","8")
ggplot(data = tsne_xy_clusters) +
aes(x = tSNE_1, y = tSNE_2) +
geom_point(size = 0.5) +
aes(color = ident) +
labs(title = "ident", color = "") +
theme_classic() +
scale_color_manual(values = col.palette)
Re-run heatmap part of the workshop if needed. This is copied exactly from the workshop.
#Extract scaled expression data
pbmc_counts_log <- as.data.frame(GetAssayData(
object = pbmc,
slot = "scale.data"))
#Order cells by clusters. Remember that we originally
# got these data from as.data.frame(pbmc$seurat_clusters)
cell_order <- cell_clusters %>%
arrange(`pbmc$seurat_clusters`)
Create “top 3” data for second exercise. Again this is copied from the workshop
top3 <- pbmc_markers %>%
group_by(cluster) %>%
top_n(n = 3, wt = avg_log2FC)
pbmc_counts_log_top3 <- pbmc_counts_log %>%
#Filter top genes
rownames_to_column("gene") %>%
filter(gene %in% top3$gene) %>%
#Remove outliers
mutate_if(is.numeric, ~ifelse(. > 2.5, 2.5, .)) %>%
#Reorder cells
select(gene, all_of(rownames(cell_order))) %>%
#Reorder genes
mutate(gene = factor(gene, levels = unique(top3$gene))) %>%
arrange(gene) %>%
#Convert to matrix
column_to_rownames("gene") %>%
as.matrix()
This time, let’s select “top” as the most significant, instead of the
largest fold change. We’ll use slice_min( )
for this.
Format data.
top20 <- pbmc_markers %>%
group_by(cluster) %>%
slice_min(n = 20, order_by = p_val_adj)
pbmc_counts_log_top20 <- pbmc_counts_log %>%
#Filter top genes
rownames_to_column("gene") %>%
filter(gene %in% top20$gene) %>%
#Remove outliers
mutate_if(is.numeric, ~ifelse(. > 2.5, 2.5, .)) %>%
#Reorder cells
select(gene, all_of(rownames(cell_order))) %>%
#Reorder genes
mutate(gene = factor(gene, levels = unique(top20$gene))) %>%
arrange(gene) %>%
#Convert to matrix
column_to_rownames("gene") %>%
as.matrix()
Plot without clustering. Here we include the cluster annotation at
top but don’t recolor like Seurat
(because I personally
don’t like those colors). Since we can’t read the 200+ gene names
anyway, I will remove those as well.
#Format cluster annotation
hm_clusters <- cell_order$`pbmc$seurat_clusters`
names(hm_clusters) <- rownames(cell_order)
#Make cluster colors the same as ggplot default
hm_colors <- scales::hue_pal()(9)
names(hm_colors) <- as.character(c(0:8))
#Format ComplexHeatmap annotation
#This is the not great syntax that the reference can help with
hm_anno <- HeatmapAnnotation(
Identity = hm_clusters,
col = list(Identity = hm_colors),
show_annotation_name = FALSE)
#Make heatmap
Heatmap(pbmc_counts_log_top20, use_raster = FALSE,
cluster_rows = FALSE, cluster_columns = FALSE,
name = "Expression",
column_split = hm_clusters,
show_column_names = FALSE, #remove cell (column) labels
show_row_names = FALSE, #remove gene (row) labels
top_annotation = hm_anno) #add cluster annotation
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
And with clustering, where we see that clusters 1 and 3 are nicely resolved. In contrast, notice how clusters 0 and 2 are heavily mixed. If we knew the cell-type identities of these clusters, these results could lead to interesting findings such as showing that gene expression is more or less similar in different cell types.
Heatmap(pbmc_counts_log_top20, use_raster = FALSE,
cluster_rows = TRUE, cluster_columns = TRUE,
name = "Expression",
show_column_names = FALSE, #remove cell (column) labels
show_row_names = FALSE, #remove gene (row) labels
top_annotation = hm_anno) #add cluster annotation
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
First, list all the cells in these two clusters.
cells_c0_1 <- cell_clusters %>%
rownames_to_column("cell") %>%
filter(`pbmc$seurat_clusters` %in% c(0,1)) %>%
pull(cell)
Then filter the expression data to these cells.
pbmc_counts_log_top3_c01 <- as.data.frame(pbmc_counts_log_top3) %>%
rownames_to_column() %>%
# move cells (which are indiv columns) into 1 column so we can filter it
pivot_longer(-rowname, names_to = "cell") %>%
# filer cells
filter(cell %in% cells_c0_1) %>%
#Put back to wide matrix for heatmap
pivot_wider(names_from = "cell") %>%
column_to_rownames() %>%
as.matrix()
#also filter the cluster splitting vector
hm_clusters_01 <- hm_clusters[names(hm_clusters) %in% cells_c0_1]
#And the annotation object
hm_anno_01 <- HeatmapAnnotation(
Identity = hm_clusters_01,
col = list(Identity = hm_colors[1:2]),
show_annotation_name = FALSE)
Plot! Let’s cluster the genes (rows) to highlight similarities between the two clusters.
Heatmap(pbmc_counts_log_top3_c01, use_raster = FALSE,
cluster_rows = TRUE, cluster_columns = FALSE, #Turn clustering on
name = "Expression",
column_split = hm_clusters_01,
show_column_names = FALSE, #remove cell (column) labels
row_names_side = "left", #move gene (row) labels
top_annotation = hm_anno_01 #add cluster annotation
)