Featured image of post 基于Seurat的单细胞分析标准流程

基于Seurat的单细胞分析标准流程

这是一个副标题

数据集的准备

说明

首先在https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE224679下载GSM7029397_103_23_barcodes.tsv.gz、GSM7029397_103_23_features.tsv.gz、GSM7029397_103_23_matrix.mtx.gz;GSM7029398_103_26_barcodes.tsv.gz、GSM7029398_103_26_features.tsv.gz、GSM7029398_103_26_matrix.mtx.gz六个文件,其中前三个文件为有FAP家族史的肿瘤患者的单细胞测序文件,后三个文件为有FAP家族史的非肿瘤患者的单细胞测序文件。新建一个文件夹“scRNA”,并且在里面建立名为“tumor”和“normal”的文件夹,将前三个文件放入“tumor”文件夹,后三个文件放入“normal”文件夹。

代码

1
2
3
4
5
6
7
8
9
if (!require(Seurat)){
  BiocManager::install("Seurat")
}
library(Seurat)
library(dplyr)
library(ggplot2)
library(ggsci)
library(ggrepel)
setwd("D:\\scRNA")#将下载好的文件放于此处,前三个文件放于tumor文件夹,后三个放于normal文件夹

读取数据并构建Seurat对象

说明

将“tumor”和“normal”文件夹中的“XXX_features.tsv.tz”,“XXX_barcodes.tsv.tz”,“XXX_matrix.mtx.tz”三个文件分别重命名为:“features.tsv.tz”,“barcodes.tsv.tz”,“matrix.mtx.tz”。这样才能被Read10X函数读取。

代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#用Read10X函数读取肿瘤样本的单细胞数据
scRNA_data_t <- Read10X(data.dir=".\\tumor")
#Read10X函数要求文件夹里有三个文件:barcodes.tsv.tz、features.tsv.tz、matrix.mtx.tz,名字必须一致
#这三个文件分别是单细胞的barcode、单细胞的注释信息以及表达矩阵
#读取的表达矩阵是用稀疏矩阵进行存储以节省空间,若想转为普通矩阵可以用as(x,"sparseMatrix")进行转换

#查看log2x的分布情况,x是基因表达量,见fig.1A
pdf(file="hist1.pdf",width = 5, height = 5) 
hist(log2(scRNA_data_t@x),breaks=100)
dev.off()

#得到该样本的基因名列表和细胞名列表
t_gene <- dimnames(scRNA_data_t)[[1]]
t_cell <- dimnames(scRNA_data_t)[[2]]

#构建metadata,即样本的注释信息
metadata_t = data.frame(row.names = t_cell) %>%
  mutate(sample = 'GSE224679',
         type = 'Tumor') #给每个细胞加上注释信息:属于GSE224679和Tumor组

#构建Seurat对象
srt1 <- CreateSeuratObject(
  counts = scRNA_data_t, #数据来源是用Read10X函数读入的scRNA_data_t
  meta.data = metadata_t, #指定细胞注释信息
  min.cells = 3, #指定基因必须在3个及以上细胞中检测到
  min.features = 200 #每个细胞至少表达200个基因才会被保留
) 

#查看原始的counts矩阵
srt1@assays[['RNA']]@layers[['counts']] #V5中Seurat对象结构有改动,与以前的方法不一样

#提取基因名称
features = srt1@assays[['RNA']]@features
features <- dimnames(features)[[1]]

#读取normal文件夹的数据
scRNA_data_n <- Read10X(data.dir=".\\normal")
pdf(file="hist2.pdf",width = 5, height = 5) 
hist(log2(scRNA_data_n@x),breaks=100) #fig.1B
dev.off()
n_gene <- dimnames(scRNA_data_n)[[1]]
n_cell <- dimnames(scRNA_data_n)[[2]]
metadata_n = data.frame(row.names = n_cell) %>%
  mutate(sample = 'GSE224679',
         type = 'Normal') 
srt2 <- CreateSeuratObject(counts = scRNA_data_n,
                           meta.data = metadata_n,
                           min.cells = 3,
                           min.features = 200) 

#合并二个Seurat对象并保存
sc_merged <- merge(srt1,srt2)
pbmc <- sc_merged
save(pbmc,file="merged_raw_counts.rda")

#查看pbmc中count和feature的分布情况
pdf(file="nCount_RNA.pdf",width = 5, height = 5) #统计每个细胞中测到的count总和的频数分布,fig1.C
hist(pbmc$nCount_RNA)
dev.off()
pdf(file="nFeature_RNA.pdf",width = 5, height = 5) #统计每个细胞中有表达的基因的数量的频数分布,fig.1.D 
hist(pbmc$nFeature_RNA)
dev.off()

图片

fig.1

质量控制

说明

代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#查看线粒体基因频数分布图,pattern参数是指匹配MT开头的基因,fig.2.A
pbmc$Percent.Mito <- PercentageFeatureSet(pbmc,pattern = '^MT-')
pdf(file="Percent.mito.pdf",width = 5, height = 5)
hist(pbmc$Percent.Mito)
dev.off()

#查看外源基因频数分布图,匹配以ERCC开头的基因,fig.2.B
pbmc$Percent.ERCC <- PercentageFeatureSet(pbmc,pattern = '^ERCC')
pdf(file="Percent.ERCC.pdf",width = 5, height = 5)
hist(pbmc$Percent.ERCC)
dev.off()

#查看核糖体基因频数分布图,匹配以RPS或RPL开头的基因,fig.2.C
pbmc$Percent.Ribo <- PercentageFeatureSet(pbmc,pattern = '^RP[SL]')
pdf(file="Percent.Ribo.pdf",width = 5, height = 5)
hist(pbmc$Percent.Ribo)
dev.off()

###可视化###
#小提琴图,可以指定对哪个变量进行画图,以及指定分组依据。可以指定seurat对象中的metadata中的类别进行分组,fig.2.D、fig.2.E、fig.2.F、fig.2.G
pdf(file="nFeature_RNA_VlnPlot.pdf",width = 5, height = 5)
VlnPlot(pbmc,features = 'nFeature_RNA',group.by = 'type',
        alpha = 0.1,pt.size = 0.01)+
  geom_hline(yintercept = 2000,color = 'red')+
  scale_fill_igv()
dev.off()

pdf(file="nCount_RNA_VlnPlot.pdf",width = 5, height = 5)
VlnPlot(pbmc,features = 'nCount_RNA',group.by = 'type',
        alpha = 0.1,pt.size = 0.01)+
  geom_hline(yintercept = 5000,color = 'red')+
  scale_fill_igv()
dev.off()

pdf(file="Percent.Ribo_VlnPlot.pdf",width = 5, height = 5)
VlnPlot(pbmc,features = 'Percent.Ribo',group.by = 'type',
        alpha = 0.1,pt.size = 0.01)+
  geom_hline(yintercept = 2000,color = 'red')+
  scale_fill_igv()
dev.off()

pdf(file="Percent.Mito_VlnPlot.pdf",width = 5, height = 5)
VlnPlot(pbmc,features = 'Percent.Mito',group.by = 'type',
        alpha = 0.1,pt.size = 0.01)+
  geom_hline(yintercept = 2000,color = 'red')+
  scale_fill_igv()
dev.off()

#散点图,用来看两个metadata之间的关系,这里p1看的是nFeature和nCount的关系,p2看的是Percent.Ribo和Percent.Mito之间的关系,fig.2.H
p1 <- FeatureScatter(object = pbmc,
                     group.by = 'type',
                     raster = F,
                     shuffle = T,
                     pt.size = 0.05,
                     feature1 = "nFeature_RNA",
                     feature2 = "nCount_RNA")+
  scale_color_igv()+
  guides(color = guide_legend(override.aes = list(size = 4)))

p2 <- FeatureScatter(object = pbmc,
                     group.by = 'type',
                     raster = F,
                     shuffle = T,
                     pt.size = 0.05,
                     feature1 = "Percent.Ribo",
                     feature2 = "Percent.Mito")+
  scale_color_igv()+
  guides(color = guide_legend(override.aes = list(size = 4)))

#将两个图拼起来
p = p1 + p2;p
ggsave(p,filename = "quality.pdf",width = 10,height = 4.5)

#进一步筛选数据
pbmc_scaled <- subset(pbmc,nCount_RNA>1000) #筛选nCount_RNA>1000的细胞
pbmc_scaled = subset(pbmc_scaled,nFeature_RNA>500)#筛选nFeature_RNA>500的细胞
pbmc_scaled = subset(pbmc_scaled,Percent.Mito<20)#筛选Percent.Mito<20的细胞
pbmc_scaled = subset(pbmc_scaled,Percent.Ribo<40)#筛选Percent.Ribo<40的细胞
dim(pbmc_scaled)#查看数据维数,即查看筛选后还有多少个细胞,多少个基因

图片

fig.2

数据降维

说明

常见的降维方法有3种:主成分分析(PCA)、T-分布领域嵌入算法(t-SNE)和UMAP(统一流形逼近与投影)。三者数学原理大相径庭,但是作用都是将成千上万维的数据降低至二维(以便在二维平面上进行绘图),同时尽可能地保持高维空间内的分布特征。例如,高维空间中距离较远的点,在降至二维后距离仍应当较远,反之亦然。在本例中,对高维数据首先进行PCA降至30维,再使用t-SNE和UMAP算法,将该30维数据降至二维(直接用后两者降维可能会花费很长的时间)。此外,最终结果的展示和进一步分析推荐UMAP。

代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
pbmc <- pbmc_scaled
pbmc <- NormalizeData(object = pbmc) #进行数据标准化,消除不同细胞之间的测序深度差异
pbmc <- FindVariableFeatures(object = pbmc) #提取高变异基因
pdf(file="AVG-SV.pdf",width = 8, height = 5) #查看每个基因的均值与标准差,fig.3.A
VariableFeaturePlot(pbmc,cols=c('grey','steelblue'))+
  scale_y_log10()
dev.off()
pbmc <- ScaleData(object = pbmc)#进行数据缩放,将基因表达量缩放到均值为0,标准差为1

#可视化缩放后的data,fig.3.B
pdf(file="scaled_data.pdf",width = 6, height = 5)
hist(colSums(pbmc,slot='scale.data'),
     breaks = 50,
     main = 'Raw counts')
dev.off()

#PCA
pbmc <- RunPCA(object = pbmc) #进行主成分分析
pdf(file="elbow.pdf",width = 6, height = 5)#可视化不同主成分的贡献,fig.3C
ElbowPlot(pbmc,ndims = 30) 
dev.off()

#tSNE and UMAP降维,需要基于PCA的结果,用PCA的前30个主成分进行降维,最终降至二维
pbmc <- RunTSNE(pbmc,dims = 1:30,reduction = 'pca',reduction.name = 'tsne')
pbmc <- RunUMAP(object = pbmc, dims = 1:30, reduction = 'pca',reduction.name = 'umap')
pbmc <- FindNeighbors(object = pbmc, dims = 1:30, reduction = 'pca')#根据前30个主成分构建细胞间邻接图
pbmc <- FindClusters(object = pbmc,resolution = 1, cluster.name = 'unintegrated_clusters')#进行聚类(预聚类)

pdf(file="pca.pdf",width = 6, height = 5) #绘制PCA图,fig.3.D
DimPlot(object = pbmc, reduction = "pca")
dev.off()

pdf(file="tsne.pdf",width = 6, height = 5) #绘制t-SNE图,fig.3.E
DimPlot(object = pbmc, reduction = "tsne")
dev.off()

pdf(file="umap.pdf",width = 6, height = 5) #绘制UMAP图,fig.3.F
DimPlot(object = pbmc, reduction = "umap")
dev.off()

#使用不同的metadata进行分类,并分别在umap图上进行标注
pdf(file="multi_umap.pdf",width = 12, height = 5) #fig.3.F
DimPlot(object = pbmc, reduction = 'umap',
        group.by = c('type','unintegrated_clusters'),
        shuffle = T)
dev.off()

图片

fig.3

细胞分群

说明

根据细胞之间的亲疏关系(即以基因表达量为坐标,高维空间内的距离远近)进行聚类。Seurat包中聚类方法是基于K临近算法(KNN算法)开发的。通过比较不同分辨率(resolution)下的分类情况,选择合适的分辨率作为最终的分类方法。

代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#载入clustree包
if (!require(clustree)){
  BiocManager::install("clustree")
}
library(clustree)

pbmc_2 <- JoinLayers(pbmc)#将所有样本合在一起
pbmc_2 <- FindNeighbors(object = pbmc_2, dims = 1:30, reduction = 'pca')
pbmc_2 <- FindClusters(
  object = pbmc_2,
  resolution = c(seq(0.1,1.0,0.1))
)#分辨率0.1、0.2、0.3、...、1.0分别进行聚类,共聚类十次。

#用clustree函数来可视化不同分辨率下的分类情况,fig.4.A
pdf(file="clustree.pdf",width = 6, height = 8)
clustree(pbmc_2@meta.data,prefix = 'RNA_snn_res.')
dev.off()

#查看不同分群情况下的umap图结果,fig.4.B
p1 <- DimPlot(pbmc_2,reduction='umap',group.by = "RNA_snn_res.0.2",
              label = T,
              repel = F,
              shuffle = T);p1
p2 <- DimPlot(pbmc_2,reduction='umap',group.by = "RNA_snn_res.0.3",
              label = T,
              repel = F,
              shuffle = T);p2
p3 <- DimPlot(pbmc_2,reduction='umap',group.by = "RNA_snn_res.0.4",
              label = T,
              repel = F,
              shuffle = T);p3
p4 <- DimPlot(pbmc_2,reduction='umap',group.by = "RNA_snn_res.0.5",
              label = T,
              repel = F,
              shuffle = T);p4
p5 <- DimPlot(pbmc_2,reduction='umap',group.by = "RNA_snn_res.0.6",
              label = T,
              repel = F,
              shuffle = T);p5
p6 <- DimPlot(pbmc_2,reduction='umap',group.by = "RNA_snn_res.1",
              label = T,
              repel = F,
              shuffle = T);p6
p = p1+p2+p3+p4+p5+p6
ggsave(p,filename = "quality2.pdf",width = 13,height = 8)

#选择合适的分辨率,这里选择0.5,共分为15类,这便是最终的分类
pbmc_2$seurat_clusters = pbmc_2$RNA_snn_res.0.5

#绘制resolution=0.5时的最终umap图像,fig.4.C
p1 <- UMAPPlot(pbmc_2,
               label = T,
               repel = F,
               group.by = 'seurat_clusters'
)+
  ggtitle('UMAP')+
  theme_bw()+
  theme(
    plot.title = element_text(face = 'bold',hjust = 0.5),
    plot.background = element_rect(fill = 'transparent',color = NA)
  )+
  scale_color_igv();p1
ggsave(p1,filename = "final_umap.pdf",width = 13,height = 8)

Idents(pbmc_2) = 'seurat_clusters' 设置metadata中默认索引是seurat_clusters

图片

fig.4

差异基因提取

说明

使用FindAllMarkers函数提取每一组内的差异表达基因,分组依据是上述的resolution=0.5时的自动分组,即‘Seurat clusters’,也就是Idents(pbmc_2)。接着提取每组中差异表达最明显的3个基因提取出来,将它们在每一组细胞内的表达情况用热图、气泡图或小提琴图展示出来。注意!这三种图包含着同样的信息,选取一个即可。

此外,FeaturePlot函数允许指定多个基因,并可视化这些基因在细胞中的表达情况,可以选择感兴趣的基因进行绘图。

代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
sc.markers <- FindAllMarkers(
  object = pbmc_2,
  only.pos = F, #保留上调与下调的基因
  test.use = 'wilcox', #检验方法是wilcoxn检验
  slot = 'data', #默认使用data而不是counts
  min.pct = 0.25, #该基因在至少25%的细胞内表达
  logfc.threshold = 0.25 #设置log2FC的阈值
) #这个函数里没有指定分组,则它会使用默认分组依据,即Idents(pbmc_2)
save(sc.markers,file = 'ALL_MARKERS.rda')

#取出每群表达中top3高的基因
top3 <- sc.markers %>%
  filter(avg_log2FC > 0) %>%
  group_by(cluster) %>% #以metadata中的‘cluster’来进行分类
  filter(p_val_adj < 0.05) %>%
  top_n(3,avg_log2FC) %>%
  group_by()

###差异基因可视化###
#各绘图方式信息量一致,选择一种即可

#热图,fig.5.A
DoHeatmap(
  object = pbmc_2,
  features = top3$gene,
  label = TRUE
)+
  scale_fill_gradient(low = 'lightgrey',high = 'salmon')
ggsave(file = 'Heatmap.pdf',width = 10,height = 8)

#点图,fig.5.B
DotPlot(
  object = pbmc_2,
  group.by = 'seurat_clusters',
  cols = c('lightgrey','salmon'),
  features = unique(top3$gene)
)+
  coord_flip()
ggsave(file = 'dotplot.pdf',width = 10,height = 8)

#小提琴图,fig.5.C
VlnPlot(
  object = pbmc_2,
  group.by = 'seurat_clusters',
  #fill.by = 'feature',
  stack = T,
  flip = T,
  features = unique(top3$gene)
)+
  scale_fill_igv()
ggsave(file = 'vlnplot.pdf',width = 10,height = 8)

#对于感兴趣的基因进行绘图,fig.5.D
FeaturePlot(
  pbmc_2,
  features = c('CD8A','CD4','ZNF683','OLFM4'),#可以选择感兴趣的基因进行绘图
  order = T,
  alpha = 0.3,
  reduction = 'umap'
)
ggsave(file = 'Featureplot.pdf',width = 10,height = 8)

图片

fig.5

细胞注释

说明

细胞注释,即根据细胞高表达的基因的特征来为其指定一种细胞类型。所研究的marker基因可以参考相关文献、参考前人的经验、或者从CellMarker官网导出等。本例中,与免疫细胞相关的marker基因来源于别人总结好的经验,与肠道细胞相关的marker基因是从CellMarker官网上导出的:

Cell Marer 2.0

在CellMarker2.0网站上检索肠道的分子Marker,在检索页面依次选择human–gastrointestinal tract–all,导出Table.csv,这里面便包含了肠道细胞的Marker基因。

代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
immune_markers = c('CD8A','CD88','CD3D','CD3E', #CD8+T
                  'CD4', #CD4+T
                  'ZNF683', #NKT
                  'NKG7','FCER1G','FGFBP2','FCGR3A','GZMH','TYROBP','IGFBP7','CD160','GZMB', #NK
                  'CD74','CD79A','CD79B','MS4A1', #B cells
                  'JCHAIN','MZB1','IGHG1', #Plasma cells
                  'LYZ','S100A8','S100A9','CD14', #monocyte
                  'PPBP' #platelet
                  )#这个是参照他人整理好的内容

#再从CellMarker2.0网站上检索肠道的分子Marker,选择human-gastrointestinal tract-all,导出Table.csv
marker <- read.csv("Table.csv")
marker <- marker$Cell.marker

#绘制气泡图,fig.6.A
plotdt = sc.markers %>%
  filter(gene %in% c(c(marker),c(immune_markers))) %>%
  arrange(cluster,avg_log2FC) %>%
  mutate(gene = factor(gene,levels = unique(gene),ordered = T))
ggplot(
  plotdt,
  aes(x = cluster,y = gene,
  size = avg_log2FC,
  color = pct.1))+
  geom_point()+
  geom_text(aes(label = cluster),size = 3,color = 'black')+
  theme_bw()+
  theme(
    plot.title = element_text(face = 'bold',hjust = 0.5),
    plot.background = element_rect(fill = 'transparent',color = NA)
  )+
  scale_color_gradient2(low = 'olivedrab',high = 'salmon',
                        mid = 'yellow',midpoint = 0.5)
ggsave(file = 'Featureplot_intestine_marker.pdf',width = 10,height = 8)

#对特定的基因(这里可以是查找出来的某些差异基因)进行feature plot,fig.6.B
FeaturePlot(
  pbmc_2,
  features = c('KLRD1','TPSAB1','KIT','CD4','JCHAIN','MUC2','SELL','MS4A1','GUCA2B','CD3G','OLFM4'),
  order = T,
  alpha = 0.3,
  reduction = 'umap'
)
ggsave(file = 'all_feature.pdf',width = 10,height = 8)

#手工细胞注释,根据cell marker,用肉眼大致看一下是哪种细胞的marker,然后手工给每群细胞打上标记。不过这种方式并不一定准确
pbmc_2$cell_type = case_when(
  pbmc_2$seurat_clusters %in% c(14) ~ 'Mast cells',
  pbmc_2$seurat_clusters %in% c(13) ~ 'Unknown',
  pbmc_2$seurat_clusters %in% c(12) ~ 'CD4+T cells',
  pbmc_2$seurat_clusters %in% c(11) ~ 'Plasma cells',
  pbmc_2$seurat_clusters %in% c(10) ~ 'Intestinal epithelial cells',
  pbmc_2$seurat_clusters %in% c(5,7) ~ 'B cells',
  pbmc_2$seurat_clusters %in% c(8) ~ 'Unknown',
  pbmc_2$seurat_clusters %in% c(9) ~ 'Unknown',
  pbmc_2$seurat_clusters %in% c(1,2,3,4) ~ 'Tumor cells',
  pbmc_2$seurat_clusters %in% c(6) ~ 'T cells',
  pbmc_2$seurat_clusters %in% c(0) ~ 'NK cells',
  TRUE ~ 'Unknown'
)
p1 <- DimPlot(pbmc_2,reduction='umap',group.by = "seurat_clusters",
              label = T,
              repel = F,
              shuffle = T)
p2 <- DimPlot(pbmc_2,reduction='umap',group.by = "cell_type",
              label = T,
              repel = F,
              shuffle = T)
p3 <- DimPlot(pbmc_2,reduction='umap',group.by = "type",
              label = T,
              repel = F,
              shuffle = T)
p = p1+p2+p3
ggsave(p,filename = "contrast_umap.pdf",width = 24,height = 8)#fig.6.C

###对pbmc_2的meta.data可视化###
#观察不同的细胞类型在肿瘤病人和正常病人之间的丰度比较,fig.6.D
plotdata = pbmc_2@meta.data
p <- ggplot(
  plotdata,
  aes(x=cell_type,fill = type)
)+
  geom_bar(position = 'dodge')+
  scale_fill_igv(alpha = 0.7)
ggsave(p,filename = "barplot.pdf",width = 10,height = 6)

#观察两类病人的不同细胞的构成比,fig.6.E
p <- ggplot(
  plotdata,
  aes(x=type,fill = cell_type)
)+
  geom_bar(position = 'fill')+
  scale_fill_igv(alpha = 0.7)
ggsave(p,filename = "comparasion_barplot.pdf",width = 4,height = 8)

#对两个样本分别绘制umap,fig.6.F
p <- UMAPPlot(pbmc_2,
               label = T,
               repel = F,
               split.by = 'type',
               group.by = 'cell_type'
)+
  ggtitle('UMAP')+
  theme_bw()+
  theme(
    plot.title = element_text(face = 'bold',hjust = 0.5),
    plot.background = element_rect(fill = 'transparent',color = NA)
  )+
  scale_color_igv()
ggsave(p,filename = "splited_umap.pdf",width = 15,height = 8)

图片

fig..6

注释后差异基因可视化

说明

与上述寻找差异基因的过程类似,不过这次已经注释好了细胞类型,可以再次绘制差异表达基因的火山图、热图等,或者提取某一种特定的细胞,研究其在不同组间的表达差异。

代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
#之前寻找差异基因的时候用的是自动生成的unintegrated_cluster作为分类依据,现在用注释好的cell_type作为分类依据
Idents(pbmc_2) <- "cell_type" #将cell_type设置为主要的分类标签
#FindAllMarkers函数会分析每个Idents中的差异表达基因
cell.markers <- FindAllMarkers(
  object = pbmc_2,
  only.pos = F, #保留上调与下调的基因
  test.use = 'wilcox', #检验方法是wilcox检验
  slot = 'data', #默认使用data而不是counts
  min.pct = 0.25, #在至少25%的细胞内表达
  logfc.threshold = 0.25 #设置log2FC的阈值
)

#FindMarkers函数通过指定分组依据(group.by),以及两个ident,来寻找这两个ident之间的差异基因
degs = FindMarkers(
  pbmc_2,
  logfc.threshold = 0.5,
  only.pos = F,
  ident.1 = 'Tumor cells',
  ident.2 = 'Intestinal epithelial cells',#默认使用ident1 vs ident2,而不是相反
  group.by = 'cell_type'
) %>%
  mutate(gene=rownames(.))#将基因名等于行名方便后续操作

#如果想要观察特定细胞类型的差异基因,可以先提取子集,然后做类似操作。比如这里选择分析T细胞
Tcell_subtype = subset(pbmc_2 , cell_type == 'T cells')
Tcell_degs = FindMarkers(
  Tcell_subtype,
  logfc.threshold = 0.5,
  only.pos = F,
  ident.1 = 'Tumor',
  ident.2 = 'Normal',
  group.by = 'type'
) %>%
  mutate(gene=rownames(.))

###可视化###
#火山图
#对degs可视化,fig.7.A
change = as.factor(ifelse(degs$p_val_adj < 0.05 & abs(degs$avg_log2FC) > 1,
                          ifelse(degs$avg_log2FC > 1 ,'Up','Down'),'No change'))#标记上调下调基因
filtered_degs = degs %>%
  filter(p_val_adj<1e-100)%>%
  filter(abs(avg_log2FC)>3)
degs$label <- ifelse(degs$gene %in% filtered_degs$gene, degs$gene, NA)#标记相当显著的基因
pdf("Tumor_vs_Unknown_vol.pdf",width=5,height=5)#绘制Tumor cells和Unknown组之间的差异基因
ggplot(degs, aes(avg_log2FC, -log10(p_val_adj)))+
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#999999")+
  geom_vline(xintercept = c(-1,1), linetype = "dashed", color = "#999999")+
  geom_point(aes(color = change),
             size = 0.2, 
             alpha = 0.5) +
  theme_bw(base_size = 12)+
  geom_text_repel(aes(label = label), 
                  size = 3, 
                  box.padding = 0.2, 
                  max.overlaps = 10) + # 防止重叠
  scale_color_manual(values = c("Up" = "red", "Down" = "blue", "No change" = "gray"))+
  theme(panel.grid = element_blank(),
        legend.position = 'right')+
  scale_x_continuous(limits = c(-max(abs(degs$avg_log2FC)), max(abs(degs$avg_log2FC))))#沿y轴对称
dev.off()

#对Tcells在肿瘤组和正常组之间的差异表达基因进行可视化,fig.7.B
change = as.factor(ifelse(Tcell_degs$p_val_adj < 0.05 & abs(Tcell_degs$avg_log2FC) > 1,
                          ifelse(Tcell_degs$avg_log2FC > 1 ,'Up','Down'),'No change'))
filtered_Tcell_degs = Tcell_degs %>%
  filter(p_val_adj< 0.05 )%>%
  filter(abs(avg_log2FC)>1)
Tcell_degs$label <- ifelse(Tcell_degs$gene %in% filtered_Tcell_degs$gene, Tcell_degs$gene, NA)
pdf("T_vol.pdf",width=5,height=5)
ggplot(Tcell_degs, aes(avg_log2FC, -log10(p_val_adj)))+
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "#999999")+
  geom_vline(xintercept = c(-1,1), linetype = "dashed", color = "#999999")+
  geom_point(aes(color = change),
             size = 0.2, 
             alpha = 0.5) +
  theme_bw(base_size = 12)+
  geom_text_repel(aes(label = label), 
                  size = 3, 
                  box.padding = 0.2, 
                  max.overlaps = 10) + # 防止重叠
  scale_color_manual(values = c("Up" = "red", "Down" = "blue", "No change" = "gray"))+
  theme(panel.grid = element_blank(),
        legend.position = 'right')+
  scale_x_continuous(limits = c(-max(abs(Tcell_degs$avg_log2FC)), max(abs(Tcell_degs$avg_log2FC))))
dev.off()

#小提琴图,对每个类别的top3表达基因进行可视化,fig.7.C
celldiff_top3 <- cell.markers %>%
  filter(avg_log2FC > 0) %>%
  group_by(cluster) %>%
  filter(avg_log2FC >3) %>%
  top_n(3,-p_val_adj) %>%
  group_by()

VlnPlot(pbmc_2,features = celldiff_top3$gene,
        group.by = "cell_type",
        split.plot = T,
        split.by = 'type',
        cols = c('olivedrab','salmon'),
        stack = T,
        flip = T)
ggsave(file = 'vlnplot_celldiff_top3gene.pdf',width = 8,height = 14)

图片

fig.7

GO、KEGG、GSEA富集分析

说明

代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
library(tidyverse)
library(clusterProfiler)
library(enrichplot)

Idents(pbmc_2) <- "cell_type" #将cell_type设置为主要的分类标签
cell.markers <- FindAllMarkers(
  object = pbmc_2,
  only.pos = F,
  test.use = 'wilcox', #检验方法是wilcox检验
  slot = 'data', #默认使用data而不是counts
  min.pct = 0.25, #在至少25%的细胞内表达
  logfc.threshold = 0.25 #设置log2FC的阈值
) #以cell_type为分类依据,寻找每个类别的差异表达基因


Tumor_degs = cell.markers %>%
  filter(cluster == "Tumor cells") %>%
  filter(p_val_adj < 0.05) %>%
  filter(abs(avg_log2FC) > 1) %>%
  arrange(desc(avg_log2FC)) #过滤差异表达基因

GO_database <- 'org.Hs.eg.db'

#将SYMBOL转ENTREZID
ENTgene <- bitr(Tumor_degs$gene, fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = GO_database,drop = F) %>%
  distinct(SYMBOL,.keep_all = T) %>%
  drop_na()
ENTgene <- dplyr::distinct(ENTgene,SYMBOL,.keep_all = T)

###GO富集分析,使用enrichGO函数###
GO <- enrichGO(
  ENTgene$ENTREZID,
  OrgDb = GO_database, #即'org.Hs.eg.db',即人类数据库
  keyType = "ENTREZID",
  ont = "all",  # 获取BP, CC, MF所有本体论的富集结果
  pvalueCutoff = 0.01,
  qvalueCutoff = 0.01,
  readable = T
)

#可视化barplot,fig.8.A
pdf("GO_Enrichment_all_ontology.pdf",width=8,height=12)
barplot(
  GO,
  split="ONTOLOGY", #按基因本体论分组
)+
  facet_wrap(~ONTOLOGY, scales="free", ncol=1)
dev.off()

#可视化dotplot,fig.8.B
pdf("GO_Enrichment_all_ontology_dot.pdf",width=10,height=20)
dotplot(GO,split = "ONTOLOGY")
dev.off()

###KEGG分析###
KEGG_database = 'hsa'
kegg_data <- enrichKEGG(
  gene = ENTgene$ENTREZID,
  keyType = 'kegg',
  organism = KEGG_database,
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.05,
  use_internal_data = F
) %>%
  setReadable(.,OrgDb = GO_database,keyType = 'ENTREZID')

kegg_data = setReadable(kegg_data, #前面分析的结果
                        OrgDb = "org.Hs.eg.db", #人类数据库
                        keyType = "ENTREZID") #要转换的基因类型
write.table(kegg_data,file="kegg_data.txt",sep="\t",quote=F,row.names = F)  
#可视化barplot,fig.8.C
pdf(file="kegg_barplot.pdf",width = 7,height = 5) 
barplot(kegg_data, x = "GeneRatio", color = "p.adjust", #默认参数
        showCategory =10) #只显示前10
dev.off()

#可视化dotplot,fig.8.D
pdf(file="kegg_dotplot.pdf",width = 7,height = 5) 
dotplot(kegg_data,x = "GeneRatio", color = "p.adjust", size = "Count", #默认参数
        showCategory = 10) 
dev.off()

#可视化colored cnetplot,fig.8.E
Tumor_degs$SYMBOL = Tumor_degs$gene
cnet_data = merge(Tumor_degs,ENTgene,by = "SYMBOL")
cnet_data = data.frame(cnet_data$ENTREZID,cnet_data$gene,cnet_data$avg_log2FC)
logFC <- cnet_data$cnet_data.avg_log2FC
names(logFC) <- cnet_data$cnet_data.gene

pdf(file="colored_kegg_cnetplot.pdf",width = 10,height = 10) 
cnetplot(kegg_data, showCategory = 8, #选择top8的pathway ,这里也可以用包含pathway名称的向量           
         foldChange = logFC,
         colorEdge = T,
         node_label = 'all',
         max.overlaps = 50
)
dev.off()

###GSEA分析###
expr_matrix = data.frame(Tumor_degs$SYMBOL,Tumor_degs$avg_log2FC)
colnames(expr_matrix) <- c("SYMBOL","Log2FoldChange")
merged_matrix = merge(expr_matrix,ENTgene,by = "SYMBOL")
data_sort <- arrange(merged_matrix,desc(Log2FoldChange)) #log2FC进行排序,这是GSEA分析必要的
gene_list <- data_sort$Log2FoldChange
names(gene_list) <- data_sort$ENTREZID

KEGG_database = 'hsa'
gsea <- gseKEGG(gene_list,organism = KEGG_database, pvalueCutoff = 0.05)
gsea = setReadable(gsea, OrgDb = GO_database, keyType = "ENTREZID")
#可视化
pdf(file="gsea_dotplot.pdf", width = 12, height = 10) #fig.8.F
dotplot(gsea)
dev.off()

pdf(file="gsea_ridgeplot.pdf", width = 12, height = 10) #fig.8.G
ridgeplot(gsea,label_format = 100)
dev.off()

pdf(file="gsea_gseaplot.pdf", width = 12, height = 10) #fig.8.H
gseaplot2(gsea,c(1:10),pvalue_table = F)
dev.off()

###pathview图###
library(pathview)
library(tidyverse)

pathview_data = Tumor_degs %>%
  filter(Tumor_degs$gene %in% as.factor(ENTgene$SYMBOL))
pathview_data$SYMBOL = pathview_data$gene
pathview_data = merge(pathview_data,ENTgene,by = 'SYMBOL')
pathview_data_draw = data.frame(pathview_data$ENTREZID,pathview_data$avg_log2FC) 
rownames(pathview_data_draw) = pathview_data_draw$pathview_data.ENTREZID
colnames(pathview_data_draw) <- c('ENTREZID','log2FC')
pathview_data_draw$ENTREZID <- as.numeric(pathview_data_draw$ENTREZID)#将ENTREZID列转化为数字

#绘图所需的数据是一个dataframe,行名是ENTREZID,两列分别是ENTREZID和log2FC
#fig.8.I
pathview(gene.data = pathview_data_draw, #上面包含行名为entrezID的logFC值的数据框
         pathway.id = "hsa04390", #选择一个KEGG信号通路,这里选的是hsa04650。更多通路见KEGG官网
         species = "hsa",
         out.suffix = "Tumor1", #输出文件名
         kegg.native = T
)

#fig.8.J
pathview(gene.data = pathview_data_draw, #上面包含行名为entrezID的logFC值的数据框
         pathway.id = "hsa04390", 
         species = "hsa",
         out.suffix = "Tumor2", #输出文件名
         kegg.native = F #以pdf保存,但是格式可能不好看
)

图片

fig.8

Cell Chat分析

说明

代码

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
if (!require(CellChat)){
  devtools::install_github("jinworks/CellChat")
  devtools::install_github('immunogenomics/presto')
}
library(CellChat)
library(patchwork)

data.input <- pbmc_2[["RNA"]]$data #提取表达矩阵
labels <- Idents(pbmc_2) #提取主要分类标签,这里是cell_type
meta <- data.frame(labels = labels, row.names = names(labels))
cellchat <- createCellChat(object = pbmc_2, group.by = "ident", assay = "RNA") #构建cellchat对象
CellChatDB <- CellChatDB.human #选择人类数据库

#展示该数据库中的数据类别的构成比,fig.9.A
showDatabaseCategory(CellChatDB)
ggsave(filename='DBcategory.pdf',width = 6, height = 4)

#提取数据库的子集,选择“Sevreted Signaling”部分
CellChatDB.use <- subsetDB(CellChatDB, search = "Secreted Signaling", key = "annotation")
cellchat@DB <- CellChatDB.use #将数据库更新到cellchat对象中
cellchat <- subsetData(cellchat) #从输入数据中提取与信号通路相关的配体和受体基因,不能省略
cellchat <- identifyOverExpressedGenes(cellchat) #识别在各群体中过表达的基因
cellchat <- identifyOverExpressedInteractions(cellchat) #根据过表达基因推断潜在的细胞相互作用

#使用人类蛋白质相互作用(PPI)网络平滑信号数据,改进噪声模型
cellchat <- smoothData(cellchat, adj = PPI.human) #cellchat包作者把projectData函数改为了smoothData函数!
cellchat <- computeCommunProb(cellchat, type = "triMean") #计算细胞间的通信概率,使用三均值法作为估算方式
cellchat <- filterCommunication(cellchat, min.cells = 10) #筛选通信事件,保留至少在 10 个细胞中检测到的通信对
cellchat <- computeCommunProbPathway(cellchat) #计算细胞间通讯的路径概率
cellchat <- aggregateNet(cellchat)#将细胞间通讯的网络聚合到更高的层次
groupSize <- as.numeric(table(cellchat@idents))#计算每种细胞类型的细胞数

#绘制弦图,fig.9.B
pdf(file="Interactions_string.pdf",width = 5,height = 5) 
netVisual_circle(
  cellchat@net$count, 
  vertex.weight = groupSize, 
  weight.scale = T, 
  label.edge= F, 
  title.name = "Number of interactions"
)
dev.off()

#绘制弦图,线的粗细为weights/strength,fig.9.C
pdf(file="Interactions_string_W_S.pdf",width = 5,height = 5) 
netVisual_circle(
  cellchat@net$weight, 
  vertex.weight = groupSize, 
  weight.scale = T, 
  label.edge= F, 
  title.name = "Interaction weights/strength"
)
dev.off()

#绘制每一个类型的细胞与所有的细胞之间的通讯情况,fig.9.D
mat <- cellchat@net$weight
pdf(file='plot_all.pdf',12,6)
par(mfrow = c(2,4), xpd=TRUE)
for (i in 1:nrow(mat)) {
  mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
  mat2[i, ] <- mat[i, ]
  netVisual_circle(
    mat2, 
    vertex.weight = groupSize, 
    weight.scale = T, 
    edge.weight.max = max(mat), 
    title.name = rownames(mat)[i]
  )
}
dev.off()

#单独显示一条通路,这里选择MIF通路。绘制弦图,fig.9.E
cellchat@netP$pathways#涉及到的所有的pathway
pathways.show <- c("MIF") 
pdf(file = 'MIF signaling pathway network2.pdf',width = 5, height = 5)
netVisual_aggregate(
  cellchat, 
  signaling = pathways.show,  
)
dev.off()

#绘制另一种弦图,fig.9.F
pdf(file = 'chord_plot.pdf',width = 10,height = 10)
netVisual_aggregate(
  cellchat, 
  signaling = pathways.show, 
  layout = "chord"
)
dev.off()

#绘制热图,fig.9.G
pdf(file = 'heatmap_plot.pdf',width = 10,height = 10)
netVisual_heatmap(
  cellchat, 
  signaling = pathways.show, 
  color.heatmap = "Reds"
)
dev.off()


#计算该通路(MIF)中一些亚型的贡献占比,fig.9.H
pdf(file = "MIF_Sub.pdf",width = 5,height = 5)
netAnalysis_contribution(cellchat, signaling = pathways.show)
dev.off()

#提取MIF通路中所有亚型
pairLR.CXCR4 <- extractEnrichedLR(
  cellchat, 
  signaling = pathways.show, 
  geneLR.return = FALSE
)

#提取MIF-(CD74+CRCX4)通路,画出某一条通路的弦图,fig.9.I
LR.show <- pairLR.CXCR4[1,] # show one ligand-receptor pair
pdf(file = 'MIF-CD74+CRCX4_signaling_pathway.pdf',width = 5,height = 5)
netVisual_individual(cellchat, signaling = pathways.show,  pairLR.use = LR.show)
dev.off()

#可视化所有通路的亚型占比,fig.9.J
pathways.show.all <- cellchat@netP$pathways
vertex.receiver = seq(1,4)
for (i in 1:length(pathways.show.all)) {
  # Visualize communication network associated with both signaling pathway and individual L-R pairs
  netVisual(cellchat, signaling = pathways.show.all[i], vertex.receiver = vertex.receiver, layout = "hierarchy")
  # Compute and visualize the contribution of each ligand-receptor pair to the overall signaling pathway
  if (i==1){
    gg <- netAnalysis_contribution(cellchat, signaling = pathways.show.all[i])
  }
  gg <- gg+netAnalysis_contribution(cellchat, signaling = pathways.show.all[i])
  #ggsave(filename=paste0(pathways.show.all[i], "_L-R_contribution.pdf"), plot=gg, width = 3, height = 2, units = 'in', dpi = 300)
}
ggsave(file = 'contribution_of_each_pathway.pdf',width = 14,height = 10)


#绘制气泡图,本质与弦图类似,fig.9.K
pdf(file = 'bubble2.pdf',width = 5,height = 5)
netVisual_bubble(
  cellchat, 
  sources.use = c(2), #指定信号来源的细胞群体
  targets.use = c(1:8), #指定信号接收的细胞群体
  signaling = c("MIF","CypA"), #指定可视化哪些信号通路
  remove.isolate = FALSE #不移除信号强度低的通信对
)
dev.off()

#指定源细胞群和目标细胞群,以及指定信号通路,绘制弦图,fig.9.L
pdf(file = 'Tumor_to_all_chord.pdf',width = 7,height = 5)
netVisual_chord_gene(
  cellchat, 
  sources.use = c(2), #指定信号来源的细胞群体,编号为 1 到 8。
  targets.use = c(1:8), #指定信号接收的细胞群体,编号为 1 到 8。
  signaling = c("MIF"),
  legend.pos.x = 8 #调整图在画布上的位置
)
dev.off()

#绘制与指定信号通路相关的基因的表达的小提琴图,fig.9.M
pdf(file = 'cell_chat_Vlnplot_MIF.pdf',width = 7,height = 5)
plotGeneExpression(
  cellchat, 
  signaling = "MIF", #指定一条信号通路
  enriched.only = TRUE, #只显示显著上调的基因
  type = "violin" #绘制小提琴图
)
dev.off()

#计算信号网络中各细胞群体的中心性指标(如度数、介数和接近中心性等)
cellchat <- netAnalysis_computeCentrality(
  cellchat, 
  slot.name = "netP" #指定数据来源为概率加权信号网络netP
)
#绘制图像,可视化在该通路下每个细胞的作用如何,分为Sender、Receiver、Mediator和Influencer,fig.9.N
pdf(file = 'SignalingRole_heatmap.pdf',width = 7,height = 5)
netAnalysis_signalingRole_network(
  cellchat, 
  signaling = pathways.show, #指定绘图的信号通路,这里是MIF
  width = 8, 
  height = 2.5, 
  font.size = 10
)
dev.off()

#生成热图,展示每个细胞群体在信号网络中作为发送者(Sender)和接收者(Receiver)的能力,fig.9.O
ht1 <- netAnalysis_signalingRole_heatmap(
  cellchat, 
  pattern = "outgoing"
)
ht2 <- netAnalysis_signalingRole_heatmap(
  cellchat, 
  pattern = "incoming"
)
pdf(file = 'O_R_heatmap.pdf',width = 12,height = 7)
ht1+ht2
dev.off()


library(NMF)
library(ggalluvial)
###outgoing###
#有两个参数:
#Cophenetic相关系数,用于衡量数据在聚类时的保真度(fidelity)
#Silhouette系数衡量每个点在聚类中的紧密度和聚类之间的分离度
#fig.9.P
K <- selectK(cellchat, pattern = "outgoing")
pdf(file = 'K_out.pdf',width = 12,height = 7)
K
dev.off()

#绘制聚类热图,fig.9.Q
pdf(file = 'CommuPattern_out.pdf',width = 12,height = 5)
nPatterns = 3
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = nPatterns)
dev.off()
#聚类-riverplot-out,fig.9.R
pdf(file = 'riverplot_out.pdf',width = 12,height = 7)
netAnalysis_river(cellchat, pattern = "outgoing")
dev.off()

#聚类-dotplot-out,fig.9.S
pdf(file = 'dotplot_out.pdf',width = 12,height = 7)
netAnalysis_dot(cellchat, pattern = "outgoing")
dev.off()

###incoming###
#fig.9.T
K <- selectK(cellchat, pattern = "incoming")
pdf(file = 'K_in.pdf',width = 12,height = 7)
K
dev.off()

#聚类热图,fig.9.U
pdf(file = 'CommuPattern_in.pdf',width = 12,height = 5)
nPatterns = 3
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "incoming", k = nPatterns)
dev.off()

#聚类-riverplot-in,fig.9.V
pdf(file = 'riverplot_in.pdf',width = 12,height = 7)
netAnalysis_river(cellchat, pattern = "incoming")
dev.off()

#聚类-dotplot-in,fig.9.W
pdf(file = 'dotplot_in.pdf',width = 12,height = 7)
netAnalysis_dot(cellchat, pattern = "incoming")
dev.off()

图片

fig.9

Licensed under CC BY-NC-SA 4.0
welcome to my blog
使用 Hugo 构建
主题 StackJimmy 设计