【文本挖掘与文本分析】上机实验三

实验目的和要求

实验

了解sklearn,gensim可视化主题的基本操作；
采集四大名著之《红楼梦》进行主题分析
对《红楼梦》的主题进行可视化
或
采集二十大报告进行主题分析；
对《二十大报告》的主题进行可视化

数据来源

《红楼梦》小说
《二十大报告》

采集红楼梦文本，加载红楼梦人物、停用词和词典，完成以下任务：
1.输出前100个人物的出场次数，然后绘制人物出现词云图，最后再绘制出现次数最多的前20个人物的出场次数柱状图。
2.利用TF-IDF算法，sklearn,gensim对红楼梦进行可视化主题分析
3.基于知识图谱对红楼梦人物关系进行可视化,梳理贾宝玉、林黛玉、薛宝钗和其他人的人物关系。

data = pd.read_excel(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦数据集.xlsx”)
chapters = data[‘Artical’].tolist()

with open(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦人物.txt”, “r”, encoding=“utf-8”) as file:
characters = file.read().splitlines()

with open(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦停用词.txt”, “r”, encoding=“utf-8”) as file:
stopwords = file.read().splitlines()

with open(“D:\学习\课件\文本挖掘\上机实验\实验三\20230320162359291\Readream\红楼梦词典.txt”, “r”, encoding=“utf-8”) as file:
dictionary = file.read().splitlines()
wordcloud = WordCloud(background_color=‘white’, font_path=r"D:\coder\randomnumbers\Keywords_cloud\msyh.ttf",width=800, height=600)

人物出现次数

import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# 读取数据
data = pd.read_excel("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦数据集.xlsx")
chapters = data['Artical'].tolist()

# 读取人物列表、停用词和词典
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt", "r", encoding="utf-8") as file:
    characters = file.read().splitlines()

with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦停用词.txt", "r", encoding="utf-8") as file:
    stopwords = file.read().splitlines()

with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦词典.txt", "r", encoding="utf-8") as file:
    dictionary = file.read().splitlines()

# 提取人物出现次数
character_counts = {}
for chapter in chapters:
    for character in characters:
        count = chapter.count(character)
        if character in character_counts:
            character_counts[character] += count
        else:
            character_counts[character] = count

# 输出前100个人物的出场次数
sorted_characters = sorted(character_counts.items(), key=lambda x: x[1], reverse=True)[:100]
print("前100个人物的出场次数:")
for character, count in sorted_characters:
    print(f"{character}: {count}")

# 制作词云图
wordcloud = WordCloud(background_color='white', font_path=r"D:\\coder\\randomnumbers\\Keywords_cloud\\msyh.ttf", width=800, height=600)
wordcloud.generate_from_frequencies(character_counts)
plt.figure(figsize=(10, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('红楼梦人物出现词云图')
plt.show()

# 绘制出现次数最多的前20个人物的出场次数柱状图
top20_characters = sorted_characters[:20]
top20_names = [item[0] for item in top20_characters]
top20_counts = [item[1] for item in top20_characters]
plt.figure(figsize=(10, 6))
plt.bar(top20_names, top20_counts, color='skyblue')
plt.xlabel('人物')
plt.ylabel('出场次数')
plt.title('出现次数最多的前20个人物的出场次数')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

在这里插入图片描述

Top 10关键词

from sklearn.feature_extraction.text import TfidfVectorizer
import re

# 定义需要过滤的无意义词语
meaningless_words = ["说", "道"]

# 自定义分词函数，用于过滤无意义词语
def tokenize(text):
    words = re.findall(r'\b\w+\b', text)
    meaningful_words = [word for word in words if word not in meaningless_words]
    return meaningful_words

# 将章节列表转换为文本字符串
corpus = [' '.join(chapters)]

# 使用TF-IDF向量化器
vectorizer = TfidfVectorizer(stop_words=stopwords, tokenizer=tokenize)
X = vectorizer.fit_transform(corpus)

# 获取特征词列表
feature_names = vectorizer.get_feature_names()

# 获取最重要的词语
indices = X.toarray().argsort(axis=1)[:, ::-1]
top_n = 10  # 取前10个关键词
top_keywords = [feature_names[indices[0, i]] for i in range(top_n)]

print("红楼梦文本的Top 10关键词:")
for i, keyword in enumerate(top_keywords):
    print(f"{i+1}. {keyword}")

红楼梦文本的Top 10关键词:

说着
宝玉道
宝玉笑道
笑道
贾母道
贾政道
凤姐道
袭人道
宝玉听了
一面说

import networkx as nx
from collections import defaultdict

# 构建人物关系图
G = nx.Graph()

# 使用默认字典以便于处理人物关系
relationships = defaultdict(int)

# 遍历每章节，统计人物之间的共现关系
for chapter in chapters:
    for i in range(len(characters)):
        for j in range(i + 1, len(characters)):
            if characters[i] in chapter and characters[j] in chapter:
                relationships[(characters[i], characters[j])] += 1

# 添加边到图中
for edge, weight in relationships.items():
    G.add_edge(edge[0], edge[1], weight=weight)

# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=0.2)  # 使用Spring布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦人物关系图')
plt.show()

在这里插入图片描述

前20人物出场

import re
from collections import Counter
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']  # 使用微软雅黑字体

# 加载人物列表
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt", "r", encoding="utf-8") as file:
    characters = file.read().splitlines()

# 统计人物出场情况
character_freq = Counter(re.findall(r'(%s)' % '|'.join(characters), all_text))

# 选择出现次数最多的前100个人物
top_characters_freq = dict(sorted(character_freq.items(), key=lambda item: item[1], reverse=True)[:100])

# 输出前100个人物的出场次数
for idx, (character, freq) in enumerate(top_characters_freq.items(), 1):
    print(f"{idx}. {character}: {freq}")

# 提取前20个人物和对应的出场次数
top_characters = list(top_characters_freq.keys())[:20]
top_frequencies = list(top_characters_freq.values())[:20]

# 绘制人物出场次数柱状图
plt.figure(figsize=(12, 6))
plt.bar(top_characters, top_frequencies)
plt.title('前20个人物出场情况')
plt.xlabel('人物')
plt.ylabel('出场次数')
plt.xticks(rotation=45)  # 旋转横坐标标签，避免重叠
plt.tight_layout()  # 调整布局以防止标签重叠
plt.show()

在这里插入图片描述

from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
from collections import Counter

# 加载人物列表
with open("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt", "r", encoding="utf-8") as file:
    characters = file.read().splitlines()

# 统计人物出场情况
character_freq = Counter(re.findall(r'(%s)' % '|'.join(characters), all_text))

# 选择出现次数最多的前100个人物
top_characters_freq = dict(sorted(character_freq.items(), key=lambda item: item[1], reverse=True)[:100])

# 输出前100个人物的出场次数
for idx, (character, freq) in enumerate(top_characters_freq.items(), 1):
    print(f"{idx}. {character}: {freq}")

# 绘制人物出现词云图
wordcloud = WordCloud(background_color='white', font_path=r"D:\\coder\\randomnumbers\\Keywords_cloud\\msyh.ttf")
wordcloud.generate_from_frequencies(top_characters_freq)

plt.figure(figsize=(10, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title('人物出现词云图')
plt.axis('off')
plt.show()

# 提取前20个人物和对应的出场次数
top_characters = list(top_characters_freq.keys())[:20]
top_frequencies = list(top_characters_freq.values())[:20]

# 绘制人物出场次数柱状图
plt.figure(figsize=(12, 6))
plt.bar(top_characters, top_frequencies)
plt.title('前20个人物出场情况')
plt.xlabel('人物')
plt.ylabel('出场次数')
plt.xticks(rotation=45)  # 旋转横坐标标签，避免重叠
plt.tight_layout()  # 调整布局以防止标签重叠
plt.show()

人物出现词云

宝玉: 3810
凤姐: 1680
贾母: 1639
袭人: 1123
王夫人: 1039
宝钗: 1002
贾政: 911
贾琏: 746
平儿: 653
薛姨妈: 446
紫鹃: 427
探春: 426
鸳鸯: 406
贾珍: 382
李纨: 366
晴雯: 340
尤氏: 336
刘姥姥: 288
邢夫人: 280
小丫头: 279
薛蟠: 277
林黛玉: 268
香菱: 245
麝月: 232
贾蓉: 222
周瑞: 215
小厮: 207
贾赦: 190
贾芸: 185
惜春: 182
芳官: 156
妙玉: 153
雪雁: 151
贾环: 146
林之孝: 142
迎春: 134
莺儿: 125
赵姨娘: 122
宝蟾: 114
巧姐: 107
秦钟: 100
薛蝌: 99
贾兰: 94
秋纹: 93
茗烟: 88
尤二姐: 88
大了: 84
史湘云: 83
赖大: 82
五儿: 80
司棋: 76
秦氏: 72
贾瑞: 68
旺儿: 68
贾蔷: 67
凤丫头: 64
兴儿: 58
彩云: 57
琥珀: 57
冯紫英: 55
焙茗: 51
鲍二: 50
包勇: 50
金钏: 48
门子: 47
翠缕: 47
北静王: 45
丰儿: 43
李贵: 41
玉钏儿: 41
柳家的: 40
倪二: 39
张华: 39
板儿: 38
小红: 38
李嬷嬷: 37
王仁: 36
坠儿: 36
甄宝玉: 36
藕官: 33
春燕: 33
尤三姐: 33
秋桐: 33
琏二奶奶: 31
金荣: 29
贾芹: 29
石头: 28
玻璃: 28
王善保: 28
大姐: 27
侍书: 27
女尼: 27
李氏: 26
彩屏: 26
李纹: 26
智能: 25
翠墨: 24
张道士: 24
李十儿: 24
王子腾: 23

在这里插入图片描述

前20个人物关系

import networkx as nx
from collections import defaultdict

# 构建人物关系图
G = nx.Graph()

# 使用默认字典以便于处理人物关系
top20_relationships = defaultdict(int)

# 遍历每章节，统计前20个人物之间的共现关系
for chapter in chapters:
    for i in range(len(top20_characters)):
        for j in range(i + 1, len(top20_characters)):
            if top20_characters[i] in chapter and top20_characters[j] in chapter:
                top20_relationships[(top20_characters[i], top20_characters[j])] += 1

# 添加边到图中
for edge, weight in top20_relationships.items():
    G.add_edge(edge[0], edge[1], weight=weight)

# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=0.2)  # 使用Spring布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦前20个人物关系图')
plt.show()

在这里插入图片描述
在这个图中，人与人之间连线上的数字表示两个人之间的关系强度或者共现次数。在原始代码中，这些数字被命名为weight，代表边的权重。在红楼梦人物关系图中，这些权重可以表示两个人物在文本中共同出现的次数，从而反映了他们之间的关系密切程度或相关性。

如果两个人物之间的数字较大，说明他们在小说中经常一起出现，可能存在密切的关系或者故事情节联系紧密。相反，如果数字较小，则表示两个人物之间的关系不太密切。

import networkx as nx
from collections import defaultdict
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']  # 使用微软雅黑字体

# 构建人物关系图
G = nx.Graph()

# 使用默认字典以便于处理人物关系
relationships = defaultdict(int)

# 遍历每章节，统计人物之间的共现关系
for chapter in chapters:
    for i in range(len(characters)):
        for j in range(i + 1, len(characters)):
            if characters[i] in chapter and characters[j] in chapter:
                relationships[(characters[i], characters[j])] += 1

# 添加边到图中（仅保留关联次数大于等于10的）
for edge, weight in relationships.items():
    if weight >= 15:
        G.add_edge(edge[0], edge[1], weight=weight)

# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=0.2)  # 使用Spring布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦人物关系图（关联次数大于等于10）')
plt.show()

在这里插入图片描述

全人物关系图

import networkx as nx
from collections import defaultdict
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']  # 使用微软雅黑字体

# 构建人物关系图
G = nx.Graph()

# 使用默认字典以便于处理人物关系
relationships = defaultdict(int)

# 遍历每章节，统计人物之间的共现关系
for chapter in chapters:
    for i in range(len(characters)):
        for j in range(i + 1, len(characters)):
            if characters[i] in chapter and characters[j] in chapter:
                relationships[(characters[i], characters[j])] += 1

# 添加边到图中（仅保留关联次数大于等于15的）
for edge, weight in relationships.items():
    if weight >= 15:
        G.add_edge(edge[0], edge[1], weight=weight)

# 绘制人物关系图
plt.figure(figsize=(12, 8))
pos = nx.circular_layout(G)  # 使用circular_layout布局算法排列节点
nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=10, font_weight='bold')
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.title('红楼梦人物关系图（关联次数大于等于15）')
plt.show()

在这里插入图片描述

案例

## 加载所需要包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

from sklearn.feature_extraction.text import CountVectorizer,TfidfTransformer,TfidfVectorizer

## 设置字体
fonts = FontProperties(fname = r"C:\Windows\Fonts\STXIHEI.ttf",size=14)
## 引入3D坐标系
from mpl_toolkits.mplot3d import Axes3D
## 设置pandas显示方式
pd.set_option("display.max_rows",8)

pd.options.mode.chained_assignment = None  # default='warn'

## 设置显示图像的方式
%matplotlib inline
%config InlineBackend.figure_format = "retina"

## 读取停用词
stopword = pd.read_csv("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦停用词.txt",
                       header=None,names = ["Stopwords"])
## 读取红楼梦数据集
Red_df = pd.read_excel("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦数据集.xlsx")
Red_df.head(5)

在这里插入图片描述

import jieba
## 添加自定义词典
jieba.load_userdict("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦词典.txt")
## 对红楼梦全文进行分词
## 数据表的行数
row,col = Red_df.shape
## 预定义列表
Red_df["cutword"] = "cutword"
for ii in np.arange(row):
    ## 分词
    cutwords = list(jieba.cut(Red_df.Artical[ii], cut_all=True))
    ## 去除长度为1的词
    cutwords = pd.Series(cutwords)[pd.Series(cutwords).apply(len)>1]
    ## 去停用词
    cutwords = cutwords[~cutwords.isin(stopword)]
    Red_df.cutword[ii] = cutwords.values
for ii in np.arange(row):   #过滤
    Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]=='：“')]
    Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]=='。”')]
    Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]=='？”')]
    Red_df.cutword[ii] =Red_df.cutword[ii][~(Red_df.cutword[ii]=='！”')]
import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
warnings.filterwarnings("ignore",category=FutureWarning)

分析红楼梦的人物关系

## 查看几个关键人物在整个书籍中的出现次数的变化
## 读取红楼梦中人物名数据集
role = pd.read_csv("D:\\学习\\课件\\文本挖掘\\上机实验\\实验三\\20230320162359291\\Readream\\红楼梦人物.txt",header=None,names = ["rolename"])
role.head()
## 计算每个角色在书籍中出现的次数
# 将120章的分词结果连接在一起,并计算词频
allcutword = np.concatenate(Red_df.cutword)
allcutword = pd.DataFrame({"word":allcutword})
allcutword = allcutword.groupby(by=["word"])["word"].agg(number=np.size)
allcutword = allcutword.reset_index().sort_values(by="number",ascending=False)
## 计算人物所出现的次数
counts = []
for ii in role.index:
    rolenam = role.rolename[ii]
    number = allcutword["number"][allcutword.word == rolenam]
    counts.append(number.values)

role["counts"] = pd.DataFrame(counts)
## 去除缺失值 和出现次数小于5的人物
role = role[role.counts.notnull()].sort_values(by="counts",ascending=False)
role = role[role.counts > 5].reset_index(drop=True)
print(role.head())

rolename counts
0 宝玉 3862.0
1 凤姐 1680.0
2 贾母 1639.0
3 袭人 1123.0
4 王夫人 1039.0

## 查看前几个关键人物在各章节的走势
rolenumber = np.zeros((10,120))
for kk in np.arange(10):
    # 计算每个人物在各章节出现的次数
    nums = []
    for ii in np.arange(len(Red_df.index)):
        ## 每章节词频
        chapcutword= pd.DataFrame({"word":Red_df.cutword[ii]})
        chapcutword = chapcutword.groupby(by=["word"])["word"].agg(number=np.size)
        chapcutword = chapcutword.reset_index()
        #  一个章节出现次数
        num = chapcutword["number"][chapcutword.word == role.rolename[kk]]
        nums.append(num.values)
    # 一个人的所有章节出现次数
    rolenumber[kk,:] = pd.DataFrame(nums).fillna(0).values[:,0]
## 绘制人物在各个章节出场频次变化图
plt.figure(figsize=(12,8))
for ii in np.arange(6):
    plt.subplot(3,2,ii+1)
    plt.bar(np.arange(120)+1,rolenumber[ii,:],alpha = 1)
    plt.title(role.rolename[ii],fontproperties = fonts,size = 12)
    plt.ylabel("频次",fontproperties = fonts,size = 10)
plt.subplots_adjust(hspace = 0.25,wspace = 0.15)
plt.show()

在这里插入图片描述

plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 解决负号'-'显示为方块的问题

## 分析10个人出场的相关性
import seaborn as sns
datacor = np.corrcoef(rolenumber)
datacor = pd.DataFrame(data=datacor,columns=role.rolename[0:10],index=role.rolename[0:10])
## 相关稀疏热力图
plt.figure(figsize=(10,10))
ax = sns.heatmap(datacor,square=True,annot=True,fmt = ".3f",
                 linewidths=.5,cmap="YlGnBu",
                 cbar_kws={"fraction":0.046, "pad":0.03})
ax.set_xticklabels(role.rolename[0:10],fontproperties = fonts)
ax.set_yticklabels(role.rolename[0:10],fontproperties = fonts)
ax.set_title("人物相关性",fontproperties = fonts)
ax.set_xlabel("",fontproperties = fonts)
ax.set_xlabel("",fontproperties = fonts)
plt.show()

在这里插入图片描述

## 根据分词结果，计算人物之间的关系权重,值分析人物出场次数大于100次的人物
## 权重的定义，如果两个人物同时出现在同一章节中，则相应的权重增加1
Red_df.cutword

在这里插入图片描述

rolenew = role[role.counts>100]
rolenew

在这里插入图片描述

## 构建两两之间的关系
from itertools import combinations
relation = combinations(rolenew.rolename,2)

rela = []
weight  = []
for ii in relation:
    rela.append(ii)
    ## 计算两者是之间的权重
    weig = 0
    for kk in np.arange(len(Red_df.index)):
        ## 人物是否同时出现在同一章
        if ((sum(Red_df.cutword[kk] == ii[0]) >1) & (sum(Red_df.cutword[kk] == ii[1]) >1)):
            weig = weig+1
    weight.append(weig)

Red_rela = pd.DataFrame(rela)
Red_rela.columns = ["First","Second"]
Red_rela["weight"] = weight
Red_rela = Red_rela[Red_rela.weight>20].sort_values(by="weight",ascending=False).reset_index(drop = True)
print(Red_rela.head())

在这里插入图片描述

import networkx as nx
## 将人物关系可视化
plt.figure(figsize=(12,12))
## 生成社交网络图
G=nx.Graph()

## 添加边
for ii in Red_rela.index:
    G.add_edge(Red_rela.First[ii],Red_rela.Second[ii],weight = Red_rela.weight[ii] / 120)
    
## 定义两种边
elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.25]
esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.25]

## 图的布局
pos=nx.circular_layout(G) # positions for all nodes

# nodes根据
nx.draw_networkx_nodes(G,pos,alpha=0.6,node_size=500)

# edges
nx.draw_networkx_edges(G,pos,edgelist=elarge,
                    width=1.5,alpha=0.6,edge_color='r')
nx.draw_networkx_edges(G,pos,edgelist=esmall,
                    width=1,alpha=0.8,edge_color='b',style='dashed')

# labels
nx.draw_networkx_labels(G,pos,font_size=10)

plt.axis('off')
plt.title("《红楼梦》社交网络",FontProperties = fonts)
plt.show() # display

在这里插入图片描述

## 将人物关系可视化
plt.figure(figsize=(12,12))
## 生成社交网络图
G=nx.Graph()

## 添加边
for ii in Red_rela.index:
    G.add_edge(Red_rela.First[ii],Red_rela.Second[ii],weight = Red_rela.weight[ii] / 120)
    
## 定义两种边
elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.25]
esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.25]

## 图的布局
pos=nx.spring_layout(G) # positions for all nodes

# nodes根据
nx.draw_networkx_nodes(G,pos,alpha=0.6,node_size=500)

# edges
nx.draw_networkx_edges(G,pos,edgelist=elarge,
                    width=1.5,alpha=0.6,edge_color='r')
nx.draw_networkx_edges(G,pos,edgelist=esmall,
                    width=1,alpha=0.8,edge_color='b',style='dashed')

# labels
nx.draw_networkx_labels(G,pos,font_size=10)

plt.axis('off')
plt.title("《红楼梦》社交网络",FontProperties = fonts)
plt.show() # display

在这里插入图片描述

## 计算每个节点的度
Red_degree = pd.DataFrame(list(G.degree))
Red_degree.columns = ["name","degree"]
Red_degree

在这里插入图片描述

Red_degree.sort_values(by="degree",ascending=False).plot(kind = "bar",x="name",y = "degree",figsize=(12,6),legend=False)
plt.xticks(FontProperties = fonts,size = 12)
plt.ylabel("degree")
plt.show()