1.1 問題分析
英文文字 --> 中文文字
結果:f = open(r"e:\python實訓\python預科班\day06\hamlet.txt", "r", encoding="utf-8")
# 讀取檔案
data = f.read().lower()
# 通過" "切割單詞
data_split = data.split(" ")
# 統計資料
count_data = {}
# 統計單詞出現的次數
for word in data_split:
# 判斷詞是否再字典中
if word in count_data:
count_data[word] += 1
count_data[word] = 1
# item獲取鍵值對(乙個乙個元組存放0),將所有鍵值對通過元組存放
data_items = count_data.items()
# 將元組轉換成list
list_item = list(data_items)
def func(i):
return i[1]
# 降序
list_item.sort(key=func, reverse=true)
# 出現最多的前十個單詞
res_data = list_item[:10]
for word in res_data:
print(f" ")
the 791
and 596
of 527
to 508
a 376
my 375
in 320
you 314
i 291
his 235
結果:import jieba
"""@author ransysun
@create 2019-07-19-11:08
"""f = open(r"e:\python實訓\python預科班\day06\threekingdoms.txt", encoding="utf8")
data = f.read()
# 切割中文
data_jieba = jieba.lcut(data)
count_dic = {}
for word in data_jieba:
# 去除長度為一的逗號,句號(無意義詞)等
if len(word) == 1:
# 去除不是人物詞
if word in :
if "曰" in word:
word = word[:-1]
if word in count_dic:
count_dic[word] += 1
count_dic[word] = 1
def func(i):
return i[1]
data_list = list(count_dic.items())
data_list.sort(key=func, reverse=true)
data_res = data_list[:10]
for data in data_res:
print(f" ")
孔明 1226
玄德 975
曹操 953
張飛 358
呂布 300
趙雲 278
劉備 277
雲長 265
孫權 264
魏兵 233
4.1 應用問題的擴充套件
