# -*- coding: utf-8 -*-
""""""
from os import path
from scipy.misc import imread
from wordcloud import wordcloud, stopwords
import matplotlib.pyplot as plt
def wordcount(filename):
'''簡單計算詞頻的函式
:param filename: 檔名
:return: 詞頻
'''wordcount = {}
file = open(filename,'r')
while true:
line = file.readline()
if line:
wordlist = line.split(',[')
if len(wordlist) == 1: continue
wordlist = wordlist[1].split('],')[0].split(',')
for word in wordlist:
word = word.replace(' ', '').replace("'", '')
if word in wordcount.keys():
wordcount[word] = wordcount[word]+1
else:
wordcount[word] = 1
else:
break
return [(k, wordcount[k]) for k in wordcount.keys()]
def generatecloud(filename,imagename,cloudname,fontname):
'''生成標籤雲的函式
'''coloring = imread(imagename) # 讀取背景
wc = wordcloud(background_color="white", # 背景顏色max_words=2000,# 詞云顯示的最大詞數
mask=coloring, # 設定背景
stopwords=stopwords, # 停止詞
font_path=fontname, # 相容中文字型
max_font_size=150) # 字型最大值
#計算好詞頻後使用generate_from_frequencies函式生成詞云
#txtfreq例子為[('詞a', 100),('詞b', 90),('詞c', 80)]
txtfreq = wordcount(filename)
wc.generate_from_frequencies(txtfreq)
# 生成
plt.imshow(wc)
plt.axis("off")
# 繪製詞云
plt.figure()
# 儲存詞云
wc.to_file(cloudname)
if __name__ == '__main__':
d = path.dirname(__file__) # 獲取當前檔案路徑
fontname = path.join(d, 'msyh.ttf') # 中文字型路徑
filename = path.join(d, '廣州.txt') # txt檔案路徑
imagename = path.join(d, "circle.jpg") # 背景路徑
cloudname = path.join(d, "cloud.png") # 標籤雲路徑
generatecloud(filename, imagename, cloudname, fontname)
WordCloud基本演算法
wordcloud基本演算法 關於wordcloud的用處我就不多說了,在這裡我假定乙個前提,然後在這個前提下來生成乙個wordcloud。1 要求生成的wordcloud占用的面積越小越好 2 要求盡量是矩形 下面是我的大概演算法 2 在可繪製區域隨機放入比重最大的字型,儲存當前的繪製區域。3 檢...
wordcloud 引數 含義
font path string 字型路徑,需要展現什麼字型就把該字型路徑 字尾名寫上,如 font path 黑體.ttf width int default 400 輸出的畫布寬度,預設為400畫素 height int default 200 輸出的畫布高度,預設為200畫素 prefer h...
詞云分析wordcloud
jieba模組 用來切割中文的模組 pillow python3中用來專門處理影象的模組 import re import jieba from pil import image from wordcloud import wordcloud import numpy as np def gen w...