importnumpy as np
import
math
#載入模擬資料
defloaddata():
postinglist=[['
my','
dog','
has','
flea
','problem
','help
','please'],
['maybe
','not
','take
','him
','to
','dog
','park
','stupid'],
['my
','dalmation
','is
','so
','cute
','i
','love
','him'],
['stop
','posting
','stupid
','worthless
','garbage'],
['mr
','licks
','ate
','my
','steak
','how
','to
','stop
','him'],
['quit
','buying
','worthless
','dog
','food
','stupid']]
classvec = [0,1,0,1,0,1] #
1 侮辱 0 非侮辱
return
postinglist,classvec
#建立詞彙表
defcreateset(dataset):
result =set()
for i in
dataset:
result = result |set(i)
return
list(result)
#dataset,labels = loaddata()
#vacablist = createset(dataset)
#print('外lables',labels)
#print('外dataset',dataset)
#print('外vacablist:',vacablist)
#建立和詞彙表對應的向量
defsetofword(vacablist,inputdata):
mylist = [0] *len(vacablist)
for word in
inputdata:
if word in
vacablist:
mylist[vacablist.index(word)] = 1
else
:
print('
沒有 {} 這個詞
'.format(word))
return
mylist
#setofdata = setofword(vacablist,dataset[3])
#print('外setofdata:',setofdata) #[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
#print(vacablist) #['garbage', 'to', 'worthless', 'ate', 'has', 'so', 'take', 'cute', 'dog', 'flea', 'buying', 'help', 'is', 'park', 'i', 'food', 'my', 'licks', 'posting', 'dalmation', 'problem', 'please', 'stop', 'how', 'stupid', 'maybe', 'love', 'steak', 'quit', 'him', 'not', 'mr']
#print(dataset[3]) #['stop', 'posting', 'stupid', 'worthless', 'garbage']
#trainmat =
#for i in dataset:##
print('外trainmat:',trainmat)
#訓練函式,算p(word\1)的概率
defp1(trainmat,labels):
plable_1 = sum(labels)/len(labels)
data_0 =np.ones(len(trainmat[0]))
count_0 = 2data_1 =np.ones(len(trainmat[0]))
count_1 = 2
for i in
range(len(labels)):
if labels[i] ==0:
data_0 +=trainmat[i]
count_0 +=sum(trainmat[i])
if labels[i] == 1:
data_1 +=trainmat[i]
count_1 +=sum(trainmat[i])
data_0 = data_0 /count_0
data_1 = data_1/count_1
print('
data_0:{},count:{}
'.format(data_0,count_0))
print('
data_1:{},count:{}
'.format(data_1, count_1))
print('
plabel_1:
',plable_1)
return
data_0,data_1,plable_1
#p1(trainmat,labels)
#用得到的概率分類
defclassfy(testset,data_0,data_1,plabel_1):
print('
開始classfy')
p1 = 1p0 = 1
for i in
range(len(testset)):
if testset[i] ==1:
p1 = p1 *data_1[i]
p0 = p0 *data_0[i]
p1 = p1 *plabel_1
p0 = p0 * (1-plabel_1)
print('
p1:{},p0:{}
'.format(p1,p0))
if p1>p0:
print('
該分類為1')
return 1
else
:
print('
該分類為0')
return0#
測試總邏輯**
deftest():
dataset,labels =loaddata()
vacablist =createset(dataset)
trainmat =
for i in dataset: #
因為訓練函式需要訓練資料是詞彙表的格式
data_0, data_1, plable_1 =p1(trainmat,labels)
testlist = ['
my','
love
','stupid']
testdata =setofword(vacablist,testlist)
classfy(testdata,data_0,data_1,plable_1)
test()
機器學習之樸素貝葉斯
寫在前面 本文寫自初學時,若後續學習過程中有新的理解,將不定期進行更新 若文中敘述有誤,望不吝賜教,也將及時修改 貝葉斯分類是一類分類演算法的總稱,這類演算法均以貝葉斯定理為基礎,故統稱為貝葉斯分類。而樸素樸素貝葉斯分類是貝葉斯分類中最簡單,也是常見的一種分類方法。在理解樸素貝葉斯之前,需要對兩個數...
機器學習之樸素貝葉斯
樸素貝葉斯 1 樸素貝葉斯 2 一 高斯樸素貝葉斯 gaussiannb實現了高斯樸素貝葉斯分類演算法,設假設特徵的可能性是高斯的 p x i y 12 y 2exp xi y 22 y 2 p x i y frac exp frac p xi y 2 y2 exp 2 y2 xi y 2 1 引數...
機器學習 樸素貝葉斯
樸素貝葉斯原理 1.貝葉斯公式 2.樸素貝葉斯的模型 3.後驗概率最大化的含義 4.樸素貝葉斯的引數估計 4.1.特徵是離散值 假設符合多項式分布 4.2.特徵是稀疏的離散值 假設符合伯努利分布 4.3.特徵是連續值 假設符合正態分佈 5.樸素貝葉斯演算法過程 6.樸素貝葉斯演算法小結 scikit...