樸素貝葉斯訓練並生成MNIST樣本python實現

2021-09-29 02:33:24 字數 4020 閱讀 6795

from sklearn.datasets import fetch_mldata

from collections import counter

import numpy as np

#載入mnist資料集

defload_mnist()

: mnist = fetch_mldata(

'mnist original'

, data_home=

'./datasets'

) x, y = mnist[

"data"

], mnist[

"target"

] x = np.where(x>0,

1, x)

#將大於1的陣列全部換成1

x_train, x_test, y_train, y_test = x[

:60000

], x[

60000:]

, y[

:60000

], y[

60000:]

return x_train, y_train, x_test, y_test

class

*****bayes()

: prob_c =

none

#類別為c的先驗概率

prob_cj =

none

#類別為c,維度為j的似然概率

y_pred =

none

#**結果,類別

y_pred_prob =

none

#**結果,類別概率

#訓練模型,計算先驗概率和似然概率

deffit

(self, x_train, y_train)

:#計算各類別先驗概率 p(y)

prob_c =

y_count = counter(y_train)

for key in y_count.keys():

/len

(y_train)

)#計算每一維的條件概率 p(x_ij | y), x_ij表示第i行第j維的元素

prob_cj =

#第c類,第j維的條件概率

for c in y_count.keys():

prob_temp =

for j in

range

(x_train.shape[1]

):c_train = x_train[y_train==c]

#類別為c的訓練樣本

c_train_j = c_train[

:, j]

#類別為c樣本中第j維(列)的資料

c_train_j_1 = c_train_j[c_train_j ==1]

#類別為c樣本中第j維(列)值為1的資料

prob_1_cj =

(c_train_j_1.shape[0]

+1)/

(c_train_j.shape[0]

+ c_train.shape[1]

)# add_one smoothing計算條件概率

self.prob_c = prob_c

self.prob_cj = prob_cj

return prob_c, prob_cj

#**defpredict

(self, x_test)

: y_pred_prob =

#對每一條測試樣本

for x in x_test:

temp_list =

#對每一類,計算後驗概率

for i in

range

(len

(self.prob_c)):

prob_cond =

1 c_test_index = np.where(x==1)

[0]for z in c_test_index:

prob_cond *= self.prob_cj[i]

[z]

post_prob = prob_cond * self.prob_c[i]

#後驗概率最大的索引值,就是該測試樣本的所屬類別

y_pred = np.argmax(y_pred_prob, axis=1)

self.y_pred_prob = y_pred_prob

self.y_pred = y_pred

return y_pred

deflossfun

(self, y_pred, y_test)

: p =

0for i in

range

(len

(y_pred)):

if y_pred[i]

== y_test[i]

: p +=

1return p/

len(y_pred)

# 計算累加概率,可以參考輪盤賭演算法

defsum_prob

(self, prob_list)

: sum_prob =

for i in

range

(len

(prob_list)):

temp_prob =

0for j in

range

(i+1):

temp_prob += prob_list[j]

return sum_prob

defgeneratedata

(self)

:# 計算p(y)的累加概率

sum_prob_c = self.sum_prob(self.prob_c)

# 隨機生成[0, 1]之間的隨機數

rand_prob_c = np.random.rand(

)for i in

range

(len

(sum_prob_c)):

# 如果隨機生成的概率小於等於第i類的累加概率,則該樣本屬於第i類

if rand_prob_c <= sum_prob_c[i]

: new_c = i # 新的類別

break

print

('隨機生成的樣本類別:'

, new_c)

# 同理生成新的樣本特徵

new_c_j =

sum_prob_c_j = self.prob_cj[new_c]

for i in

range

(len

(sum_prob_c_j)):

rand_prob_c_j = np.random.rand(

)if rand_prob_c_j <= sum_prob_c_j[i]:1

)else:0

) new_c_j = np.array(new_c_j)

print

('隨機生成的樣本資料:'

)print

(new_c_j.reshape(28,

28))if __name__ ==

'__main__'

: x_train, y_train, x_test, y_test = load_mnist(

) clf = *****bayes(

) prob_c, prob_cj = clf.fit(x_train, y_train)

#訓練模型

y_pred = clf.predict(x_test)

#測試

accurate = clf.lossfun(y_pred, y_test)

#計算精確度

print

('各類數量統計:'

, counter(y_pred)

)print

('準確率: '

, accurate)

clf.generatedata(

)#隨機生成新樣本資料

樸素貝葉斯

樸素貝葉斯演算法是一種基於概率統計的分類方法,它主要利用貝葉斯公式對樣本事件求概率,通過概率進行分類。以下先對貝葉斯公式做個了解。對於事件a b,若p b 0,則事件a在事件b發生的條件下發生的概率為 p a b p a b p b 將條件概率稍作轉化即可得到貝葉斯公式如下 p a b p b a ...

樸素貝葉斯

1.準備資料 從文字中構建詞向量 2.訓練演算法 從詞向量計算概率 3.測試演算法 儲存為 bayes.py 檔案 參考 coding utf 8 from numpy import 文字轉化為詞向量 def loaddataset postinglist my dog has flea probl...

樸素貝葉斯

機器學習是將資料轉化為決策面的過程 scikit learn縮寫為sklearn 訓練乙個分類器,學習之後 其處理的準確性 def nbaccuracy features train,labels train,features test,labels test from sklearn.bayes ...