# -*- coding: utf-8 -*-
"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@file : *****bayes.py
@contact : [email protected]
@author : ffzzyy
@version : 0.1
@modify time : 2019/3/15 15:04
@desciption
"""import numpy as np
import pandas as pd
from functools import reduce
def acount(x1, x2, axis=0):
"""統計array like:x1 中 x2的個數
:param x1: 乙個numpy ndarray
:param x2: 乙個numpy ndarray
:param axis: 表示統計方向,預設為按列
:return:返回乙個array like
examples
--------
>>> x1=np.array([[1,"s"],[2,"l"],[1,"l"],[3,"m"]])
>>> x2=np.array([2,"l"])
>>> acount(x1,x2)
[1. 2.]
"""result = np.zeros(len(x2))
for i, value in enumerate(x2):
x1_column = x1[:, i] # numpy array 按列切片
result[i] = np.sum(np.array(x1_column == value)) # 使用numpy bool索引進行統計個數
return result
class nbclassifier:
def __init__(self):
self._x_train = none
self._y_train = none
self._class = none
self._prior_proba = {} # 字典:先驗概率
def _set_class(self):
"""得到列表:分類序列
:return:
"""self._class = list(np.unique(self._y_train))
self._class.sort()
def _set_prior_proba(self):
"""計算先驗概率
:param class_:
:return:
"""for enum in self._class:
count_ = list(self._y_train).count(enum)
self._prior_proba[enum] = count_ / len(self._y_train)
def _get_condi_proba(self, x, c):
"""計算p(x | c)的條件概率
:param x:
:param c:
:return: 列表
"""y_train_index = [i for i, value in enumerate(self._y_train) if value == c]
x_train_eq_c = self._x_train[y_train_index]
condi_proba = acount(x_train_eq_c, x) / len(y_train_index)
"""發現為0的概率的時候,通過那普拉斯修正
"""if 0 in condi_proba:
for i, value in enumerate(condi_proba):
if value==0:
# 得到該 特徵 的字可能取值數
proba_value_count_i=len(list(np.unique(self._x_train[:,i])))
condi_proba[i]=1/(len(y_train_index)+proba_value_count_i)
return reduce(lambda x, y: x * y, condi_proba)
def fit(self, x_train, y_train):
"""訓練函式
:param x_train:
:param y_train:
:return:
"""self._x_train = x_train
self._y_train = y_train
self._class = none
self._prior_proba = {} # 字典:先驗概率
self._set_class()
self._set_prior_proba()
def _predict(self, x):
"""針對單個訓練元組進行**
:param x:
:return:
"""result = {}
for enum in self._class:
result[enum] = self._get_condi_proba(x, enum)
for enum in self._class:
result[enum] = result[enum] * self._prior_proba[enum]
return result
def predict(self, x_predict):
"""**函式
:param x_predict:ndarray like
:return:
"""y_predict = [self._predict(x) for x in x_predict]
return np.array(y_predict)
def load_data(file_path):
"""從檔案中得到訓練集
:param file_path:
:return:
"""df = pd.read_csv(file_path, encoding='cp936')
csv_arr = np.array(df)
# 最後一列是y_train
y_train = csv_arr[:, csv_arr.shape[1] - 1]
x_train = csv_arr[:, 0:csv_arr.shape[1] - 1]
return x_train, y_train
def main():
file_path = "貝葉斯測試.csv"
x_train, y_train = load_data(file_path)
nb = nbclassifier()
nb.fit(x_train, y_train)
print(nb._class)
print(nb._prior_proba)
print(nb.predict([[2, 's']]))
file_path = "西瓜資料.csv"
x_train, y_train = load_data(file_path)
# 西瓜測試資料需要刪除第一列
x_train=np.delete(x_train, 0, axis=1)
nb = nbclassifier()
nb.fit(x_train, y_train)
print(nb._class)
print(nb._prior_proba)
print(nb.predict([['淺白','蜷縮','濁響','模糊','平坦','硬滑']]))
if __name__ == '__main__':
main()
樸素貝葉斯演算法Python實現
coding cp936 樸素貝葉斯分類器的實現 執行 reload docclass c1 docclass.bayes docclass.getwords docclass.sampletrain c1 c1.classify quick rabbit default unknown 構建訓練樣...
python實現樸素貝葉斯演算法
貝葉斯公式 換種說法 詳細參考文章樸素貝葉斯例題分析 也就是引數為1時的貝葉斯估計,當某個分量在總樣本某個分類中 觀察樣本庫 訓練集 從沒出現過,會導致整個例項的計算結果為0。為了解決這個問題,使用拉普拉斯平滑 加1平滑進行處理。它的思想非常簡單,就是對先驗概率的分子 劃分的計數 加1,分母加上類別...
機器學習演算法 之樸素貝葉斯的實現
為了加深對機器學習演算法的理解,以及熟悉python,pandas,scikit learn。現在自己實現一下主要的機器學習演算法,程式記錄如下 決策樹類的實現程式 from numpy import defloaddataset postinglist my dog has flea proble...