Python實現機器演算法 03 貝葉斯

2021-09-14 07:45:19 字數 3968 閱讀 3195

# -*- coding: utf-8 -*-

"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

@file : *****bayes.py

@contact : [email protected]

@author : ffzzyy

@version : 0.1

@modify time : 2019/3/15 15:04

@desciption

"""import numpy as np

import pandas as pd

from functools import reduce

def acount(x1, x2, axis=0):

"""統計array like:x1 中 x2的個數

:param x1: 乙個numpy ndarray

:param x2: 乙個numpy ndarray

:param axis: 表示統計方向,預設為按列

:return:返回乙個array like

examples

--------

>>> x1=np.array([[1,"s"],[2,"l"],[1,"l"],[3,"m"]])

>>> x2=np.array([2,"l"])

>>> acount(x1,x2)

[1. 2.]

"""result = np.zeros(len(x2))

for i, value in enumerate(x2):

x1_column = x1[:, i] # numpy array 按列切片

result[i] = np.sum(np.array(x1_column == value)) # 使用numpy bool索引進行統計個數

return result

class nbclassifier:

def __init__(self):

self._x_train = none

self._y_train = none

self._class = none

self._prior_proba = {} # 字典:先驗概率

def _set_class(self):

"""得到列表:分類序列

:return:

"""self._class = list(np.unique(self._y_train))

self._class.sort()

def _set_prior_proba(self):

"""計算先驗概率

:param class_:

:return:

"""for enum in self._class:

count_ = list(self._y_train).count(enum)

self._prior_proba[enum] = count_ / len(self._y_train)

def _get_condi_proba(self, x, c):

"""計算p(x | c)的條件概率

:param x:

:param c:

:return: 列表

"""y_train_index = [i for i, value in enumerate(self._y_train) if value == c]

x_train_eq_c = self._x_train[y_train_index]

condi_proba = acount(x_train_eq_c, x) / len(y_train_index)

"""發現為0的概率的時候,通過那普拉斯修正

"""if 0 in condi_proba:

for i, value in enumerate(condi_proba):

if value==0:

# 得到該 特徵 的字可能取值數

proba_value_count_i=len(list(np.unique(self._x_train[:,i])))

condi_proba[i]=1/(len(y_train_index)+proba_value_count_i)

return reduce(lambda x, y: x * y, condi_proba)

def fit(self, x_train, y_train):

"""訓練函式

:param x_train:

:param y_train:

:return:

"""self._x_train = x_train

self._y_train = y_train

self._class = none

self._prior_proba = {} # 字典:先驗概率

self._set_class()

self._set_prior_proba()

def _predict(self, x):

"""針對單個訓練元組進行**

:param x:

:return:

"""result = {}

for enum in self._class:

result[enum] = self._get_condi_proba(x, enum)

for enum in self._class:

result[enum] = result[enum] * self._prior_proba[enum]

return result

def predict(self, x_predict):

"""**函式

:param x_predict:ndarray like

:return:

"""y_predict = [self._predict(x) for x in x_predict]

return np.array(y_predict)

def load_data(file_path):

"""從檔案中得到訓練集

:param file_path:

:return:

"""df = pd.read_csv(file_path, encoding='cp936')

csv_arr = np.array(df)

# 最後一列是y_train

y_train = csv_arr[:, csv_arr.shape[1] - 1]

x_train = csv_arr[:, 0:csv_arr.shape[1] - 1]

return x_train, y_train

def main():

file_path = "貝葉斯測試.csv"

x_train, y_train = load_data(file_path)

nb = nbclassifier()

nb.fit(x_train, y_train)

print(nb._class)

print(nb._prior_proba)

print(nb.predict([[2, 's']]))

file_path = "西瓜資料.csv"

x_train, y_train = load_data(file_path)

# 西瓜測試資料需要刪除第一列

x_train=np.delete(x_train, 0, axis=1)

nb = nbclassifier()

nb.fit(x_train, y_train)

print(nb._class)

print(nb._prior_proba)

print(nb.predict([['淺白','蜷縮','濁響','模糊','平坦','硬滑']]))

if __name__ == '__main__':

main()

樸素貝葉斯演算法Python實現

coding cp936 樸素貝葉斯分類器的實現 執行 reload docclass c1 docclass.bayes docclass.getwords docclass.sampletrain c1 c1.classify quick rabbit default unknown 構建訓練樣...

python實現樸素貝葉斯演算法

貝葉斯公式 換種說法 詳細參考文章樸素貝葉斯例題分析 也就是引數為1時的貝葉斯估計,當某個分量在總樣本某個分類中 觀察樣本庫 訓練集 從沒出現過,會導致整個例項的計算結果為0。為了解決這個問題,使用拉普拉斯平滑 加1平滑進行處理。它的思想非常簡單,就是對先驗概率的分子 劃分的計數 加1,分母加上類別...

機器學習演算法 之樸素貝葉斯的實現

為了加深對機器學習演算法的理解,以及熟悉python,pandas,scikit learn。現在自己實現一下主要的機器學習演算法,程式記錄如下 決策樹類的實現程式 from numpy import defloaddataset postinglist my dog has flea proble...