作業 利用貝葉斯分類器實現手寫數字的識別

2021-09-18 06:06:39 字數 4435 閱讀 9021

# -*- coding: utf-8 -*-

"""created on mon apr 15 12:52:24 2019

@author: lccfm

"""import numpy as np

import struct

import os

from collections import defaultdict

def normalize(data): ##將畫素二值化

m, n = data.shape

for i in range(m):

for j in range(n):

if data[i, j] != 0:

data[i, j] = 1

else:

data[i, j] = 0

return data

def transforms(imgs):

c, l = imgs.shape

for i in range(c):

imgs[i] = [i for i in range(l)]

imgs[i] = np.array(imgs[i]).reshape(28, 28)

print(imgs[i].shape)

return imgs

def read_data_sets(dir, one_hot=true):

files =

data_set = defaultdict(dict)

for key, value in files.items():

for i, fn in enumerate(value): # 可遍歷的資料物件(如列表、元組或字串)組合為乙個索引序列,同時列出資料和資料下標

file = open(os.path.join(dir, fn), 'rb') # 拼接路徑

f = file.read()

file.close()

if not i: # 把檔案解壓成位元組流

img_index = struct.calcsize('>iiii') # 將python的值根據格式符,轉換為字串# 計算給定的格式(fmt)占用多少位元組的記憶體

_, size, row, column = struct.unpack('>iiii', f[:img_index]) # 將位元組字串解包成為變數

imgs = struct.unpack_from(str(size * row * column) + 'b', f, img_index)

# print(imgs)

data_set['img_shape'] = (row, column, 1)

imgs = np.reshape(imgs, (size, row * column)).astype(np.float32)

# imgs = transforms(imgs)

imgs = normalize(imgs)

i # mgs = (imgs - np.min(imgs)) / (np.max(imgs) - np.min(imgs))

data_set[key]['images'] = imgs

else: # 把標籤檔案解壓成位元組流

label_index = struct.calcsize('>ii')

_, size = struct.unpack('>ii', f[:label_index]) # 按照給定的格式(fmt)#解析位元組流string,返回解析出來的tuple

labels = struct.unpack_from(str(size) + 'b', f,

label_index) # 根據minist檔案的描述,labels的數字是`unsigned byte`格式,占用乙個位元組,所以這裡填寫`b`

labels = np.reshape(labels, (size,))

if one_hot:

tmp = np.zeros((size, np.max(labels) + 1))

tmp[np.arange(size), labels] = 1

labels = tmp

data_set[key]['labels'] = labels

return data_set

def train(data_set):

imgs = data_set['train']['images']

labels = data_set['train']['labels']

num_image, dimsnum = imgs.shape

num_label, labelnum = labels.shape

# print(labels) 當前是哪個數字,就在某確定位置標註1

# print(num_image, dimsnum)

# print(num_label, labelnum)

label_sum = np.zeros(labelnum)

label_shape = np.zeros((labelnum, dimsnum))

# print(label_shape.shape)

for i in range(num_image):

label = np.argmax(labels[i])

label_sum[label] = label_sum[label] + 1

for j in range(dimsnum):

label_shape[label][j] = label_shape[label][j] + imgs[i][j]

# print('label個數', label_num)

for i in range(labelnum):

for j in range(dimsnum):

label_shape[i][j] = (label_shape[i][j] + 1) / (label_sum[i] + 2)

label_sum = label_sum / num_image # 計算每個label的概率 即p(wi)

return label_sum, label_shape

def test(data_set, pyjk1, pyj): ##測試

imgs = data_set['test']['images']

labels = data_set['test']['labels']

num, dimsnum = imgs.shape

num1, labelnum = labels.shape

acc = 0

# print(pyjk1.shape)

# print(pyjk1[9][100])

for i in range(num):

testdata = imgs[i]

res=np.argmax(labels[i])

# print(p_yj_xi[1])

# print(p_yj_xi.shape)

result = 0

pro = 0

for j in range(labelnum): ##計算xi 屬於 第j個類別的概率

p_yj_xi = 1

for k in range(dimsnum):

# testdata = np.concatenate(testdata, axis=0)

xk = testdata[k] ##x^i的第j個畫素 或者說是 維度

if (xk == 1):

p_yj_xi *= pyjk1[j][k]

else:

p_yj_xi *= (1 - pyjk1[j][k])

temp = pyj[j] * p_yj_xi

if pro < temp:

pro = temp

result = j

if result == res:

acc = acc + 1

# print('real is: ', np.argmax(labels[i]), ' predict is: ', result)

return acc, num

if __name__ == '__main__':

data_set = read_data_sets('c:/users/lccfm/desktop/data/')

label_sum, label_shape = train(data_set)

# print(imgs.shape)

labels = data_set['train']['labels']

# print(labels.shape)

# print(labels.size)

acc, num = test(data_set, label_shape, label_sum)

print(acc/num)

print('test accuracy is: %f' % (acc/num))

利用樸素貝葉斯分類器實現手寫數字的識別

條件 類別數一定,i,i 1,2,3,c 已知類先驗概率和類條件概率密度 1,2,兩類情況 i?then if?then 多類情況 if?max then 已知 1,2,資料集包括四部分 訓練影象 訓練標籤 表示影象為哪個數字 測試影象 測試標籤 二值特徵提取將進行分割處理轉化為0,1數字資訊,方便...

Python實現貝葉斯分類器

使用樸素貝葉斯分類器,對一片文章進行分類處理 對中文進行分詞處理 jieba分詞 對分開的詞語進行處理,去除重複詞彙,去除標點和單個虛擬詞彙如 你,我,他。選擇特徵詞,很重要,要總結出符合某一型別的關鍵特徵詞對分類器進行訓練,即傳入一些已經分好類的文章,讓分類器可以知道其中的一些特徵詞。計算出特徵詞...

matlab 實現貝葉斯分類器

網上有很多文章介紹貝葉斯原理,這裡推薦個鏈結。這裡再說貝葉斯分類器的設計步驟 1.對每個簇的資料求均值mu 和協方差矩陣sigma 2.對測試資料,將其對每個簇用均值和協方差矩陣求相關性。3.將資料分類到相關性大的簇中。分類器函式 bayesclassifer.m function labels b...