from numpy import *
import operator
import os
def classify0(inx, dataset, labels, k): # 構造分類器 knn原理
datasetsize = dataset.shape[0]
diffmat = tile(inx, (datasetsize, 1)) - dataset
sqdiffmat = diffmat ** 2
sqdistances = sqdiffmat.sum(axis=1)
distances = sqdistances ** 0.5
sorteddistindicies = distances.argsort() # 這裡是腳標的排列順序如:[2 3 1 0]
classcount = {}
for i in range(k):
voteilabel = labels[sorteddistindicies[i]]
classcount[voteilabel] = classcount.get(voteilabel, 0) + 1
sortedclasscount = sorted(classcount.items(), key=operator.itemgetter(1), reverse=true)
return sortedclasscount[0][0]
def img2vector(filename): # 把32*32的矩陣轉化成1*1024的向量
returnvector = zeros((1, 1024))
fr = open(filename)
for i in range(32):
linestr = fr.readline()
for j in range(32):
returnvector[0, 32 * i + j] = int(linestr[j])
return returnvector
def handwritingclasstest():
hwlabels =
trainingfilelist = os.listdir("./trainingdigits/")
m = len(trainingfilelist)
trainingmat = zeros((m, 1024))
for i in range(m):
filenamestr = trainingfilelist[i]
filestr = filenamestr.split('.')[0]
classnumstr = int(filestr.split('_')[0])
trainingmat[i, :] = img2vector('./trainingdigits/%s' %(filenamestr))
testfilelist=os.listdir('testdigits')
errorcount=0.0
mtest=len(testfilelist)
for i in range(mtest):
filenamestr = testfilelist[i]
filestr = filenamestr.split('.')[0]
classnumstr = int(filestr.split('_')[0])
vectorundertest = img2vector('./testdigits/%s' %(filenamestr))
classifyresult=classify0(vectorundertest,trainingmat,hwlabels,3)
# print("識別結果是%d,真實結果是%d" %(classifyresult,classnumstr))
if (classifyresult!=classnumstr):
errorcount +=1
print("識別結果是%d,真實結果是%d" %(classifyresult,classnumstr))
print("錯誤數為%d" %(errorcount))
print("錯誤率是%f" %(errorcount/mtest))
def main():
handwritingclasstest()
if __name__ == '__main__':
main()
KNN手寫數字識別
以歐幾里得距離度量樣本間的相似程度。對於乙個測試樣本首先計算該樣本與每個訓練樣本間的距離,然後按距離值對訓練樣本進行公升序排序,排序後前k個樣本進行投票,即哪個標籤出現的次數多,就將測試樣例劃為該類。程式使用資料 預先將資料處理為,標籤資訊轉化為txt文件。from numpy import imp...
kNN 手寫數字識別
識別手寫的數字0 9,影象為32畫素 32畫素的黑白影象 1.將影象轉換為向量 將32 32的二進位制影象矩陣轉換為1 1024的向量。將影象轉化為向量 defimgvector filename returnvect zeros 1,1024 fr open filename 讀取檔案的前32行 ...
KNN手寫數字識別
import numpy as np import matplotlib pyplot as plt from sklearn.neighbors import kneighborsclassifier 讀取樣本資料,目標 0,1,2,3,4,5,6,7,8,9 feature target for...