# -*- coding: utf-8 -*-
"""created on sun apr 22 10:25:14 2018
@author: zhangsh
"""import csv
import numpy as np
from sklearn.neighbors import kneighborsclassifier
# list 轉化為 array陣列
def listtoarray(datalist):
m = len(datalist) # 獲取 list 的長度
dataarray = np.zeros((1,m))
for i in range(m):
dataarray[0,i] = int(datalist[i])
return dataarray
# 將彩色轉換為黑白,歸一化。將非0灰度值轉化為1
def datanormalize(data):
m, n = data.shape
for i in range(m):
for j in range(n):
if data[i,j] != 0:
data[i,j] = 1
return data
# 匯入訓練集
def loadtraindataset():
traindata =
trainlabel =
index = 0
with open('train.csv','r') as file:
readcsv = csv.reader(file)
for line in readcsv:
if index == 0 : # 去掉第一行,第一行為說明行
n = len(line) # 一行的長度
index += 1
continue
index += 1
traindata.extend(line[1:])
file.close()
index = index - 1
n = n - 1
trainlabelarray = listtoarray(trainlabel).reshape((index,1))
traindataarray = listtoarray(traindata).reshape((index,n))
return datanormalize(traindataarray), trainlabelarray
# 匯入測試集
def loadtestdataset():
testdata =
index = 0
with open('test.csv','r') as file:
readcsv = csv.reader(file)
for line in readcsv:
if index == 0: # 去掉第一行
n = len(line)
index += 1
continue
index += 1
testdata.extend(line[:])
file.close()
index = index - 1
testdataarray = listtoarray(testdata).reshape((index,n))
return datanormalize(testdataarray)
# 建立模型訓練,並測試
def knnclassifier():
trainingdata, traininglabel = loadtraindataset() # 載入訓練集
testingdata = loadtestdataset() # 載入測試集
testresult = # 建立乙個列表儲存測試結果
print(testingdata.shape)
knn = kneighborsclassifier(algorithm = 'ball_tree') # 建立knn模型
knn.fit(trainingdata, traininglabel) # 訓練模型
i = 1
for line in testingdata:
predictlabel = knn.predict(line.reshape((1,-1)))
print('**第%d條資料' %i)
i+=1
with open('result.csv','w',newline='') as file:
writer=csv.writer(file)
writer.writerows(testresult)
if __name__ == "__main__":
knnclassifier()
參考了這篇部落格: kaggle之手寫體識別
kaggle位址 首先載入資料集 import pandas as pd import numpy as np train pd.read csv users frank documents workspace kaggle dataset digit recognizer train.csv te...
tensorflow 之手寫體識別
原因 由於tensorflow相對於caffe更加靈活,準備轉戰tensorflow,昨天看了下大概的基本函式,今天打算先跑跑簡單的例子 tensorflow的安裝太簡單了,一行 搞定,網上很多教程,不一一列出。想安裝固定tensorflow版本 pip install tensorflow gpu...
KNN手寫體數字識別
思路 首先這是乙個通過knn分類來完成的數字識別,資料集的格式全部是經過處理後的32x32de1二進位制數字矩陣,把乙個樣本 32x32 轉化為1x1024的向量,即一行代表乙個樣本,然後把訓練樣本也轉化為乙個數字矩陣,每次輸入乙個測試集都與訓練集的矩陣進行作差,然後平方和開根號,最後將每乙個輸入的...