使用KNN對鳶尾花資料集進行分類處理

""
"@author: tao
@contact: 1281538933@qq.com
@file: knn.py
@time: 2020/12/21 
@software: vscode"""
from sklearn.datasets import load_iris   #匯入資料集iris
import matplotlib.pyplot as plt
import numpy as np
# 從sklearn.cross_validation裡選擇匯入train_test_split用於資料分割。
from sklearn.model_selection import train_test_split
import math
from collections import counter
import matplotlib.pyplot as plt
# 從sklearn.preprocessing裡選擇匯入資料標準化模組。
from sklearn.preprocessing import standardscaler
iris = load_iris(
)#載入資料集
# 從使用train_test_split，利用隨機種子random_state取樣20%的資料作為測試集。
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.20, random_state=27)
# 對訓練和測試的特徵資料進行標準化。
ss = standardscaler(
)x_train = ss.fit_transform(x_train)
x_test = ss.transform(x_test)
class myknn(
):    def __init__(self,k):
self.k=k
self._x_train=none
self._y_train=none
def fit(self,x_train,y_train):
self._x_train=x_train
self._y_train=y_train
return self
def _predict(self,x):
d =[math.sqrt(np.sum((x-x_train[i])**2
)) for i in range(len(x_train))
]        near = np.argsort(d)        
top_k =
[y_train[i]
for i in near[0:self.k]
]        votes = counter(top_k)
pre = votes.most_common(1)
[0][0]
#**結果存在pre_y中
return pre
def predict(self,x_pre):
y_pre=
[self._predict(x)
for x in x_pre]
return np.array(y_pre)
def accuracy(self,y_pre,y_test):
return sum(y_pre==y_test)/len(y_test)
def get_best_k(
):    acc=
#從(4,20) 分別訓練k 得到相應的準確率，然後繪圖
for k in range(4,20):
sknn = myknn(k)
sknn.fit(x_train,y_train)
y_p = sknn.predict(x_test)
xt = range(4,20)
yt = acc
xmax = np.argsort(yt)
[-1]
plt.rcparams[
'font.sans-serif']=
['simhei'
]    plt.rcparams[
'axes.unicode_minus'
]=false
plt.plot(xt,yt, lw = 1.5)
plt.plot(xmax,max(yt),'ro'
)    plt.xlabel(
"k")
plt.ylabel(
"k_accuracy"
)    plt.title(
'精確率隨k的變化,當k=%d,精確率達到最大值:%.3f'%(xmax,max(yt))
)    plt.legend(
)#打上標籤
plt.show(
)get_best_k(
)

這裡對資料進行了標準化處理

用knn演算法對鳶尾花資料集進行分類

from sklearn.datasets import load iris from sklearn.model selection import train test split from sklearn.preprocessing import standardscaler from skle...

kNN處理鳶尾花資料集

knn k nearest neighbor 演算法是機器學習中最基礎入門，也是最常用的演算法之一，可以解決大多數分類與回歸問題。這裡以鳶尾花資料集為例，討論分類問題中的 knn 的思想。鳶尾花資料集內包含 3 類共 150 條記錄，每類各 50 個資料，每條記錄都有 4 項特徵花萼長度 sepa...

利用KNN對鳶尾花資料進行分類

knn k nearest neighbor 工作原理存在乙個樣本資料集合，也稱為訓練樣本集，並且樣本集中每個資料都存在標籤，即我們知道樣本集中每一資料與所屬分類對應的關係。輸入沒有標籤的資料後，將新資料中的每個特徵與樣本集中資料對應的特徵進行比較，提取出樣本集中特徵最相似資料最近鄰的分類標籤...

使用KNN對鳶尾花資料集進行分類處理

用knn演算法對鳶尾花資料集進行分類

kNN處理鳶尾花資料集

利用KNN對鳶尾花資料進行分類

相關推薦