k近鄰演算法,簡稱knn演算法,作用就是分類
三大要素
import numpy
import matplotlib.pyplot as plt
'''已知訓練集和訓練集類別、測試集
1.測試集廣播和訓練集一樣的規格
2.計算距離
3.對距離列表排序返回最近的k個點的下標
4.有幾個類別就設幾個標籤用來統計,迴圈排序列表,對類別判斷,少數服從多數
5.資料視覺化
'''if __name__ ==
'__main__'
:pass
k =5# 已知 奇數
trainlabel =
["b"
,"b"
,"a"
,"a"
,"b"
,"b"
,"a"
,"b"
,"a"
,"a"
,"b"
,"a"
]# 訓練集標籤類別
traindata = numpy.loadtxt(
"knndata1.txt"
, delimiter=
",")
# 訓練集中的座標資料
testdata = numpy.array(
[0.7
,0.7])
# 測試點
# print(traindata)
# print(testdata)
# 測試點廣播成和訓練集相同的規格
testdata = numpy.tile(testdata,
(traindata.shape[0]
,1))
# 不廣播也行,為了畫圖方便還是廣播
# print(testdata)
# 計算距離
mandist = numpy.
sum(
abs(testdata - traindata)
, axis=1)
# 曼哈頓
# print(mandist)
eucdist = numpy.
sum(
(testdata - traindata)**2
, axis=1)
**0.5
# 歐式距離 一般用歐式距離
# print(eucdist)
# 排序
sortindex = numpy.argsort(eucdist)
# sortindex = numpy.argsort(mandist)
# print(sortindex)
a = b =
0for i in sortindex[
0:k]
:if trainlabel[i]
=="a"
: a +=
1else
: b +=
1print
(a, b)
print
("i am a"
)if a>b else
print
("i am b"
)# 資料視覺化
plt.figure(
) plt.title(
"wgs"
)for i in
range
(traindata.shape[0]
):# 有幾個點迴圈幾次
if trainlabel[i]
=="a"
: plt.scatter(traindata[i,0]
, traindata[i,1]
, c=
"r")
else
: plt.scatter(traindata[i,0]
, traindata[i,1]
, c=
"g")
if a>b:
plt.scatter(testdata[0,
0], testdata[0,
1], c=
"r", marker=
"*", label=
"test point"
)else
: plt.scatter(testdata[0,
0], testdata[0,
1], c=
"g", marker=
"*", label=
"test point"
) plt.grid(
true
) plt.legend(bbox_to_anchor=(0
,1.1
), loc=
2, borderaxespad=0)
# # plt.show()
小案例
# 訓練集
traindata =
# 測試集
testdata =
# 提取資料
tranlable =
# 標籤
getlist =
# 訓練集
x1 = testdata[
"hi boy"
] x2 = testdata[
"hi boy"
] new_test =
[x1[0]
, x2[1]
]for i in traindata.keys():
temp = traindata[i]0]
)# x1]
)# y2]
)# 標籤
x =[i for i in getlist[0:
:2]]
y =[i for i in getlist[1:
:2]]
new_traindata = numpy.c_[x, y]
# 測試集規格和訓練集一致
new_test = numpy.tile(new_test,
(new_traindata.shape[0]
,1))
# 歐式距離
distance = numpy.sqrt(numpy.
sum(
(new_test - new_traindata)**2
, axis=1)
)# 排序返回原下標
sortlist = numpy.argsort(distance)
# 分類
k =3 a = b =
0for i in sortlist[
0:k]
:if tranlable[i]
=="愛情片"
: a +=
1else
: b +=
1print
("愛情片"
)if a>b else
("動作片"
)# 資料視覺化
for i in
range
(new_traindata.shape[0]
):if tranlable[i]
=="愛情片"
: plt.scatter(new_traindata[i,0]
, new_traindata[i,1]
, c=
"r")
else
: plt.scatter(new_traindata[i,0]
, new_traindata[i,1]
, c=
"g")
if a>b:
plt.scatter(new_test[0,
0], new_test[0,
1], c=
"r", marker=
"+")
else
: plt.scatter(new_test[0,
0], new_test[0,
1], c=
"g", marker=
"+")
plt.show(
)
Python 實現 KNN 分類演算法
2.python 實現 本文將詳細講述 knn 演算法及其 python 實現 knn k nearest neighbour 即 k最近鄰,是分類演算法中最簡單的演算法之一。knn 演算法的核心思想是如果乙個樣本在特徵空間中的 k 個最相鄰的樣本中的大多數屬於某乙個類別,則將該樣本歸為該類別 有 ...
python實現KNN分類演算法
import sklearn import numpy as np from sklearn.neighbors import kneighborsclassifier import sklearn.datasets as data import matplotlib as mpl import m...
分類 KNN分類演算法之Python實現
knn稱為k最近鄰。對於待分類資料,它先計算出與其最相近的k個的樣本,然後判斷這k個樣本中最多的類標籤,並將待分類資料標記為這個最多的類標籤。python樣例 import numpy as np from sklearn.neighbors import kneighborsclassifier ...