K means演算法實現

import math
import numpy
class
point_data_reader:
file_name = str()
def__init__
(self,file_name):
point_data_reader.file_name = file_name
defget_data_list
(self,num_lost):
file_ = open(self.file_name,'r+')
db = list()       
for line in file_.readlines():
reader_list = list()
for reader_ in line.split():
try:
eval(reader_)
except:
else:
file_.close()
return db
class
k_means:
def__init__
(self,point_list):
# 點的資料
k_means.point_list = point_list
# 維度
k_means.dimension = len(point_list[0])
# 隨機點獲取
defadd_random_point
(self):
now = 0
point = list()
while now != self.dimension:
min = none
max = none
for line in self.point_list:
if max == none
or line[now] > max :
max = line[now]
if min == none
or line[now] < min :
min = line[now]
now += 1
return point
# 返回乙個包含len(gather_point_list)個元素的列表，第n個列表內的點分給gather_point_list[n]
defseprate_data
(self,gather_point_list):
split_data = list()
for i in range(len(gather_point_list)):
for data_reader in self.point_list:
length = 0
now_max_index = 0
for point_reader in gather_point_list:
# euclidean distance算距離 d_ = sqrt_^m (x_-x_)^2}
now_length = self.euclidean_distance(data_reader,point_reader)
if length < now_length:
length = now_length
now_max_index = gather_point_list.index(point_reader)
return split_data
defeuclidean_distance
(self,point,gather_point):
now = 0
sum = 0
while now != len(point):
sum += (point[now] - gather_point[now])**2
now += 1
return math.sqrt(sum)
# 根據現有分割改變聚集點位置
# 即求點群簇中心點
# argmax_x = sum_^n sqrt_^m (x_j^n - x_j)^2} 等同於:
# argmax_x = sum_^n sum_^m (x_j^n - x_j)^2
# 求各個梯度的導得: x = 為各維度平均數
defget_centeral_point
(self,seprate_data):
gather_point = list()       
for point_list_reader in seprate_data:
if len(point_list_reader) == 0:
continue
sum = numpy.zeros((1,self.dimension))
for point_reader in point_list_reader:
sum += numpy.mat(point_reader)
sum /= len(point_list_reader)
return gather_point
defget_seprate_point
(self,k = 2,error = 0.000001):
gather_point = list()
for i in range(k):
# 增加k個隨機點
# 上一次分類所得的聚集點
last_gather_point = list(gather_point)
# 把資料分割給聚集點
seprate_data = self.seprate_data(gather_point)
# 重新計算得到資料的聚集點座標
gather_point = self.get_centeral_point(seprate_data)
# 保證至少有乙個點附在聚集點，否則刪掉它
while len(gather_point) != len(last_gather_point):
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
while abs((numpy.mat(gather_point)-numpy.mat(last_gather_point)).sum()) >= error:
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
# 保證至少有乙個點附在聚集點，否則刪掉它
while len(gather_point) != len(last_gather_point):
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
return gather_point,seprate_data
if __name__ == '__main__':
db = point_data_reader('text.dat').get_data_list(num_lost = 1.0)
k = k_means(db)
p = k.add_random_point()
gather_point,seprate_data = k.get_seprate_point(k = numpy.random.random_integers(1,10),error = 0.001)
print(gather_point)
# print(seprate_data)

Kmeans演算法實現

include opencv2 highgui highgui.hpp include opencv2 core core.hpp include using namespace cv using namespace std static void help int main int argc ch...

K means演算法實現

首先隨機生成k個聚類中心點根據聚類中心點，將資料分為k類。分類的原則是資料離哪個中心點近就將它分為哪一類別。再根據分好的類別的資料，重新計算聚類的類別中心點。不斷的重複2和3步，直到中心點不再變化。from numpy import import csv import matplotlib.pyp...

matlab實現kmeans演算法

kmeans是一種聚類演算法無監督學習演算法分為兩步 1.隨機選取k個聚類中心。2.計算每個樣本點離哪個聚類中心最近距離計算就將該樣本分為這個類。3.重新計算這k個類的聚類中心。一種簡單的計算方法為計算每個類的平均值即為新的聚類中心。重複執行步驟2，直到聚類中心的變化小於給定閾值，或者達到...

K means演算法實現

Kmeans演算法實現

K means演算法實現

matlab實現kmeans演算法

相關推薦