import math
import numpy
class
point_data_reader:
file_name = str()
def__init__
(self,file_name):
point_data_reader.file_name = file_name
defget_data_list
(self,num_lost):
file_ = open(self.file_name,'r+')
db = list()
for line in file_.readlines():
reader_list = list()
for reader_ in line.split():
try:
eval(reader_)
except:
else:
file_.close()
return db
class
k_means:
def__init__
(self,point_list):
# 點的資料
k_means.point_list = point_list
# 維度
k_means.dimension = len(point_list[0])
# 隨機點獲取
defadd_random_point
(self):
now = 0
point = list()
while now != self.dimension:
min = none
max = none
for line in self.point_list:
if max == none
or line[now] > max :
max = line[now]
if min == none
or line[now] < min :
min = line[now]
now += 1
return point
# 返回乙個包含len(gather_point_list)個元素的列表,第n個列表內的點分給gather_point_list[n]
defseprate_data
(self,gather_point_list):
split_data = list()
for i in range(len(gather_point_list)):
for data_reader in self.point_list:
length = 0
now_max_index = 0
for point_reader in gather_point_list:
# euclidean distance算距離 d_ = sqrt_^m (x_-x_)^2}
now_length = self.euclidean_distance(data_reader,point_reader)
if length < now_length:
length = now_length
now_max_index = gather_point_list.index(point_reader)
return split_data
defeuclidean_distance
(self,point,gather_point):
now = 0
sum = 0
while now != len(point):
sum += (point[now] - gather_point[now])**2
now += 1
return math.sqrt(sum)
# 根據現有分割改變聚集點位置
# 即求點群簇中心點
# argmax_x = sum_^n sqrt_^m (x_j^n - x_j)^2} 等同於:
# argmax_x = sum_^n sum_^m (x_j^n - x_j)^2
# 求各個梯度的導得: x = 為各維度平均數
defget_centeral_point
(self,seprate_data):
gather_point = list()
for point_list_reader in seprate_data:
if len(point_list_reader) == 0:
continue
sum = numpy.zeros((1,self.dimension))
for point_reader in point_list_reader:
sum += numpy.mat(point_reader)
sum /= len(point_list_reader)
return gather_point
defget_seprate_point
(self,k = 2,error = 0.000001):
gather_point = list()
for i in range(k):
# 增加k個隨機點
# 上一次分類所得的聚集點
last_gather_point = list(gather_point)
# 把資料分割給聚集點
seprate_data = self.seprate_data(gather_point)
# 重新計算得到資料的聚集點座標
gather_point = self.get_centeral_point(seprate_data)
# 保證至少有乙個點附在聚集點,否則刪掉它
while len(gather_point) != len(last_gather_point):
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
while abs((numpy.mat(gather_point)-numpy.mat(last_gather_point)).sum()) >= error:
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
# 保證至少有乙個點附在聚集點,否則刪掉它
while len(gather_point) != len(last_gather_point):
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
return gather_point,seprate_data
if __name__ == '__main__':
db = point_data_reader('text.dat').get_data_list(num_lost = 1.0)
k = k_means(db)
p = k.add_random_point()
gather_point,seprate_data = k.get_seprate_point(k = numpy.random.random_integers(1,10),error = 0.001)
print(gather_point)
# print(seprate_data)
Kmeans演算法實現
include opencv2 highgui highgui.hpp include opencv2 core core.hpp include using namespace cv using namespace std static void help int main int argc ch...
K means演算法實現
首先隨機生成k個聚類中心點 根據聚類中心點,將資料分為k類。分類的原則是資料離哪個中心點近就將它分為哪一類別。再根據分好的類別的資料,重新計算聚類的類別中心點。不斷的重複2和3步,直到中心點不再變化。from numpy import import csv import matplotlib.pyp...
matlab實現kmeans演算法
kmeans是一種聚類演算法 無監督學習 演算法分為兩步 1.隨機選取k個聚類中心。2.計算每個樣本點離哪個聚類中心最近 距離計算 就將該樣本分為這個類。3.重新計算這k個類的聚類中心。一種簡單的計算方法為 計算每個類的平均值即為新的聚類中心。重複執行步驟2,直到聚類中心的變化小於給定閾值,或者達到...