二分K均值c 實現

2021-07-11 18:51:00 字數 4247 閱讀 8737

二分k均值原理及python實現請見:

(1)(2) python實現

c++**:基於python的思路寫的(求指點)

#include #include #include #include #include using namespace std;

typedef vectortuple;

const int dims = 2;

const int k = 2;

const int bik = 4;

void dokmeans(vector& tuples,tuple subcentroids,vector& subclusterassment1,tuple& subclusterassment2);

void assigntuples(vectorclusters,vectortuples,tuple means);

float getdist(const tuple& t1, const tuple& t2);

float getval(vectorclusters, tuple means);

tuple updatemeans(const vector& cluster_i);

void print(vectorclustes);

int main()

//read the data讀取資料

vectortuples;

int pos = 0;

while(!file.eof())

vectorcentroids; // generated centroids 生成的聚類中心

vectorclusterassment1; // samples index in different centroids 樣本對應的簇索引

tuple clusterassment2; // samples se 每個樣本到其聚類中心的平方誤差

//initialize the first centroid初始化第乙個聚類中心(採樣本均值)

tuple centroid(dims+1,0);

for(int i = 0; i < tuples.size();i++)

for(int j = 1; j <= dims; j++)

centroid[j] /= tuples.size();

centroids.push_back(centroid);

//initialize the clusterassment1and clusterassmen2 初始化樣本簇索引為0及計算樣本到聚類中心的距離

for(int i = 0; i < tuples.size(); i++)

//inviding二分

int test = 0;

while(centroids.size() < bik) //是否已經得到bik個聚類中心

//2means clustering //對該簇進行二聚類

tuple subcentroids[2];

vectorsubclusterassment1;

tuple subclusterassment2;

dokmeans(subdataset,subcentroids,subclusterassment1,subclusterassment2);

//computer the sum sse of cluster i //二聚類後,計算對應sse 及總的聚類代價

float ssei = 0;

float sseother = 0;

float ssesum = 0

//該簇聚類後的sse

for (int x = 0; x < subclusterassment2.size(); x++)

ssei += subclusterassment2[x];

//其他簇的sse

for (int y = 0; y < tuples.size(); y++)

ssesum = ssei + sseother;

//find the cluster who has the minimal sse

if( ssesum < minsse)

minindex = i;

minsse = ssesum;

}} //update the centroids //更新聚類中心

int yuan_size = centroids.size();

centroids[minindex].swap(minsubcentroid[0]); //minindex簇二聚類後索引為0的子簇的聚類中心取代該簇原來的聚類中心,

centroids.push_back(minsubcentroid[1]); //minindex簇二聚類後索引為1的子簇的聚類中心新增到後面

cout << "yuan minsubclusterassment1: " << endl;

for(int jj = 0; jj < minsubclusterassment1.size(); jj++)

cout << minsubclusterassment1[jj] << " ";

cout << endl;

//update the clusterassment //更新樣本的簇索引

//更新兩個新簇的索引

for(int s = 0; s < minsubclusterassment1.size(); s++)

cout << "minindex: " << minindex << endl;

cout << "updated minsubclusterassment1: " << endl;

for(int jj = 0; jj < minsubclusterassment1.size(); jj++)

cout << minsubclusterassment1[jj] << " ";

cout << endl;

//更新樣本簇索引,將樣本簇索引為minindex的全部換成兩個新子簇的索引

int count = 0;

for(int a = 0; a < clusterassment1.size(); a++)

}cout << "clusterassment1: " << endl;

for(int jj = 0; jj < clusterassment1.size(); jj++)

cout << clusterassment1[jj] << " ";

cout << endl;

} cout << "bikmenas is done!" << endl;

system("pause");

return 0;

}void dokmeans(vector& tuples,tuple subcentroids,vector& subclusterassment1,tuple& subclusterassment2)

assigntuples(clusters,tuples,means);

double newval = getval(clusters,means);

double oldval = -1;

int t = 0;

while ((abs(newval - oldval)) > 1)

subcentroids[0] = means[0];

subcentroids[1] = means[1];

for(int i = 0; i < tuples.size();i++)

subclusterassment1.push_back(label);

subclusterassment2.push_back(dist); }

} void assigntuples(vectorclusters, vectortuples,tuple means)

} clusters[label].push_back(tuples[i]);

} }

float getdist(const tuple& t1, const tuple& t2)

float getval(vectorclusters, tuple means)

return val;

}

tuple updatemeans(const vector& cluster_i)

實現結果:

k Means 二分k 均值演算法)

在上一節中我們已經講了k 均值演算法,當時我們選取的質心是隨機選取的,沒有什麼依據,所以聚類的結果很可能出現誤差,為了降低這種誤差的出現我們今天來研究一種優化的k 均值演算法 二分k 均值演算法,看到名稱我們就能明白在每次劃分的時候都是將資料劃分成倆份,直到達到我們要求的聚類數。怎麼來分?選取哪一堆...

k means k均值聚類 及二分k均值聚類

from numpy import def load data file name data fr open file name for line in fr.readlines cur line line.strip split t flt line map float,cur line retu...

二分 Ybt 最大均值

給定正整數序列 a,求乙個平均數最大的,長度不小於 l 的 連續的 子段 為了方便計算所以所有數都乘上1000.then,二分均值。n nn 的範圍到 105 10 5 105考慮二分判斷結果如何在 o n o n o n 或 o nl ogn o nlogn o nlog n 的複雜度內解決 我們...