这里主要用到的知识点是Union-find数据结构:
In this programming problem and the next you'll code up the clustering algorithm from lecture for computing a max-spacing k-clustering. Download the text file here. This file describes a distance function (equivalently, a complete graph with edge costs).
It has the following format:
[number_of_nodes]
[edge 1 node 1] [edge 1 node 2] [edge 1 cost]
[edge 2 node 1] [edge 2 node 2] [edge 2 cost]
...
There is one edge (i,j) for each choice of 1≤i<j≤n, where n is the number of nodes. For example, the third line of the file is "1 3 5250", indicating that the distance between nodes 1 and 3 (equivalently, the cost of the edge (1,3)) is 5250. You can assume
that distances are positive, but you should NOT assume that they are distinct.
Your task in this problem is to run the clustering algorithm from lecture on this data set, where the target number k of clusters is set to 4. What is the maximum spacing of a 4-clustering?
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>
using namespace std;
typedef struct
{
int nodes[2];
int cost;
} Edge;
bool EdgeCompare(Edge e1, Edge e2) {return (e1.cost < e2.cost);}
// ------------ Union-find ------------
typedef struct
{
int parent;
int rank;
} UnionNode;
void IniUnionFind(UnionNode *nodes, int n)
{
for(int i=0; i<n; i++)
{
nodes[i].parent = i;
nodes[i].rank = 0;
}
}
int Find(UnionNode *nodes, int n, int th)
{
//if(nodes[th].parent == th)
// return th;
//else
// return Find(nodes, n, nodes[th].parent);
if(nodes[th].parent != th)
nodes[th].parent = Find(nodes, n, nodes[th].parent);
return nodes[th].parent;
}
void Union(UnionNode *nodes, int n, int th1, int th2)
{
int root1 = Find(nodes, n, th1);
int root2 = Find(nodes, n, th2);
if(root1 == root2)
return;
if (nodes[root1].rank < nodes[root2].rank)
nodes[root1].parent = root2;
else if (nodes[root1].rank > nodes[root2].rank)
nodes[root2].parent = root1;
else
{
nodes[root2].parent = root1;
nodes[root1].rank += 1;
}
}
// --------------------------------------
int ClusterMaxSpace(vector<Edge>& E, int nV, int k)
{
// ---------- Sort --------
sort(E.begin(), E.end(), EdgeCompare);
// ---------- Initial ----------
UnionNode *nodes = new UnionNode[nV];
IniUnionFind(nodes, nV);
// --------- Cluster -----------
int maxspace = 0;
int nc = nV;
int root1;
int root2;
int i = 0;
while(nc != k)
{
root1 = Find(nodes, nV, E[i].nodes[0]);
root2 = Find(nodes, nV, E[i].nodes[1]);
if (root1 != root2)
{
maxspace = E[i].cost;
nc--;
}
Union(nodes, nV, E[i].nodes[0], E[i].nodes[1]);
i++;
}
delete nodes;
return maxspace;
}
int main()
{
ifstream infile;
infile.open("clustering1.txt");
// ------------------------
string line;
stringstream ss;
getline(infile, line);
ss << line;
int nV;
ss >> nV;
// --------- Initialize ----------
vector<Edge> E;
ss.clear();
line.clear();
int n = 0;
Edge e;
while(getline(infile, line))
{
ss << line;
ss >> e.nodes[0];
ss >> e.nodes[1];
e.nodes[0]--;
e.nodes[1]--;
ss >> e.cost;
E.push_back(e);
n++;
ss.clear();
line.clear();
}
infile.close();
// --------------------------
int k = 4;
int maxspace = ClusterMaxSpace(E, nV, k-1);
cout << maxspace << endl;
return 0;
}
参考:
【1】并查集 http://zh.wikipedia.org/wiki/%E5%B9%B6%E6%9F%A5%E9%9B%86

这篇博客介绍了如何利用Union-find数据结构来实现最大间距k-聚类算法,特别是针对给定的数据集进行4-聚类。文章中提到的数据文件包含了节点间的距离信息,并要求找出具有最大间距的4个聚类。任务是根据课堂上讲解的算法处理这个数据集,确定当目标聚类数k为4时的最大间距。

680

被折叠的 条评论
为什么被折叠?



