Max-Spacing k-clustering

这篇博客介绍了如何利用Union-find数据结构来实现最大间距k-聚类算法,特别是针对给定的数据集进行4-聚类。文章中提到的数据文件包含了节点间的距离信息,并要求找出具有最大间距的4个聚类。任务是根据课堂上讲解的算法处理这个数据集,确定当目标聚类数k为4时的最大间距。

这里主要用到的知识点是Union-find数据结构:

In this programming problem and the next you'll code up the clustering algorithm from lecture for computing a max-spacing k-clustering. Download the text file here. This file describes a distance function (equivalently, a complete graph with edge costs). It has the following format:
[number_of_nodes]
[edge 1 node 1] [edge 1 node 2] [edge 1 cost]
[edge 2 node 1] [edge 2 node 2] [edge 2 cost]
...
There is one edge (i,j) for each choice of 1≤i<j≤n, where n is the number of nodes. For example, the third line of the file is "1 3 5250", indicating that the distance between nodes 1 and 3 (equivalently, the cost of the edge (1,3)) is 5250. You can assume that distances are positive, but you should NOT assume that they are distinct.


Your task in this problem is to run the clustering algorithm from lecture on this data set, where the target number k of clusters is set to 4. What is the maximum spacing of a 4-clustering?


#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>

using namespace std;

typedef struct
{
    int nodes[2];
    int cost;
} Edge;
bool EdgeCompare(Edge e1, Edge e2) {return (e1.cost < e2.cost);} 

// ------------ Union-find ------------
typedef struct
{
    int parent;
    int rank; 
} UnionNode;

void IniUnionFind(UnionNode *nodes, int n)
{
    for(int i=0; i<n; i++)
    {
        nodes[i].parent = i;
        nodes[i].rank = 0; 
    }
}

int Find(UnionNode *nodes, int n, int th)
{
    //if(nodes[th].parent == th)
    //       return th; 
    //else
    //    return Find(nodes, n, nodes[th].parent);
    if(nodes[th].parent != th)
        nodes[th].parent = Find(nodes, n, nodes[th].parent);

    return nodes[th].parent; 
}

void Union(UnionNode *nodes, int n, int th1, int th2)
{
    int root1 = Find(nodes, n, th1);
    int root2 = Find(nodes, n, th2);
    if(root1 == root2)
        return;

    if (nodes[root1].rank < nodes[root2].rank)
        nodes[root1].parent = root2;
    else if (nodes[root1].rank > nodes[root2].rank)
        nodes[root2].parent = root1;
    else
	{
		nodes[root2].parent = root1;
		nodes[root1].rank += 1; 
	}
       
}
// --------------------------------------
int ClusterMaxSpace(vector<Edge>& E, int nV, int k)
{
    // ---------- Sort --------
    sort(E.begin(), E.end(), EdgeCompare);

    // ---------- Initial ----------
    UnionNode *nodes = new UnionNode[nV];
    IniUnionFind(nodes, nV);


    // --------- Cluster -----------
    int maxspace = 0;
	int nc = nV;
	int root1;
	int root2;
	int i = 0;
	while(nc != k)
    {
        root1 = Find(nodes, nV, E[i].nodes[0]);
        root2 = Find(nodes, nV, E[i].nodes[1]);
        if (root1 != root2)
        {
            maxspace = E[i].cost; 
            nc--; 
        }
        
        Union(nodes, nV, E[i].nodes[0], E[i].nodes[1]);
		i++;
    }

	delete nodes;
    return maxspace;
}



int main()
{
    ifstream infile;
    infile.open("clustering1.txt");
    
    // ------------------------
    string line;
    stringstream ss;
    getline(infile, line);
    ss << line;

    int nV;
    ss >> nV;
    // --------- Initialize ----------
    vector<Edge> E;

    ss.clear();
    line.clear();
    int n = 0;
    Edge e;
    while(getline(infile, line))
    {
        ss << line;
        
        ss >> e.nodes[0];
        ss >> e.nodes[1];
		e.nodes[0]--;
		e.nodes[1]--;
        ss >> e.cost;
        
        E.push_back(e);
        
        n++;
        ss.clear();
        line.clear();
    }
	infile.close();
	// --------------------------

	int k = 4;
    int maxspace = ClusterMaxSpace(E, nV, k-1); 

    cout << maxspace << endl;




    return 0;
}

参考:

【1】并查集 http://zh.wikipedia.org/wiki/%E5%B9%B6%E6%9F%A5%E9%9B%86

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值