【聚类算法】高维数据的聚类

以八维数据为例
设原本聚类数是4,目标聚类数也是4
代码:

%% 八维数据聚类完整示例(自带数据 + 输出中心点)
% function Main_Cluster()
clc
clear
close all
%K是聚类数
%% 1. 生成 100 个 8 维样本(4 个高斯簇混合)
rng(0)                       % 保证每次结果可复现
K_true = 4;                  % 真实簇数
mu = [ 1*ones(1,8); ...
      -1*ones(1,8); ...
       3*ones(1,8); ...
      -3*ones(1,8)];         % 4×8 中心
sigma = 0.6;                 % 各维标准差
nPer = 25;                   % 每簇样本数
X = [];
for k = 1:K_true
    X = [X; mvnrnd(mu(k,:), sigma^2*eye(8), nPer)];
end
% 打乱顺序
X = X(randperm(size(X,1)), :);

%% 2. Elbow 方法自动选 K(可选)
maxK = 10;
sse  = zeros(maxK,1);
for k = 1:maxK
    [~,~,sumd] = kmeans(X,k,'Replicates',10,'Start','plus');
    sse(k) = sum(sumd);
end
figure;
plot(1:maxK, sse, '-o');
xlabel('K'); ylabel('SSE');
title('Elbow 方法选 K');
grid on;

%% 3. 用最佳 K(这里取 4)正式聚类
K =4;
[idx, C] = kmeans(X, K, 'Replicates', 20, 'Start', 'plus');

%% 4. 输出中心点
fprintf('\n==== 聚类完成 ====\n');
fprintf('K = %d 个簇的 8 维中心坐标如下:\n', K);
disp(C);                     % 4×8 矩阵

%% 5. 保存中心点到文件
writematrix(C, 'centers8D.txt', 'Delimiter', 'tab');
fprintf('中心点已写入 centers8D.txt\n');

%% 6. 2D 投影可视化(仅看前两维)
figure;
gscatter(X(:,1), X(:,2), idx);
hold on;
plot(C(:,1), C(:,2), 'kx', 'MarkerSize', 12, 'LineWidth', 2);
title('前二维投影及中心点');
xlabel('dim1'); ylabel('dim2');
grid on;

效果:
在这里插入图片描述
(注:Elbow Method是一种用于确定最优的K值(即聚类数目)的方法,特别是在使用K均值聚类算法时。该方法基于这样的直觉:随着K值的增加,聚类内部的方差(或称为“总内平方和”SSW)会先急剧下降,然后趋于平坦。这个平坦点被称为“肘点”(Elbow Point),它被认为是数据最适合的聚类数。如上图所示,肘点K=4,即可认为4分类最合适)

函数版代码

%% 八维数据聚类完整示例(自带数据 + 输出中心点)
% function Main_Cluster()
clc
clear
close all
%K是聚类数
%% 1. 生成 100 个 8 维样本(4 个高斯簇混合)
rng(0)                       % 保证每次结果可复现
K_true = 4;                  % 真实簇数
mu = [ 1*ones(1,8); ...
      -1*ones(1,8); ...
       3*ones(1,8); ...
      -3*ones(1,8)];         % 4×8 中心
sigma = 0.6;                 % 各维标准差
nPer = 25;                   % 每簇样本数
X = [];
for k = 1:K_true
    X = [X; mvnrnd(mu(k,:), sigma^2*eye(8), nPer)];
end
% 打乱顺序
X = X(randperm(size(X,1)), :);
%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 2.k均值聚类
maxK = 10;
K =5;
[idx, C]=K_means_Cluster(X,K,maxK);%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%函数

%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 3. 输出中心点
fprintf('\n==== 聚类完成 ====\n');
fprintf('K = %d 个簇的 8 维中心坐标如下:\n', K);
disp(C);                     % 4×8 矩阵

%% 4. 保存中心点到文件
writematrix(C, 'centers8D.txt', 'Delimiter', 'tab');
fprintf('中心点已写入 centers8D.txt\n');

%% 5. 2D 投影可视化(仅看前两维)
figure;
gscatter(X(:,1), X(:,2), idx);
hold on;
plot(C(:,1), C(:,2), 'kx', 'MarkerSize', 12, 'LineWidth', 2);
title('前二维投影及中心点');
xlabel('dim1'); ylabel('dim2');
grid on;

%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Elbow 方法自动选 K(可选)
function [idx, C]=K_means_Cluster(X,K,maxK)
% maxK = 10;
sse  = zeros(maxK,1);
for k = 1:maxK
    [~,~,sumd] = kmeans(X,k,'Replicates',10,'Start','plus');
    sse(k) = sum(sumd);
end
% figure;
% plot(1:maxK, sse, '-o');
% xlabel('K'); ylabel('SSE');
% title('Elbow 方法选 K');
% grid on;

%% 3. 用最佳 K(这里取 4)正式聚类
% K =4;
[idx, C] = kmeans(X, K, 'Replicates', 20, 'Start', 'plus');
end

举例:对某一维数据进行聚类,并且计算该聚类的概率

clc
clear
close all
%%
TIR_A=[0,10.9320141605342,22.3017103483401,7.55501061237867,-3.16309883667157,-50.3877762617543,-29.0764588447341,1.00655582158596,1.47979826116019,-29.7781258371633,9.21777505579750,-36.9681744698529,-10.2595681285202,-97.6746895975595,-69.6424068808550,4.09964442145573,-0.0589750309156270,60.9664835118606,159.272768478900,-28.3819347458263,-208.477383047218,-298.014317448484,-168.343706968087,120.211424639785,306.174532485099,349.163753069571,209.650395562140,-166.939090116214,-313.591495065766,-397.097897745068,-143.803879726548,69.4907905164423,227.223616719120,302.627035295426,152.104786472928,-15.7020977287557,-42.7964487689261,-189.667066235494,-242.093178882592,-40.1925503085135,143.669174172268,123.496485591614,167.071240164582,190.892638987677,-39.5121809010691,-95.6587806301937,-249.393467617111,-166.729107328201,-38.0150039375307,60.6083376444827,110.549042877590,213.137994990564,138.122144452505,-13.6633913356083,-85.3773217516193,-123.732695613217,-66.1934149367136,-9.09416852659231,98.5413480689020,34.8044508637293,134.607027050057,101.564153598770,25.7043470242587,-59.5965807367283,-30.6263479304648,-103.692881736932,-133.615402042692,-73.1678430143008,111.460535622717,110.925304200934,221.834285145565,110.783508652248,-23.7706667403506,-78.1410820542480,-105.417280975365,-68.3864358851021,-52.3826484391477,28.7901316898159,118.660623258025,16.1245492121155,-88.6675706680938,-163.326820336352,-72.3453624840566,73.2080359139187,259.239191893748,292.570246353440,41.8355307689670,-108.430621128346,-366.332102129192,-269.113736197274,-145.441270636588,231.287483440561,280.915587964462,159.644303150082,113.514218090324,-64.9233356111842,-142.616648254360,-104.059730731903,-34.6706115025591,-67.7458994964854,-62.7277628618223,137.286204515507,223.950879534932,58.5720938331089,-57.0305312432633,-56.9456589516629,-169.056085866569,-67.6806449282849,-63.2946866491125,44.7261830198759,132.317174481225,228.568578024462,196.578456075708,-101.050834987227,-341.357228253509,-299.319423513016,-100.520630867494,237.692701311753,435.420641175777,368.738446230842,83.2442880644332,-351.228389405920,-487.514593802370,-225.840863083046,114.671892231921,272.581597344224,396.781490338879,138.759428337858,-38.5722619035933,-312.219753900329,-420.526494839520,-309.861335445815,20.1581067940042,384.279935099987,520.997067449641,366.079755346821,-116.974973537505,-443.868248711696,-473.026841266845,-350.990352951547,4.98967783618344,290.033726284974,353.370635223207,187.735344012305,-89.6002318637028,-197.444140951982,-177.507112044056,-33.6915271713454,-62.6293293588017,-59.4424106958346,126.183807373652,36.2842781750896,-46.1980264876249,53.0744295268854,-26.1441288939619,-118.586378952139,-41.0251257999535,-20.5532477075309,122.050166563326,205.458215007567,157.622846176245,57.7896199021187,-212.556968731379,-230.438758724265,-171.470183284260,-5.61774933326351,250.294087199973,245.088267799183,100.134426077305,42.7793477887696,-52.2691496251956,-200.643095577106,-54.9315132766710,117.396738124945,185.871996303418,31.9919957550983,-49.8303772282563,1.66179443727746,87.2915688722618,35.1256873345484,-85.2113286371787,13.2253045008874,-60.1899834627178,22.8269851935815,162.103437577725,176.385374701395,-37.6986333136475,-28.1130011093737,-139.684195367391,-95.2362360117043,34.4520859594834,150.721107043641,19.6060281446815,33.7148372254880,44.7745475128671,-9.52640881989722,-157.984972446793,-34.3509639328829,49.9697266473927,-35.4690690264349];
maxK = 50;
K =4;
[idx, C_Res_A]=K_means_Cluster(TIR_A',K,maxK);

for i=1:K
    value = i; % 要统计的元素值
    indices = find(idx == value); % 找到所有值为2的元素的索引
    count(i) = length(indices)/length(idx); % 计算这些元素的数量
end
count=count';%计算聚类中心对应的聚类的概率
Res=[C_Res_A,count];
Res_Cluster=sortrows(Res,1);%按照聚类中心升序排列
% Res_Cluster第一列是聚类中心的值,第二列是该聚类中心对应的聚类的概率
%%
function [idx, C]=K_means_Cluster(X,K,maxK)
sse  = zeros(maxK,1);
for k = 1:maxK
    [~,~,sumd] = kmeans(X,k,'Replicates',10,'Start','plus');
    sse(k) = sum(sumd);
    k
end
clc
[idx, C] = kmeans(X, K, 'Replicates', 20, 'Start', 'plus');

figure;
plot(1:maxK, sse, '-o');
xlabel('K'); ylabel('SSE');
title('Elbow 方法选 K');
grid on;

end
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值