自定义数据集，使用scikit-learn 中K均值包进行聚类

最新推荐文章于 2026-06-19 17:00:49 发布

原创最新推荐文章于 2026-06-19 17:00:49 发布 · 146 阅读

本内容遵循CC 4.0 BY-SA版权协议

标签

#python #numpy #开发语言

import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import numpy as np

# 1.创建示例数据
class1_points = np.array([[1.9, 1.2],
                          [1.5, 2.1],
                          [1.9, 0.5],
                          [1.5, 0.9],
                          [0.9, 1.2],
                          [1.1, 1.7],
                          [1.4, 1.1]])

class2_points = np.array([[-1.9, 1.2],
                          [-1.5, 2.1],
                          [-1.9, 0.5],
                          [-1.5, 0.9],
                          [-0.9, 1.2],
                          [-1.1, 1.7],
                          [-1.4, 1.1]])

class3_points = np.array([[1.9, -1.2],
                          [1.5, -2.1],
                          [1.9, -0.5],
                          [1.5, -0.9],
                          [0.9, -1.2],
                          [1.1, -1.7],
                          [1.4, -1.1]])

class4_points = np.array([[-1.9, -1.2],
                          [-1.5, -2.1],
                          [-1.9, -0.5],
                          [-1.5, -0.9],
                          [-0.9, -1.2],
                          [-1.1, -1.7],
                          [-1.4, -1.1]])

# 合并四类数据点
data = np.concatenate((class1_points, class2_points, class3_points, class4_points))

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
# 2.设置聚类簇数
k=4
#展示一下原始数据未聚类的内容
ax1.scatter(data[:,0],data[:,1],s=50)
# ax1.yticks(())
ax1.plot()
# plt.show()
#3.模型构建
km=KMeans(n_clusters=k,max_iter=30)
#4.模型训练
km.fit(data)#把数据传入

#验证一下看看对不对
#获取簇心
centeroids=km.cluster_centers_
y_kmean=km.predict(data)
print(y_kmean)

#显示一下归类后的效果
# 绘制连接线
for i in range(k):
    cluster_points = data[y_kmean == i]
    centroid = centeroids[i]
    for cluster_point in cluster_points:
        ax2.plot([cluster_point[0], centroid[0]], [cluster_point[1], centroid[1]], 'k--')


ax2.scatter(data[:,0],data[:,1],c=y_kmean,s=50)


#展示一下簇心
ax2.scatter(centeroids[:,0],centeroids[:,1],c='black',s=100,alpha=0.5)

# ax2.yticks(())
plt.show()