import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import numpy as np
# 1.创建示例数据
class1_points = np.array([[1.9, 1.2],
[1.5, 2.1],
[1.9, 0.5],
[1.5, 0.9],
[0.9, 1.2],
[1.1, 1.7],
[1.4, 1.1]])
class2_points = np.array([[-1.9, 1.2],
[-1.5, 2.1],
[-1.9, 0.5],
[-1.5, 0.9],
[-0.9, 1.2],
[-1.1, 1.7],
[-1.4, 1.1]])
class3_points = np.array([[1.9, -1.2],
[1.5, -2.1],
[1.9, -0.5],
[1.5, -0.9],
[0.9, -1.2],
[1.1, -1.7],
[1.4, -1.1]])
class4_points = np.array([[-1.9, -1.2],
[-1.5, -2.1],
[-1.9, -0.5],
[-1.5, -0.9],
[-0.9, -1.2],
[-1.1, -1.7],
[-1.4, -1.1]])
# 合并四类数据点
data = np.concatenate((class1_points, class2_points, class3_points, class4_points))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
# 2.设置聚类簇数
k=4
#展示一下原始数据未聚类的内容
ax1.scatter(data[:,0],data[:,1],s=50)
# ax1.yticks(())
ax1.plot()
# plt.show()
#3.模型构建
km=KMeans(n_clusters=k,max_iter=30)
#4.模型训练
km.fit(data)#把数据传入
#验证一下看看对不对
#获取簇心
centeroids=km.cluster_centers_
y_kmean=km.predict(data)
print(y_kmean)
#显示一下归类后的效果
# 绘制连接线
for i in range(k):
cluster_points = data[y_kmean == i]
centroid = centeroids[i]
for cluster_point in cluster_points:
ax2.plot([cluster_point[0], centroid[0]], [cluster_point[1], centroid[1]], 'k--')
ax2.scatter(data[:,0],data[:,1],c=y_kmean,s=50)
#展示一下簇心
ax2.scatter(centeroids[:,0],centeroids[:,1],c='black',s=100,alpha=0.5)
# ax2.yticks(())
plt.show()
自定义数据集,使用scikit-learn 中K均值包 进行聚类
最新推荐文章于 2026-06-19 17:00:49 发布

1730

被折叠的 条评论
为什么被折叠?



