实现K-MEANS

import tensorflow as tf
import numpy as np
import time

import matplotlib
import matplotlib.pyplot as plt

from sklearn.datasets.samples_generator import make_blobs
from sklearn.datasets.samples_generator import make_circles

#定义需要生成的学习数据个数
N=10
#定义定义的簇的数量
K=2
#定义最大迭代次数
MAX_ITERS=1000
#这里定义了开始时间
start = time.time()

#定义2个质点的位置
centers = [(-2, -2), (2,2)]
#生成学习数据
data, features = make_blobs (n_samples=10, centers=centers, n_features = 2, cluster_std=0.8, shuffle=False, random_state=42)
#使用图形展示两个质点和学习数据的位置
fig, ax = plt.subplots()
ax.scatter(np.asarray(centers).transpose()[0], np.asarray(centers).transpose()[1], marker = 'o', s = 250)
ax.scatter(data.transpose()[0], data.transpose()[1], marker = 'o', s = 100, c = features, cmap=plt.cm.coolwarm )
plt.show()

#将生成的训练数据定义至TensorFlow
points=tf.Variable(data)
#初始化训练数据点的归类数组
cluster_assignments = tf.Variable(tf.zeros([N], dtype=tf.int64))
#把前两个学习数据 做为最初的质点
centroids = tf.Variable(tf.slice(points.initialized_value(), [0,0], [K,2]))

sess = tf.Session()
sess.run(tf.initialize_all_variables())
r = sess.run(centroids)
print(r)

fig, ax = plt.subplots()
ax.scatter(np.asarray(r).transpose()[0], np.asarray(r).transpose()[1], marker = 'o', s = 250)
ax.scatter(data.transpose()[0], data.transpose()[1], marker = 'o', s = 100, c = features, cmap=plt.cm.coolwarm )
plt.show()

rep_centroids = tf.reshape(tf.tile(centroids, [N, 1]), [N, K, 2])
rep_points = tf.reshape(tf.tile(points, [1, K]), [N, K, 2])
#计算到质点的距离 并降维
sum_squares = tf.reduce_sum(tf.square(rep_points - rep_centroids), reduction_indices=2)

#判断各个数据与哪个质点最接近
best_centroids = tf.argmin(sum_squares, 1)

#标识质点有无改变
did_assignments_change = tf.reduce_any(tf.not_equal(best_centroids, cluster_assignments))

def bucket_mean(data, bucket_ids, num_buckets):
total = tf.unsorted_segment_sum(data, bucket_ids, num_buckets)
count = tf.unsorted_segment_sum(tf.ones_like(data), bucket_ids, num_buckets)
return total / count


means = bucket_mean(points, best_centroids, K)

with tf.control_dependencies([did_assignments_change]):
do_updates = tf.group(centroids.assign(means),cluster_assignments.assign(best_centroids))

changed = True
iters = 0

fig, ax = plt.subplots()
colourindexes=[2,1]

while changed and iters < MAX_ITERS:
fig, ax = plt.subplots()
iters += 1
[changed, _] = sess.run([did_assignments_change, do_updates])
[centers, assignments] = sess.run([centroids, cluster_assignments])
ax.scatter(sess.run(points).transpose()[0], sess.run(points).transpose()[1], marker = 'o', s = 200, c = assignments, cmap=plt.cm.coolwarm )
ax.scatter(centers[:,0],centers[:,1], marker = '^', s = 550, c = colourindexes, cmap=plt.cm.plasma)
ax.set_title('Iteration ' + str(iters))
plt.savefig("kmeans" + str(iters) +".png")


ax.scatter(sess.run(points).transpose()[0], sess.run(points).transpose()[1], marker = 'o', s = 200, c = assignments, cmap=plt.cm.coolwarm )
plt.show()


end = time.time()
print(("Found in %.2f seconds" % (end-start)), iters, "iterations")
print("Centroids:")
print(centers)
print("Cluster assignments:", assignments)