import tensorflow as tf import numpy as np import time import matplotlib import matplotlib.pyplot as plt from sklearn.datasets.samples_generator import make_blobs from sklearn.datasets.samples_generator import make_circles #定义需要生成的学习数据个数 N=10 #定义定义的簇的数量 K=2 #定义最大迭代次数 MAX_ITERS=1000 #这里定义了开始时间 start = time.time() #定义2个质点的位置 centers = [(-2, -2), (2,2)] #生成学习数据 data, features = make_blobs (n_samples=10, centers=centers, n_features = 2, cluster_std=0.8, shuffle=False, random_state=42) #使用图形展示两个质点和学习数据的位置 fig, ax = plt.subplots() ax.scatter(np.asarray(centers).transpose()[0], np.asarray(centers).transpose()[1], marker = 'o', s = 250) ax.scatter(data.transpose()[0], data.transpose()[1], marker = 'o', s = 100, c = features, cmap=plt.cm.coolwarm ) plt.show() #将生成的训练数据定义至TensorFlow points=tf.Variable(data) #初始化训练数据点的归类数组 cluster_assignments = tf.Variable(tf.zeros([N], dtype=tf.int64)) #把前两个学习数据 做为最初的质点 centroids = tf.Variable(tf.slice(points.initialized_value(), [0,0], [K,2])) sess = tf.Session() sess.run(tf.initialize_all_variables()) r = sess.run(centroids) print(r) fig, ax = plt.subplots() ax.scatter(np.asarray(r).transpose()[0], np.asarray(r).transpose()[1], marker = 'o', s = 250) ax.scatter(data.transpose()[0], data.transpose()[1], marker = 'o', s = 100, c = features, cmap=plt.cm.coolwarm ) plt.show() rep_centroids = tf.reshape(tf.tile(centroids, [N, 1]), [N, K, 2]) rep_points = tf.reshape(tf.tile(points, [1, K]), [N, K, 2]) #计算到质点的距离 并降维 sum_squares = tf.reduce_sum(tf.square(rep_points - rep_centroids), reduction_indices=2) #判断各个数据与哪个质点最接近 best_centroids = tf.argmin(sum_squares, 1) #标识质点有无改变 did_assignments_change = tf.reduce_any(tf.not_equal(best_centroids, cluster_assignments)) def bucket_mean(data, bucket_ids, num_buckets): total = tf.unsorted_segment_sum(data, bucket_ids, num_buckets) count = tf.unsorted_segment_sum(tf.ones_like(data), bucket_ids, num_buckets) return total / count means = bucket_mean(points, best_centroids, K) with tf.control_dependencies([did_assignments_change]): do_updates = tf.group(centroids.assign(means),cluster_assignments.assign(best_centroids)) changed = True iters = 0 fig, ax = plt.subplots() colourindexes=[2,1] while changed and iters < MAX_ITERS: fig, ax = plt.subplots() iters += 1 [changed, _] = sess.run([did_assignments_change, do_updates]) [centers, assignments] = sess.run([centroids, cluster_assignments]) ax.scatter(sess.run(points).transpose()[0], sess.run(points).transpose()[1], marker = 'o', s = 200, c = assignments, cmap=plt.cm.coolwarm ) ax.scatter(centers[:,0],centers[:,1], marker = '^', s = 550, c = colourindexes, cmap=plt.cm.plasma) ax.set_title('Iteration ' + str(iters)) plt.savefig("kmeans" + str(iters) +".png") ax.scatter(sess.run(points).transpose()[0], sess.run(points).transpose()[1], marker = 'o', s = 200, c = assignments, cmap=plt.cm.coolwarm ) plt.show() end = time.time() print(("Found in %.2f seconds" % (end-start)), iters, "iterations") print("Centroids:") print(centers) print("Cluster assignments:", assignments)