from __future__ import division from algos import k_means, gaussian_mixture import matplotlib.pyplot as plt import numpy as np Z = np.load("Arrays\\Z.npy") variance = np.sum(Z**2) / (Z.shape[0] - 1) compressed_vars = [] for K in range(2, 15): model = k_means(K) model.train(Z) compressed_vars.append(model.compressed_variance / variance) # print "K = ", K # print "\n" # print "num_iters: ", model.num_iters # print "reconstruction error: ", model.reconstruction_error # print "cluster sizes: ", model.cluster_sizes # print "compressed variance (/variance): ", model.compressed_variance/variance # print "dunn index: ", model.dunn_index # print "\n" compressed_vars = np.array(compressed_vars) fig1 = plt.figure() sub = fig1.add_subplot(1, 1, 1) sub.plot(np.array(range(2, 15)), 100 * compressed_vars) sub.set_title("Compressed Variance") sub.set_xlabel("$K$") sub.set_ylabel("Percentage") fig1.savefig("Plots\\compressed-variance.png", bbox_inches='tight')
def random_cluster(center, radius, num): r = np.random.uniform(0, radius, num) theta = np.random.uniform(0, 2 * np.pi, num) return np.asarray(center) + np.array( [r * np.cos(theta), r * np.sin(theta)]).T N = 1000 C_1 = random_cluster([-3, 0], 1, N // 3) C_2 = random_cluster([3, 0], 1, N // 3) C_3 = random_cluster([0, 3], 1, N // 3) means = np.array( [np.mean(C_1, axis=0), np.mean(C_2, axis=0), np.mean(C_3, axis=0)]) X = np.concatenate((C_1, C_2, C_3), axis=0) model = k_means(3) model.train(X) print "num_iters: ", model.num_iters print "reconstruction error: ", model.reconstruction_error print "min intracluster distance: ", model.min_intracluster_distance print "max intercluster distance: ", model.max_intercluster_distance print "dunn index: ", model.dunn_index for k in range(model.K): print "cluster " + str(k + 1) + ":" print "true mean: ", np.round(means[k, :], 5) print "model mean: ", np.round(model.means[k, :], 5)