from __future__ import division
from algos import k_means, gaussian_mixture
import matplotlib.pyplot as plt
import numpy as np

Z = np.load("Arrays\\Z.npy")

variance = np.sum(Z**2) / (Z.shape[0] - 1)

compressed_vars = []
for K in range(2, 15):
    model = k_means(K)
    model.train(Z)
    compressed_vars.append(model.compressed_variance / variance)
#	print "K = ", K
#	print "\n"
#	print "num_iters: ", model.num_iters
#	print "reconstruction error: ", model.reconstruction_error
#	print "cluster sizes: ", model.cluster_sizes
#	print "compressed variance (/variance): ", model.compressed_variance/variance
#	print "dunn index: ", model.dunn_index
#	print "\n"
compressed_vars = np.array(compressed_vars)

fig1 = plt.figure()
sub = fig1.add_subplot(1, 1, 1)
sub.plot(np.array(range(2, 15)), 100 * compressed_vars)
sub.set_title("Compressed Variance")
sub.set_xlabel("$K$")
sub.set_ylabel("Percentage")
fig1.savefig("Plots\\compressed-variance.png", bbox_inches='tight')
Пример #2
0
def random_cluster(center, radius, num):
    r = np.random.uniform(0, radius, num)
    theta = np.random.uniform(0, 2 * np.pi, num)
    return np.asarray(center) + np.array(
        [r * np.cos(theta), r * np.sin(theta)]).T


N = 1000
C_1 = random_cluster([-3, 0], 1, N // 3)
C_2 = random_cluster([3, 0], 1, N // 3)
C_3 = random_cluster([0, 3], 1, N // 3)
means = np.array(
    [np.mean(C_1, axis=0),
     np.mean(C_2, axis=0),
     np.mean(C_3, axis=0)])

X = np.concatenate((C_1, C_2, C_3), axis=0)

model = k_means(3)
model.train(X)

print "num_iters: ", model.num_iters
print "reconstruction error: ", model.reconstruction_error
print "min intracluster distance: ", model.min_intracluster_distance
print "max intercluster distance: ", model.max_intercluster_distance
print "dunn index: ", model.dunn_index
for k in range(model.K):
    print "cluster " + str(k + 1) + ":"
    print "true mean: ", np.round(means[k, :], 5)
    print "model mean: ", np.round(model.means[k, :], 5)