def demo(): import matplotlib.pyplot as plt from kmeans import kmeans_cluster np.random.seed(1111) dim, k = 2, 2 # generate data num = 50 mean1 = np.zeros(dim) mean2 = np.ones(dim) * 2 cov1 = np.eye(dim) cov2 = np.eye(dim) * 0.5 x1 = np.random.multivariate_normal(mean1, cov1, [ num, ]) x2 = np.random.multivariate_normal(mean2, cov2, [ num, ]) x = np.concatenate([x1, x2], 0) plt.scatter(x1[:, 0], x1[:, 1], c='r') plt.scatter(x2[:, 0], x2[:, 1], c='g') # init GMM with kmeans gs = [] centers, assignment = kmeans_cluster(x, k) weight = [] for i in range(k): # mean mean = centers[i] # covariate cov = np.eye(dim) * 1e-6 count = 0. for j in range(num * 2): if i == assignment[j]: cov += np.outer(mean - x[j], mean - x[j]) count += 1 cov /= count weight.append(count / len(x)) gs.append(Gauss(dim, mean, cov)) gmm = GMM(gs, weight) centers = np.stack([gmm[i].mean for i in range(gmm.k)]) plt.scatter(centers[:, 0], centers[:, 1], c='b', s=50, marker='v') train_gmm(gmm, x, threshold=1e-4) centers = np.stack([gmm[i].mean for i in range(gmm.k)]) plt.scatter(centers[:, 0], centers[:, 1], c='y', s=500, marker='^')
test[1][0] = 1.5 test[1][1] = 2.0 test[2][0] = 3.0 test[2][1] = 4.0 test[3][0] = 5.0 test[3][1] = 7.0 test[4][0] = 3.5 test[4][1] = 5.0 test[5][0] = 4.5 test[5][1] = 5.0 test[6][0] = 3.5 test[6][1] = 4.5 #kmeans.lloyd_iteration(test, cluster) total = 0.0 for i in range(1000): (C, a, obj) = kmeans.kmeans_cluster(X, 9, 'kmeans++', 1) total = total + obj print "ans:" print total / 1000 result = np.zeros(30) result[:] = sys.float_info.max #for i in range(10): # (C, a, obj) = kmeans.kmeans_cluster(X, i + 1, 'fixed', 10) # TODO: Test update_assignments function, defined in kmeans.py # TODO: Test update_centers function, defined in kmeans.py # TODO: Test lloyd_iteration function, defined in kmeans.py #result[i + 1] = obj #idx = np.argmin(result)
mean = np.mean(train_set, axis=0) s = np.var(train_set, axis=0)**0.5 # Normalize with mean # and standard deviation for i in range(train_set.shape[0]): train_set[i] = (train_set[i] - mean) / s print(train_set) print(np.mean(train_set, axis=0), np.var(train_set, axis=0), sep='\n') train_set_PCA = PCA(train_set, threshold) if len(train_set_PCA) == 0: print("Threshold is too small. Please input a bigger threshold.\n") sys.exit(1) # plot_3D(train_set_PCA, label_set) sil_coef, cluster_label = kmeans.kmeans_cluster(k, train_set_PCA) # print(sil_coef) sil_coef_mean = np.mean(sil_coef) # Calculate Rand Index a, d = 0, 0 n = len(label_set) for i in range(n): for j in range(i + 1, n): if (label_set[i] == label_set[j]) and (cluster_label[i] == cluster_label[j]): a += 1 elif (label_set[i] != label_set[j]) and (cluster_label[i] != cluster_label[j]): d += 1 pair_cnt = n * (n - 1) // 2
print("kernelval is {}".format(kernel_val.shape)) print("dim, hidden is {},{}".format(dim, hiddens)) assert kernel_val.shape == (dim, hiddens), \ 'kernel shape error at {}'.format(layer_name) except: use_pretrained_param = False print( '2.Shape of the pretrained parameter of {} does not match, ' 'use randomly initialized parameter'.format(layer_name)) if use_pretrained_param: #tk min_seed = 0 min_seed_val = float(9999999) for i in xrange(10000): kmeans1 = kmeans_cluster(kernel_val, nCluster=9800, max_iter=1000) kmeans2 = XXhash(cWeights=kernel_val, nCluster=9800, seed=i) #print("shape is {} and {}".format(kmeans1.weight().shape, kmeans2.weight().shape)) temp = sum(map(sum, abs(kmeans1.weight() - kmeans2.weight()))) print("temp is {} for {}".format(temp, i)) if temp < min_seed_val: min_seed = i min_seed_val = temp print("SEED IS THIS!!!!!!{}", min_seed) kmeans = XXhash(cWeights=kernel_val, nCluster=9800, seed=min_seed) #kmeans = XXhash(cWeights=kernel_val, nCluster=9800, blocked=True, blocked_param=64) print("Kmeans label is", kmeans.label()) print("Kmeans cluster_centers is", kmeans.centro()) print("Kmeans weight is", kmeans.weight())
import os import csv import numpy as np import kmeans data_dir = os.path.join('', 'data') X = np.genfromtxt(os.path.join(data_dir, 'kmeans_test_data.csv'), delimiter=',') a = kmeans.update_assignments(X, C) C = kmeans.update_centers(X, C, a) (C, a) = kmeans.lloyd_iteration(X, C) obj = kmeans.kmeans_obj(X, C, a) (best_C, best_a, best_obj) = kmeans.kmeans_cluster(X, 5, "random", 2) # TODO: Run experiments outlined in HW6 PDF # For question 9 and 10 # from sklearn.decomposition import PCA mnist_X = np.genfromtxt(os.path.join(data_dir, 'mnist_data.csv'), delimiter=',')
# X[4][1] = 5.0 # X[5][0] = 4.5 # X[5][1] = 5.0 # X[6][0] = 3.5 # X[6][1] = 4.5 # x = [] # y = [] # for item in X: # x.append(item[0]) # y.append(item[1]) # plt.scatter(x,y) # plt.show() # obj = 0 # for i in range(0,1000): # (best_C, best_a, best_obj) = kmeans.kmeans_cluster(X,9,'kmeans++',1) # obj += best_obj # # print best_obj # print obj / 1000 X = np.genfromtxt(os.path.join(data_dir, 'mnist_data.csv'), delimiter=',') print X.shape[0],X.shape[1] print X[0,0] # x_reduced = PCA(n_components=5).fit_transform(X) print x_reduced.shape[0],x_reduced.shape[1] (best_C, best_a, best_obj) = kmeans.kmeans_cluster(x_reduced, 3, 'fixed', 1) print best_obj print best_C print best_a
if use_pretrained_param: try: #kernel_val = np.transpose(kernel_val, (1,0)) print("kernelval is {}".format(kernel_val.shape)) print("dim, hidden is {},{}".format(dim, hiddens)) assert kernel_val.shape == (dim, hiddens), \ 'kernel shape error at {}'.format(layer_name) except: use_pretrained_param = False print( '2.Shape of the pretrained parameter of {} does not match, ' 'use randomly initialized parameter'.format(layer_name)) if use_pretrained_param: #tk kmeans = kmeans_cluster(kernel_val, max_iter=1000) #kmeans = kmeans_hash_cluster(kernel_val) print("Kmeans label is", kmeans.label()) print("Kmeans cluster_centers is", kmeans.centro()) print("Kmeans weight is", kmeans.weight()) kernel_init = tf.constant(kmeans.weight(), dtype=tf.float32) bias_init = tf.constant(bias_val, dtype=tf.float32) elif xavier: kernel_init = tf.contrib.layers.xavier_initializer() bias_init = tf.constant_initializer(0.0) else: kernel_init = tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32) bias_init = tf.constant_initializer(0.0)