Пример #1
0
def demo():
    import matplotlib.pyplot as plt
    from kmeans import kmeans_cluster

    np.random.seed(1111)
    dim, k = 2, 2

    # generate data
    num = 50
    mean1 = np.zeros(dim)
    mean2 = np.ones(dim) * 2
    cov1 = np.eye(dim)
    cov2 = np.eye(dim) * 0.5

    x1 = np.random.multivariate_normal(mean1, cov1, [
        num,
    ])
    x2 = np.random.multivariate_normal(mean2, cov2, [
        num,
    ])
    x = np.concatenate([x1, x2], 0)

    plt.scatter(x1[:, 0], x1[:, 1], c='r')
    plt.scatter(x2[:, 0], x2[:, 1], c='g')

    # init GMM with kmeans
    gs = []
    centers, assignment = kmeans_cluster(x, k)
    weight = []
    for i in range(k):
        # mean
        mean = centers[i]

        # covariate
        cov = np.eye(dim) * 1e-6
        count = 0.
        for j in range(num * 2):
            if i == assignment[j]:
                cov += np.outer(mean - x[j], mean - x[j])
                count += 1
        cov /= count
        weight.append(count / len(x))

        gs.append(Gauss(dim, mean, cov))
    gmm = GMM(gs, weight)
    centers = np.stack([gmm[i].mean for i in range(gmm.k)])
    plt.scatter(centers[:, 0], centers[:, 1], c='b', s=50, marker='v')

    train_gmm(gmm, x, threshold=1e-4)
    centers = np.stack([gmm[i].mean for i in range(gmm.k)])
    plt.scatter(centers[:, 0], centers[:, 1], c='y', s=500, marker='^')
Пример #2
0
test[1][0] = 1.5
test[1][1] = 2.0
test[2][0] = 3.0
test[2][1] = 4.0
test[3][0] = 5.0
test[3][1] = 7.0
test[4][0] = 3.5
test[4][1] = 5.0
test[5][0] = 4.5
test[5][1] = 5.0
test[6][0] = 3.5
test[6][1] = 4.5
#kmeans.lloyd_iteration(test, cluster)
total = 0.0
for i in range(1000):
    (C, a, obj) = kmeans.kmeans_cluster(X, 9, 'kmeans++', 1)
    total = total + obj
print "ans:"
print total / 1000
result = np.zeros(30)
result[:] = sys.float_info.max
#for i in range(10):
#  (C, a, obj) = kmeans.kmeans_cluster(X, i + 1, 'fixed', 10)
# TODO: Test update_assignments function, defined in kmeans.py

# TODO: Test update_centers function, defined in kmeans.py

# TODO: Test lloyd_iteration function, defined in kmeans.py
#result[i + 1] =  obj

#idx = np.argmin(result)
Пример #3
0
    mean = np.mean(train_set, axis=0)
    s = np.var(train_set, axis=0)**0.5
    # Normalize with mean # and standard deviation
    for i in range(train_set.shape[0]):
        train_set[i] = (train_set[i] - mean) / s
    print(train_set)
    print(np.mean(train_set, axis=0), np.var(train_set, axis=0), sep='\n')

    train_set_PCA = PCA(train_set, threshold)

    if len(train_set_PCA) == 0:
        print("Threshold is too small. Please input a bigger threshold.\n")
        sys.exit(1)
    # plot_3D(train_set_PCA, label_set)

    sil_coef, cluster_label = kmeans.kmeans_cluster(k, train_set_PCA)
    # print(sil_coef)
    sil_coef_mean = np.mean(sil_coef)

    # Calculate Rand Index
    a, d = 0, 0
    n = len(label_set)
    for i in range(n):
        for j in range(i + 1, n):
            if (label_set[i] == label_set[j]) and (cluster_label[i]
                                                   == cluster_label[j]):
                a += 1
            elif (label_set[i] != label_set[j]) and (cluster_label[i] !=
                                                     cluster_label[j]):
                d += 1
    pair_cnt = n * (n - 1) // 2
Пример #4
0
                print("kernelval is {}".format(kernel_val.shape))
                print("dim, hidden is {},{}".format(dim, hiddens))
                assert kernel_val.shape == (dim, hiddens), \
                 'kernel shape error at {}'.format(layer_name)
            except:
                use_pretrained_param = False
                print(
                    '2.Shape of the pretrained parameter of {} does not match, '
                    'use randomly initialized parameter'.format(layer_name))

    if use_pretrained_param:
        #tk
        min_seed = 0
        min_seed_val = float(9999999)
        for i in xrange(10000):
            kmeans1 = kmeans_cluster(kernel_val, nCluster=9800, max_iter=1000)
            kmeans2 = XXhash(cWeights=kernel_val, nCluster=9800, seed=i)
            #print("shape is {} and {}".format(kmeans1.weight().shape, kmeans2.weight().shape))
            temp = sum(map(sum, abs(kmeans1.weight() - kmeans2.weight())))
            print("temp is {} for {}".format(temp, i))
            if temp < min_seed_val:
                min_seed = i
                min_seed_val = temp

        print("SEED IS THIS!!!!!!{}", min_seed)
        kmeans = XXhash(cWeights=kernel_val, nCluster=9800, seed=min_seed)

        #kmeans = XXhash(cWeights=kernel_val, nCluster=9800, blocked=True, blocked_param=64)
        print("Kmeans label is", kmeans.label())
        print("Kmeans cluster_centers is", kmeans.centro())
        print("Kmeans weight is", kmeans.weight())
Пример #5
0
import os
import csv
import numpy as np
import kmeans

data_dir = os.path.join('', 'data')

X = np.genfromtxt(os.path.join(data_dir, 'kmeans_test_data.csv'),
                  delimiter=',')

a = kmeans.update_assignments(X, C)
C = kmeans.update_centers(X, C, a)
(C, a) = kmeans.lloyd_iteration(X, C)
obj = kmeans.kmeans_obj(X, C, a)

(best_C, best_a, best_obj) = kmeans.kmeans_cluster(X, 5, "random", 2)

# TODO: Run experiments outlined in HW6 PDF

# For question 9 and 10
# from sklearn.decomposition import PCA
mnist_X = np.genfromtxt(os.path.join(data_dir, 'mnist_data.csv'),
                        delimiter=',')
Пример #6
0
# X[4][1] = 5.0
# X[5][0] = 4.5
# X[5][1] = 5.0
# X[6][0] = 3.5
# X[6][1] = 4.5
# x = []
# y = []
# for item in X:
#     x.append(item[0])
#     y.append(item[1])
# plt.scatter(x,y)
# plt.show()
# obj = 0
# for i in range(0,1000):
#     (best_C, best_a, best_obj) = kmeans.kmeans_cluster(X,9,'kmeans++',1)
#     obj += best_obj
#     # print best_obj
# print obj / 1000

X = np.genfromtxt(os.path.join(data_dir, 'mnist_data.csv'), delimiter=',')
print X.shape[0],X.shape[1]
print X[0,0]
#
x_reduced = PCA(n_components=5).fit_transform(X)
print x_reduced.shape[0],x_reduced.shape[1]


(best_C, best_a, best_obj) = kmeans.kmeans_cluster(x_reduced, 3, 'fixed', 1)
print best_obj
print best_C
print best_a
Пример #7
0
        if use_pretrained_param:
            try:
                #kernel_val = np.transpose(kernel_val, (1,0))
                print("kernelval is {}".format(kernel_val.shape))
                print("dim, hidden is {},{}".format(dim, hiddens))
                assert kernel_val.shape == (dim, hiddens), \
                 'kernel shape error at {}'.format(layer_name)
            except:
                use_pretrained_param = False
                print(
                    '2.Shape of the pretrained parameter of {} does not match, '
                    'use randomly initialized parameter'.format(layer_name))

    if use_pretrained_param:
        #tk
        kmeans = kmeans_cluster(kernel_val, max_iter=1000)
        #kmeans = kmeans_hash_cluster(kernel_val)
        print("Kmeans label is", kmeans.label())
        print("Kmeans cluster_centers is", kmeans.centro())
        print("Kmeans weight is", kmeans.weight())

        kernel_init = tf.constant(kmeans.weight(), dtype=tf.float32)
        bias_init = tf.constant(bias_val, dtype=tf.float32)
    elif xavier:
        kernel_init = tf.contrib.layers.xavier_initializer()
        bias_init = tf.constant_initializer(0.0)
    else:
        kernel_init = tf.truncated_normal_initializer(stddev=stddev,
                                                      dtype=tf.float32)
        bias_init = tf.constant_initializer(0.0)