示例#1
0
def B1(pca=False):
    '''
		Plot WC_SSD and SC over K.
	'''
    K = [2, 4, 6, 8, 16, 32]
    fnames = [
        'digits-embedding.csv', 'digits-embedding-2467.csv',
        'digits-embedding-67.csv'
    ]
    wc_ssd_val = zeros((len(fnames), len(K)))
    sc_val = zeros((len(fnames), len(K)))
    for i, fname in enumerate(fnames):
        X = genfromtxt(fname, delimiter=',')[:, 2:]
        for j, k in enumerate(K):
            kmeans = KMeans(n_clusters=k)
            kmeans.fit(X)
            wc_ssd_val[i, j], sc_val[i, j], _ = kmeans.get_evals()
    # Plot WC_SSD
    figure()
    for i, fname in enumerate(fnames):
        plot(K, wc_ssd_val[i], label=fname)
    legend()
    title('WC_SSD v.s. K')
    figure()
    for i, fname in enumerate(fnames):
        plot(K, sc_val[i], label=fname)
    legend()
    title('SC v.s. K')
    show()
示例#2
0
def B4(pca=False):
    '''
		Evaluate using NMI and visualize in 2D.
	'''
    fnames = [
        'digits-embedding.csv', 'digits-embedding-2467.csv',
        'digits-embedding-67.csv'
    ]
    nmi = zeros(len(fnames))
    for i, k, fname in zip([0, 1, 2], [8, 4, 2], fnames):
        raw = genfromtxt(fname, delimiter=',')
        X = raw[:, 2:]
        y = get_normalized_labels(raw[:, 1])
        kmeans = KMeans(n_clusters=k)
        ind = kmeans.fit(X, y)
        _, _, nmi[i] = kmeans.get_evals()
        figure()
        perm = permutation(X.shape[0])[:1000]
        X = X[perm]
        ind = ind[perm]
        colors = rand(k, 3)[ind, :]
        scatter(X[:, 0], X[:, 1], c=colors, alpha=0.9, s=30)
    print(fnames)
    print("NMI =", nmi)
    show()
示例#3
0
def Bonus4():
    '''
		Repeat B1, B2, B4 with PCA embedding.
	'''
    K = [2, 4, 6, 8, 16, 32]
    fnames = [
        'digits-pca-embedding.csv', 'digits-pca-embedding-2467.csv',
        'digits-pca-embedding-67.csv'
    ]
    wc_ssd_val = zeros((len(fnames), len(K), 10))
    sc_val = zeros((len(fnames), len(K), 10))
    nmi_val = zeros((len(fnames), len(K), 10))
    for i, fname in enumerate(fnames):
        raw = genfromtxt(fname, delimiter=',')
        X = raw[:, 2:]
        y = get_normalized_labels(raw[:, 1])
        for j, k in enumerate(K):
            for m in range(10):
                kmeans = KMeans(n_clusters=k)
                ind = kmeans.fit(X, y)
                wc_ssd_val[i, j,
                           m], sc_val[i, j,
                                      m], nmi_val[i, j,
                                                  m] = kmeans.get_evals()
        figure()
        perm = permutation(X.shape[0])[:1000]
        X = X[perm]
        ind = ind[perm]
        colors = rand(k, 3)[ind, :]
        scatter(X[:, 0], X[:, 1], c=colors, alpha=0.9, s=30)
    save('Bonus_wc_ssd_val.npy', wc_ssd_val)
    save('Bonus_sc_val.npy', sc_val)
    save('Bonus_nmi_val.npy', nmi_val)
    wc_ssd_val = load('Bonus_wc_ssd_val.npy')
    sc_val = load('Bonus_sc_val.npy')
    # nmi_val = load('Bonus_nmi_val.npy')
    ssd_means = mean(wc_ssd_val, axis=2)
    sc_means = mean(sc_val, axis=2)
    ssd_std = std(wc_ssd_val, axis=2)
    sc_std = std(sc_val, axis=2)
    # Plot WC_SSD
    figure()
    for i, fname in enumerate(fnames):
        errorbar(K, ssd_means[i], ssd_std[i], capsize=4, label=fname)
    legend()
    title('WC_SSD v.s. K')
    figure()
    for i, fname in enumerate(fnames):
        errorbar(K, sc_means[i], sc_std[i], capsize=4, label=fname)
    legend()
    title('SC v.s. K')
    print(fnames)
    print("NMI =", mean(nmi_val, axis=2))
    show()
示例#4
0
def B3():
    '''
		Repeat 10 times for each K.
	'''
    K = [2, 4, 6, 8, 16, 32]
    fnames = [
        'digits-embedding.csv', 'digits-embedding-2467.csv',
        'digits-embedding-67.csv'
    ]
    wc_ssd_val = zeros((len(fnames), len(K), 10))
    sc_val = zeros((len(fnames), len(K), 10))
    for i, fname in enumerate(fnames):
        X = genfromtxt(fname, delimiter=',')[:, 2:]
        for j, k in enumerate(K):
            for m in range(10):
                kmeans = KMeans(n_clusters=k)
                kmeans.fit(X)
                wc_ssd_val[i, j, m], sc_val[i, j, m], _ = kmeans.get_evals()
    save('B3_wc_ssd_val.npy', wc_ssd_val), save('B3_sc_val.npy', sc_val)
    wc_ssd_val = load('B3_wc_ssd_val.npy')
    sc_val = load('B3_sc_val.npy')
    ssd_means = mean(wc_ssd_val, axis=2)
    sc_means = mean(sc_val, axis=2)
    ssd_std = std(wc_ssd_val, axis=2)
    sc_std = std(sc_val, axis=2)
    # Plot WC_SSD
    figure()
    for i, fname in enumerate(fnames):
        errorbar(K, ssd_means[i], ssd_std[i], capsize=4, label=fname)
    legend()
    title('WC_SSD v.s. K')
    figure()
    for i, fname in enumerate(fnames):
        errorbar(K, sc_means[i], sc_std[i], capsize=4, label=fname)
    legend()
    title('SC v.s. K')
    show()