示例#1
0
z = z - 1

idx = range(len(data))
np.random.shuffle(idx)
data = data[idx]
z = z[idx]
data = (data - data.mean(axis=0))/data.std(axis=0)

sigma2 = sum([np.linalg.norm(x-y)**2 
                for x in data for y in data])/(len(data)**2)
sigma = np.sqrt(sigma2)

rho_exp = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)/(2*sigma))
rho_gauss = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)**2/(2*(sigma)**2))

G = eclust.kernel_matrix(data, rho)
#G = eclust.kernel_matrix(data, rho_gauss)
#G = eclust.kernel_matrix(data, rho_exp)

k = 3

r = []
r.append(wrapper.kmeans(k, data, run_times=5))
r.append(wrapper.gmm(k, data, run_times=5))
r.append(wrapper.spectral_clustering(k, data, G, run_times=5))
r.append(wrapper.spectral(k, data, G, run_times=5))
r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='random'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='spectral'))
r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='random'))
#r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='k-means++'))
if __name__ == "__main__":
    
    import data
    import metric
    from prettytable import PrettyTable
    import sys

    n = 400
    d = 10
    n1, n2 = np.random.multinomial(n, [1/2, 1/2])
    m1 = np.zeros(d)
    m2 = 0.7*np.ones(d)
    s1 = s2 = np.eye(d)
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])

    G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y))
    W = np.eye(n)
    k = 2

    t = PrettyTable(["Method", "Accuracy"])
    
    zh = kernel_kmeans(k, X, G, W, run_times=5, ini="k-means++")
    a = metric.accuracy(z, zh)
    t.add_row(["Kernel k-means", a])
    
    zh = kernel_kgroups(k, X, G, W, run_times=5, ini="k-means++")
    a = metric.accuracy(z, zh)
    t.add_row(["Kernel k-groups", a])
    
    zh = spectral(k, X, G, W, run_times=5)
    a = metric.accuracy(z, zh)
def generate_data(D):
    d = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((0.7 * np.ones(d), np.zeros(D - d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z


r = []
for _ in range(num_experiments):
    for dim in dimensions:
        X, z = generate_data(dim)
        G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x - y))

        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', dim, a])

        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', dim, a])

        zh = wrapper.spectral_clustering(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['spectral clustering', dim, a])

        zh = wrapper.kernel_kmeans(k, X, G)
        a = metric.accuracy(z, zh)
s1 = np.array([[1, 0], [0, 20]])
s2 = np.array([[1, 0], [0, 20]])

r1 = 1
r2 = 3
eps = 0.2

r = []
for _ in range(num_experiments):
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    #X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    X, z = data.circles([r1, r2], [eps, eps], [n1, n2])

    #G = eclust.kernel_matrix(X,
    #        lambda x, y: 2 - 2*np.exp(-np.linalg.norm(x-y)/2/2))
    G = eclust.kernel_matrix(
        X, lambda x, y: 2 - 2 * np.exp(-np.linalg.norm(x - y)**2 / 2 / 1))

    row = []
    zh = wrapper.kmeans(k, X)
    a = metric.accuracy(z, zh)
    row.append(a)

    zh = wrapper.gmm(k, X)
    a = metric.accuracy(z, zh)
    row.append(a)

    zh = wrapper.spectral_clustering(k, X, G)
    a = metric.accuracy(z, zh)
    row.append(a)

    zh = wrapper.kernel_kmeans(k, X, G, ini='random')
    m1 = np.zeros(D)
    s1 = 0.5*np.eye(D)
    m2 = 0.5*np.concatenate((np.ones(d), np.zeros(D-d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(n, [0.5, 0.5])
    if distr_type == 'normal':
        X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    elif distr_type == 'lognormal':
        X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n1, n2])
    return X, z

r = []
for _ in range(num_experiments):
    for n in num_points:
        X, z = generate_data(n)
        G1 = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y))
        G2 = eclust.kernel_matrix(X, 
                lambda x, y: np.power(np.linalg.norm(x-y), 0.5))
        G3 = eclust.kernel_matrix(X, 
                lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)/2))
        
        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', n, a])
        
        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', n, a])
        
        zh = wrapper.spectral_clustering(k, X, G3)
        a = metric.accuracy(z, zh)
rho_gauss = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)**2/(2*(1)**2))

#df = pd.read_csv('data/wdbc.data', sep=',', header=None)
#df = pd.read_csv('data/iris.data', sep=',', header=None)
#classes = {
#    'Iris-setosa': 0,
#    'Iris-versicolor': 1,
#    'Iris-virginica': 2
#}

z = np.array([classes[v] for v in df[4].values])
df = df.drop(4, axis=1)
data = df.values
data = (data - data.mean(axis=0))/data.std(axis=0)

G = eclust.kernel_matrix(data, rho_gauss)

k = 3
nt = 5
r = []
r.append(wrapper.kmeans(k, data, run_times=nt))
r.append(wrapper.gmm(k, data, run_times=nt))
r.append(wrapper.spectral_clustering(k, data, G, run_times=nt))
r.append(wrapper.spectral(k, data, G, run_times=nt))
r.append(wrapper.kernel_kmeans(k, data, G, run_times=nt, ini='spectral'))
r.append(wrapper.kernel_kgroups(k,data,G,run_times=nt, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
         'kernel k-means', 'kernel k-groups']
    s1 = 0.5 * np.eye(D)
    m2 = 0.5 * np.concatenate((np.ones(d), np.zeros(D - d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(n, [0.5, 0.5])
    if distr_type == 'normal':
        X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    elif distr_type == 'lognormal':
        X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n1, n2])
    return X, z


r = []
for _ in range(num_experiments):
    for n in num_points:
        X, z = generate_data(n)
        G1 = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x - y))
        G2 = eclust.kernel_matrix(
            X, lambda x, y: np.power(np.linalg.norm(x - y), 0.5))
        G3 = eclust.kernel_matrix(
            X, lambda x, y: 2 - 2 * np.exp(-np.linalg.norm(x - y) / 2))

        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', n, a])

        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', n, a])

        zh = wrapper.spectral_clustering(k, X, G3)
        a = metric.accuracy(z, zh)
示例#8
0
    table = np.zeros((num_experiments, 5))
    for i in range(num_experiments):
        X, z = data.univariate_lognormal([0, -1.5], [0.3, 1.5], [100, 100])
        #X, z = data.univariate_normal([0, 5], [1, 22], [15, 15])
        Y = np.array([[x] for x in X])
        k = 2

        # 1D energy clustering
        zh, cost = two_clusters1D(X)
        table[i,0] = accuracy(z, zh)
       
        # initialization
        z0 = initialization.kmeanspp(k, Y, ret='labels')
        Z0 = eclust.ztoZ(z0)
        rho = lambda x, y: np.linalg.norm(x-y)
        G = eclust.kernel_matrix(Y, rho)
        z1 = initialization.spectral(k, G)
        Z1 = eclust.ztoZ(z1)
        
        # Hartigan's method
        zh = eclust.energy_hartigan(k, G, Z0)
        table[i,1] = accuracy(z, zh)
        
        zh = eclust.energy_hartigan(k, G, Z1)
        table[i,2] = accuracy(z, zh)
    
        # standard k-means
        km = KMeans(2)
        zh = km.fit_predict(Y)
        table[i, 3] = accuracy(z, zh)
idx = np.random.choice(range(len(data)), 2000)
data = data[idx]
z = z[idx]
data = (data - data.mean(axis=0)) / data.std(axis=0)

sigma2 = sum([np.linalg.norm(x - y)**2 for x in data
              for y in data]) / (len(data)**2)
sigma = np.sqrt(sigma2)

rho_exp = lambda x, y: 2 - 2 * np.exp(-np.linalg.norm(x - y) / (2 * sigma))
rho_gauss = lambda x, y: 2 - 2 * np.exp(-np.linalg.norm(x - y)**2 /
                                        (2 * (sigma)**2))

# normalize data

G = eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_exp)

r = []
r.append(wrapper.kmeans(3, data, run_times=5))
r.append(wrapper.gmm(3, data, run_times=5))
r.append(wrapper.spectral_clustering(3, data, G, run_times=5))
r.append(wrapper.spectral(3, data, G, run_times=5))
#r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='random'))
#r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='k-means++'))
r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='spectral'))
#r.append(wrapper.kernel_kgroups(3,data,G,run_times=5, ini='random'))
#r.append(wrapper.kernel_kgroups(3,data,G,run_times=5, ini='k-means++'))
r.append(wrapper.kernel_kgroups(3, data, G, run_times=5, ini='spectral'))
s1 = np.array([[1,0], [0,20]])
s2 = np.array([[1,0], [0,20]])

r1 = 1
r2 = 3
eps = 0.2

r = []
for _ in range(num_experiments):
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    #X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    X, z = data.circles([r1, r2], [eps, eps], [n1, n2])
    
    #G = eclust.kernel_matrix(X, 
    #        lambda x, y: 2 - 2*np.exp(-np.linalg.norm(x-y)/2/2))
    G = eclust.kernel_matrix(X, 
            lambda x, y: 2 - 2*np.exp(-np.linalg.norm(x-y)**2/2/1))
    
    row = []
    zh = wrapper.kmeans(k, X)
    a = metric.accuracy(z, zh)
    row.append(a)
    
    zh = wrapper.gmm(k, X)
    a = metric.accuracy(z, zh)
    row.append(a)
    
    zh = wrapper.spectral_clustering(k, X, G)
    a = metric.accuracy(z, zh)
    row.append(a)
    
    zh = wrapper.kernel_kmeans(k, X, G, ini='random')