r = []
for _ in range(num_experiments):
    for dim in dimensions:
        X, z = generate_data(dim)
        G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x - y))

        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', dim, a])

        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', dim, a])

        zh = wrapper.spectral_clustering(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['spectral clustering', dim, a])

        zh = wrapper.kernel_kmeans(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['kernel k-means', dim, a])

        zh = wrapper.kernel_kgroups(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['kernel k-groups', dim, a])

df = pd.DataFrame(np.array(r), columns=['method', 'dimension', 'accuracy'])
df.to_csv(output, index=False)
示例#2
0
G = eclust.kernel_matrix(data, rho)
#G = eclust.kernel_matrix(data, rho_gauss)
#G = eclust.kernel_matrix(data, rho_exp)

k = 3

r = []
r.append(wrapper.kmeans(k, data, run_times=5))
r.append(wrapper.gmm(k, data, run_times=5))
r.append(wrapper.spectral_clustering(k, data, G, run_times=5))
r.append(wrapper.spectral(k, data, G, run_times=5))
r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='random'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='spectral'))
r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='random'))
#r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
         'kernel k-means', 'kernel k-groups']

for algo, zh in zip(algos, r):
    t.add_row([algo, 
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])

print t
                lambda x, y: np.power(np.linalg.norm(x-y), 0.5))
        G3 = eclust.kernel_matrix(X, 
                lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)/2))
        
        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', n, a])
        
        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', n, a])
        
        zh = wrapper.spectral_clustering(k, X, G3)
        a = metric.accuracy(z, zh)
        r.append([r'spectral clustering $\widetilde{\rho}_1$', n, a])
        
        zh = wrapper.kernel_kgroups(k, X, G1)
        a = metric.accuracy(z, zh)
        r.append([r'kernel k-groups $\rho_{1}$', n, a])
        
        zh = wrapper.kernel_kgroups(k, X, G2)
        a = metric.accuracy(z, zh)
        r.append([r'kernel k-groups $\rho_{1/2}$', n, a])
        
        zh = wrapper.kernel_kgroups(k, X, G3)
        a = metric.accuracy(z, zh)
        r.append([r'kernel k-groups $\widetilde{\rho}_{1}$', n, a])

df = pd.DataFrame(np.array(r), columns=['method', 'points', 'accuracy'])
df.to_csv(output, index=False)
r = []
for _ in range(num_experiments):
    for dim in dimensions:
        X, z = generate_data(dim)
        G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y))
        
        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', dim, a])
        
        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', dim, a])
        
        zh = wrapper.spectral_clustering(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['spectral clustering', dim, a])
        
        zh = wrapper.kernel_kmeans(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['kernel k-means', dim, a])
        
        
        zh = wrapper.kernel_kgroups(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['kernel k-groups', dim, a])

df = pd.DataFrame(np.array(r), columns=['method', 'dimension', 'accuracy'])
df.to_csv(output, index=False)
#G = energy.eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_exp)

r = []
r.append(wrapper.kmeans(6, data, run_times=10))
r.append(wrapper.gmm(6, data, run_times=10))
r.append(wrapper.spectral_clustering(6, data, G, run_times=10))

r.append(wrapper.spectral(6, data, G, run_times=10))

#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random'))
r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral'))

#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random'))
r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
         'kernel k-means', 'kernel k-groups']

for algo, zh in zip(algos, r):
    t.add_row([algo, 
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])

print t

Z = np.array(eclust.ztoZ(z), dtype=int)
示例#6
0
Y = np.array([[x] for x in X])
bw = 0.5  # bandwidth
num_points = 1500  # number points for linspace
low = -6
high = 6
#low = -2
#high = 20

### clustering
t = PrettyTable(['Method', 'Accuracy'])
G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x - y))
zh_kmeans = wrapper.kmeans(k, Y)
t.add_row(['k-means', metric.accuracy(z, zh_kmeans)])
zh_gmm = wrapper.gmm(k, Y)
t.add_row(['gmm', metric.accuracy(z, zh_gmm)])
zh_kgroups = wrapper.kernel_kgroups(k, Y, G)
t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)])
print t

X_plot = np.linspace(low, high, num_points)[:, np.newaxis]

### kernel density estimation
x1_true = X[np.where(z == 0)][:, np.newaxis]
x2_true = X[np.where(z == 1)][:, np.newaxis]

fig = plt.figure()
ax = fig.add_subplot(111)

kde1 = KernelDensity(kernel='gaussian', bandwidth=bw).fit(x1_true)
log_dens1 = kde1.score_samples(X_plot)
kde2 = KernelDensity(kernel='gaussian', bandwidth=bw).fit(x2_true)
    a = metric.accuracy(z, zh)
    row.append(a)

    zh = wrapper.gmm(k, X)
    a = metric.accuracy(z, zh)
    row.append(a)

    zh = wrapper.spectral_clustering(k, X, G)
    a = metric.accuracy(z, zh)
    row.append(a)

    zh = wrapper.kernel_kmeans(k, X, G, ini='random')
    a = metric.accuracy(z, zh)
    row.append(a)

    zh = wrapper.kernel_kgroups(k, X, G, ini='random')
    a = metric.accuracy(z, zh)
    row.append(a)

    r.append(row)
r = np.array(r)

t = PrettyTable(['Method', 'Accuracy', 'Std'])
for i, m in enumerate([
        'k-means', 'gmm', 'spectral clustering', 'kernel k-means',
        'kernel k-groups'
]):
    t.add_row([m, r[:, i].mean(), sem(r[:, i])])

print t
m1 = 0
s1 = 1.5
m2 = 1.5
s2 = 0.3
#X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2])
X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2])
Y = np.array([[x] for x in X])

### clustering
t = PrettyTable(['Method', 'Accuracy'])
G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y))
zh_kmeans = wrapper.kmeans(k, Y)
t.add_row(['k-means', metric.accuracy(z, zh_kmeans)])
zh_gmm = wrapper.gmm(k, Y)
t.add_row(['gmm', metric.accuracy(z, zh_gmm)])
zh_kgroups = wrapper.kernel_kgroups(k, Y, G)
t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)])
print t

### estimated classes
x1_true = X[np.where(z==0)]
x2_true = X[np.where(z==1)]

x1_kmeans = X[np.where(zh_kmeans==0)]
x2_kmeans = X[np.where(zh_kmeans==1)]

x1_gmm = X[np.where(zh_gmm==0)]
x2_gmm = X[np.where(zh_gmm==1)]

x1_kgroups = X[np.where(zh_kgroups==0)]
x2_kgroups = X[np.where(zh_kgroups==1)]
z = np.array([classes[v] for v in df[4].values])
df = df.drop(4, axis=1)
data = df.values
data = (data - data.mean(axis=0))/data.std(axis=0)

G = eclust.kernel_matrix(data, rho_gauss)

k = 3
nt = 5
r = []
r.append(wrapper.kmeans(k, data, run_times=nt))
r.append(wrapper.gmm(k, data, run_times=nt))
r.append(wrapper.spectral_clustering(k, data, G, run_times=nt))
r.append(wrapper.spectral(k, data, G, run_times=nt))
r.append(wrapper.kernel_kmeans(k, data, G, run_times=nt, ini='spectral'))
r.append(wrapper.kernel_kgroups(k,data,G,run_times=nt, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
         'kernel k-means', 'kernel k-groups']

for algo, zh in zip(algos, r):
    t.add_row([algo, 
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])

print t

            X, lambda x, y: np.power(np.linalg.norm(x - y), 0.5))
        G3 = eclust.kernel_matrix(
            X, lambda x, y: 2 - 2 * np.exp(-np.linalg.norm(x - y) / 2))

        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', n, a])

        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', n, a])

        zh = wrapper.spectral_clustering(k, X, G3)
        a = metric.accuracy(z, zh)
        r.append([r'spectral clustering $\widetilde{\rho}_1$', n, a])

        zh = wrapper.kernel_kgroups(k, X, G1)
        a = metric.accuracy(z, zh)
        r.append([r'kernel k-groups $\rho_{1}$', n, a])

        zh = wrapper.kernel_kgroups(k, X, G2)
        a = metric.accuracy(z, zh)
        r.append([r'kernel k-groups $\rho_{1/2}$', n, a])

        zh = wrapper.kernel_kgroups(k, X, G3)
        a = metric.accuracy(z, zh)
        r.append([r'kernel k-groups $\widetilde{\rho}_{1}$', n, a])

df = pd.DataFrame(np.array(r), columns=['method', 'points', 'accuracy'])
df.to_csv(output, index=False)
G = eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_exp)

r = []
r.append(wrapper.kmeans(3, data, run_times=5))
r.append(wrapper.gmm(3, data, run_times=5))
r.append(wrapper.spectral_clustering(3, data, G, run_times=5))
r.append(wrapper.spectral(3, data, G, run_times=5))
#r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='random'))
#r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='k-means++'))
r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='spectral'))
#r.append(wrapper.kernel_kgroups(3,data,G,run_times=5, ini='random'))
#r.append(wrapper.kernel_kgroups(3,data,G,run_times=5, ini='k-means++'))
r.append(wrapper.kernel_kgroups(3, data, G, run_times=5, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = [
    'kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means',
    'kernel k-groups'
]

for algo, zh in zip(algos, r):
    t.add_row([
        algo,
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])

print t
    zh = wrapper.kmeans(k, X)
    a = metric.accuracy(z, zh)
    row.append(a)
    
    zh = wrapper.gmm(k, X)
    a = metric.accuracy(z, zh)
    row.append(a)
    
    zh = wrapper.spectral_clustering(k, X, G)
    a = metric.accuracy(z, zh)
    row.append(a)
    
    zh = wrapper.kernel_kmeans(k, X, G, ini='random')
    a = metric.accuracy(z, zh)
    row.append(a)
    
    
    zh = wrapper.kernel_kgroups(k, X, G, ini='random')
    a = metric.accuracy(z, zh)
    row.append(a)
    
    r.append(row)
r = np.array(r)

t = PrettyTable(['Method', 'Accuracy', 'Std'])
for i, m in enumerate(['k-means', 'gmm', 'spectral clustering', 
            'kernel k-means', 'kernel k-groups']):
    t.add_row([m, r[:,i].mean(), sem(r[:,i])])

print t
示例#13
0
#G = energy.eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_exp)

r = []
r.append(wrapper.kmeans(6, data, run_times=10))
r.append(wrapper.gmm(6, data, run_times=10))
r.append(wrapper.spectral_clustering(6, data, G, run_times=10))

r.append(wrapper.spectral(6, data, G, run_times=10))

#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random'))
r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral'))

#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random'))
r.append(wrapper.kernel_kgroups(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = [
    'kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means',
    'kernel k-groups'
]

for algo, zh in zip(algos, r):
    t.add_row([
        algo,
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])