def initialize(method, k, G, X, W):
    if method == "spectral":
        z0 = init.topeigen(k, G, W)
    elif method == "k-means++":
        z0 = init.kmeans_plus(k, X)
    else:
        z0 = np.random.randint(0, k, len(X))
    Z0 = eclust.ztoZ(z0)
    return Z0
def initialize(method, k, G, X, W):
    if method == "spectral":
        z0 = init.topeigen(k, G, W)
    elif method == "k-means++":
        z0 = init.kmeans_plus(k, X)
    else:
        z0 = np.random.randint(0, k, len(X))
    Z0 = eclust.ztoZ(z0)
    return Z0
def spectral(k, X, G, W=None, run_times=5):
    if type(W) == type(None):
        W = np.eye(len(X))
    best_score = -np.inf
    for _ in range(run_times):
        zh = init.topeigen(k, G, W, run_times=run_times)
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G, W)
        if score > best_score:
            best_score = score
            best_z = zh
    return best_z
def spectral(k, X, G, W=None, run_times=5):
    if type(W) == type(None):
        W = np.eye(len(X))
    best_score = -np.inf
    for _ in range(run_times):
        zh = init.topeigen(k, G, W, run_times=run_times)
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G, W)
        if score > best_score:
            best_score = score
            best_z = zh
    return best_z
def kernel_kgroups(k, X, G, W=None, run_times=5, ini="k-means++"):
    if type(W) == type(None):
        W = np.eye(len(X))
    best_score = -np.inf
    for _ in range(run_times):
        Z0 = initialize(ini, k, G, X, W)
        zh = eclust.kernel_kgroups(k, G, Z0, W, max_iter=300)
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G, W)
        if score > best_score:
            best_score = score
            best_z = zh
    return best_z
def kernel_kgroups(k, X, G, W=None, run_times=5, ini="k-means++"):
    if type(W) == type(None):
        W = np.eye(len(X))
    best_score = -np.inf
    for _ in range(run_times):
        Z0 = initialize(ini, k, G, X, W)
        zh = eclust.kernel_kgroups(k, G, Z0, W, max_iter=300)
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G, W)
        if score > best_score:
            best_score = score
            best_z = zh
    return best_z
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random'))
r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral'))

#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random'))
r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
         'kernel k-means', 'kernel k-groups']

for algo, zh in zip(algos, r):
    t.add_row([algo, 
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])

print t

Z = np.array(eclust.ztoZ(z), dtype=int)
Zh = np.array(eclust.ztoZ(zh), dtype=int)

df = pd.DataFrame(Z)
df.to_csv('data/dermatology_true_label_matrix.csv', index=False, header=None)

df = pd.DataFrame(Zh)
df.to_csv('data/dermatology_pred_label_matrix.csv', index=False, header=None)

示例#8
0
    
    num_experiments = 10
    table = np.zeros((num_experiments, 5))
    for i in range(num_experiments):
        X, z = data.univariate_lognormal([0, -1.5], [0.3, 1.5], [100, 100])
        #X, z = data.univariate_normal([0, 5], [1, 22], [15, 15])
        Y = np.array([[x] for x in X])
        k = 2

        # 1D energy clustering
        zh, cost = two_clusters1D(X)
        table[i,0] = accuracy(z, zh)
       
        # initialization
        z0 = initialization.kmeanspp(k, Y, ret='labels')
        Z0 = eclust.ztoZ(z0)
        rho = lambda x, y: np.linalg.norm(x-y)
        G = eclust.kernel_matrix(Y, rho)
        z1 = initialization.spectral(k, G)
        Z1 = eclust.ztoZ(z1)
        
        # Hartigan's method
        zh = eclust.energy_hartigan(k, G, Z0)
        table[i,1] = accuracy(z, zh)
        
        zh = eclust.energy_hartigan(k, G, Z1)
        table[i,2] = accuracy(z, zh)
    
        # standard k-means
        km = KMeans(2)
        zh = km.fit_predict(Y)
示例#9
0
r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral'))

#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random'))
r.append(wrapper.kernel_kgroups(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = [
    'kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means',
    'kernel k-groups'
]

for algo, zh in zip(algos, r):
    t.add_row([
        algo,
        metric.accuracy(z, zh),
        sklearn.metrics.adjusted_rand_score(z, zh)
    ])

print t

Z = np.array(eclust.ztoZ(z), dtype=int)
Zh = np.array(eclust.ztoZ(zh), dtype=int)

df = pd.DataFrame(Z)
df.to_csv('data/dermatology_true_label_matrix.csv', index=False, header=None)

df = pd.DataFrame(Zh)
df.to_csv('data/dermatology_pred_label_matrix.csv', index=False, header=None)