示例#1
0
def energy1D(X, z):
    """Energy clustering in 1 dimension. No need to run multiple times since
    this is exact.
    
    """
    zh, cost = energy1d.two_clusters1D(X)
    return metric.accuracy(z, zh)
def energy1D(X, z):
    """Energy clustering in 1 dimension. No need to run multiple times since
    this is exact.
    
    """
    zh, cost = energy1d.two_clusters1D(X)
    return metric.accuracy(z, zh)
示例#3
0
def kmeans(k, X, z, run_times=10, init='k-means++'):
    """run_times is the number of times the algorithm is gonna run.
    init = {'k-means++', 'random'}
    """
    km = KMeans(k, n_init=run_times, init=init)
    km.fit(X)
    zh = km.labels_
    a = metric.accuracy(z, zh)
    v = metric.variation_information(z, zh)
    return a, v
def kmeans(k, X, z, run_times=10, init='k-means++'):
    """run_times is the number of times the algorithm is gonna run.
    init = {'k-means++', 'random'}
    """
    km = KMeans(k, n_init=run_times, init=init)
    km.fit(X)
    zh = km.labels_
    a = metric.accuracy(z, zh)
    v = metric.variation_information(z, zh)
    return a, v
示例#5
0
def spectral(k, X, G, z, run_times=10):
    """Spectral clustering from sklearn library. 
    run_times is the number of times the algorithm is gonna run with different
    initializations.
    
    """
    sc = SpectralClustering(k, affinity='precomputed', n_init=run_times)
    zh = sc.fit_predict(G)
    a = metric.accuracy(z, zh)
    v = metric.variation_information(z, zh)
    return a, v
def spectral(k, X, G, z, run_times=10):
    """Spectral clustering from sklearn library. 
    run_times is the number of times the algorithm is gonna run with different
    initializations.
    
    """
    sc = SpectralClustering(k, affinity='precomputed', n_init=run_times)
    zh = sc.fit_predict(G)
    a = metric.accuracy(z, zh)
    v = metric.variation_information(z, zh)
    return a, v
示例#7
0
def gmm(k, X, z, run_times=10, init='kmeans'):
    """GMM from sklearn library. init = {'kmeans', 'random'}, run_times
    is the number of times the algorithm is gonna run with different
    initializations.
    
    """
    gm = GMM(k, n_init=run_times, init_params=init)
    gm.fit(X)
    zh = gm.predict(X)
    a = metric.accuracy(z, zh)
    v = metric.variation_information(z, zh)
    return a, v
def gmm(k, X, z, run_times=10, init='kmeans'):
    """GMM from sklearn library. init = {'kmeans', 'random'}, run_times
    is the number of times the algorithm is gonna run with different
    initializations.
    
    """
    gm = GMM(k, n_init=run_times, init_params=init)
    gm.fit(X)
    zh = gm.predict(X)
    a = metric.accuracy(z, zh)
    v = metric.variation_information(z, zh)
    return a, v
示例#9
0
def energy_hartigan(k, X, G, z, run_times=10, init="spectral"):
    """Run few times and pick the best objective function value."""
    best_score = -np.inf
    for rt in range(run_times):
        Z0 = initialize(init, k, G, X)
        zh = eclust.energy_hartigan(k, G, Z0, max_iter=300)
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G)

        if score > best_score:
            best_score = score
            best_z = zh

    a = metric.accuracy(z, best_z)
    v = metric.variation_information(z, best_z)
    return a, v
def energy_hartigan(k, X, G, z, run_times=10, init="spectral"):
    """Run few times and pick the best objective function value."""
    best_score = -np.inf
    for rt in range(run_times):
        Z0 = initialize(init, k, G, X)
        zh = eclust.energy_hartigan(k, G, Z0, max_iter=300)
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G)
        
        if score > best_score:
            best_score = score
            best_z = zh

    a = metric.accuracy(z, best_z)
    v = metric.variation_information(z, best_z)
    return a, v
示例#11
0
def energy_spectral(k, X, G, z, run_times=10, init="random"):
    """Run few times and pick the best objective function value.
    Choose the initializatio for k-means, which can be k-means++ or random.
    
    """
    best_score = -np.inf
    for rt in range(run_times):
        zh = initialization.topeigen(k, G, run_times=run_times, init="random")
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G)

        if score > best_score:
            best_score = score
            best_z = zh

    a = metric.accuracy(z, best_z)
    v = metric.variation_information(z, best_z)
    return a, v
def energy_spectral(k, X, G, z, run_times=10, init="random"):
    """Run few times and pick the best objective function value.
    Choose the initializatio for k-means, which can be k-means++ or random.
    
    """
    best_score = -np.inf
    for rt in range(run_times):
        zh = initialization.topeigen(k, G, run_times=run_times, init="random")
        Zh = eclust.ztoZ(zh)
        score = eclust.objective(Zh, G)
        
        if score > best_score:
            best_score = score
            best_z = zh

    a = metric.accuracy(z, best_z)
    v = metric.variation_information(z, best_z)
    return a, v
示例#13
0
    n0 = 500
    n1 = 500
    data_class0 = data[np.where(labels == 0)]
    data_class1 = data[np.where(labels == 1)]
    idx0 = np.random.choice(range(len(data_class0)), n0, replace=True)
    idx1 = np.random.choice(range(len(data_class1)), n1, replace=True)
    data, labels = shuffle_data([data_class0[idx0], data_class1[idx1]])

    #data = (data - data.mean(axis=0))/data.std(axis=0)

    rho = lambda x, y: np.power(np.linalg.norm(x - y), 1)
    #rho = lambda x, y: 2-2*np.exp(-np.power(np.linalg.norm(x-y),1)/(2*1**2))
    G = eclust.kernel_matrix(data, rho)

    labels_hat = run_clustering.kmeans(2, data)
    print accuracy(labels, labels_hat)
    print type_errors(labels, labels_hat)
    print

    labels_hat = run_clustering.gmm(2, data)
    print accuracy(labels, labels_hat)
    print type_errors(labels, labels_hat)
    print

    labels_hat = run_clustering.energy_hartigan(2,
                                                data,
                                                G,
                                                run_times=5,
                                                init="gmm")
    print accuracy(labels, labels_hat)
    print type_errors(labels, labels_hat)
    n0 = 500
    n1 = 500
    data_class0 = data[np.where(labels==0)]
    data_class1 = data[np.where(labels==1)]
    idx0 = np.random.choice(range(len(data_class0)), n0, replace=True)
    idx1 = np.random.choice(range(len(data_class1)), n1, replace=True)
    data, labels = shuffle_data([data_class0[idx0], data_class1[idx1]])

    #data = (data - data.mean(axis=0))/data.std(axis=0)

    rho = lambda x, y: np.power(np.linalg.norm(x-y), 1)
    #rho = lambda x, y: 2-2*np.exp(-np.power(np.linalg.norm(x-y),1)/(2*1**2))
    G = eclust.kernel_matrix(data, rho)
    
    labels_hat = run_clustering.kmeans(2, data)
    print accuracy(labels, labels_hat)
    print type_errors(labels, labels_hat)
    print
    
    labels_hat = run_clustering.gmm(2, data)
    print accuracy(labels, labels_hat)
    print type_errors(labels, labels_hat)
    print
    
    labels_hat = run_clustering.energy_hartigan(2, data, G, run_times=5,
                                                    init="gmm")
    print accuracy(labels, labels_hat)
    print type_errors(labels, labels_hat)
    print