예제 #1
0
# Clustering Evaluation Imports
from functools import partial

from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs as sk_make_blobs

from yellowbrick.cluster import InterclusterDistance

# Helpers for easy dataset creation
N_SAMPLES = 1000
N_FEATURES = 12
SHUFFLE = True

# Make blobs partial
make_blobs = partial(sk_make_blobs,
                     n_samples=N_SAMPLES,
                     n_features=N_FEATURES,
                     shuffle=SHUFFLE)

if __name__ == '__main__':
    # Make 8 blobs dataset
    X, y = make_blobs(centers=12)

    # Instantiate the clustering model and visualizer
    # Instantiate the clustering model and visualizer
    visualizer = InterclusterDistance(KMeans(9))

    visualizer.fit(X)  # Fit the training data to the visualizer
    visualizer.poof(outpath="images/icdm.png")  # Draw/show/poof the data
예제 #2
0
# Clustering Evaluation Imports
from functools import partial

from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs as sk_make_blobs

from yellowbrick.cluster import InterclusterDistance

# Helpers for easy dataset creation
N_SAMPLES = 1000
N_FEATURES = 12
SHUFFLE = True

# Make blobs partial
make_blobs = partial(sk_make_blobs, n_samples=N_SAMPLES, n_features=N_FEATURES, shuffle=SHUFFLE)


if __name__ == '__main__':
    # Make 8 blobs dataset
    X, y = make_blobs(centers=12)

    # Instantiate the clustering model and visualizer
    # Instantiate the clustering model and visualizer
    visualizer = InterclusterDistance(KMeans(9))

    visualizer.fit(X) # Fit the training data to the visualizer
    visualizer.poof(outpath="images/icdm.png") # Draw/show/poof the data
예제 #3
0
def cluster_metrics(i_patches, a_patches, g_patches, city_names, K, save_path,
                    g_indices):
    # intra-cluster distances: ssd of samples to the nearest cluster centre
    sum_of_squared_distances = []
    silhouette_scores = []
    calinski_harabasz_scores = []
    davies_bouldin_scores = []
    k_mean_list = []
    for k in K:
        model, k_means, A = get_kmeans144_result(a_patches, k)
        k_mean_list.append(k_means)
        sum_of_squared_distances.append(k_means.inertia_)

        labels = k_means.labels_
        score = metrics.silhouette_score(A, labels, metric='euclidean')
        silhouette_scores.append(score)

        score = metrics.calinski_harabasz_score(A, labels)
        calinski_harabasz_scores.append(score)

        score = metrics.davies_bouldin_score(A, labels)
        davies_bouldin_scores.append(score)

        mydict = dict_cluster(i_patches, a_patches, g_patches, city_names,
                              k_means)
        save_path_k = '{}_{}'.format(save_path, k)
        gt_ratio = gt_metric(mydict, save_path_k)

    plot_figure(K, sum_of_squared_distances, save_path,
                'sum_of_squared_distances')
    plot_figure(K, silhouette_scores, save_path, 'silhouette_scores')
    plot_figure(K, calinski_harabasz_scores, save_path,
                'calinski_harabasz_scores')
    plot_figure(K, davies_bouldin_scores, save_path, 'davies_bouldin_score')

    ssd_best_index = sum_of_squared_distances.index(
        max(sum_of_squared_distances))
    sil_best_index = silhouette_scores.index(max(silhouette_scores))
    ch_best_index = calinski_harabasz_scores.index(
        max(calinski_harabasz_scores))
    db_best_index = davies_bouldin_scores.index(max(davies_bouldin_scores))
    #gtr_best_index = gt_ratio.index(max(gt_ratio))

    all_indices = [
        ssd_best_index, sil_best_index, ch_best_index, db_best_index
    ]  #, gtr_best_index] #, axis=None)
    best_k = np.array(K)[np.unique(all_indices)]

    for ind in range(len(K)):  #best_k:
        # Visualize output clusters of K means in 2D
        k_means = k_mean_list[ind]
        visualizer = InterclusterDistance(k_means)
        visualizer.fit(A)  # Fit the data to the visualizer
        #visualizer.show()  # Finalize and render the figure
        visualizer.show(
            outpath='{}_{}_InterclusterDistance.png'.format(save_path, ind))
        visualizer.poof()

        # Visualize through TSNE
    A_embedded = TSNE().fit_transform(A)
    plt.figure()
    palette = sns.color_palette("bright", 2)
    y_ = np.asarray(g_indices)
    y = y_.astype(np.float32)
    sns.scatterplot(A_embedded[:, 0],
                    A_embedded[:, 1],
                    hue=y,
                    legend='full',
                    palette=palette)
    plt.savefig('{}_tsne.png'.format(save_path))

    return