示例#1
0
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import clusters
import plot

data = clusters.get_data()

num_clusters = 4
pipe = clusters.get_pipeline_kmeans(num_clusters)
pipe.fit(data)
_, _, data_pca = clusters.get_pca_data_kmeans(num_clusters)
data_with_clusters = data
data_with_clusters['predicted_cluster'] = pipe.predict(data)

mean_vals = data_with_clusters.groupby('predicted_cluster',
                                       as_index=False).mean()
max_vals = data_with_clusters.groupby('predicted_cluster',
                                      as_index=False).max()
min_vals = data_with_clusters.groupby('predicted_cluster',
                                      as_index=False).min()
std_dev_vals = data_with_clusters.groupby('predicted_cluster',
                                          as_index=False).std()

# print(min_vals.to_markdown())

# Finally we plot all of our data and make it look a bit pretty
plt.style.use("fivethirtyeight")
plt.figure(figsize=(8, 8))
示例#2
0
            color = cm.nipy_spectral(float(i) / n_clusters)
            ax1.fill_betweenx(np.arange(y_lower, y_upper),
                              0,
                              ith_cluster_silhouette_values,
                              facecolor=color,
                              edgecolor=color,
                              alpha=0.7)

            # Label the silhouette plots with their cluster numbers at the middle
            ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

            # Compute the new y_lower for next plot
            y_lower = y_upper + 10  # 10 for the 0 samples

        ax1.set_title("The silhouette plot for the various clusters.")
        ax1.set_xlabel("The silhouette coefficient values")
        ax1.set_ylabel("Cluster label")

        # The vertical line for average silhouette score of all the values
        ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

        ax1.set_yticks([])  # Clear the yaxis labels / ticks
        ax1.set_xticks([-1, -0.8, -.6, -.4, -.2, 0, 0.2, 0.4, 0.6, 0.8, 1])

    plt.show()


data = get_data()
plot_average_silhouette(data, 8)
plot_average_silhouette(data, 8, usePCA=False)