Пример #1
0
def make_elbow(X):
    lst_k = range(1, 10)
    lst_rsq = []

    for k in lst_k:
        kmeanModel = KMeans(n_clusters=k)
        kmeanModel.fit(X)
        # lst_rsq.append(np.average(np.min(cdist(X, kmeanModel.cluster_centers_, "euclidean"), axis=1)) / X.shape[0])
        lst_rsq.append(
            r_square(X.values, kmeanModel.cluster_centers_, kmeanModel.labels_,
                     k))

    fig = plt.figure()
    plt.plot(lst_k, lst_rsq, "bx-")
    plt.xlabel("k")
    plt.ylabel("RSQ score")
    plt.title("The Elbow Method showing the optimal k")
    plt.savefig("fig/" + type_exec + "/k-means_elbow_method")
    plt.close()
Пример #2
0
plt.savefig('fig/k-means_ground_truth')
plt.close(fig)

# Compute R-square, i.e. V_inter/V
from R_square_clustering import r_square
from purity import purity_score

# Plot elbow graphs for KMeans using R square and purity scores
lst_k = range(2, 11)
lst_rsq = []
lst_purity = []
for k in lst_k:
    est = KMeans(n_clusters=k)
    est.fit(X_norm)
    lst_rsq.append(
        r_square(X_norm.to_numpy(), est.cluster_centers_, est.labels_, k))
    lst_purity.append(purity_score(y.to_numpy(), est.labels_))

fig = plt.figure()
plt.plot(lst_k, lst_rsq, 'bx-')
plt.plot(lst_k, lst_purity, 'rx-')
plt.xlabel('k')
plt.ylabel('RSQ/purity score')
plt.title('The Elbow Method showing the optimal k')
plt.savefig('fig/k-means_elbow_method')
plt.close()

# # hierarchical clustering
from scipy.cluster.hierarchy import dendrogram, linkage

lst_labels = map(lambda pair: pair[0] + str(pair[1]),
Пример #3
0
    cercle = plt.Circle((0, 0), 1, color='blue', fill=False)
    axes.add_artist(cercle)
    plt.savefig('acp_correlation_circle_axes_' + str(x_axis) + '_' +
                str(y_axis))
    plt.close(fig)


correlation_circle(data, 9, 2, 3)

#question 5
lst_k = range(2, 8)
lst_rsq = []
for k in lst_k:
    est = KMeans(n_clusters=k)
    est.fit(X_norm)
    lst_rsq.append(r_square(X_norm, est.cluster_centers_, est.labels_, k))
fig = plt.figure()
plt.plot(lst_k, lst_rsq, 'bx-')
plt.xlabel('k')
plt.ylabel('RSQ')
plt.title('The Elbow Method showing the optimal k')
plt.savefig('r_square')
plt.close(fig)

est = KMeans(n_clusters=5)

est.fit(X)

# print centroids associated with several countries
lst_countries = ['EL', 'FR', 'DE', 'US']
# centroid of the entire dataset
Пример #4
0
def cah_cat():
    import pandas as pd
    import matplotlib.pyplot as plt
    from sklearn.cluster import KMeans
    print("###############################################")
    print("#####RUN KMEANS DEMISSIONAIRES CATEGORIELS#####")
    print("###############################################")
    clients_dem = pd.read_csv('../donnees/fusion/dem.csv', sep=',')
    print(clients_dem)

    del clients_dem['is_adh']
    del clients_dem['DTADH']
    del clients_dem['DTDEM']

    ## Remove numerics data
    del clients_dem['MTREV']
    del clients_dem['AGEAD']
    del clients_dem['agedem']
    del clients_dem['adh']

    print(clients_dem)

    X_cat_one_hot = pd.get_dummies(clients_dem.astype(str))
    print(X_cat_one_hot)

    ####
    ## kmeans
    ####

    # Compute R-square, i.e. V_inter/V

    # Plot elbow graphs for KMeans using R square and purity scores
    lst_k = range(2, 8)
    lst_rsq = []
    lst_purity = []
    for k in lst_k:
        est = KMeans(n_clusters=k)
        est.fit(X_cat_one_hot)
        lst_rsq.append(
            r_square(X_cat_one_hot.to_numpy(), est.cluster_centers_,
                     est.labels_, k))
        # TODO: complete lst_purity
        print("------------- Groupe de " + str(k) + " clusters ---------")
        clusters = {
            "code": pd.DataFrame(clients_dem.index.values.tolist()),
            "cluster": est.labels_
        }
        print(pd.DataFrame(clusters))

    fig = plt.figure()
    plt.plot(lst_k, lst_rsq, 'bx-')
    # plt.plot(lst_k, lst_purity, 'rx-')
    plt.xlabel('k')
    plt.ylabel('RSQ/purity score')
    plt.title('The Elbow Method showing the optimal k')
    plt.savefig('../fig/k-means_elbow_method')
    plt.close()

    print("###############################################")
    print("#####END KMEANS DEMISSIONAIRES CATEGORIELS#####")
    print("###############################################")
Пример #5
0
    print('\tcentroid: ' + str(est.cluster_centers_[num_cluster]))

print(
    '--------------------------------------------------------------------------'
)
print(
    '--------------------------------------------------------------------------'
)

lst_k = range(2, 9)
lst_rsq = []

for k in lst_k:
    est = KMeans(n_clusters=k)
    est.fit(eurofit)
    lst_rsq.append(r_square(eurofit, est.cluster_centers_, est.labels_, k))

fig = plt.figure()
plt.plot(lst_k, lst_rsq, 'bx-')
plt.xlabel('k')
plt.ylabel('RSQ')
plt.title('The Elbow Method showing the optimal k')
plt.savefig('R2')
plt.show()
plt.close()
print(
    '--------------------------------------------------------------------------'
)
print(
    '--------------------------------------------------------------------------'
)