def evaluation_scores(groundtruth, labels_pred): """ Eval scores of the predicted results. :param: groundtruth (type list): the groundtruth (GT) of cluster assignment. Each element denotes an item's GT cluster_id. :param: labels_pred (type list): the predicted cluster assignments. Each element denotes an item's predicted cluster_id. """ NMI = metrics.normalized_mutual_info_score(groundtruth,labels_pred) A = metrics.accuracy(groundtruth,labels_pred) F1 = metrics.f_measure(groundtruth,labels_pred) P = metrics.purity(groundtruth,labels_pred) RI = metrics.random_index(groundtruth,labels_pred) ARI = metrics.adjusted_rand_score(groundtruth,labels_pred) map_pairs = metrics.get_map_pairs(groundtruth,labels_pred) return NMI, A, F1, P, RI, ARI, map_pairs
# casestudy_iris_pca.py import data_iris import hierarchical import matplotlib.pyplot as plt import metrics import numpy as np import pca import plot_data # (1) load data iris = data_iris.iris() X,class_label = iris.load() # perform pca and reduce dimension to 2 model_pca = pca.pca() model_pca.fit(X) R = model_pca.data_reduced_dimension(reduced_dim=2) plot_data.plot_scatter_class(R,class_label,"Iris Data Projected to 2 Dimensions using PCA","u0","u1") # (2) create model model = hierarchical.hierarchical() # (3) fit model model.fit(R) print("Time fit: {}".format(model.time_fit)) # (4) results level = -3 print("Purity: {}".format(metrics.purity(model.clustersave[level],class_label))) print("Davies-Bouldin: {}".format(metrics.davies_bouldin(R,model.clustersave[level]))) print("Silhouette: {}".format(metrics.silhouette(R,model.clustersave[level]))) model.plot_cluster(nlevel=level,title="Hierarchical Clustering for Iris Dataset reduced to 2d",xlabel="u0",ylabel="u1") metrics.plot_cluster_distribution(model.clustersave[level],class_label) plt.show()
start = time.time() print("# Tuning hyper-parameters for", score, "\n") clf = GridSearchCV(LinearSVC(C=1, max_iter=1000), param_grid, cv=5, scoring='%s' % score) clf.fit(x_train, y_train) print("Best parameters set found on development set:\n") print(clf.best_params_) print("Best value for ", score, ":\n") print(clf.best_score_) Y_true, Y_pred = y_test, clf.predict(x_test) print("Report") print(classification_report(Y_true, Y_pred, digits=6)) print(metrics.purity(Y_true, Y_pred)) print(metrics.entropy(Y_true, Y_pred)) print("Accuracy: ", clf.score(x_test, y_test)) print("Time taken:", time.time() - start, "\n") endtime = time.time() print("svc time cost: ", time.time() - start) print("Total time taken: ", endtime - start, "seconds.") print("********************************************************") import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib.ticker import NullFormatter from sklearn import manifold, datasets import numpy as np import matplotlib