#username = mzr3 import EM as em import pandas as pd if __name__ == "__main__": breast_cancer = pd.read_csv('./breast-cancer-wisconsin.csv') li = list(breast_cancer) breast_cancer = pd.DataFrame(breast_cancer.values, columns=li) # Class=li[-1] arr = breast_cancer.values y = arr[:, -1] X = arr[:, 0:-1] tester = em.ExpectationMaximizationTestCluster(X, y, clusters=range(2, 15), plot=True, stats=True) tester.run()
li = list(breast_cancer) breast_cancer = pd.DataFrame(breast_cancer.values, columns=li) Class = li[-1] arr = breast_cancer.values y = arr[:, -1] X = arr[:, 0:-1] clusters = range(2, 15) sp = SparseRandomProjection(n_components=4) output = sp.fit_transform(X) tester = em.ExpectationMaximizationTestCluster(output, y, clusters=range(2, 15), plot=False, stats=True) silhouette_EM, vmeasure_scores = tester.run() tester = kmtc.KMeansTestCluster(output, y, clusters=range(2, 15), plot=False, stats=True) silhouette_kmeans, V_measure = tester.run() """ Plot Silhouette Score from observations from the cluster centroid to use the Elbow Method to identify number of clusters to choose """ plt.plot(clusters, silhouette_kmeans, 'r^-', label="K Means")