Exemplo n.º 1
0
#username = mzr3
import EM as em
import pandas as pd

if __name__ == "__main__":

    breast_cancer = pd.read_csv('./breast-cancer-wisconsin.csv')
    li = list(breast_cancer)
    breast_cancer = pd.DataFrame(breast_cancer.values, columns=li)

    #   Class=li[-1]

    arr = breast_cancer.values
    y = arr[:, -1]
    X = arr[:, 0:-1]

    tester = em.ExpectationMaximizationTestCluster(X,
                                                   y,
                                                   clusters=range(2, 15),
                                                   plot=True,
                                                   stats=True)
    tester.run()
li = list(breast_cancer)
breast_cancer = pd.DataFrame(breast_cancer.values, columns=li)

Class = li[-1]

arr = breast_cancer.values
y = arr[:, -1]
X = arr[:, 0:-1]
clusters = range(2, 15)

sp = SparseRandomProjection(n_components=4)
output = sp.fit_transform(X)

tester = em.ExpectationMaximizationTestCluster(output,
                                               y,
                                               clusters=range(2, 15),
                                               plot=False,
                                               stats=True)
silhouette_EM, vmeasure_scores = tester.run()

tester = kmtc.KMeansTestCluster(output,
                                y,
                                clusters=range(2, 15),
                                plot=False,
                                stats=True)
silhouette_kmeans, V_measure = tester.run()
"""
Plot Silhouette Score from observations from the cluster centroid
to use the Elbow Method to identify number of clusters to choose
"""
plt.plot(clusters, silhouette_kmeans, 'r^-', label="K Means")