Exemplo n.º 1
0
def normalize_data_2d():
    input_values, contaminated = ia_n2020.ioValues()
    predict_values = ia_n2020.newData()

    norm = StandardScaler()
    input_values_norm = norm.fit_transform(input_values)
    predict_values_norm = norm.fit_transform(predict_values)
    pca = PCA(n_components=2)
    input_values_2d = pca.fit_transform(input_values_norm)
    predict_values_2d = pca.fit_transform(predict_values_norm)

    reg1 = ''
    reg2 = ''
    for i in range(len(input_values_2d)):
        if contaminated[i] == 0:
            reg1 = plt.scatter(input_values_2d[i][0],
                               input_values_2d[i][1],
                               marker='x',
                               color='g')
        elif contaminated[i] == 1:
            reg2 = plt.scatter(input_values_2d[i][0],
                               input_values_2d[i][1],
                               marker='o',
                               color='b')
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.grid(True)
    plt.legend((reg1, reg2), ('Não contaminado', 'Contaminado'))
    plt.savefig('graphs/pca_graph.png')
    plt.close()

    return input_values_2d, predict_values_2d
Exemplo n.º 2
0
def normalize_data_3d():
    input_values, contaminated = ia_n2020.ioValues()
    predict_values = ia_n2020.newData()

    norm = StandardScaler()
    input_values_norm = norm.fit_transform(input_values)
    predict_values_norm = norm.fit_transform(predict_values)
    pca = PCA(n_components=3)
    input_values_3d = pca.fit_transform(input_values_norm)
    predict_values_3d = pca.fit_transform(predict_values_norm)

    return input_values_3d, predict_values_3d
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import ia_n2020
import pca_graph
import pandas as pd

input_values, contaminated = ia_n2020.ioValues()
input_values_2d, predict_values_2d = pca_graph.normalize_data_2d()

inertia = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i)
    kmeans.fit(input_values_2d)
    inertia.append(kmeans.inertia_)

plt.plot(inertia)
plt.xlabel('# Cluster - K')
plt.ylabel("Inertia")
plt.savefig('graphs/graph_kmeans_inertia')
plt.show()

kmeans_clf = KMeans(n_clusters=2)
kmeans_clf.fit(input_values_2d)
contaminated = kmeans_clf.predict(input_values_2d)
centroid = kmeans_clf.cluster_centers_

contaminated_predict = kmeans_clf.predict(predict_values_2d)
print(contaminated_predict)
spreadsheet = pd.read_csv('output_data/predict_data.csv')
spreadsheet['kmeans_predict'] = contaminated_predict
spreadsheet.to_csv('output_data/predict_data.csv', index=False)