color=color, lw=1, label='ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) plt.plot([0, 1], [0, 1], 'k--', lw=1) plt.xlim([-0.05, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title(title) plt.legend(loc="lower right") plt.show() if __name__ == "__main__": X_data, Y_data = load_data(ROOT_PATH + SPLASH + MFCC_FILE_PATH) X_train, X_test, Y_train, Y_test = train_test_split_by_ratio( X_data, Y_data, test_size=0.3, random_state=2333) parameters = { "svc__tol": [1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8], "svc__C": np.logspace(-2, 5, 10) } for label_index in range(Y_train.shape[1]): y_train = Y_train[:, label_index] y_test = Y_test[:, label_index] smote = SMOTE(random_state=2333) X_train_smote, y_train_smote = smote.fit_sample(X_train, y_train) X_test_smote, y_test_smote = smote.fit_sample(X_test, y_test)
__email__ = '*****@*****.**' __date__ = '10/30/2019 10:57 PM' from collections import Counter import numpy as np import pandas as pd from sklearn.cluster import KMeans from ay_hw_5._global import ROOT_PATH, SPLASH, MFCC_FILE_PATH, LABELS_NAME from ay_hw_5.util_data import load_data PREDICT_LABEL = 'predicted' if __name__ == "__main__": X_train, y_data = load_data(ROOT_PATH + SPLASH + MFCC_FILE_PATH) y_data = pd.DataFrame(y_data, columns=LABELS_NAME) hamming_loss_list = list() hamming_dist = list() avg_scores = [] for i in range(1, 10): temp_avg_scores = [] predicted_results = [] for k in range(3, 5): k_means_clf = KMeans(n_clusters=k, random_state=i) predicted_labels = k_means_clf.fit_predict(X_train) predicted_results.append(predicted_labels) temp_avg_scores.append(silhouette_score(X_train, predicted_labels)) hamming_dist.append( sum( np.min(cdist(X_train, k_means_clf.cluster_centers_,