# (e) lda = LinearDiscriminantAnalysis() lda.pred = lda.fit(x_train, y_train).predict(x_test) print(pd.DataFrame(confusion_matrix(y_test, lda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1'])) print('error rate: ', accuracy_score(y_test, lda.pred)) # 62.5% # (f) qda = QuadraticDiscriminantAnalysis() qda.pred = qda.fit(x_train, y_train).predict(x_test) print(pd.DataFrame(confusion_matrix(y_test, qda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1'])) print('error rate: ', accuracy_score(y_test, qda.pred)) # 58.7% # (g) knn = KNeighborsClassifier(n_neighbors=1) knn.pred = knn.fit(x_train, y_train).predict(x_test) print('error rate: ', accuracy_score(y_test, knn.pred)) # 49% # (h): Logistic and LDA models are the best. # (i) # KNN error_rate = np.array([]) k_value = np.array([]) for i in (5, 10, 20): knn = KNeighborsClassifier(n_neighbors=i) knn.pred = knn.fit(x_train, y_train).predict(x_test) k_value = np.append(k_value, i) error_rate = np.append(error_rate, 1-accuracy_score(y_test, knn.pred)) best_k = k_value[error_rate.argmin()]
) print("calculating DECISION TREE please waite...") dt = DecisionTreeClassifier(random_state=0) dt.fit(fea_train, np.array(labels_train)) dt.pred = dt.predict(fea_test) calculate_result(np.array(labels_test), dt.pred) print( "------------------------------------------------------------------------" ) print("calculating KNN please waite...") knn = KNeighborsClassifier(n_neighbors=5) knn.fit(fea_train, np.array(labels_train)) knn.pred = knn.predict(fea_test) calculate_result(np.array(labels_test), knn.pred) print( "------------------------------------------------------------------------" ) print("calculating Nueral networks please waite...") paramters = L_layer_model(fea_train.T, np.array(labels_train).T, [50, 100, 50, 10]) nnpred = NN_predict(fea_test.T, np.array(labels_test).T, paramters) calculate_result(np.array(labels_test), np.array(nnpred)) print( "=========================================================================" ) print("Test optimal model on test set")
plt.ylabel('Error Rate') print("the misclassification error for each k value is : ", np.round(MSE, 3)) return nearest_k query = list(range(0, 50)) optimal_k = find_optimal_k(X_train, Y_train, query) # K-NN with optimal K knn = KNeighborsClassifier(n_neighbors=optimal_k) knn.fit(X_train, Y_train) pred = knn.pred(X_test) """ Model Evaluation """ # Confusion Matrix plt.figure() cm = confusion_matrix(Y_test, pred) class_label = ["negative", "positive"] df_cm_test = pd.DataFrame(cm, index=class_label, columns=class_label) sns.heatmap(df_cm_test, annot=True, fmt="d") plt.title("Confusion Matrix for Test datas") plt.xlabel("Predicted Label") plt.ylabel("True Label") # classification report