import matplotlib.pyplot as plt import numpy as np import math """ Replace the Euclidean metric with the following metrics and test them. Summarize the test errors (i.e., when k = k∗) in a table. Use all of your training data and select the best k when k = {1; 6; 11; : : : ; 196}. i. Minkowski Distance: B. with log10(p) = {0.1; 0.2; 0.3; . . . 1}. In this case, use the k∗ you found for the Manhattan distance in 1(d)iA. What is the best log10(p)? """ if __name__ == "__main__": X_row_data, Y_row_data = load_data('../assets/data.csv') split_info_dict = {CLASS0: 70, CLASS1: 140} X_train, X_test, y_train, y_test = train_test_by_class_index( X_row_data, Y_row_data, split_info_dict) alternative_p = [10**p for p in np.arange(0.1, 1.1, 0.1)] train_accuracy = np.empty(len(alternative_p)) test_accuracy = np.empty(len(alternative_p)) for index, p in enumerate(alternative_p): # KNeighborsClassifier based on Minkowski Distance # in both sk_minkowski_p1_accurancy and minkowski_p1_accurancy pics, the best k = [1 6 11 26] knn_clf = KNeighborsClassifier(n_neighbors=6, p=p)
# __author__ = 'Aaron Yang' __email__ = '*****@*****.**' __date__ = '8/28/2019 1:32 PM' from ay_hw_1.util import load_data import matplotlib.pyplot as plt import pandas as pd import seaborn as sns ''' i. Make scatterplots of the independent variables in the dataset. Use color to show Classes 0 and 1. ''' if __name__ == "__main__": # load data from csv file X_train, y_train = load_data('../assets/data.csv') fig, ax = plt.subplots(nrows=6, ncols=6, figsize=(32, 24)) labels = [ 'pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis' ] result = ['Normal', 'Abnormal'] for x_index in range(0, 6): for y_index in range(0, 6): if (x_index != y_index): ax[x_index, y_index].scatter(X_train[y_train == 0, x_index], X_train[y_train == 0, y_index], color='#0984e3', label=result[0], alpha=0.6,