Пример #1
0
import matplotlib.pyplot as plt
import numpy as np
import math
"""
Replace the Euclidean metric with the following metrics and test them. Summarize the test errors (i.e., when k = k∗) in a table. 
Use all of your training data and select the best k when k = {1; 6; 11; : : : ; 196}.
i. Minkowski Distance:
	
	B. with log10(p)  = {0.1; 0.2; 0.3; . . . 1}. In this case, use the k∗ you found
		for the Manhattan distance in 1(d)iA. What is the best log10(p)?
	
"""

if __name__ == "__main__":

    X_row_data, Y_row_data = load_data('../assets/data.csv')

    split_info_dict = {CLASS0: 70, CLASS1: 140}

    X_train, X_test, y_train, y_test = train_test_by_class_index(
        X_row_data, Y_row_data, split_info_dict)

    alternative_p = [10**p for p in np.arange(0.1, 1.1, 0.1)]
    train_accuracy = np.empty(len(alternative_p))
    test_accuracy = np.empty(len(alternative_p))

    for index, p in enumerate(alternative_p):
        # KNeighborsClassifier based on Minkowski Distance
        # in both sk_minkowski_p1_accurancy and minkowski_p1_accurancy pics, the best k = [1 6 11 26]
        knn_clf = KNeighborsClassifier(n_neighbors=6, p=p)
Пример #2
0
#
__author__ = 'Aaron Yang'
__email__ = '*****@*****.**'
__date__ = '8/28/2019 1:32 PM'
from ay_hw_1.util import load_data
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
'''
i. Make scatterplots of the independent variables in the dataset. Use color to
show Classes 0 and 1.
'''
if __name__ == "__main__":
    # load data from csv file
    X_train, y_train = load_data('../assets/data.csv')

    fig, ax = plt.subplots(nrows=6, ncols=6, figsize=(32, 24))
    labels = [
        'pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle',
        'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis'
    ]
    result = ['Normal', 'Abnormal']

    for x_index in range(0, 6):
        for y_index in range(0, 6):
            if (x_index != y_index):
                ax[x_index, y_index].scatter(X_train[y_train == 0, x_index],
                                             X_train[y_train == 0, y_index],
                                             color='#0984e3',
                                             label=result[0],
                                             alpha=0.6,