def run_knn(points): m = KNN(5) m.train(points) print(f'predicted class: {m.predict(points[0])}') print(f'true class: {points[0].label}') cv = CrossValidation() cv.run_cv(points, 10, m, accuracy_score)
def run_knn(points): """ Runs knn with given set of data :param points: set of data """ m = KNN(5) m.train(points) print(f'predicted class: {m.predict(points[0])}') print(f'true class: {points[0].label}') cv = CrossValidation() cv.run_cv(points, 10, m, accuracy_score)
def k_fold_cross_validation(points, k): """ Runs a knn for a given k value on a set of data and each time with different fold :param points: set of data :param k: value for knn """ folds = [2, 10, 20] print(f"K={k}") for fold in folds: a = KNN(k) a.train(points) cv = CrossValidation() print(f"{fold}-fold-cross-validation:") cv.run_cv(points, fold, a, accuracy_score, False, True)
def question_3(points, k): """ question 3 :param points: list of Point :param k: the best classifier for the given data, based on question 2 """ m = KNN(k) m.train(points) n_folds_list = [2, 10, 20] print("Question 3:") print("K=", k, sep="") for i in n_folds_list: print(i, "-fold-cross-validation:", sep="") cv = CrossValidation() cv.run_cv(points, i, m, accuracy_score, False, True)
def question_4(points): """ question 4 :param points: list of Point """ k_list = [5, 7] normalization_list = [[DummyNormalizer, "DummyNormalizer"], [SumNormalizer, "SumNormalizer"], [MinMaxNormalizer, "MinMaxNormalizer"], [ZNormalizer, "ZNormalizer"]] print("Question 4:") for k in k_list: print("K=", k, sep="") m = KNN(k) m.train(points) cv = CrossValidation() for i in normalization_list: normalize_object = i[0]() normalize_object.fit(points) new_points = normalize_object.transform(points) # 2 is the best n-fold average_score = cv.run_cv(new_points, 2, m, accuracy_score, False, True) formatted_average_score = "{:.2f}".format(average_score) print("Accuracy of", i[1], "is", formatted_average_score) print()
def knn_n_fold(k, n, points, normal_type): """ run knn with n folds with normalized points :param k: k-nn :param n: n folds :param points: the points to use :param normal_type: the normalization of those points :return: """ m = KNN(k) cv = CrossValidation() cv.run_cv(normal_type(points), n, m, accuracy_score, normal_type, print_fold_score=True)
def ques_two(points): max_accuracy = 0 best_k = 0 for k in range(1, 31): m = KNN(k) m.train(points) cv = CrossValidation() # print("current k=", k ," ", end="") a = cv.run_cv(points, len(points), m, accuracy_score, False) if max_accuracy < a: max_accuracy = a best_k = k return best_k
def question_4(points, normalizers): print("Question 4:") m = KNN(5) m.train(points) cv = CrossValidation() print("K=5") for key in normalizers.keys(): norm = normalizers.get(key) n = norm() n.fit(points) new_points = n.transform(points) print(f"Accuracy of {key} is " + str(cv.run_cv(new_points, 2, m, accuracy_score, False, True))) print("") m = KNN(7) m.train(points) print("K=7") for key in normalizers.keys(): norm = normalizers.get(key) n = norm() n.fit(points) new_points = n.transform(points) print(f"Accuracy of {key} is " + str(cv.run_cv(new_points, 2, m, accuracy_score, False, True))) print("")
def ques_four(points): print("Question 4:") list_of_k = [5, 7] for i in list_of_k: print("K={}".format(i)) m = KNN(i) m.train(points) cv = CrossValidation() a = cv.run_cv(points, 2, m, accuracy_score, False, True) print("Accuracy of DummyNormalizer is", a) print() new_p = SumNormalizer() new_p.fit(points) new_points = new_p.transform(points) cv = CrossValidation() a = cv.run_cv(new_points, 2, m, accuracy_score, False, True) print("Accuracy of SumNormalizer is", a) print() new_p = MinMaxNormalizer() new_p.fit(points) new_points = new_p.transform(points) cv = CrossValidation() a = cv.run_cv(new_points, 2, m, accuracy_score, False, True) print("Accuracy of MinMaxNormalizer is", a) print() new_p = ZNormalizer() new_p.fit(points) new_points = new_p.transform(points) cv = CrossValidation() a = cv.run_cv(new_points, 2, m, accuracy_score, False, True) print("Accuracy of ZNormalizer is", a) if i == 5: print()
def question_3(points, k): m = KNN(k) m.train(points) cv = CrossValidation() print("Question 3:") print("K=" + str(k)) print("2-fold-cross-validation:") cv.run_cv(points, 2, m, accuracy_score, False, True) print("10-fold-cross-validation:") cv.run_cv(points, 10, m, accuracy_score, False, True) print("20-fold-cross-validation:") cv.run_cv(points, 20, m, accuracy_score, False, True)
def run_knn_k(points): """ a function for question 2 :param points: list of Point :return: a number, which is the best classifier for the given data """ best_classifier = 0 best_accuracy_score = 0.0 for k in range(1, 31): m = KNN(k) m.train(points) cv = CrossValidation() current_accuracy = cv.run_cv(points, len(points), m, accuracy_score, False, False) if current_accuracy > best_accuracy_score: best_accuracy_score = current_accuracy best_classifier = k return best_classifier
def run_knn(points): #for k in range(1,31): # m = KNN(k=k) # m.train(points) # print(f'predicted class: {m.predict(points[0])}') #print(f'true class: {points[0].label}') #cv = CrossValidation() #cv.run_cv(points, len(points), m, accuracy_score,d.transform(points)) print("Question 3:\nK=19") m = KNN(k=19) m.train(points) cv = CrossValidation() z = ZNormalizer() z.fit(points) d = DummyNormalizer() sum = SumNormalizer() min_max = MinMaxNormalizer() min_max.fit(points) print("2-fold-cross-validation:") cv.run_cv(points, 2, m, accuracy_score, d.transform, print_final_score=False, print_fold_score=True) print("10-fold-cross-validation:") cv.run_cv(points, 10, m, accuracy_score, d.transform, print_final_score=False, print_fold_score=True) print("20-fold-cross-validation:") cv.run_cv(points, 20, m, accuracy_score, d.transform, print_final_score=False, print_fold_score=True) print("Question 4:\nK=5") knn_n_fold(5, 2, points, d.transform) knn_n_fold(5, 2, points, sum.l1) knn_n_fold(5, 2, points, min_max.transform) knn_n_fold(5, 2, points, z.transform) print("K=7") knn_n_fold(7, 2, points, d.transform) knn_n_fold(7, 2, points, sum.l1) knn_n_fold(7, 2, points, min_max.transform) knn_n_fold(7, 2, points, z.transform)
def q3(k, points): m = KNN(k) m.train(points) cv = CrossValidation() print("Question 3:") print(f'K={k}') print("2-fold-cross-validation:") cv.run_cv(points, 2, m, accuracy_score, False, True) print("10-fold-cross-validation:") cv.run_cv(points, 10, m, accuracy_score, False, True) print("20-fold-cross-validation:") cv.run_cv(points, 20, m, accuracy_score, False, True)
def ques_three(points): print("Question 3:") # best_k = ques_two(points) best_k = 19 print("K={}".format(best_k)) m = KNN(best_k) m.train(points) cv = CrossValidation() print("2-fold-cross-validation:") cv.run_cv(points, 2, m, accuracy_score, False, True) print("10-fold-cross-validation:") cv.run_cv(points, 10, m, accuracy_score, False, True) print("20-fold-cross-validation:") cv.run_cv(points, 20, m, accuracy_score, False, True)
def run_1_to_30_knn(points): """ Runs knn with k=0 to k=30 on a given set of data :param points: set of data """ k = 0 accuracy = 0 num_of_points = len(points) for index in range(1, 31): a = KNN(index) a.train(points) print(f"classifier {index}:") print(f'predicted class: {a.predict(points[0])}') print(f'true class: {points[0].label}') cv = CrossValidation() temp_score = cv.run_cv(points, num_of_points, a, accuracy_score) if temp_score > accuracy: accuracy = temp_score k = index print() print(f"best classifier is: {k}, best accuracy is: {accuracy}")
def two_fold_cross_validation(points): """ Runs two fold cross validation on specific k values and each time test another norm :param points: set of data """ knns = [5, 7] norms = [DummyNormalizer, SumNormalizer, MinMaxNormalizer, ZNormalizer] prints = 0 for knn in knns: print(f"K={knn}") for norm in norms: a = KNN(knn) nor = norm() nor.fit(points) temp_points = nor.transform(points) a.train(temp_points) cv = CrossValidation() accuracy = cv.run_cv(temp_points, 2, a, accuracy_score, True, True) print(f"Accuracy of {norm.__name__} is {accuracy}") prints += 1 if prints != len(knns) * len(norms): print()
def run(model_id): """Run experiment.""" config = configs[model_id] logger.info('\n\n\ntrain model {}'.format(model_id)) # prepare data if config['preprocess_fn'] is not None: function = getattr(data_generator, config['preprocess_fn']) preprocess_fn = partial(function, **config['preprocess']) else: preprocess_fn = None generator = Generator(path=PATH_TRAIN, IDs=meta_train.index.tolist(), labels=meta_train[['target']], preprocessing_fn=preprocess_fn, shuffle=False, batch_size=64, **config['generator']) X, y = generate_train_data(generator, meta_train) logger.info('X shape: {}, y shape: {}'.format(X.shape, y.shape)) # define model model_function = getattr(models, config['model_name']) nn_model = partial(model_function, input_shape=(X.shape[1:]), **config['model_params']) nn_model().summary(print_fn=logger.info) model = KerasModel(nn_model, logger=logger, **config['train']) # train and save model cross_val = CrossValidation(X=X, y=y, Xtest=X[:100], logger=logger, **config['cv']) pred, pred_test, metrics, trained_models = cross_val.run_cv(model) for i, model in enumerate(trained_models): path = os.path.join(MODELS_PATH, 'model_{}_{}.h5'.format(model_id, i)) model.save(path)
def implementation(points): # Q1 # m = KNN() # m.train(points) # predicted = m.predict(points) # real = [point.get_label() for point in points] # print(sum([real[i] == predicted[i] for i in range(len(real))]) / len(real)) # Q2 max = 0 best_k = 0 # this part for i in range(30): m = KNN(i + 1) m.train(points) cv = CrossValidation() temp_average_score = cv.run_cv(points, len(points), m, accuracy_score, print_final_score=False) if max < temp_average_score: max = temp_average_score best_k = i + 1 print("Question 3:") print(f'K={best_k}') list_n_folds = [2, 10, 20] k_q3 = KNN(best_k) k_q3.train(points) for n in list_n_folds: print(f'{n}-fold-cross-validation:') # print(f'K={best_k}') cv.run_cv(points, n, k_q3, accuracy_score, print_final_score=False, print_fold_score=True) print("Question 4:") list_k = [5, 7] dummy = DummyNormalizer() z_norm = ZNormalizer() sum_norm = SumNormalizer() min_max_norm = MinMaxNormalizer() list_norm = [dummy, sum_norm, min_max_norm, z_norm] n_folds_q4 = 2 for k in list_k: k_q4 = KNN(k) print(f'K={k}') for norm in list_norm: norm.fit(points) t_points = norm.transform(points) k_q4.train(t_points) avg_acc = cv.run_cv(t_points, n_folds_q4, k_q4, accuracy_score, print_final_score=False, print_fold_score=True) print('Accuracy of {} is {:.2f}'.format(norm.print_name(), avg_acc)) if not (norm == list_norm[-1] and k == list_k[-1]): print()
from sklearn.linear_model import Lasso import pandas as pd import numpy as np from cross_validation import CrossValidation d = pd.DataFrame({'x': range(100)}) d['y'] = d['x'] + np.random.rand(100)*10 def mse(x, y): return np.mean([(x_val-y_val)**2 for x_val, y_val in zip(x, y)]) cv = CrossValidation(d, ['x'], 'y', mse) cv.run_cv(Lasso())
def q4_print(points, k): m = KNN(k) m.train(points) cv = CrossValidation() return cv.run_cv(points, 2, m, accuracy_score, False, True)
def q2(k, points): m = KNN(k) m.train(points) l = len(points) cv = CrossValidation() return cv.run_cv(points, l, m, accuracy_score, False, False)