def cv_performance(x, y, num_folds, k): """This function evaluates average accuracy in cross validation.""" length = len(y) splits = split_cv(length, num_folds) accuracy_array = [] for split in splits: print(split) test_x, test_y, train_x, train_y = [], [], [], [] # Finish this function to use the training instances # indexed by `split.train` to train the classifier, # and then store the accuracy # on the testing instances indexed by `split.test` # accuracy = knn.accuracy() for i in range(len(split[1])): test_x.append(x[split[1][i]]) test_y.append(y[split[1][i]]) for i in range(len(split[0])): train_x.append(x[split[0][i]]) train_y.append(y[split[0][i]]) knn = Knearest(train_x, train_y, k) confusion = knn.confusion_matrix(test_x, test_y) accuracy = knn.accuracy(confusion) accuracy_array.append(accuracy) return np.mean(accuracy_array)
def cv_performance(x, y, num_folds, k): """This function evaluates average accuracy in cross validation.""" length = len(y) splits = split_cv(length, num_folds) accuracy_array = [] for split in splits: # Finish this function to use the training instances # indexed by `splits.train` to train the classifier, # and then store the accuracy # on the testing instances indexed by `splits.test` ### Begin Evan's code ### test_data_x = [x[i] for i in split.test] test_data_y = [y[i] for i in split.test] train_data_x = [x[i] for i in split.train] train_data_y = [y[i] for i in split.train] knn = Knearest(train_data_x, train_data_y,k) confusion = knn.confusion_matrix(test_data_x, test_data_y) accuracy = knn.accuracy(confusion) ### End Evan's code ### accuracy_array.append(accuracy) return np.mean(accuracy_array)
def cv_performance(x, y, num_folds, k): """This function evaluates average accuracy in cross validation.""" length = len(y) splits = split_cv(length, num_folds) accuracy_array = [] for split in splits: # Finish this function to use the training instances # indexed by `split.train` to train the classifier, # and then store the accuracy # on the testing instances indexed by `split.test train_x = [] train_y = [] test_x = [] test_y = [] for trainIndex in split.train: train_x.append(x[trainIndex]) train_y.append(y[trainIndex]) for testIndex in split.test: test_x.append(x[testIndex]) test_y.append(y[testIndex]) train_x = np.asarray(train_x) test_x = np.asarray(test_x) train_y = np.asarray(train_y) test_y = np.asarray(test_y) #print(train_y) #print(test_x) knn = Knearest(train_x[0], train_y[0], k) confusion = knn.confusion_matrix(test_x[0], test_y[0]) accuracy = knn.accuracy(confusion) accuracy_array.append(accuracy) return np.mean(accuracy_array)
def cv_performance(x, y, num_folds, k): length = len(y) splits = split_cv(length, num_folds) accuracy_array = [] for split in splits: train_set_x = [x[i] for i in split.train] train_set_y = [y[i] for i in split.train] test_set_x = [x[i] for i in split.test] test_set_y = [y[i] for i in split.test] knn = Knearest(train_set_x, train_set_y, k) confusion_mtr = knn.confusion_matrix(test_set_x, test_set_y) accuracy = knn.accuracy(confusion_mtr) accuracy_array.append(accuracy) return np.mean(accuracy_array)
def cv_performance(x, y, num_folds, k): """This function evaluates average accuracy in cross validation.""" length = len(y) splits = split_cv(length, num_folds) accuracy_array = [] for split in splits: # Finish this function to use the training instances # indexed by `split.train` to train the classifier, # and then store the accuracy # on the testing instances indexed by `split.test` knn = Knearest(x[split.train], y[split.train], k) confusion = knn.confusion_matrix(x[split.test], y[split.test]) accuracy = knn.accuracy(confusion) accuracy_array.append(accuracy) return np.mean(accuracy_array)
def limit(lim, tr_x, tr_y, ts_x, ts_y, K): accuracy = {} plt.ion() for k in K: accr = [] for l in lim: knn = Knearest(tr_x[:l], tr_y[:l], k) conf = knn.confusion_matrix(ts_x[:l], ts_y[:l]) ac = knn.accuracy(conf) accr.append(ac) accuracy[k] = accr plt.ion() for vals in accuracy: plt.plot(lim, accuracy[vals], label="K= " + str(vals)) plt.xlabel("Numbers of Training") plt.ylabel("Accuracy") plt.title("Figure 1 - Accuracies Against Numbers of Training ") plt.legend(bbox_to_anchor=(0.5, 0.5), loc=2, borderaxespad=0.) plt.savefig("question_1.png") plt.show()
def cv_performance(x, y, num_folds, k): """This function evaluates average accuracy in cross validation.""" length = len(y) splits = split_cv(length, num_folds) accuracy_array = [] for split in splits: # Finish this function to use the training instances # indexed by `split.train` to train the classifier, # and then store the accuracy # on the testing instances indexed by `split.test` X_train = x[:int(split.train[0])] X_test = x[-int(split.test[0]):] Y_train = y[:int(split.train[0])] Y_test = y[-int(split.test[0]):] knn1 = Knearest(X_train, Y_train, 3) conf = knn1.confusion_matrix(X_test, Y_test) accuracy1 = knn1.accuracy(conf) accuracy_array.append(accuracy1) return np.mean(accuracy_array)
knn = Knearest(train_x, train_y, k) confusion = knn.confusion_matrix(test_x, test_y) accuracy = knn.accuracy(confusion) accuracy_array.append(accuracy) return np.mean(accuracy_array) if __name__ == "__main__": parser = argparse.ArgumentParser(description='KNN classifier options') parser.add_argument('--limit', type=int, default=-1, help="Restrict training to this many examples") args = parser.parse_args() data = Numbers("../data/mnist.pkl.gz") x, y = data.train_x, data.train_y if args.limit > 0: x, y = x[:args.limit], y[:args.limit] best_k, best_accuracy = -1, 0 for k in [1, 3, 5, 7, 9]: accuracy = cv_performance(x, y, 5, k) print("%d-nearest neighber accuracy: %f" % (k, accuracy)) if accuracy > best_accuracy: best_accuracy, best_k = accuracy, k knn = Knearest(x, y, best_k) confusion = knn.confusion_matrix(data.test_x, data.test_y) accuracy = knn.accuracy(confusion) print("Accuracy for chosen best k= %d: %f" % (best_k, accuracy))