def run_with_sample() -> None: total_point = 0.0 number_test = 100 for i in range(0, number_test): data = get_train_data() samples: List[FrameOrSeries] = [] data[0], sample = get_sample_data(data[0], 25) # type: FrameOrSeries samples.append(sample) data[1], sample = get_sample_data(data[1], 25) # type: FrameOrSeries samples.append(sample) knn.clear() knn.fit(data[0].values.tolist(), data[1].values.tolist()) point = 0 total = 0 for k in range(0, len(samples)): for s in samples[k].values: if knn.predict(s.tolist(), k=3) == k: point += 1 total += 1 print('point: ' + str(point)) print('total: ' + str(total)) total_point += float(point / total * 100) print(point / total * 100) print('total_point: ' + str(total_point / number_test))
def test__predict_with_tans_fun(self): X = array([[2, 4], [6, 8]]) Y = array(['a','b']) knn = KNN(trans_fun = KNN.STD_TRANS) knn.fit(X,Y) self.assertEqual(knn.predict(array([4,5]), 1), 'a') self.assertEqual(knn.predict(array([5,6]), 1), 'b')
def test__predict_with_tans_fun(self): X = array([[2, 4], [6, 8]]) Y = array(['a', 'b']) knn = KNN(trans_fun=KNN.STD_TRANS) knn.fit(X, Y) self.assertEqual(knn.predict(array([4, 5]), 1), 'a') self.assertEqual(knn.predict(array([5, 6]), 1), 'b')
def run() -> None: test_data = get_test_data() data = get_train_data() knn.clear() knn.fit(data[0].values.tolist(), data[1].values.tolist()) test_list: list = test_data.values.tolist() results = [] for i in test_list: # type: list id = i[:1][0] del i[0] results.append([int(id), knn.predict(i)]) write_to_csv(results)
def gridsearchKNN2(parametros, xTrain, yTrain, Xval, yVal): f1Metric = [] par = [] for params in ParameterGrid(parametros): knn = KNeighborsClassifier(n_neighbors=params['n_neighbors'], metric=params['metric']) knn.fit(xTrain, yTrain) pred = knn.predict(Xval) f1 = metrics.f1_score(yVal, pred, average='weighted') print(f1) f1Metric.append(f1) par.append(params) return par[f1Metric.index(max(f1Metric))]
y = dataset['y'] Xtest = dataset['Xtest'] ytest = dataset['ytest'] # part 1: implement knn.predict # part 2: print training and test errors for k=1,3,10 (use utils.classification_error) # part 3: plot classification boundaries for k=1 (use utils.plot_2dclassifier) model = None predict = None yhat = None Yhat = None tr_err = 0 te_err = 0 for k in [1, 3, 10]: model = knn.fit(X, y, k) predict = model['predict'] yhat = predict(model, X) Yhat = predict(model, Xtest) tr_err = utils.classification_error(y, yhat) te_err = utils.classification_error(ytest, Yhat) print("Training error for k =", k, "is =", tr_err) print("Testing error for k =", k, "is =", te_err) utils.plot_2dclassifier(knn.fit(X, y, 1), Xtest, ytest) plt.show() if question == '1.2': dataset = utils.load_dataset('citiesBig1') X = dataset['X'] y = dataset['y']
import EEG_feature_extraction import EEG_load # main - train&test #dataset = EEG_load.load_data("s16",20) dataset = EEG_feature_extraction.generate_feature_data("s16", 20) X = dataset['X_train'] y = dataset['Y_train'] Xtest = dataset['X_test'] ytest = dataset['Y_test'] k = [1, 3, 10] for i in range(3): model = knn.fit(X, y, k[i]) y_pred = knn.predict(model, X) train_error = np.mean(y_pred.flatten() != y) print("The current training error is: %r" % train_error) y_pred = knn.predict(model, Xtest) test_error = np.mean(y_pred.flatten() != ytest) print("The current test error is: %r" % test_error) # part 3: plot classification boundaries for k=1 (use utils.plot_2dclassifier) model1 = knn.fit(X, y, k[2]) utils.plot_2dclassifier(model1, X, y) #plt.show() # save figure fname = "../s16-c20-mean.png"
'1.1', '1.2', '2.1', '2.2', '3.1', '3.2', '3.3', '4.1', '4.2', '4.3' ]) io_args = parser.parse_args() question = io_args.question if question == '1.1': dataset = utils.load_dataset('citiesSmall') X = dataset['X'] y = dataset['y'] Xtest = dataset['Xtest'] ytest = dataset['ytest'] #model = knn.fit(X,y,3) #model = knn.fit(X,y,1) model = knn.fit(X, y, 10) y_pred_tr = knn.predict(model, X) y_pred_te = knn.predict(model, Xtest) trerror = utils.classification_error(y_pred_tr, y) teerror = utils.classification_error(y_pred_te, ytest) print(trerror) print(teerror) utils.plot_2dclassifier(model, Xtest, ytest) # part 1: implement knn.predict # part 2: print training and test errors for k=1,3,10 (use utils.classification_error) # part 3: plot classification boundaries for k=1 (use utils.plot_2dclassifier)
print(feat_vec.shape) feat_vec = np.concatenate((feat_vec[:, :1], add / np.mean(add)), axis=1) accuracy = 0 for i in range(0, 100): np.random.shuffle(feat_vec) sli = 0.8 * len(feat_vec) X = np.split(feat_vec, [int(sli)], axis=0) y_train = np.transpose(X[0][:, :1])[0] X_train = X[0][:, 1:] y_test = np.transpose(X[1][:, :1])[0] X_test = X[1][:, 1:] knn = neighbors.KNeighborsClassifier(n_neighbors=4, weights='distance', metric='manhattan') knn.fit(X_train, y_train) test = knn.predict(X_test) acc = test == y_test accuracy += acc.sum() / len(acc) print(accuracy / 100)