def test_knn(): stream = SEAGenerator(random_state=1) stream.prepare_for_use() learner = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40) cnt = 0 max_samples = 5000 predictions = array('i') correct_predictions = 0 wait_samples = 100 X_batch = [] y_batch = [] while cnt < max_samples: X, y = stream.next_sample() X_batch.append(X[0]) y_batch.append(y[0]) # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) if y[0] == predictions[-1]: correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_correct_predictions = 49 assert correct_predictions == expected_correct_predictions expected_info = 'KNN(leaf_size=40, max_window_size=2000, n_neighbors=8, nominal_attributes=None)' assert learner.get_info() == expected_info learner.reset() assert learner.get_info() == expected_info X_batch = np.array(X_batch) y_batch = np.array(y_batch) learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1]) predictions = learner.predict(X_batch[4501:4550]) expected_predictions = array('i', [ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0 ]) assert np.alltrue(predictions == expected_predictions) correct_predictions = sum(predictions == y_batch[4501:4550]) expected_correct_predictions = 49 assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray
def hyperparametertuning_classifiers(learn, X, y, knn_max_w_size): cl_name = learn.__class__.__name__ # print (cl_name) scor = 'balanced_accuracy' cv = 10 if cl_name == 'KNN': KNN_grid = { 'n_neighbors': [3, 5, 7, 10, 15], 'leaf_size': [3, 5, 7, 10, 15], 'algorithm': ['kd_tree'] } grid_cv_KNN = GridSearchCV(estimator=KNeighborsClassifier(), cv=cv, scoring=scor, param_grid=KNN_grid) # grid_cv_KNN = RandomizedSearchCV(estimator=KNeighborsClassifier(), cv=cv,scoring=scor,param_distributions=KNN_grid) grid_cv_KNN.fit(X.as_matrix(), y.as_matrix().ravel()) # print('grid_cv_KNN.best_params_: ',grid_cv_KNN.best_params_) n_neighbors = grid_cv_KNN.best_params_['n_neighbors'] leaf_size = grid_cv_KNN.best_params_['leaf_size'] tuned_params = { 'n_neighbors': n_neighbors, 'leaf_size': leaf_size, 'max_window_size': knn_max_w_size } tuned_learn = KNN() tuned_learn.set_params(**tuned_params) tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel()) elif cl_name == 'HoeffdingTree': grace_period_range = np.array([25, 75, 150, 300]) tie_threshold_range = np.linspace(0.001, 1.0, 5) split_confidence_range = np.linspace(0.000000001, 0.1, 5) split_criterion_range = ['gini', 'info_gain', 'hellinger'] leaf_prediction_range = ['mc', 'nb', 'nba'] HT_grid = { 'grace_period': grace_period_range, 'tie_threshold': tie_threshold_range, 'split_confidence': split_confidence_range, 'split_criterion': split_criterion_range, 'leaf_prediction': leaf_prediction_range } grid_cv_HT = GridSearchCV(estimator=learn, scoring=scor, cv=cv, param_grid=HT_grid) # grid_cv_HT=RandomizedSearchCV(estimator=learn,scoring=scor,cv=cv,param_distributions=HT_grid) grid_cv_HT.fit(X.as_matrix(), y.as_matrix().ravel()) # print('grid_cv_HT.best_params_: ',grid_cv_HT.best_params_) tuned_params = grid_cv_HT.best_params_ tuned_learn = grid_cv_HT.best_estimator_ elif cl_name == 'NaiveBayes': tuned_params = {'nominal_attributes': None} tuned_learn = NaiveBayes() tuned_learn.set_params(**tuned_params) tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel()) # print('Final tuned algorithm: ',tuned_learn) return tuned_learn, tuned_params