示例#1
0
def test_knn():
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    learner = KNN(n_neighbors=8, max_window_size=2000, leaf_size=40)
    cnt = 0
    max_samples = 5000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 100
    X_batch = []
    y_batch = []

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    expected_info = 'KNN(leaf_size=40, max_window_size=2000, n_neighbors=8, nominal_attributes=None)'
    assert learner.get_info() == expected_info

    learner.reset()
    assert learner.get_info() == expected_info

    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)
    learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1])
    predictions = learner.predict(X_batch[4501:4550])

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        0
    ])
    assert np.alltrue(predictions == expected_predictions)

    correct_predictions = sum(predictions == y_batch[4501:4550])
    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
示例#2
0
def hyperparametertuning_classifiers(learn, X, y, knn_max_w_size):

    cl_name = learn.__class__.__name__
    #    print (cl_name)

    scor = 'balanced_accuracy'
    cv = 10

    if cl_name == 'KNN':

        KNN_grid = {
            'n_neighbors': [3, 5, 7, 10, 15],
            'leaf_size': [3, 5, 7, 10, 15],
            'algorithm': ['kd_tree']
        }

        grid_cv_KNN = GridSearchCV(estimator=KNeighborsClassifier(),
                                   cv=cv,
                                   scoring=scor,
                                   param_grid=KNN_grid)
        #        grid_cv_KNN = RandomizedSearchCV(estimator=KNeighborsClassifier(), cv=cv,scoring=scor,param_distributions=KNN_grid)
        grid_cv_KNN.fit(X.as_matrix(), y.as_matrix().ravel())
        #        print('grid_cv_KNN.best_params_: ',grid_cv_KNN.best_params_)
        n_neighbors = grid_cv_KNN.best_params_['n_neighbors']
        leaf_size = grid_cv_KNN.best_params_['leaf_size']

        tuned_params = {
            'n_neighbors': n_neighbors,
            'leaf_size': leaf_size,
            'max_window_size': knn_max_w_size
        }

        tuned_learn = KNN()
        tuned_learn.set_params(**tuned_params)
        tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel())

    elif cl_name == 'HoeffdingTree':

        grace_period_range = np.array([25, 75, 150, 300])
        tie_threshold_range = np.linspace(0.001, 1.0, 5)
        split_confidence_range = np.linspace(0.000000001, 0.1, 5)
        split_criterion_range = ['gini', 'info_gain', 'hellinger']
        leaf_prediction_range = ['mc', 'nb', 'nba']

        HT_grid = {
            'grace_period': grace_period_range,
            'tie_threshold': tie_threshold_range,
            'split_confidence': split_confidence_range,
            'split_criterion': split_criterion_range,
            'leaf_prediction': leaf_prediction_range
        }

        grid_cv_HT = GridSearchCV(estimator=learn,
                                  scoring=scor,
                                  cv=cv,
                                  param_grid=HT_grid)
        #        grid_cv_HT=RandomizedSearchCV(estimator=learn,scoring=scor,cv=cv,param_distributions=HT_grid)
        grid_cv_HT.fit(X.as_matrix(), y.as_matrix().ravel())
        #        print('grid_cv_HT.best_params_: ',grid_cv_HT.best_params_)

        tuned_params = grid_cv_HT.best_params_
        tuned_learn = grid_cv_HT.best_estimator_

    elif cl_name == 'NaiveBayes':

        tuned_params = {'nominal_attributes': None}
        tuned_learn = NaiveBayes()
        tuned_learn.set_params(**tuned_params)
        tuned_learn.fit(X.as_matrix(), y.as_matrix().ravel())


#    print('Final tuned algorithm: ',tuned_learn)

    return tuned_learn, tuned_params