def test_dynamic_weighted_majority():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()

    learner = DynamicWeightedMajorityClassifier(3, NaiveBayes(), beta=0.5, theta=0.01)

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
                            0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
                            0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1]
    expected_correct_predictions = 44
    expected_performance = 0.8979591836734694

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = 'DynamicWeightedMajorityClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n' \
                    '                                  beta=0.5, n_estimators=3, period=50,\n' \
                    '                                  theta=0.01)'
    assert learner.get_info() == expected_info
示例#2
0
def test_hoeffding_tree_coverage():
    # Cover memory management
    stream = SEAGenerator(random_state=1, noise_percentage=0.05)
    stream.prepare_for_use()
    X, y = stream.next_sample(5000)

    learner = HoeffdingTree(max_byte_size=30, memory_estimate_period=100, grace_period=10, leaf_prediction='mc')

    learner.partial_fit(X, y, classes=stream.target_values)

    learner.reset()

    # Cover nominal attribute observer
    stream = RandomTreeGenerator(tree_random_state=1, sample_random_state=1, n_num_features=0,
                                 n_categories_per_cat_feature=2)
    stream.prepare_for_use()
    X, y = stream.next_sample(1000)
    learner = HoeffdingTree(leaf_prediction='mc', nominal_attributes=[i for i in range(10)])
    learner.partial_fit(X, y, classes=stream.target_values)
示例#3
0
def test_online_boosting():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()
    nb = NaiveBayes()
    learner = OnlineBoosting(base_estimator=nb,
                             n_estimators=3,
                             random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 45
    expected_performance = 0.9183673469387755

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
示例#4
0
def test_leverage_bagging():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()
    knn = KNN(n_neighbors=8, leaf_size=40, max_window_size=2000)
    learner = LeverageBagging(base_estimator=knn,
                              n_estimators=3,
                              random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 42
    expected_performance = 0.8571428571428571

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_sam_knn_coverage():

    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    hyperParams = {'maxSize': 50,
                   'n_neighbors': 3,
                   'weighting': 'uniform',
                   'stm_size_option': 'maxACC',
                   'min_stm_size': 10,
                   'use_ltm': True}

    learner = SAMKNN(n_neighbors=hyperParams['n_neighbors'],
                     max_window_size=hyperParams['maxSize'],
                     weighting=hyperParams['weighting'],
                     stm_size_option=hyperParams['stm_size_option'],
                     min_stm_size=hyperParams['min_stm_size'],
                     use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 1000
    predictions = array('i')

    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
                                       0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
                                       0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
                                       1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
                                       0, 0, 0, 0, 0, 1, 1, 1, 0])
    assert np.alltrue(predictions == expected_predictions)

    expected_info = "SAMKNN(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3,\n" \
                    "       stm_size_option='maxACC', use_ltm=True, weighting='uniform')"
    assert learner.get_info() == expected_info
示例#6
0
def test_hoeffding_anytime_tree_coverage():
    # Cover memory management
    stream = SEAGenerator(random_state=1, noise_percentage=0.05)
    stream.prepare_for_use()
    X, y = stream.next_sample(5000)

    learner = HATT(max_byte_size=30, memory_estimate_period=100, grace_period=10, leaf_prediction='nba')

    learner.partial_fit(X, y, classes=stream.target_values)

    learner.reset()

    # Cover nominal attribute observer
    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=2, n_cat_features=2,
                                 n_categories_per_cat_feature=4, n_num_features=1, max_tree_depth=30, min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()
    X, y = stream.next_sample(5000)
    learner = HATT(leaf_prediction='nba', nominal_attributes=[i for i in range(1, 9)])
    learner.partial_fit(X, y, classes=stream.target_values)
示例#7
0
def test_evaluate_coverage(tmpdir):
    from skmultiflow.data import SEAGenerator
    from skmultiflow.bayes import NaiveBayes

    max_samples = 1000

    # Stream
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    # Learner
    nb = NaiveBayes()

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = ['running_time', 'model_size']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    data_points_for_classification=True,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=nb, model_names=['NB'])
def test_additive_expert_ensemble_oldest():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    stream.prepare_for_use()

    learner = AdditiveExpertEnsembleClassifier(10, NaiveBayes(), beta=0.5, gamma=0.1,
                                               pruning='oldest')

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=stream.target_values)
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)

    expected_predictions = [1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
                            0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
                            0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1]
    expected_correct_predictions = 39
    expected_performance = 0.7959183673469388

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
示例#9
0
def test_sam_knn():

    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    hyperParams = {'maxSize': 1000, 'nNeighbours': 5, 'knnWeights': 'distance', 'STMSizeAdaption': 'maxACCApprox',
                   'use_ltm': False}

    learner = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'],
                               weighting=hyperParams['knnWeights'],
                               stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 5000
    predictions = array('d')

    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
                                       1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
                                       1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
                                       0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
                                       1, 1, 0, 1, 0, 0, 1, 0, 1])

    assert np.alltrue(predictions == expected_predictions)

    assert type(learner.predict(X)) == np.ndarray

    with pytest.raises(NotImplementedError):
        learner.predict_proba(X)
示例#10
0
from skmultiflow.data.file_stream import FileStream
import numpy as np
from Goowe import Goowe
from skmultiflow.data import SEAGenerator
import logging
from GooweMS import GooweMS
import random

logger = logging.getLogger()
logger.setLevel(logging.INFO)
# Prepare the data stream
stream_1 = SEAGenerator()
stream_2 = SEAGenerator()
stream_3 = SEAGenerator()
stream_1.prepare_for_use()
stream_2.prepare_for_use()
stream_3.prepare_for_use()

ENSEMBLE_TYPE = 'av'
instances_num = 10000
instances_counter = 0
num_features = stream_1.n_features
num_targets = stream_1.n_targets
num_classes = 2
target_values = [0., 1.]
logging.info("\n\tStreams are generated and prepared for use.\n\tNumber of features: {0} - Number of targets: {1} - Number of classes: {2} - Target values: {3}"
             .format(num_features, num_targets, num_classes, target_values))

N_MAX_CLASSIFIERS = 15
CHUNK_SIZE = 500        # User-specified
WINDOW_SIZE = 100       # User-specified
示例#11
0
def test_learn_nse():
    stream = SEAGenerator(random_state=2212)
    stream.prepare_for_use()
    estimator = GaussianNB()

    corrects, acc, classifier = run_classifier(estimator, stream)

    expected_correct_predictions = 1754
    expected_acc = 0.877

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    # Test reset method
    classifier.reset()
    assert len(classifier.ensemble) == 0
    assert len(classifier.ensemble_weights) == 0
    assert len(classifier.bkts) == 0
    assert len(classifier.wkts) == 0
    assert len(classifier.X_batch) == 0
    assert len(classifier.y_batch) == 0

    expected_info = 'LearnPPNSEClassifier(base_estimator=GaussianNB(priors=None, var_smoothing=1e-09),\n' \
                    '                     crossing_point=10, n_estimators=15, pruning=None,\n' \
                    '                     slope=0.5, window_size=250)'
    assert classifier.get_info() == expected_info
    # test pruning error
    corrects, acc, classifier = run_classifier(estimator,
                                               stream,
                                               pruning="error",
                                               ensemble_size=5)

    expected_correct_predictions = 1751
    expected_acc = 0.8755

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    # test pruning age
    corrects, acc, classifier = run_classifier(estimator,
                                               stream,
                                               pruning="age",
                                               ensemble_size=5)

    expected_correct_predictions = 1774
    expected_acc = 0.887

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    stream = SEAGenerator(random_state=2212)
    stream.prepare_for_use()

    estimator = HoeffdingTreeClassifier()

    classifier = LearnPPNSEClassifier(base_estimator=estimator)

    # Keeping track of sample count and correct prediction count
    sample_count = 0
    corrects = 0

    m = 250
    # Pre training the classifier
    X, y = stream.next_sample(m)
    classifier.partial_fit(X, y, classes=stream.target_values)

    # print(classifier.ensemble_weights)
    for i in range(10):
        X, y = stream.next_sample(m)
        pred = classifier.predict(X)
        classifier.partial_fit(X, y)

        if pred is not None:
            # print(pred)
            corrects += np.sum(y == pred)
        sample_count += m

    acc = corrects / sample_count
    expected_acc = 0.9436
    assert acc == expected_acc
示例#12
0
def test_rslvq():
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    learner_adadelta = RSLVQ(gradient_descent='adadelta')
    learner_vanilla = RSLVQ(gradient_descent='vanilla')

    cnt = 0
    max_samples = 5000
    y_pred_vanilla = array('i')
    y_pred_adadelta = array('i')
    X_batch = []
    y_batch = []
    wait_samples = 100

    # Check if predicted labels are as expected
    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred_vanilla.append(learner_vanilla.predict(X)[0])
            y_pred_adadelta.append(learner_adadelta.predict(X)[0])
        learner_adadelta.partial_fit(X, y, classes=stream.target_values)
        learner_vanilla.partial_fit(X, y, classes=stream.target_values)
        cnt += 1

    expected_predictions_vanilla = array('i', [
        1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
        0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    expected_predictions_adadelta = array('i', [
        1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    assert np.alltrue(y_pred_vanilla == expected_predictions_vanilla)
    assert np.alltrue(y_pred_adadelta == expected_predictions_adadelta)

    # Check get_info method
    expected_info = "RobustSoftLearningVectorQuantization(gamma=0.9, gradient_descent='vanilla',\n                                     initial_prototypes=None,\n                                     prototypes_per_class=1, random_state=None,\n                                     sigma=1.0)"

    assert learner_vanilla.get_info() == expected_info

    # Check reset method
    learner_vanilla.reset()
    learner_vanilla.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500]))

    learner_adadelta.reset()
    learner_adadelta.fit(X=np.array(X_batch[:4500]),
                         y=np.array(y_batch[:4500]))

    # Check classifiers performance
    learner_w_init_ppt = RSLVQ(
        initial_prototypes=[[2.59922826, 2.57368134, 4.92501, 0],
                            [6.05801971, 6.01383352, 5.02135783, 1]],
        gradient_descent='adadelta')
    learner_w_init_ppt.fit(X=np.array(X_batch[:4500]),
                           y=np.array(y_batch[:4500]))

    expected_score_ppt = .9539078156312625
    assert np.isclose(
        expected_score_ppt,
        learner_w_init_ppt.score(X=np.array(X_batch[4501:]),
                                 y=np.array(y_batch[4501:])))

    expected_score_vanilla = .8897795591182365
    assert np.isclose(
        expected_score_vanilla,
        learner_vanilla.score(X=np.array(X_batch[4501:]),
                              y=np.array(y_batch[4501:])))

    expected_score_adadelta = .9458917835671342
    assert np.isclose(
        expected_score_adadelta,
        learner_adadelta.score(X=np.array(X_batch[4501:]),
                               y=np.array(y_batch[4501:])))

    # Check types
    assert is_classifier(learner_vanilla)
    assert is_classifier(learner_adadelta)

    assert type(learner_vanilla.predict(X)) == np.ndarray
    assert type(learner_adadelta.predict(X)) == np.ndarray

    # Check prototypes after learning
    expected_prototypes = np.array([[2.59922826, 2.57368134, 4.92501],
                                    [6.05801971, 6.01383352, 5.02135783]])

    assert np.allclose(learner_adadelta.prototypes, expected_prototypes)
示例#13
0
def test_learn_nse():
    stream = SEAGenerator(random_state=2212)
    stream.prepare_for_use()
    estimator = GaussianNB()

    corrects, acc, classifier = run_classifier(estimator, stream)

    expected_correct_predictions = 1754
    expected_acc = 0.877

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    # Test reset method
    classifier.reset()
    assert len(classifier.ensemble) == 0
    assert len(classifier.ensemble_weights) == 0
    assert len(classifier.bkts) == 0
    assert len(classifier.wkts) == 0
    assert len(classifier.X_batch) == 0
    assert len(classifier.y_batch) == 0

    assert classifier.get_info() == "LearnNSE: base_estimator: <class 'sklearn.naive_bayes.GaussianNB'> - " \
                                    "ensemble_size: <class 'int'> - " \
                                    "period: <class 'int'> - " \
                                    "slope: <class 'float'> - " \
                                    "crossing_point: <class 'int'> - " \
                                    "pruning: <class 'NoneType'>"
    # test pruning error
    corrects, acc, classifier = run_classifier(estimator,
                                               stream,
                                               pruning="error",
                                               ensemble_size=5)

    expected_correct_predictions = 1751
    expected_acc = 0.8755

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    # test pruning age
    corrects, acc, classifier = run_classifier(estimator,
                                               stream,
                                               pruning="age",
                                               ensemble_size=5)

    expected_correct_predictions = 1774
    expected_acc = 0.887

    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions

    stream = SEAGenerator(random_state=2212)
    stream.prepare_for_use()

    estimator = HoeffdingTree()

    classifier = LearnNSE(base_estimator=estimator)

    # Keeping track of sample count and correct prediction count
    sample_count = 0
    corrects = 0

    m = 250
    # Pre training the classifier
    X, y = stream.next_sample(m)
    classifier.partial_fit(X, y, classes=stream.target_values)

    # print(classifier.ensemble_weights)
    for i in range(10):
        X, y = stream.next_sample(m)
        pred = classifier.predict(X)
        classifier.partial_fit(X, y)

        if pred is not None:
            # print(pred)
            corrects += np.sum(y == pred)
        sample_count += m

    acc = corrects / sample_count
    expected_acc = 0.9436
    assert acc == expected_acc
示例#14
0
def test_knn():
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    learner = KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40)
    cnt = 0
    max_samples = 5000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 100
    X_batch = []
    y_batch = []

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    expected_info = 'KNNClassifier(leaf_size=40, max_window_size=2000, n_neighbors=8, nominal_attributes=None)'
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    learner.reset()
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)
    learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1])
    predictions = learner.predict(X_batch[4501:4550])

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        0
    ])
    assert np.alltrue(predictions == expected_predictions)

    correct_predictions = sum(predictions == y_batch[4501:4550])
    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_perceptron(test_path):
    stream = SEAGenerator(random_state=1)
    stream.prepare_for_use()

    learner = PerceptronMask(random_state=1)

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=stream.target_values)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'data_perceptron_proba.npy')
    y_proba_expected = np.load(test_file)
    assert np.allclose(y_proba, y_proba_expected)

    expected_info = "PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,\n" \
                    "               fit_intercept=True, max_iter=None, n_iter=None,\n" \
                    "               n_iter_no_change=5, n_jobs=None, penalty=None, random_state=1,\n" \
                    "               shuffle=True, tol=None, validation_fraction=0.1, verbose=0,\n" \
                    "               warm_start=False)"
    assert learner.get_info() == expected_info

    # Coverage tests
    learner.reset()
    if not sklearn_version.startswith("0.21"):
        learner.fit(X=np.asarray(X_batch[:4500]),
                    y=np.asarray(y_batch[:4500]),
                    classes=stream.target_values)
    else:
        # Root cause of failure (TypeError: an integer is required) is in the fit() method in sklearn 0.21.0,
        # This is a workaround until a fix is made available in sklearn
        learner.partial_fit(X=np.asarray(X_batch[:4500]),
                            y=np.asarray(y_batch[:4500]),
                            classes=stream.target_values)
    y_pred = learner.predict(X=X_batch[4501:])
    accuracy = accuracy_score(y_true=y_batch[4501:], y_pred=y_pred)
    expected_accuracy = 0.9478957915831663
    assert np.isclose(expected_accuracy, accuracy)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
示例#16
0
########################################
from sklearn.linear_model import SGDClassifier
from skmultiflow.data import SEAGenerator
from skmultiflow.evaluation import EvaluatePrequential

# We will use the `SEA` stream generator
stream = SEAGenerator(classification_function=2, random_state=1)
# Prepare the stream for use
stream.prepare_for_use()
# Setup a classifier, in this case `Linear SVM` with `SGD` training*
classifier = SGDClassifier()
# Setup the evaluator, we will use prequential evaluation
eval = EvaluatePrequential(
    show_plot=True,
    max_samples=20000,
    metrics=['accuracy', 'kappa', 'running_time', 'model_size'])
# Run the evaluation
eval.evaluate(stream=stream, model=classifier, model_names=['SVM-SGD'])