Python SEAGenerator.next_sample示例，skmultiflow.data.generator.sea_generator.SEAGenerator.next_sample Python示例

示例#1

0

显示文件

def test_half_space_trees(test_path):
    stream = SEAGenerator(classification_function=0,
                          noise_percentage=0.1,
                          random_state=1)

    learner = HalfSpaceTrees(n_estimators=13,
                             size_limit=75,
                             anomaly_threshold=0.90,
                             depth=10,
                             random_state=5)

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    y_proba = []
    wait_samples = 500

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Scale inputs between 0 and 1
        X = X / 10
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X)
        cnt += 1

    expected_predictions = array('i', [1, 0, 0, 0, 1, 0, 0, 1, 0])
    assert np.alltrue(y_pred == expected_predictions)
    test_file = os.path.join(test_path, 'test_half_space_trees.npy')
    expected_proba = np.load(test_file)
    assert np.allclose(y_proba, expected_proba)

示例#2

0

显示文件

文件： test_base.py 项目： trajkova-elena/scikit-multiflow

def test_clone():
    stream = SEAGenerator(random_state=1)

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    cloned = clone(learner)

    assert learner._observed_class_distribution != {} and cloned._observed_class_distribution == {}

示例#3

0

显示文件

def test_perceptron(test_path):
    stream = SEAGenerator(random_state=1)

    learner = PerceptronMask(random_state=1)

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'data_perceptron_proba.npy')
    y_proba_expected = np.load(test_file)
    assert np.allclose(y_proba, y_proba_expected)

    expected_info = "PerceptronMask(alpha=0.0001, class_weight=None, early_stopping=False, " \
                    "eta0=1.0, fit_intercept=True, max_iter=1000, n_iter_no_change=5, " \
                    "n_jobs=None, penalty=None, random_state=1, shuffle=True, tol=0.001, " \
                    "validation_fraction=0.1, verbose=0, warm_start=False)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    # Coverage tests
    learner.reset()
    if not sklearn_version.startswith("0.21"):
        learner.fit(X=np.asarray(X_batch[:4500]),
                    y=np.asarray(y_batch[:4500], dtype=int))
    else:
        # Root cause of failure (TypeError: an integer is required) is in the fit() method
        # in sklearn 0.21.0. This is a workaround until a fix is made available in sklearn
        learner.partial_fit(X=np.asarray(X_batch[:4500]),
                            y=np.asarray(y_batch[:4500]),
                            classes=stream.target_values)
    learner.predict(X=X_batch[4501:])  # Run for coverage

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

示例#4

0

显示文件

文件： test_leverage_bagging.py 项目： trajkova-elena/scikit-multiflow

def test_leverage_bagging():
    stream = SEAGenerator(classification_function=1,
                          noise_percentage=0.067,
                          random_state=112)
    knn = KNNClassifier(n_neighbors=8,
                        leaf_size=40,
                        max_window_size=2000)
    learner = LeveragingBaggingClassifier(base_estimator=knn,
                                          n_estimators=3,
                                          random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1

    performance = correct_predictions / len(predictions)
    expected_predictions = [1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
                            0, 0, 1, 0, 1, 1, 1, 0, 1, 0,
                            0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
                            1, 1, 1, 1, 0, 0, 1, 1, 1, 0,
                            0, 1, 1, 0, 1, 0, 0, 1, 1]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8571428571428571
    assert np.isclose(expected_performance, performance)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "LeveragingBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), " \
                    "delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag'," \
                    " n_estimators=3, random_state=112, w=6)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

示例#5

0

显示文件

def test_naive_bayes(test_path):
    stream = SEAGenerator(random_state=1)

    learner = NaiveBayes()

    cnt = 0
    max_samples = 5000
    y_pred = array('i')
    X_batch = []
    y_batch = []
    y_proba = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    expected_predictions = array('i', [1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
                                       1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
                                       1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
                                       0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
                                       1, 1, 0, 1, 0, 0, 1, 1, 1])

    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'data_naive_bayes_proba.npy')
    y_proba_expected = np.load(test_file)
    assert np.allclose(y_proba, y_proba_expected)

    expected_info = 'NaiveBayes(nominal_attributes=None)'
    assert learner.get_info() == expected_info

    learner.reset()
    learner.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500]))

    expected_score = 0.9378757515030061
    assert np.isclose(expected_score, learner.score(X=np.array(X_batch[4501:]),
                                                    y=np.array(y_batch[4501:])))

    assert is_classifier(learner)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

示例#6

0

显示文件

文件： test_online_csb2.py 项目： trajkova-elena/scikit-multiflow

def test_online_csb2():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    nb = NaiveBayes()
    learner = OnlineCSB2Classifier(base_estimator=nb,
                                   n_estimators=3,
                                   cost_positive=1,
                                   cost_negative=0.9,
                                   random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]

    expected_correct_predictions = 43
    expected_performance = 0.8775510204081632

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OnlineCSB2Classifier(base_estimator=NaiveBayes(nominal_attributes=None), cost_negative=0.9, " \
                    "cost_positive=1, drift_detection=True, n_estimators=3, random_state=112)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

示例#7

0

显示文件

文件： test_oza_bagging_adwin.py 项目： JozefStefanInstitute/scikit-multiflow

def test_oza_bagging_adwin():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)
    learner = OzaBaggingADWINClassifier(base_estimator=knn,
                                        n_estimators=3,
                                        random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8979591836734694
    assert np.isclose(expected_performance, performance)

    expected_correct_predictions = 44
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "OzaBaggingADWINClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), n_estimators=3, " \
                    "random_state=112)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

示例#8

0

显示文件

文件： test_dynamic_weighted_majority.py 项目： trajkova-elena/scikit-multiflow

def test_dynamic_weighted_majority():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)

    learner = DynamicWeightedMajorityClassifier(3,
                                                NaiveBayes(),
                                                beta=0.5,
                                                theta=0.01)

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        1
    ]
    expected_correct_predictions = 44
    expected_performance = 0.8979591836734694

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = 'DynamicWeightedMajorityClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n' \
                    '                                  beta=0.5, n_estimators=3, period=50,\n' \
                    '                                  theta=0.01)'
    assert learner.get_info() == expected_info

示例#9

0

显示文件

文件： test_online_rus_boost.py 项目： trajkova-elena/scikit-multiflow

def test_online_rus_3():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)
    nb = NaiveBayes()
    learner = OnlineRUSBoostClassifier(base_estimator=nb,
                                       n_estimators=3,
                                       sampling_rate=5,
                                       algorithm=3,
                                       random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)
    expected_predictions = [
        1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
        1
    ]

    expected_correct_predictions = 35
    expected_performance = 0.7142857142857143

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

示例#10

0

显示文件

文件： test_sam_knn.py 项目： trajkova-elena/scikit-multiflow

def test_sam_knn_coverage():

    stream = SEAGenerator(random_state=1)

    hyperParams = {
        'maxSize': 50,
        'n_neighbors': 3,
        'weighting': 'uniform',
        'stm_size_option': 'maxACC',
        'min_stm_size': 10,
        'use_ltm': True
    }

    learner = SAMKNNClassifier(n_neighbors=hyperParams['n_neighbors'],
                               max_window_size=hyperParams['maxSize'],
                               weighting=hyperParams['weighting'],
                               stm_size_option=hyperParams['stm_size_option'],
                               min_stm_size=hyperParams['min_stm_size'],
                               use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 1000
    predictions = array('i')

    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_info = "SAMKNNClassifier(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3, " \
                    "stm_size_option='maxACC', use_ltm=True, weighting='uniform')"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

示例#11

0

显示文件

文件： test_sam_knn.py 项目： trajkova-elena/scikit-multiflow

def test_sam_knn():

    stream = SEAGenerator(random_state=1)

    hyperParams = {
        'maxSize': 1000,
        'nNeighbours': 5,
        'knnWeights': 'distance',
        'STMSizeAdaption': 'maxACCApprox',
        'use_ltm': False
    }

    learner = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'],
                               max_window_size=hyperParams['maxSize'],
                               weighting=hyperParams['knnWeights'],
                               stm_size_option=hyperParams['STMSizeAdaption'],
                               use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 5000
    predictions = array('d')

    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    assert np.alltrue(predictions == expected_predictions)

    assert type(learner.predict(X)) == np.ndarray

    with pytest.raises(NotImplementedError):
        learner.predict_proba(X)

示例#12

0

显示文件

def test_additive_expert_ensemble_weakest():
    stream = SEAGenerator(1, noise_percentage=0.067, random_state=112)

    learner = AdditiveExpertEnsembleClassifier(3, NaiveBayes(), beta=0.5, gamma=0.1,
                                               pruning='weakest')

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0
    first = True

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1
    performance = correct_predictions / len(predictions)

    expected_predictions = [1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
                            0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
                            0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1]
    expected_correct_predictions = 45
    expected_performance = 0.9183673469387755

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = "AdditiveExpertEnsembleClassifier(base_estimator=NaiveBayes(nominal_attributes=None),\n" \
                    "                                 beta=0.5, gamma=0.1, n_estimators=3,\n" \
                    "                                 pruning='weakest')"
    assert learner.get_info() == expected_info

示例#13

0

显示文件

文件： test_sea_generator.py 项目： trajkova-elena/scikit-multiflow

def test_sea_generator(test_path):
    stream = SEAGenerator(classification_function=2,
                          random_state=112,
                          balance_classes=False,
                          noise_percentage=0.28)

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'sea_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    for j in range(0, 10):
        X, y = stream.next_sample()
        assert np.alltrue(np.isclose(X, X_expected[j]))
        assert np.alltrue(np.isclose(y[0], y_expected[j]))

    expected_info = "SEAGenerator(balance_classes=False, classification_function=2, noise_percentage=0.28, random_state=112)"
    assert stream.get_info() == expected_info

示例#14

0

显示文件

文件： test_leverage_bagging.py 项目： trajkova-elena/scikit-multiflow

def test_leverage_bagging_coverage():
    # Invalid leverage_algorithm
    with pytest.raises(ValueError):
        LeveragingBaggingClassifier(leverage_algorithm='invalid')

    estimator = LeveragingBaggingClassifier(random_state=4321)
    stream = SEAGenerator(random_state=4321)
    X, y = stream.next_sample()

    # classes not passed in partial_fit
    with pytest.raises(ValueError):
        estimator.partial_fit(X, y, classes=None)
    estimator.partial_fit(X, y, classes=[0, 1])
    # different observed classes
    with pytest.raises(ValueError):
        estimator.partial_fit(X, y, classes=[0, 1] + [-1])
    # Invalid leverage_algorithm, changed after initialization
    with pytest.raises(RuntimeError):
        estimator.leverage_algorithm = 'invalid'
        estimator.partial_fit(X, y, classes=[0, 1])

    # Reset ensemble
    estimator.reset()
    assert estimator.classes is None

示例#15

0

显示文件

def test_rslvq():
    stream = SEAGenerator(random_state=1)

    learner_adadelta = RSLVQ(gradient_descent='adadelta')
    learner_vanilla = RSLVQ(gradient_descent='vanilla')

    cnt = 0
    max_samples = 5000
    y_pred_vanilla = array('i')
    y_pred_adadelta = array('i')
    X_batch = []
    y_batch = []
    wait_samples = 100

    # Check if predicted labels are as expected
    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred_vanilla.append(learner_vanilla.predict(X)[0])
            y_pred_adadelta.append(learner_adadelta.predict(X)[0])
        learner_adadelta.partial_fit(X, y, classes=[0, 1])
        learner_vanilla.partial_fit(X, y, classes=[0, 1])
        cnt += 1

    expected_predictions_vanilla = array('i', [
        1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
        0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    expected_predictions_adadelta = array('i', [
        1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    assert np.alltrue(y_pred_vanilla == expected_predictions_vanilla)
    assert np.alltrue(y_pred_adadelta == expected_predictions_adadelta)

    # Check get_info method
    expected_info = "RobustSoftLearningVectorQuantization(gamma=0.9, gradient_descent='vanilla',\n" \
                    "                                     initial_prototypes=None,\n" \
                    "                                     prototypes_per_class=1, random_state=None,\n" \
                    "                                     sigma=1.0)"

    assert learner_vanilla.get_info() == expected_info

    # Check reset method
    learner_vanilla.reset()
    learner_vanilla.fit(X=np.array(X_batch[:4500]), y=np.array(y_batch[:4500]))

    learner_adadelta.reset()
    learner_adadelta.fit(X=np.array(X_batch[:4500]),
                         y=np.array(y_batch[:4500]))

    # Check classifiers performance
    learner_w_init_ppt = RSLVQ(
        initial_prototypes=[[2.59922826, 2.57368134, 4.92501, 0],
                            [6.05801971, 6.01383352, 5.02135783, 1]],
        gradient_descent='adadelta')
    learner_w_init_ppt.fit(X=np.array(X_batch[:4500]),
                           y=np.array(y_batch[:4500]))

    expected_score_ppt = .9539078156312625
    assert np.isclose(
        expected_score_ppt,
        learner_w_init_ppt.score(X=np.array(X_batch[4501:]),
                                 y=np.array(y_batch[4501:])))

    expected_score_vanilla = .8897795591182365
    assert np.isclose(
        expected_score_vanilla,
        learner_vanilla.score(X=np.array(X_batch[4501:]),
                              y=np.array(y_batch[4501:])))

    expected_score_adadelta = .9458917835671342
    assert np.isclose(
        expected_score_adadelta,
        learner_adadelta.score(X=np.array(X_batch[4501:]),
                               y=np.array(y_batch[4501:])))

    # Check types
    assert is_classifier(learner_vanilla)
    assert is_classifier(learner_adadelta)

    assert type(learner_vanilla.predict(X)) == np.ndarray
    assert type(learner_adadelta.predict(X)) == np.ndarray

    # Check properties after learning
    expected_prototypes = np.array([[2.59922826, 2.57368134, 4.92501],
                                    [6.05801971, 6.01383352, 5.02135783]])

    assert np.allclose(learner_adadelta.prototypes, expected_prototypes)

    expected_prototypes_classes = np.array([0, 1])

    assert np.allclose(learner_adadelta.prototypes_classes,
                       expected_prototypes_classes)

    expected_class_labels = np.array([0, 1])

    assert np.allclose(learner_adadelta.class_labels, expected_class_labels)

示例#16

0

显示文件

文件： test_knn_classifier.py 项目： trajkova-elena/scikit-multiflow

def test_knn():
    stream = SEAGenerator(random_state=1)

    learner = KNNClassifier(n_neighbors=8, max_window_size=2000, leaf_size=40)
    cnt = 0
    max_samples = 5000
    predictions = array('i')
    correct_predictions = 0
    wait_samples = 100
    X_batch = []
    y_batch = []

    while cnt < max_samples:
        X, y = stream.next_sample()
        X_batch.append(X[0])
        y_batch.append(y[0])
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    expected_info = "KNNClassifier(leaf_size=40, max_window_size=2000, " \
                    "metric='euclidean', n_neighbors=8)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    learner.reset()
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    X_batch = np.array(X_batch)
    y_batch = np.array(y_batch)
    learner.fit(X_batch[:4500], y_batch[:4500], classes=[0, 1])
    predictions = learner.predict(X_batch[4501:4550])

    expected_predictions = array('i', [
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        0
    ])
    assert np.alltrue(predictions == expected_predictions)

    correct_predictions = sum(predictions == y_batch[4501:4550])
    expected_correct_predictions = 49
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray