Python RandomTreeGenerator.prepare_for_use示例，skmultiflow.data.RandomTreeGenerator.prepare_for_use Python示例

示例#1

0

显示文件

def test_evaluate_prequential_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    max_samples = 1000
    metrics = ['kappa', 'kappa_t', 'performance']
    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'prequential_summary.csv')
    compare_files(output_file, expected_file)

示例#2

0

显示文件

def test_evaluate_classification_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    max_samples = 1000
    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = [
        'accuracy', 'kappa', 'kappa_t', 'kappa_m', 'f1', 'precision', 'recall',
        'gmean', 'true_vs_predicted'
    ]
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    # Evaluate
    evaluator.evaluate(stream=stream, model=learner)
    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)

    expected_current_accuracy = 0.685
    assert np.isclose(current_performance.accuracy_score(),
                      expected_current_accuracy)

示例#3

0

显示文件

def test_kdd_tree_mixed():
    stream = RandomTreeGenerator(tree_random_state=1,
                                 sample_random_state=1,
                                 n_num_features=0)
    stream.prepare_for_use()

    X, _ = stream.next_sample(1000)
    X_test, _ = stream.next_sample(10)

    # Build tree
    cat_features = [i for i in range(25)]
    kdtree = KDTree(X,
                    metric='mixed',
                    return_distance=True,
                    categorical_list=cat_features)

    # Query tree
    dist, idx = kdtree.query(X_test, 4)

    expected_idx = [[123, 234, 707, 654], [688, 429, 216, 627],
                    [463, 970, 566, 399], [18, 895, 640, 996],
                    [396, 612, 897, 232], [328, 54, 138, 569],
                    [253, 501, 82, 273], [38, 146, 752, 923],
                    [946, 808, 271, 363], [951, 111, 708, 5]]
    expected_dist = [[2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 0],
                     [2, 2, 2, 0], [2, 2, 2, 0], [2, 2, 2, 2], [2, 2, 0, 0],
                     [2, 2, 2, 0], [2, 2, 2, 2]]
    assert np.alltrue(idx == expected_idx)

    assert np.allclose(dist, expected_dist)

    expected_info = 'KDTree: - leaf_size: 40 - metric: mixed - return_distance: True'
    assert kdtree.get_info() == expected_info

    assert kdtree.get_class_type() == 'data_structure'

示例#4

0

显示文件

文件： test_extremely_fast_decision_tree.py 项目： nino2222/scikit-multiflow

def test_extremely_fast_decision_tree_nba(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()

    learner = ExtremelyFastDecisionTreeClassifier(
        nominal_attributes=[i for i in range(1, 9)])

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
        0
    ])

    assert np.alltrue(predictions == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_anytime_tree.npy')
    expected_proba = np.load(test_file)[:49, :]

    assert np.allclose(proba_predictions, expected_proba)

    expected_info = "ExtremelyFastDecisionTreeClassifier(binary_split=False, grace_period=200, " \
                    "leaf_prediction='nba', max_byte_size=33554432, memory_estimate_period=1000000, " \
                    "min_samples_reevaluate=20, nb_threshold=0, nominal_attributes=[1, 2, 3, 4, 5, 6, 7, 8], " \
                    "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, " \
                    "tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    expected_model = 'ifAttribute1=0.0:ifAttribute3=0.0:Leaf=Class1|{0:260.0,1:287.0}' \
                     'ifAttribute3=1.0:Leaf=Class0|{0:163.0,1:117.0}ifAttribute1=1.0:Leaf=Class0|{0:718.0,1:495.0}'

    assert (learner.get_model_description().replace("\n", " ").replace(
        " ", "") == expected_model.replace(" ", ""))
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

示例#5

0

显示文件

文件： test_hoeffding_adaptive_tree.py 项目： shuxiangzhang/scikit-multiflow

def test_HAT(test_path):
    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2,
                                 n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    nominal_attr_idx = [x for x in range(5, stream.n_features)]
    learner = HAT(nominal_attributes=nominal_attr_idx)

    cnt = 0
    max_samples = 5000
    predictions = array('d')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('d', [2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 0.0, 1.0, 1.0, 2.0,
                                       0.0, 2.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0,
                                       1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 3.0, 2.0,
                                       1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0,
                                       0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 3.0, 2.0])

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree.npy')

    data = np.load(test_file)

    assert np.alltrue(predictions == expected_predictions)
    assert np.allclose(proba_predictions, data)

    expected_info = 'HAT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200' \
                    ' - split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05' \
                    ' - binary_split: False - stop_mem_management: False - remove_poor_atts: False' \
                    ' - no_pre_prune: False - leaf_prediction: nba - nb_threshold: 0' \
                    ' - nominal_attributes: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - '

    assert learner.get_info() == expected_info

    expected_model_1 = 'Leaf = Class 1.0 | {0.0: 1367.3628584299263, 1.0: 1702.2738590243584,' \
                       ' 2.0: 952.1668539501372, 3.0: 822.1964285955778}\n'
    expected_model_2 = 'Leaf = Class 1.0 | {1.0: 1702.2738590243584, 2.0: 952.1668539501372,' \
                       ' 0.0: 1367.3628584299263, 3.0: 822.1964285955778}\n'
    expected_model_3 = 'Leaf = Class 1.0 | {1.0: 1702.2738590243584, 2.0: 952.16685395013724, ' \
                       '0.0: 1367.3628584299263, 3.0: 822.1964285955778}\n'   # Python 3.6
    expected_model_4 = 'Leaf = Class 1.0 | {0.0: 1367.3628584299263, 1.0: 1702.2738590243584,' \
                       ' 2.0: 952.16685395013724, 3.0: 822.1964285955778}\n'  # Python 3.4

    assert (learner.get_model_description() == expected_model_1) \
           or (learner.get_model_description() == expected_model_2) \
           or (learner.get_model_description() == expected_model_3) \
           or (learner.get_model_description() == expected_model_4)

示例#6

0

显示文件

def test_evaluate_prequential_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2,
                                 n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    max_samples = 1000
    metrics = ['accuracy', 'kappa', 'kappa_t']
    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'prequential_summary.csv')
    compare_files(output_file, expected_file)

    mean_performance, current_performance = evaluator.get_measurements(model_idx=0)

    expected_mean_accuracy = 0.436250
    assert np.isclose(mean_performance.get_accuracy(), expected_mean_accuracy)

    expected_mean_kappa = 0.231791
    assert np.isclose(mean_performance.get_kappa(), expected_mean_kappa)

    expected_mean_kappa_t = 0.236887
    assert np.isclose(mean_performance.get_kappa_t(), expected_mean_kappa_t)

    expected_current_accuracy = 0.430000
    assert np.isclose(current_performance.get_accuracy(), expected_current_accuracy)

    expected_current_kappa = 0.223909
    assert np.isclose(current_performance.get_kappa(), expected_current_kappa)

    expected_current_kappa_t = 0.240000
    assert np.isclose(current_performance.get_kappa_t(), expected_current_kappa_t)

    expected_info = "EvaluatePrequential(batch_size=1, data_points_for_classification=False,\n" \
                    "                    max_samples=1000, max_time=inf,\n" \
                    "                    metrics=['accuracy', 'kappa', 'kappa_t'], n_wait=200,\n" \
                    "                    output_file='prequential_summary.csv',\n" \
                    "                    pretrain_size=200, restart_stream=True, show_plot=False)"
    assert evaluator.get_info() == expected_info

示例#7

0

显示文件

文件： test_hoeffding_tree.py 项目： garawalid/scikit-multiflow

def test_hoeffding_tree(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    nominal_attr_idx = [x for x in range(5, stream.n_features)]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        0, 1, 3, 0, 0, 3, 0, 1, 1, 2, 0, 2, 1, 1, 2, 1, 3, 0, 1, 1, 1, 1, 0, 3,
        1, 2, 1, 1, 3, 2, 1, 2, 2, 2, 1, 1, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 1, 3,
        2
    ])

    test_file = os.path.join(test_path, 'test_hoeffding_tree.npy')

    data = np.load(test_file)

    assert np.alltrue(predictions == expected_predictions)
    assert np.allclose(proba_predictions, data)

    expected_info = 'HoeffdingTree: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200 ' \
                    '- split_criterion: info_gain - split_confidence: 1e-07 - tie_threshold: 0.05 ' \
                    '- binary_split: False - stop_mem_management: False - remove_poor_atts: False ' \
                    '- no_pre_prune: False - leaf_prediction: nba - nb_threshold: 0 - nominal_attributes: [5, 6, 7,' \
                    ' 8, 9, 10, 11, 12, 13, 14] - '
    assert learner.get_info() == expected_info

    expected_model_1 = 'Leaf = Class 1.0 | {0.0: 1423.0, 1.0: 1745.0, 2.0: 978.0, 3.0: 854.0}\n'
    expected_model_2 = 'Leaf = Class 1.0 | {1.0: 1745.0, 2.0: 978.0, 0.0: 1423.0, 3.0: 854.0}\n'
    assert (learner.get_model_description() == expected_model_1) \
           or (learner.get_model_description() == expected_model_2)
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

示例#8

0

显示文件

文件： test_evaluate_holdout.py 项目： lengfab/scikit-multiflow

def test_evaluate_holdout_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    n_wait = 200
    max_samples = 1000
    metrics = ['accuracy', 'kappa', 'kappa_t']
    output_file = os.path.join(str(tmpdir), "holdout_summary.csv")
    evaluator = EvaluateHoldout(n_wait=n_wait,
                                max_samples=max_samples,
                                test_size=50,
                                metrics=metrics,
                                output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'holdout_summary.csv')
    compare_files(output_file, expected_file)

    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)
    expected_mean_accuracy = 0.344000
    expected_mean_kappa = 0.135021
    expected_mean_kappa_t = 0.180000
    expected_current_accuracy = 0.360000
    expected_current_kappa = 0.152542
    expected_current_kappa_t = 0.200000
    assert np.isclose(mean_performance.get_accuracy(), expected_mean_accuracy)
    assert np.isclose(mean_performance.get_kappa(), expected_mean_kappa)
    assert np.isclose(mean_performance.get_kappa_t(), expected_mean_kappa_t)
    assert np.isclose(current_performance.get_accuracy(),
                      expected_current_accuracy)
    assert np.isclose(current_performance.get_kappa(), expected_current_kappa)
    assert np.isclose(current_performance.get_kappa_t(),
                      expected_current_kappa_t)

示例#9

0

显示文件

文件： test_adaptive_random_forests.py 项目： wenhaoz-fengcai/scikit-multiflow

def test_adaptive_random_forests_nb():
    stream = RandomTreeGenerator(tree_random_state=112,
                                 sample_random_state=112,
                                 n_classes=2)
    stream.prepare_for_use()

    learner = AdaptiveRandomForest(n_estimators=3,
                                   random_state=112,
                                   leaf_prediction='nb')

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(int(learner.predict(X)[0]))
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1
    last_version_predictions = [
        1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
        1
    ]

    # Performance below does not need to be guaranteed. This check is set up so that anything that changes
    # to predictions are caught in the unit test. This helps prevent accidental changes.

    assert type(learner.predict(X)) == np.ndarray
    assert np.alltrue(predictions == last_version_predictions)

    expected_info = "AdaptiveRandomForest(binary_split=False, disable_weighted_vote=False,\n" \
                    "                     drift_detection_method=ADWIN(delta=0.001), grace_period=50,\n" \
                    "                     lambda_value=6, leaf_prediction='nb',\n" \
                    "                     max_byte_size=33554432, max_features=5,\n" \
                    "                     memory_estimate_period=2000000, n_estimators=3,\n" \
                    "                     nb_threshold=0, no_preprune=False, nominal_attributes=None,\n" \
                    "                     performance_metric='acc', random_state=112,\n" \
                    "                     remove_poor_atts=False, split_confidence=0.01,\n" \
                    "                     split_criterion='info_gain', stop_mem_management=False,\n" \
                    "                     tie_threshold=0.05,\n" \
                    "                     warning_detection_method=ADWIN(delta=0.01))"
    assert learner.get_info() == expected_info

示例#10

0

显示文件

def test_hoeffding_anytime_tree(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()

    learner = HATT(nominal_attributes=[i for i in range(1, 9)])

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
        0
    ])

    assert np.alltrue(predictions == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_anytime_tree.npy')
    expected_proba = np.load(test_file)[:49, :]

    assert np.allclose(proba_predictions, expected_proba)

    expected_info = 'HATT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200 - ' \
                    'min_samples_reevaluate: 20 - split_criterion: info_gain - split_confidence: 1e-07 - ' \
                    'tie_threshold: 0.05 - binary_split: False - stop_mem_management: False - leaf_prediction: ' \
                    'nba - nb_threshold: 0 - nominal_attributes: [1, 2, 3, 4, 5, 6, 7, 8] - '
    assert learner.get_info() == expected_info

    expected_model = 'ifAttribute1=0:ifAttribute3=0:Leaf=Class1|{0:260.0,1:287.0}' \
                     'ifAttribute3=1:Leaf=Class0|{0:163.0,1:117.0}ifAttribute1=1:Leaf=Class0|{0:718.0,1:495.0}'

    assert (learner.get_model_description().replace("\n", " ").replace(
        " ", "") == expected_model.replace(" ", ""))
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

示例#11

0

显示文件

文件： reccurring_concept_stream.py 项目： cpearce/scikit-ika

 def __init__(self, concept_id=0, seed=None, noise=0, desc=None):
     self.cf = concept_id
     self.seed = seed
     self.difficulty = 0 if desc == None else desc.difficulty
     stream = RandomTreeGenerator(tree_random_state=seed,
                                  sample_random_state=seed,
                                  max_tree_depth=self.difficulty + 2,
                                  min_leaf_depth=self.difficulty,
                                  n_classes=2)
     stream.prepare_for_use()
     super().__init__(stream)

示例#12

0

显示文件

def test_batch_incremental():
    stream = RandomTreeGenerator(tree_random_state=112, sample_random_state=112)
    stream.prepare_for_use()
    estimator = DecisionTreeClassifier(random_state=112)
    learner = BatchIncremental(base_estimator=estimator, n_estimators=10)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    performance = correct_predictions / len(predictions)
    expected_predictions = [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0,
                            0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0,
                            0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                            0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                            0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0]

    expected_correct_predictions = 31
    expected_performance = 0.6326530612244898

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

    expected_info = "BatchIncremental(base_estimator=DecisionTreeClassifier(class_weight=None, " \
                    "criterion='gini', max_depth=None, max_features=None, max_leaf_nodes=None, " \
                    "min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, " \
                    "min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, random_state=112, " \
                    "splitter='best'), n_estimators=10, window_size=100)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

示例#13

0

显示文件

文件： test_learn_pp.py 项目： nino2222/scikit-multiflow

def test_learn_pp():
    stream = RandomTreeGenerator(tree_random_state=2212,
                                 sample_random_state=2212)
    stream.prepare_for_use()
    estimator = DecisionTreeClassifier(random_state=2212)
    classifier = LearnPPClassifier(base_estimator=estimator,
                                   n_estimators=5,
                                   n_ensembles=5,
                                   random_state=2212)

    m = 200

    # Keeping track of sample count and correct prediction count
    sample_count = 0
    corrects = 0

    # Pre training the classifier with 200 samples
    X, y = stream.next_sample(m)
    classifier.partial_fit(X, y, classes=stream.target_values)
    predictions = []

    for i in range(10):
        X, y = stream.next_sample(200)
        pred = classifier.predict(X)
        classifier.partial_fit(X, y)

        if pred is not None:
            corrects += np.sum(y == pred)
            predictions.append(pred[0])
        sample_count += m

    acc = corrects / sample_count

    expected_correct_predictions = 1138
    expected_acc = 0.569
    expected_predictions = [0, 1, 0, 0, 1, 1, 0, 0, 0, 0]

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions
    assert type(classifier.predict(X)) == np.ndarray

    expected_info = "LearnPPClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, " \
                    "criterion='gini', max_depth=None, max_features=None, max_leaf_nodes=None, " \
                    "min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, " \
                    "min_samples_split=2, min_weight_fraction_leaf=0.0, presort=False, " \
                    "random_state=2212, splitter='best'), error_threshold=0.5, n_ensembles=5, " \
                    "n_estimators=5, random_state=2212, window_size=100)"
    info = " ".join([line.strip() for line in classifier.get_info().split()])
    assert info == expected_info

示例#14

0

显示文件

def test_hoeffding_tree_nb(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    nominal_attr_idx = [x for x in range(5, stream.n_features)]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx,
                            leaf_prediction='nb')

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1
    expected_predictions = array('i', [
        0, 1, 3, 0, 0, 3, 0, 1, 1, 2, 0, 2, 1, 1, 2, 1, 3, 0, 1, 1, 1, 1, 0, 3,
        1, 2, 1, 1, 3, 2, 1, 2, 2, 2, 1, 1, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 1, 3,
        2
    ])

    assert np.alltrue(predictions == expected_predictions)

    expected_info = "HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nb',\n" \
                    "              max_byte_size=33554432, memory_estimate_period=1000000,\n" \
                    "              nb_threshold=0, no_preprune=False,\n" \
                    "              nominal_attributes=[5, 6, 7, 8, 9, 10, 11, 12, 13, 14],\n" \
                    "              remove_poor_atts=False, split_confidence=1e-07,\n" \
                    "              split_criterion='info_gain', stop_mem_management=False,\n" \
                    "              tie_threshold=0.05)"
    assert learner.get_info() == expected_info

示例#15

0

显示文件

文件： test_extremely_fast_decision_tree.py 项目： nino2222/scikit-multiflow

def test_extremely_fast_decision_tree_nb_gini(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()

    learner = ExtremelyFastDecisionTreeClassifier(
        nominal_attributes=[i for i in range(1, 9)],
        leaf_prediction='nb',
        split_criterion='gini')

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
        0
    ])

    assert np.alltrue(predictions == expected_predictions)

    expected_info = "ExtremelyFastDecisionTreeClassifier(binary_split=False, grace_period=200, " \
                    "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, " \
                    "min_samples_reevaluate=20, nb_threshold=0, nominal_attributes=[1, 2, 3, 4, 5, 6, 7, 8], " \
                    "split_confidence=1e-07, split_criterion='gini', stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

示例#16

0

显示文件

def test_batch_incremental():
    stream = RandomTreeGenerator(tree_random_state=112, sample_random_state=112)
    stream.prepare_for_use()
    estimator = DecisionTreeClassifier(random_state=112)
    classifier = BatchIncremental(base_estimator=estimator, n_estimators=10)

    learner = Pipeline([('classifier', classifier)])

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    performance = correct_predictions / len(predictions)
    expected_predictions = [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0,
                            0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0,
                            0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                            0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                            0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0]

    expected_correct_predictions = 31
    expected_performance = 0.6326530612244898

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray

示例#17

0

显示文件

def test_adaptive_random_forests_labels_given():
    stream = RandomTreeGenerator(tree_random_state=112,
                                 sample_random_state=112,
                                 n_classes=2)
    stream.prepare_for_use()

    learner = AdaptiveRandomForest(n_estimators=3, random_state=112)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y, classes=[0, 1])

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict_proba(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1].argmax()):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    assert np.alltrue([
        np.isclose(y_proba.sum(), 1) for y_proba in predictions
    ]), "Probabilities should sum to 1."

    class_probabilities = np.asarray(predictions).squeeze()
    assert class_probabilities.shape == (49, 2)

    predictions = class_probabilities.argmax(axis=1)
    last_version_predictions = [
        1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
        0
    ]

    assert np.alltrue(predictions == last_version_predictions)

示例#18

0

显示文件

def test_evaluate_classification_metrics():

    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=2, n_cat_features=2,
                                 n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    max_samples = 1000
    metrics = ['f1', 'precision', 'recall', 'gmean']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics)

    # Evaluate
    evaluator.evaluate(stream=stream, model=learner)
    mean_performance, current_performance = evaluator.get_measurements(model_idx=0)

    expected_current_f1_score = 0.7096774193548387
    expected_current_precision = 0.6814159292035398
    expected_current_recall = 0.7403846153846154
    expected_current_g_mean = 0.6802502367624613
    expected_mean_f1_score = 0.7009803921568628
    expected_mean_precision = 0.7185929648241206
    expected_mean_recall = 0.6842105263157895
    expected_mean_g_mean = 0.6954166367760247
    print(mean_performance.get_g_mean())
    print(mean_performance.get_recall())
    print(mean_performance.get_precision())
    print(mean_performance.get_f1_score())
    print(current_performance.get_g_mean())
    print(current_performance.get_recall())
    print(current_performance.get_precision())
    print(current_performance.get_f1_score())
    assert np.isclose(current_performance.get_f1_score(), expected_current_f1_score)
    assert np.isclose(current_performance.get_precision(), expected_current_precision)
    assert np.isclose(current_performance.get_recall(), expected_current_recall)
    assert np.isclose(current_performance.get_g_mean(), expected_current_g_mean)
    assert np.isclose(mean_performance.get_f1_score(), expected_mean_f1_score)
    assert np.isclose(mean_performance.get_precision(), expected_mean_precision)
    assert np.isclose(mean_performance.get_recall(), expected_mean_recall)
    assert np.isclose(mean_performance.get_g_mean(), expected_mean_g_mean)

示例#19

0

显示文件

def test_adaptive_random_forests_batch_predict_proba():
    stream = RandomTreeGenerator(tree_random_state=112,
                                 sample_random_state=112,
                                 n_classes=2)
    stream.prepare_for_use()

    learner = AdaptiveRandomForest(n_estimators=3, random_state=112)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y, classes=[0, 1])

    cnt = 0
    max_samples = 500
    predictions = []
    true_labels = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample(5)
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            p = learner.predict_proba(X)
            assert p.shape == (5, 2)
            predictions.append(p)
            true_labels.append(y)
        learner.partial_fit(X, y)
        cnt += 1

    all_predictions = np.concatenate(predictions)
    # all_true_labels = np.asarray(true_labels).flatten()
    # correct_predictions = sum(np.equal(all_true_labels, all_predictions.argmax(axis=1)))

    assert np.alltrue([
        np.isclose(y_proba.sum(), 1) for y_proba in all_predictions
    ]), "Probabilities should sum to 1."
    assert all_predictions.shape == (4 * 5, 2)

    last_version_predictions = [
        1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1
    ]
    assert type(learner.predict_proba(X)) == np.ndarray
    assert np.alltrue(
        all_predictions.argmax(axis=1) == last_version_predictions)

示例#20

0

显示文件

def test_adaptive_random_forests():
    stream = RandomTreeGenerator(tree_random_state=112, sample_random_state=112)
    stream.prepare_for_use()

    learner = AdaptiveRandomForest(n_estimators=3,
                                   random_state=112)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(int(learner.predict(X)[0]))
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

        last_version_predictions = [1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0,
                            1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
                            1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0]

    # Performance below does not need to be guaranteed. This check is set up so that anything that changes
    # to predictions are caught in the unit test. This helps prevent accidental changes.
    # If these tests fail, make sure that what is worked on *should* change the predictions of ARF.
    if sys.version_info.major == 3 and sys.version_info.minor >= 6:
        #  Temporary disable as pre-3.6 give different predictions than 3.6+
        assert np.alltrue(predictions == last_version_predictions)

    assert type(learner.predict(X)) == np.ndarray

示例#21

0

显示文件

def test_learn_pp():
    stream = RandomTreeGenerator(tree_random_state=2212, sample_random_state=2212)
    stream.prepare_for_use()
    estimator = DecisionTreeClassifier(random_state=2212)
    classifier = LearnPP(base_estimator=estimator, n_estimators=5, n_ensembles=5, random_state=2212)

    m = 200

    # Keeping track of sample count and correct prediction count
    sample_count = 0
    corrects = 0

    # Pre training the classifier with 200 samples
    X, y = stream.next_sample(m)
    classifier.partial_fit(X, y, classes=stream.target_values)
    predictions = []

    for i in range(10):
        X, y = stream.next_sample(200)
        pred = classifier.predict(X)
        classifier.partial_fit(X, y)

        if pred is not None:
            corrects += np.sum(y == pred)
            predictions.append(pred[0])
        sample_count += m

    acc = corrects / sample_count

    expected_correct_predictions = 1138
    expected_acc = 0.569
    expected_predictions = [0, 1, 0, 0, 1, 1, 0, 0, 0, 0]

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_acc, acc)
    assert corrects == expected_correct_predictions
    assert type(classifier.predict(X)) == np.ndarray

示例#22

0

显示文件

文件： test_evaluate_holdout.py 项目： zhouyonglong/scikit-multiflow

def test_evaluate_holdout_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    n_wait = 200
    max_samples = 1000
    metrics = ['accuracy', 'kappa', 'kappa_t']
    output_file = os.path.join(str(tmpdir), "holdout_summary.csv")
    evaluator = EvaluateHoldout(n_wait=n_wait,
                                max_samples=max_samples,
                                test_size=50,
                                metrics=metrics,
                                output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'holdout_summary.csv')
    compare_files(output_file, expected_file)

    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)

    expected_mean_accuracy = 0.344000
    assert np.isclose(mean_performance.get_accuracy(), expected_mean_accuracy)

    expected_mean_kappa = 0.135021
    assert np.isclose(mean_performance.get_kappa(), expected_mean_kappa)

    expected_mean_kappa_t = 0.180000
    assert np.isclose(mean_performance.get_kappa_t(), expected_mean_kappa_t)

    expected_current_accuracy = 0.360000
    assert np.isclose(current_performance.get_accuracy(),
                      expected_current_accuracy)

    expected_current_kappa = 0.152542
    assert np.isclose(current_performance.get_kappa(), expected_current_kappa)

    expected_current_kappa_t = 0.200000
    assert np.isclose(current_performance.get_kappa_t(),
                      expected_current_kappa_t)

    expected_info = "EvaluateHoldout(batch_size=1, dynamic_test_set=False, max_samples=1000,\n" \
                    "                max_time=inf, metrics=['accuracy', 'kappa', 'kappa_t'],\n" \
                    "                n_wait=200,\n" \
                    "                output_file='holdout_summary.csv',\n" \
                    "                restart_stream=True, show_plot=False, test_size=50)"
    assert evaluator.get_info() == expected_info

    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    n_wait = 200
    max_samples = 1000
    metrics = ['f1', 'precision', 'recall', 'gmean']
    evaluator = EvaluateHoldout(n_wait=n_wait,
                                max_samples=max_samples,
                                test_size=50,
                                metrics=metrics)

    # Evaluate
    evaluator.evaluate(stream=stream, model=learner)
    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)

    expected_current_f1_score = 0.6818181818181818
    expected_current_precision = 0.625
    expected_current_recall = 0.75
    expected_current_g_mean = 0.7245688373094719
    expected_mean_f1_score = 0.6431718061674009
    expected_mean_precision = 0.5748031496062992
    expected_mean_recall = 0.73
    expected_mean_g_mean = 0.6835202996254025

    assert np.isclose(current_performance.get_f1_score(),
                      expected_current_f1_score)
    assert np.isclose(current_performance.get_precision(),
                      expected_current_precision)
    assert np.isclose(current_performance.get_recall(),
                      expected_current_recall)
    assert np.isclose(current_performance.get_g_mean(),
                      expected_current_g_mean)
    assert np.isclose(mean_performance.get_f1_score(), expected_mean_f1_score)
    assert np.isclose(mean_performance.get_precision(),
                      expected_mean_precision)
    assert np.isclose(mean_performance.get_recall(), expected_mean_recall)
    assert np.isclose(mean_performance.get_g_mean(), expected_mean_g_mean)

示例#23

0

显示文件

def test_hoeffding_tree_nba(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    nominal_attr_idx = [x for x in range(5, stream.n_features)]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        0, 1, 3, 0, 0, 3, 0, 1, 1, 2, 0, 2, 1, 1, 2, 1, 3, 0, 1, 1, 1, 1, 0, 3,
        1, 2, 1, 1, 3, 2, 1, 2, 2, 2, 1, 1, 1, 0, 1, 2, 0, 2, 0, 0, 0, 0, 1, 3,
        2
    ])

    test_file = os.path.join(test_path, 'test_hoeffding_tree.npy')

    data = np.load(test_file)

    assert np.alltrue(predictions == expected_predictions)
    assert np.allclose(proba_predictions, data)

    expected_info = "HoeffdingTree(binary_split=False, grace_period=200, leaf_prediction='nba',\n" \
                    "              max_byte_size=33554432, memory_estimate_period=1000000,\n" \
                    "              nb_threshold=0, no_preprune=False,\n" \
                    "              nominal_attributes=[5, 6, 7, 8, 9, 10, 11, 12, 13, 14],\n" \
                    "              remove_poor_atts=False, split_confidence=1e-07,\n" \
                    "              split_criterion='info_gain', stop_mem_management=False,\n" \
                    "              tie_threshold=0.05)"
    assert learner.get_info() == expected_info

    expected_model_1 = 'Leaf = Class 1.0 | {0.0: 1423.0, 1.0: 1745.0, 2.0: 978.0, 3.0: 854.0}\n'

    assert (learner.get_model_description() == expected_model_1)
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    X, y = stream.next_sample(20000)
    learner.split_criterion = 'hellinger'
    learner.partial_fit(X, y)

    expected_rules = 'Att (5) == 0.000 and Att (12) == 0.000 | class: 1\n' + \
        'Att (5) == 0.000 and Att (12) == 1.000 | class: 1\n' + \
        'Att (5) == 1.000 and Att (13) == 0.000 and Att (1) <= 0.550 and Att (3) <= 0.730 | class: 0\n' +\
        'Att (5) == 1.000 and Att (13) == 0.000 and Att (1) <= 0.550 and Att (3) > 0.730 | class: 2\n' + \
        'Att (5) == 1.000 and Att (13) == 0.000 and Att (1) > 0.550 and Att (1) <= 0.800 | class: 0\n' + \
        'Att (5) == 1.000 and Att (13) == 0.000 and Att (1) > 0.550 and Att (1) > 0.800 and Att (14) == 0.000 | class: 0\n' + \
        'Att (5) == 1.000 and Att (13) == 0.000 and Att (1) > 0.550 and Att (1) > 0.800 and Att (14) == 1.000 | class: 1\n' + \
        'Att (5) == 1.000 and Att (13) == 1.000 and Att (3) <= 0.730 | class: 1\n' + \
        'Att (5) == 1.000 and Att (13) == 1.000 and Att (3) > 0.730 | class: 0\n'
    assert expected_rules == learner.get_rules_description()

示例#24

0

显示文件

def test_kdd_tree_euclidean():
    stream = RandomTreeGenerator(tree_random_state=1, sample_random_state=1)
    stream.prepare_for_use()

    X, _ = stream.next_sample(1000)
    X_test, _ = stream.next_sample(10)

    # Build tree
    kdtree = KDTree(X, metric='euclidean', return_distance=True)

    # Query tree
    dist, idx = kdtree.query(X_test, 4)

    expected_idx = [[855, 466, 348, 996], [829, 654, 92, 333],
                    [227, 364, 183, 325], [439, 482, 817, 501],
                    [886, 173, 279, 470], [98, 30, 34, 580],
                    [959, 773, 374, 819], [819, 685, 59, 992],
                    [624, 665, 209, 239], [524, 807, 506, 191]]
    expected_dist = [[
        1.6366216258724973, 1.631437068636607, 1.5408182139320563,
        1.4836054196064452
    ],
                     [
                         1.7839579422032452, 1.7694587302438618,
                         1.5339920309706585, 1.5228981881653287
                     ],
                     [
                         1.6512443805072872, 1.637456923425164,
                         1.61736766513639, 1.5776532815820448
                     ],
                     [
                         1.5843121606184263, 1.571918014408251,
                         1.5038147281265382, 0.7058569455034059
                     ],
                     [
                         2.052148026638031, 2.0157953468214007,
                         1.8012794130725434, 1.6572756455115591
                     ],
                     [
                         1.5844032729792423, 1.5688736638121885,
                         1.55893121879858, 1.4609657517960262
                     ],
                     [
                         1.6819916227667229, 1.6186557774269037,
                         1.5815309744477162, 1.5720184136312232
                     ],
                     [
                         1.7302164693989817, 1.5964713159009083,
                         1.4897849225874815, 1.1629448414734906
                     ],
                     [
                         1.6511813695220574, 1.6454651930288255,
                         1.5926685577827064, 1.4973008307362947
                     ],
                     [
                         1.5982346741983797, 1.5875900895982191,
                         1.4702209684850878, 1.4676217546305874
                     ]]

    assert np.alltrue(idx == expected_idx)

    assert np.allclose(dist, expected_dist)

    expected_info = 'KDTree: - leaf_size: 40 - metric: euclidean - return_distance: True'
    assert kdtree.get_info() == expected_info

    assert kdtree.get_class_type() == 'data_structure'