def test_hoeffding_adaptive_tree_categorical_features(test_path):
    data_path = os.path.join(test_path, 'ht_categorical_features_testcase.npy')
    stream = np.load(data_path)
    # Removes the last two columns (regression targets)
    stream = stream[:, :-2]
    X, y = stream[:, :-1], stream[:, -1]

    nominal_attr_idx = np.arange(7).tolist()
    learner = HoeffdingAdaptiveTreeClassifier(
        nominal_attributes=nominal_attr_idx, random_state=1)

    learner.partial_fit(X, y, classes=np.unique(y))

    expected_description = "if Attribute 0 = -15.0:\n" \
                           "  Leaf = Class 2 | {2: 475.0}\n" \
                           "if Attribute 0 = 0.0:\n" \
                           "  Leaf = Class 0 | {0: 560.0, 1: 345.0}\n" \
                           "if Attribute 0 = 1.0:\n" \
                           "  Leaf = Class 1 | {0: 416.0, 1: 464.0}\n" \
                           "if Attribute 0 = 2.0:\n" \
                           "  Leaf = Class 1 | {0: 335.0, 1: 504.0}\n" \
                           "if Attribute 0 = 3.0:\n" \
                           "  Leaf = Class 1 | {0: 244.0, 1: 644.0}\n" \
                           "if Attribute 0 = -30.0:\n" \
                           "  Leaf = Class 3 | {3: 65.0, 4: 55.0}\n"

    assert learner.get_model_description() == expected_description
Пример #2
0
def test_hoeffding_adaptive_tree_nb(test_path):
    stream = ConceptDriftStream(stream=SEAGenerator(random_state=1,
                                                    noise_percentage=0.05),
                                drift_stream=SEAGenerator(
                                    random_state=2,
                                    classification_function=2,
                                    noise_percentage=0.05),
                                random_state=1,
                                position=250,
                                width=10)

    learner = HoeffdingAdaptiveTreeClassifier(leaf_prediction='nb',
                                              random_state=1)

    cnt = 0
    max_samples = 1000
    y_pred = array('i')
    y_proba = []
    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            y_pred.append(learner.predict(X)[0])
            y_proba.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
        1
    ])
    assert np.alltrue(y_pred == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_adaptive_tree_nb.npy')
    data = np.load(test_file)
    assert np.allclose(y_proba, data)

    expected_info = "HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True, grace_period=200, " \
                    "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, " \
                    "no_preprune=False, nominal_attributes=None, random_state=1, remove_poor_atts=False, " \
                    "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, tie_threshold=0.05)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
def test_evaluation_event_observer(test_path):
    test_file = os.path.join(test_path, 'iris.data')
    train_eval_trigger = QuantityBasedHoldoutTrigger(5, 10, 20)
    algorithm = HoeffdingAdaptiveTreeClassifier(leaf_prediction='mc',
                                                random_state=1)

    results_observer = mock()
    evaluation_event_observer = EvaluationEventObserver(
        algorithm, train_eval_trigger, [results_observer], [0, 1, 2])

    data_source = FileDataSource(record_to_dictionary,
                                 [evaluation_event_observer], test_file)
    data_source.listen_for_events()
    time.sleep(3)

    verify(results_observer, times=4).report(any, any)
def test_hoeffding_adaptive_tree_alternate_tree():
    stream = AGRAWALGenerator(random_state=7)

    learner = HoeffdingAdaptiveTreeClassifier(random_state=1)

    cnt = 0
    change_point1 = 1500
    change_point2 = 2500
    change_point3 = 4000
    max_samples = 5000

    while cnt < max_samples:
        X, y = stream.next_sample()
        learner.partial_fit(X, y)
        cnt += 1

        if cnt > change_point1:
            stream.generate_drift()
            change_point1 = float('Inf')

            expected_description = "if Attribute 2 <= 63.63636363636363:\n" \
                                   "  if Attribute 2 <= 39.54545454545455:\n" \
                                   "    Leaf = Class 0 | {0: 397.5023676194098}\n" \
                                   "  if Attribute 2 > 39.54545454545455:\n" \
                                   "    if Attribute 2 <= 58.81818181818181:\n" \
                                   "      Leaf = Class 1 | {1: 299.8923824199619}\n" \
                                   "    if Attribute 2 > 58.81818181818181:\n" \
                                   "      Leaf = Class 0 | {0: 54.0, 1: 20.107617580038095}\n" \
                                   "if Attribute 2 > 63.63636363636363:\n" \
                                   "  Leaf = Class 0 | {0: 512.5755895049351}\n"
            assert expected_description == learner.get_model_description()

        if cnt > change_point2:
            stream.generate_drift()
            change_point2 = float('Inf')
            expected_description = "if Attribute 8 <= 268547.7178694747:\n" \
                                   "  Leaf = Class 0 | {0: 446.18690518790413, 1: 80.6180778406834}\n" \
                                   "if Attribute 8 > 268547.7178694747:\n" \
                                   "  Leaf = Class 1 | {0: 36.8130948120959, 1: 356.38192215931656}\n"
            assert expected_description == learner.get_model_description()

        if cnt > change_point3:
            stream.generate_drift()
            change_point3 = float('Inf')

    expected_description = "Leaf = Class 0 | {0: 1083.0, 1: 2.0}\n"
    assert expected_description == learner.get_model_description()
Пример #5
0
            total_length = int(total_length)
            for data in response.iter_content(chunk_size=4096):
                dl += len(data)
                f.write(data)
                done = int(50 * dl / total_length)
                sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done)))
                sys.stdout.flush()
    data = np.load(file_name, allow_pickle=True)

    return data


# data = download_data()
#If dataset file is already downloaded
data = np.load(file_name, allow_pickle=True)

sam = SAMKNN()
arf = HoeffdingAdaptiveTreeClassifier()

stream = DataStream(data[:, 1:], data[:, 0].astype(int))
stream.prepare_for_use()

evaluator = EvaluatePrequential(max_samples=10000,
                                max_time=1000,
                                show_plot=True,
                                metrics=['accuracy', 'kappa'])

evaluator.evaluate(stream=stream,
                   model=[sam, arf],
                   model_names=['Sam', 'RSLVQ'])
ds = ConceptDriftStream(random_state=777, position=30000)
ds
# Output:
#ConceptDriftStream(alpha=0.0,
#                   drift_stream=AGRAWALGenerator(balance_classes=False,
#                                                 classification_function=2,
#                                                 perturbation=0.0,
#                                                 random_state=112),
#                   position=30000, random_state=777,
#                   stream=AGRAWALGenerator(balance_classes=False,
#                                           classification_function=0,
#                                           perturbation=0.0, random_state=112),
#                   width=1000)

# Instantiate the model object
model_hat = HoeffdingAdaptiveTreeClassifier()

# Prequential evaluation
eval1 = EvaluatePrequential(pretrain_size=400,
                            max_samples=300000,
                            batch_size=1,
                            n_wait=100,
                            max_time=2000,
                            show_plot=False,
                            metrics=['accuracy'])

eval1.evaluate(stream=ds, model=model_hat)

# Holdout evaluation
eval2 = EvaluateHoldout(max_samples=30000,
                        max_time=2000,
Пример #7
0
from skmultiflow.meta import BatchIncrementalClassifier

all_Files = r'C:\Users\JZM\Desktop\concept-drift-master\concept-drift-master\data\\'
all_Datasets = glob.glob(all_Files + '*.csv')
Results = []
for x in all_Datasets:
    print("Results for " + str(os.path.basename(x)))
    try:
        #    Importing Data as stream
        data_stream = FileStream(x)
        data_stream.prepare_for_use()
        #        awe = AccuracyWeightedEnsembleClassifier(n_estimators=10,
        #                                                base_estimator=HoeffdingTreeClassifier)

        #    Defining Classifier     HT is simple where Adaptive H.Tree uses adwin to minimze error
        CLF = [HoeffdingAdaptiveTreeClassifier()]

        #    Defininf and Training
        eval = EvaluatePrequential(show_plot=True,
                                   metrics=[
                                       'accuracy', 'kappa', 'model_size',
                                       'precision', 'recall', 'f1'
                                   ],
                                   n_wait=100)
        eval.evaluate(stream=data_stream, model=CLF, model_names=['HAT'])

        clf = BatchIncrementalClassifier(
            base_estimator=HoeffdingTreeClassifier(), n_estimators=10)
        # Keeping track of sample count and correct prediction count
        sample_count = 0
        corrects = 0