def test_label_combination_hoeffding_tree_nb(test_path): stream = MultilabelGenerator(n_samples=10000, n_features=15, n_targets=3, n_labels=4, random_state=112) stream.prepare_for_use() learner = LabelCombinationHoeffdingTreeClassifier(n_labels=3, leaf_prediction='nb') cnt = 0 max_samples = 5000 predictions = [] proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples learner.partial_fit(X, y) if cnt % wait_samples == 0 and (cnt != 0): predictions.append(learner.predict(X)[0].tolist()) proba_predictions.append(learner.predict_proba(X)[0]) cnt += 1 print(predictions) expected_predictions = [[0, 0, 1], [1, 1, 1], [0, 1, 1], [0, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1], [1, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 0], [1, 0, 0], [1, 0, 1], [1, 1, 1], [0, 0, 1], [1, 0, 1], [1, 1, 1], [1, 0, 0], [1, 1, 1], [1, 1, 1], [0, 0, 1], [1, 1, 1], [0, 0, 0], [0, 1, 0], [1, 1, 1], [0, 1, 1], [1, 1, 0], [0, 0, 1], [0, 0, 0], [1, 1, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1], [1, 1, 1], [1, 1, 1], [0, 1, 0], [0, 1, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1]] assert np.alltrue(predictions == expected_predictions) assert type(learner.predict(X)) == np.ndarray assert type(learner.predict_proba(X)) == np.ndarray expected_info = "LabelCombinationHoeffdingTreeClassifier(binary_split=False, grace_period=200, " \ "leaf_prediction='nb', max_byte_size=33554432, memory_estimate_period=1000000, n_labels=3, " \ "nb_threshold=0, no_preprune=False, nominal_attributes=None, remove_poor_atts=False, " \ "split_confidence=1e-07, split_criterion='info_gain', stop_mem_management=False, " \ "tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
def test_label_combination_hoeffding_tree_coverage(): # Cover memory management max_samples = 10000 max_size_kb = 50 stream = MultilabelGenerator(n_samples=10000, n_features=15, n_targets=3, n_labels=4, random_state=112) # Unconstrained model has over 62 kB learner = LabelCombinationHoeffdingTreeClassifier( n_labels=3, leaf_prediction='mc', memory_estimate_period=200, max_byte_size=max_size_kb * 2**10) X, y = stream.next_sample(max_samples) learner.partial_fit(X, y) assert calculate_object_size(learner, 'kB') <= max_size_kb
from skmultiflow.meta import MultiOutputLearner from skmultiflow.trees import LabelCombinationHoeffdingTreeClassifier, HoeffdingTreeClassifier from skmultiflow.metrics import hamming_score # Setting up a data stream stream = MultilabelGenerator(random_state=1, n_samples=200, n_targets=5, n_features=10) # Setup Label Combination Hoeffding Tree classifier lc_ht = LabelCombinationHoeffdingTreeClassifier(n_labels=stream.n_targets) # Setup variables to control loop and track performance n_samples = 0 max_samples = 200 true_labels = [] predicts = [] # Train the estimator with the samples provided by the data stream while n_samples < max_samples and stream.has_more_samples(): X, y = stream.next_sample() y_pred = lc_ht.predict(X) lc_ht.partial_fit(X, y, classes=stream.target_values) predicts.extend(y_pred) true_labels.extend(y) n_samples += 1 # Display results perf = hamming_score(true_labels, predicts) print('{} samples analyzed.'.format(n_samples)) print('Label Combination Hoeffding Tree Hamming score: ' + str(perf))