def test_batch_incremental(): stream = RandomTreeGenerator(tree_random_state=112, sample_random_state=112) stream.prepare_for_use() estimator = DecisionTreeClassifier(random_state=112) classifier = BatchIncremental(base_estimator=estimator, n_estimators=10) learner = Pipeline([('classifier', classifier)]) X, y = stream.next_sample(150) learner.partial_fit(X, y) cnt = 0 max_samples = 5000 predictions = [] true_labels = [] wait_samples = 100 correct_predictions = 0 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) true_labels.append(y[0]) if np.array_equal(y[0], predictions[-1]): correct_predictions += 1 learner.partial_fit(X, y) cnt += 1 performance = correct_predictions / len(predictions) expected_predictions = [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0] expected_correct_predictions = 31 expected_performance = 0.6326530612244898 assert np.alltrue(predictions == expected_predictions) assert np.isclose(expected_performance, performance) assert correct_predictions == expected_correct_predictions assert type(learner.predict(X)) == np.ndarray
def demo(): """ _test_mol This demo tests the MOL learner on a file stream, which reads from the music.csv file. The test computes the performance of the MOL learner as well as the time to create the structure and classify all the samples in the file. """ # Setup logging logging.basicConfig(format='%(message)s', level=logging.INFO) # Setup the file stream opt = FileOption("FILE", "OPT_NAME", "../datasets/music.csv", "CSV", False) stream = FileStream(opt, 0, 6) stream.prepare_for_use() # Setup the classifier, by default it uses Logistic Regression #classifier = MultiOutputLearner() #classifier = MultiOutputLearner(h=SGDClassifier(n_iter=100)) classifier = MultiOutputLearner(h=Perceptron()) # Setup the pipeline pipe = Pipeline([('classifier', classifier)]) pretrain_size = 150 logging.info('Pre training on %s samples', str(pretrain_size)) X, y = stream.next_instance(pretrain_size) #classifier.fit(X, y) pipe.partial_fit(X, y, classes=stream.get_classes()) count = 0 true_labels = [] predicts = [] init_time = timer() logging.info('Evaluating...') while stream.has_more_instances(): X, y = stream.next_instance() #p = classifier.predict(X) p = pipe.predict(X) predicts.extend(p) true_labels.extend(y) count += 1 perf = hamming_score(true_labels, predicts) logging.info('Evaluation time: %s s', str(timer() - init_time)) logging.info('Total samples analyzed: %s', str(count)) logging.info('The classifier\'s static Hamming score : %0.3f' % perf)