Пример #1
0
def test_batch_incremental():
    stream = RandomTreeGenerator(tree_random_state=112, sample_random_state=112)
    stream.prepare_for_use()
    estimator = DecisionTreeClassifier(random_state=112)
    classifier = BatchIncremental(base_estimator=estimator, n_estimators=10)

    learner = Pipeline([('classifier', classifier)])

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            true_labels.append(y[0])
            if np.array_equal(y[0], predictions[-1]):
                correct_predictions += 1

        learner.partial_fit(X, y)
        cnt += 1

    performance = correct_predictions / len(predictions)
    expected_predictions = [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0,
                            0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0,
                            0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                            0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0,
                            0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0]

    expected_correct_predictions = 31
    expected_performance = 0.6326530612244898

    assert np.alltrue(predictions == expected_predictions)
    assert np.isclose(expected_performance, performance)
    assert correct_predictions == expected_correct_predictions

    assert type(learner.predict(X)) == np.ndarray
Пример #2
0
def demo():
    """ _test_mol

    This demo tests the MOL learner on a file stream, which reads from 
    the music.csv file.

    The test computes the performance of the MOL learner as well as 
    the time to create the structure and classify all the samples in 
    the file.

    """
    # Setup logging
    logging.basicConfig(format='%(message)s', level=logging.INFO)

    # Setup the file stream
    opt = FileOption("FILE", "OPT_NAME", "../datasets/music.csv", "CSV", False)
    stream = FileStream(opt, 0, 6)
    stream.prepare_for_use()

    # Setup the classifier, by default it uses Logistic Regression
    #classifier = MultiOutputLearner()
    #classifier = MultiOutputLearner(h=SGDClassifier(n_iter=100))
    classifier = MultiOutputLearner(h=Perceptron())

    # Setup the pipeline
    pipe = Pipeline([('classifier', classifier)])

    pretrain_size = 150
    logging.info('Pre training on %s samples', str(pretrain_size))
    X, y = stream.next_instance(pretrain_size)
    #classifier.fit(X, y)
    pipe.partial_fit(X, y, classes=stream.get_classes())
    count = 0
    true_labels = []
    predicts = []
    init_time = timer()
    logging.info('Evaluating...')
    while stream.has_more_instances():
        X, y = stream.next_instance()
        #p = classifier.predict(X)
        p = pipe.predict(X)
        predicts.extend(p)
        true_labels.extend(y)
        count += 1
    perf = hamming_score(true_labels, predicts)
    logging.info('Evaluation time: %s s', str(timer() - init_time))
    logging.info('Total samples analyzed: %s', str(count))
    logging.info('The classifier\'s static Hamming score    : %0.3f' % perf)