示例#1
0
def demo():
    """ _test_mol

    This demo tests the MOL learner on a file stream, which reads from 
    the music.csv file.

    The test computes the performance of the MOL learner as well as 
    the time to create the structure and classify all the samples in 
    the file.

    """
    # Setup logging
    logging.basicConfig(format='%(message)s', level=logging.INFO)

    # Setup the file stream
    stream = FileStream("../data/datasets/music.csv", 0, 6)
    stream.prepare_for_use()

    # Setup the classifier, by default it uses Logistic Regression
    # classifier = MultiOutputLearner()
    # classifier = MultiOutputLearner(base_estimator=SGDClassifier(n_iter=100))
    classifier = MultiOutputLearner(base_estimator=Perceptron())

    # Setup the pipeline
    pipe = Pipeline([('classifier', classifier)])

    pretrain_size = 150
    logging.info('Pre training on %s samples', str(pretrain_size))
    logging.info('Total %s samples', str(stream.n_samples))
    X, y = stream.next_sample(pretrain_size)
    # classifier.fit(X, y)
    classes = stream.target_values
    classes_flat = list(set([item for sublist in classes for item in sublist]))
    pipe.partial_fit(X, y, classes=classes_flat)
    count = 0
    true_labels = []
    predicts = []
    init_time = timer()
    logging.info('Evaluating...')
    while stream.has_more_samples():
        X, y = stream.next_sample()
        # p = classifier.predict(X)
        p = pipe.predict(X)
        predicts.extend(p)
        true_labels.extend(y)
        count += 1
    perf = hamming_score(true_labels, predicts)
    logging.info('Evaluation time: %s s', str(timer() - init_time))
    logging.info('Total samples analyzed: %s', str(count))
    logging.info('The classifier\'s static Hamming score    : %0.3f' % perf)
def demo():
    """ _test_pipeline
    
    This demo demonstrates the Pipeline structure seemingly working as a 
    learner, while being passed as parameter to an EvaluatePrequential 
    object.
     
    """
    # # Setup the stream
    # stream = FileStream("https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
    #                     "master/covtype.csv")
    # # If used for Hoeffding Trees then need to pass indices for Nominal attributes

    # Test with RandomTreeGenerator
    # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5)

    # Test with WaveformGenerator
    stream = WaveformGenerator()

    # Setup the classifier
    #classifier = PerceptronMask()
    #classifier = NaiveBayes()
    #classifier = PassiveAggressiveClassifier()
    classifier = HoeffdingTreeClassifier()

    # Setup the pipeline
    pipe = Pipeline([('Hoeffding Tree', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(show_plot=True, pretrain_size=1000, max_samples=100000)

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
示例#3
0
def test_pipeline(test_path):
    n_categories = 5

    # Load test data generated using:
    # RandomTreeGenerator(tree_random_state=1, sample_random_state=1,
    #                     n_cat_features=n_categories, n_num_features=0)
    test_file = os.path.join(test_path, 'data-one-hot.npz')
    data = np.load(test_file)
    X = data['X']
    y = data['y']
    stream = DataStream(data=X, y=y.astype(np.int))

    # Setup transformer
    cat_att_idx = [[i + j for i in range(n_categories)]
                   for j in range(0, n_categories * n_categories, n_categories)
                   ]
    transformer = OneHotToCategorical(categorical_list=cat_att_idx)

    # Set up the classifier
    classifier = KNNADWINClassifier(n_neighbors=2,
                                    max_window_size=50,
                                    leaf_size=40)
    # Setup the pipeline
    pipe = Pipeline([('one-hot', transformer),
                     ('KNNADWINClassifier', classifier)])
    # Setup the evaluator
    evaluator = EvaluatePrequential(show_plot=False,
                                    pretrain_size=10,
                                    max_samples=100)
    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)

    metrics = evaluator.get_mean_measurements()

    expected_accuracy = 0.5555555555555556
    assert np.isclose(expected_accuracy, metrics[0].accuracy_score())

    expected_kappa = 0.11111111111111116
    assert np.isclose(expected_kappa, metrics[0].kappa_score())
    print(pipe.get_info())
    expected_info = "Pipeline: [OneHotToCategorical(categorical_list=[[0, 1, 2, 3, 4], " \
                    "[5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19], " \
                    "[20, 21, 22, 23, 24]]) KNNADWINClassifier(leaf_size=40, " \
                    "max_window_size=50, metric='euclidean', n_neighbors=2)]"
    info = " ".join([line.strip() for line in pipe.get_info().split()])
    assert info == expected_info
def test_pipeline(test_path):
    n_categories = 5

    # Load test data generated using:
    # RandomTreeGenerator(tree_random_state=1, sample_random_state=1,
    #                     n_cat_features=n_categories, n_num_features=0)
    test_file = os.path.join(test_path, 'data-one-hot.npz')
    data = np.load(test_file)
    X = data['X']
    y = data['y']
    stream = DataStream(data=X, y=y)
    stream.prepare_for_use()

    # Setup transformer
    cat_att_idx = [[i + j for i in range(n_categories)]
                   for j in range(0, n_categories * n_categories, n_categories)
                   ]
    transformer = OneHotToCategorical(categorical_list=cat_att_idx)

    # Set up the classifier
    classifier = KNNAdwin(n_neighbors=2, max_window_size=50, leaf_size=40)
    # Setup the pipeline
    pipe = Pipeline([('one-hot', transformer), ('KNNAdwin', classifier)])
    # Setup the evaluator
    evaluator = EvaluatePrequential(show_plot=False,
                                    pretrain_size=10,
                                    max_samples=100)
    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)

    metrics = evaluator.get_mean_measurements()

    expected_accuracy = 0.5555555555555556
    assert np.isclose(expected_accuracy, metrics[0].get_accuracy())

    expected_kappa = 0.11111111111111116
    assert np.isclose(expected_kappa, metrics[0].get_kappa())
    print(pipe.get_info())
    expected_info = "Pipeline:\n" \
                    "[OneHotToCategorical(categorical_list=[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9],\n" \
                    "                                      [10, 11, 12, 13, 14],\n" \
                    "                                      [15, 16, 17, 18, 19],\n" \
                    "                                      [20, 21, 22, 23, 24]])\n" \
                    "KNNAdwin(leaf_size=40, max_window_size=50, n_neighbors=2,\n" \
                    "         nominal_attributes=None)]"
    assert pipe.get_info() == expected_info
def demo(instances=2000):
    """ _test_comparison_prequential
    
    This demo will test a prequential evaluation when more than one learner is 
    passed, which makes it a comparison task.
    
    Parameters
    ----------
    instances: int
        The evaluation's maximum number of instances.
     
    """
    # Stream setup
    stream = FileStream("../data/datasets/covtype.csv", -1, 1)
    # stream = SEAGenerator(classification_function=2, sample_seed=53432, balance_classes=False)
    stream.prepare_for_use()
    # Setup the classifier
    clf = SGDClassifier()
    # classifier = KNNAdwin(n_neighbors=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    clf_one = KNNAdwin(n_neighbors=8, max_window_size=1000, leaf_size=30)
    # clf_two = KNN(n_neighbors=8, max_window_size=1000, leaf_size=30)
    # clf_two = LeverageBagging(base_estimator=KNN(), n_estimators=2)

    t_one = OneHotToCategorical([[10, 11, 12, 13],
                                 [
                                     14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                                     24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
                                     34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
                                     44, 45, 46, 47, 48, 49, 50, 51, 52, 53
                                 ]])
    # t_two = OneHotToCategorical([[10, 11, 12, 13],
    #                        [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])

    pipe_one = Pipeline([('one_hot_to_categorical', t_one), ('KNN', clf_one)])
    # pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNN', clf_two)])

    classifier = [clf, pipe_one]
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    # pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(
        pretrain_size=2000,
        output_file='test_comparison_prequential.csv',
        max_samples=instances,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        show_plot=True,
        metrics=['performance', 'kappa_t'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
def test_pipeline(test_path):
    n_categories = 5

    test_file = os.path.join(test_path, 'data-one-hot.npz')
    data = np.load(test_file)

    data_as_dict = []
    for i in range(0, len(data['X'])):
        data_as_dict.append({
            'X': data['X'][i].reshape(1, 25),
            'y': np.array(data['y'][i]).reshape(1, 1)
        })

    # Setup transformer
    cat_att_idx = [[i + j for i in range(n_categories)]
                   for j in range(0, n_categories * n_categories, n_categories)
                   ]
    transformer = OneHotToCategorical(categorical_list=cat_att_idx)

    # Set up the classifier
    classifier = KNNADWINClassifier(n_neighbors=2,
                                    max_window_size=50,
                                    leaf_size=40)
    # Setup the pipeline
    pipe = Pipeline([('one-hot', transformer),
                     ('KNNADWINClassifier', classifier)])

    train_eval_trigger = PrequentialTrigger(10)
    reporter = BufferedMetricsReporter(retrieve_metrics)
    results_observer = MetricsResultObserver(ClassificationMeasurements(),
                                             reporter)
    evaluation_event_observer = EvaluationEventObserver(
        pipe, train_eval_trigger, [results_observer], [0, 1])

    data_source = ArrayDataSource(record_to_dictionary,
                                  [evaluation_event_observer], data_as_dict)

    data_source.listen_for_events()
    time.sleep(3)

    expected_accuracy = 0.5555555555555556
    expected_kappa = 0.11111111111111116

    assert np.isclose(expected_accuracy, reporter.get_buffer()['accuracy'])
    assert np.isclose(expected_kappa, reporter.get_buffer()['kappa'])
def demo(output_file=None, instances=40000):
    """ _test_holdout
    
    This demo runs a holdout evaluation task with one learner. The default 
    stream is a WaveformGenerator. The default learner is a SGDClassifier, 
    which is inserted into a Pipeline structure. All the default values can 
    be changing by uncommenting/commenting the code below.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
         
    """
    # Setup the File Stream
    # stream = FileStream("../data/datasets/covtype.csv", -1, 1)
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    classifier = SGDClassifier()
    # classifier = PassiveAggressiveClassifier()
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluateHoldout(test_size=2000,
                                dynamic_test_set=True,
                                max_samples=instances,
                                batch_size=1,
                                n_wait=15000,
                                max_time=1000,
                                output_file=output_file,
                                show_plot=True,
                                metrics=['kappa', 'kappa_t', 'performance'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
def demo(instances=2000):
    """ _test_comparison_prequential
    
    This demo will test a prequential evaluation when more than one learner is 
    passed, which makes it a comparison task.
    
    Parameters
    ----------
    instances: int
        The evaluation's maximum number of instances.
     
    """
    # Stream setup
    stream = FileStream(
        "https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
        "master/covtype.csv")
    # stream = SEAGenerator(classification_function=2, sample_seed=53432, balance_classes=False)
    # Setup the classifier
    clf = SGDClassifier()
    # classifier = KNNADWINClassifier(n_neighbors=8, max_window_size=2000,leaf_size=40, nominal_attributes=None)
    # classifier = OzaBaggingADWINClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000,
    #                                                                     leaf_size=30))
    clf_one = KNNADWINClassifier(n_neighbors=8,
                                 max_window_size=1000,
                                 leaf_size=30)
    # clf_two = KNNClassifier(n_neighbors=8, max_window_size=1000, leaf_size=30)
    # clf_two = LeveragingBaggingClassifier(base_estimator=KNNClassifier(), n_estimators=2)

    t_one = OneHotToCategorical([[10, 11, 12, 13],
                                 [
                                     14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                                     24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
                                     34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
                                     44, 45, 46, 47, 48, 49, 50, 51, 52, 53
                                 ]])
    # t_two = OneHotToCategorical([[10, 11, 12, 13],
    #                             [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
    #                              27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
    #                              40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])

    pipe_one = Pipeline([('one_hot_to_categorical', t_one),
                         ('KNNClassifier', clf_one)])
    # pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNNClassifier', clf_two)])

    classifier = [clf, pipe_one]
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    # pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(
        pretrain_size=2000,
        output_file='test_comparison_prequential.csv',
        max_samples=instances,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        show_plot=True)

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)