def test_evaluate_classification_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)

    # Learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTreeClassifier(nominal_attributes=nominal_attr_idx)

    max_samples = 1000
    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = [
        'accuracy', 'kappa', 'kappa_t', 'kappa_m', 'f1', 'precision', 'recall',
        'gmean', 'true_vs_predicted'
    ]
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    # Evaluate
    evaluator.evaluate(stream=stream, model=learner)
    mean_performance, current_performance = evaluator.get_measurements(
        model_idx=0)

    expected_current_accuracy = 0.685
    assert np.isclose(current_performance.accuracy_score(),
                      expected_current_accuracy)
Exemplo n.º 2
0
def demo(output_file=None, instances=40000):
    """ _test_regression

    This demo demonstrates how to evaluate a regressor. The data stream used
    is an instance of the RegressionGenerator, which feeds an instance from
    sklearn's SGDRegressor.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    stream = RegressionGenerator(n_samples=40000)

    regressor = HoeffdingTreeRegressor()

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=1,
                                    max_samples=instances,
                                    batch_size=1,
                                    n_wait=200,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=False,
                                    metrics=['mean_square_error'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=regressor)
def test_evaluate_multi_target_regression_coverage(tmpdir):
    from skmultiflow.data import RegressionGenerator
    from skmultiflow.trees import iSOUPTreeRegressor

    max_samples = 1000

    # Stream
    stream = RegressionGenerator(n_samples=max_samples,
                                 n_features=20,
                                 n_informative=15,
                                 random_state=1,
                                 n_targets=7)

    # Learner
    mtrht = iSOUPTreeRegressor(leaf_prediction='adaptive')

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = [
        'average_mean_square_error', 'average_mean_absolute_error',
        'average_root_mean_square_error'
    ]
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=mtrht, model_names=['MTRHT'])
Exemplo n.º 4
0
def demo():
    """ _test_pipeline
    
    This demo demonstrates the Pipeline structure seemingly working as a 
    learner, while being passed as parameter to an EvaluatePrequential 
    object.
     
    """
    # # Setup the stream
    # stream = FileStream("https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
    #                     "master/covtype.csv")
    # # If used for Hoeffding Trees then need to pass indices for Nominal attributes

    # Test with RandomTreeGenerator
    # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5)

    # Test with WaveformGenerator
    stream = WaveformGenerator()

    # Setup the classifier
    #classifier = PerceptronMask()
    #classifier = NaiveBayes()
    #classifier = PassiveAggressiveClassifier()
    classifier = HoeffdingTreeClassifier()

    # Setup the pipeline
    pipe = Pipeline([('Hoeffding Tree', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(show_plot=True, pretrain_size=1000, max_samples=100000)

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
Exemplo n.º 5
0
def test_evaluate_prequential_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=4,
                                 n_cat_features=2,
                                 n_num_features=5,
                                 n_categories_per_cat_feature=5,
                                 max_tree_depth=6,
                                 min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    max_samples = 1000
    metrics = ['kappa', 'kappa_t', 'performance']
    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'prequential_summary.csv')
    compare_files(output_file, expected_file)
def demo(instances=2000):
    """ _test_comparison_prequential
    
    This demo will test a prequential evaluation when more than one learner is 
    passed, which makes it a comparison task.
    
    Parameters
    ----------
    instances: int
        The evaluation's maximum number of instances.
     
    """
    # Stream setup
    stream = FileStream("../data/datasets/covtype.csv", -1, 1)
    # stream = SEAGenerator(classification_function=2, sample_seed=53432, balance_classes=False)
    stream.prepare_for_use()
    # Setup the classifier
    clf = SGDClassifier()
    # classifier = KNNAdwin(n_neighbors=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    clf_one = KNNAdwin(n_neighbors=8, max_window_size=1000, leaf_size=30)
    # clf_two = KNN(n_neighbors=8, max_window_size=1000, leaf_size=30)
    # clf_two = LeverageBagging(base_estimator=KNN(), n_estimators=2)

    t_one = OneHotToCategorical([[10, 11, 12, 13],
                                 [
                                     14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                                     24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
                                     34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
                                     44, 45, 46, 47, 48, 49, 50, 51, 52, 53
                                 ]])
    # t_two = OneHotToCategorical([[10, 11, 12, 13],
    #                        [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
    #                        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]])

    pipe_one = Pipeline([('one_hot_to_categorical', t_one), ('KNN', clf_one)])
    # pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNN', clf_two)])

    classifier = [clf, pipe_one]
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    # pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(
        pretrain_size=2000,
        output_file='test_comparison_prequential.csv',
        max_samples=instances,
        batch_size=1,
        n_wait=200,
        max_time=1000,
        show_plot=True,
        metrics=['performance', 'kappa_t'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
Exemplo n.º 7
0
def demo(output_file=None, instances=50000):
    """ _test_sam_knn_prequential

    This demo shows how to produce a prequential evaluation.

    The first thing needed is a stream. For this case we use a file stream 
    which gets its samples from the movingSquares.csv file, inside the datasets 
    folder.

    Then we need to setup a classifier, which in this case is an instance 
    of scikit-multiflow's SAMKNN. Then, optionally we create a 
    pipeline structure, initialized on that classifier.

    The evaluation is then run.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    stream = FileStream("../data/datasets/movingSquares.csv", -1, 1)
    # stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    # classifier = SGDClassifier()
    # classifier = KNNAdwin(n_neighbors=8, max_window_size=2000,leaf_size=40, categorical_list=None)
    # classifier = OzaBaggingAdwin(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    classifier = SAMKNN(n_neighbors=5,
                        weighting='distance',
                        max_window_size=1000,
                        stm_size_option='maxACCApprox',
                        use_ltm=False)
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    # pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=0,
                                    max_samples=instances,
                                    batch_size=1,
                                    n_wait=100,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=True,
                                    metrics=['performance'])

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
Exemplo n.º 8
0
def test_evaluate_prequential_classifier(tmpdir, test_path):
    # Setup file stream
    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2,
                                 n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    # Setup evaluator
    max_samples = 1000
    metrics = ['accuracy', 'kappa', 'kappa_t']
    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream, model=learner)
    result_learner = result[0]

    assert isinstance(result_learner, HoeffdingTree)

    assert learner.get_model_measurements == result_learner.get_model_measurements

    expected_file = os.path.join(test_path, 'prequential_summary.csv')
    compare_files(output_file, expected_file)

    mean_performance, current_performance = evaluator.get_measurements(model_idx=0)

    expected_mean_accuracy = 0.436250
    assert np.isclose(mean_performance.get_accuracy(), expected_mean_accuracy)

    expected_mean_kappa = 0.231791
    assert np.isclose(mean_performance.get_kappa(), expected_mean_kappa)

    expected_mean_kappa_t = 0.236887
    assert np.isclose(mean_performance.get_kappa_t(), expected_mean_kappa_t)

    expected_current_accuracy = 0.430000
    assert np.isclose(current_performance.get_accuracy(), expected_current_accuracy)

    expected_current_kappa = 0.223909
    assert np.isclose(current_performance.get_kappa(), expected_current_kappa)

    expected_current_kappa_t = 0.240000
    assert np.isclose(current_performance.get_kappa_t(), expected_current_kappa_t)

    expected_info = "EvaluatePrequential(batch_size=1, data_points_for_classification=False,\n" \
                    "                    max_samples=1000, max_time=inf,\n" \
                    "                    metrics=['accuracy', 'kappa', 'kappa_t'], n_wait=200,\n" \
                    "                    output_file='prequential_summary.csv',\n" \
                    "                    pretrain_size=200, restart_stream=True, show_plot=False)"
    assert evaluator.get_info() == expected_info
Exemplo n.º 9
0
def demo_parameterized(h, filename="covtype.csv", show_plot=True, model_names=None):
    # Setup Stream
    stream = FileStream("https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
                        "master/" + filename)

    # For each classifier, e...
    pretrain = 100
    evaluator = EvaluatePrequential(pretrain_size=pretrain, output_file='test_parametrized.csv', max_samples=10000,
                                    batch_size=1, n_wait=500, show_plot=show_plot)
    evaluator.evaluate(stream=stream, model=h, model_names=model_names)
Exemplo n.º 10
0
def demo_parameterized(h, filename="covtype.csv", show_plot=True, model_names=None):
    # Setup Stream
    stream = FileStream("../data/datasets/" + filename)
    stream.prepare_for_use()

    # For each classifier, e...
    pretrain = 100
    evaluator = EvaluatePrequential(pretrain_size=pretrain, output_file='test_parametrized.csv', max_samples=10000,
                                    batch_size=1, n_wait=500, show_plot=show_plot)
    evaluator.evaluate(stream=stream, model=h, model_names=model_names)
def demo(): 

    # The classifier we will use (other options: SAMKNNClassifier, LeverageBaggingClassifier, SGD)
    h = HoeffdingTreeClassifier()

    # Setup Stream
    stream = FileStream("../data/datasets/sea_stream.csv")

    pretrain = 100
    evaluator = EvaluatePrequential(pretrain_size=pretrain, output_file='test_filestream.csv', max_samples=10000,
                                    batch_size=1, n_wait=1000, show_plot=True)
    evaluator.evaluate(stream=stream, model=h)
Exemplo n.º 12
0
def evaluate(params, stream, study_size, metrics=['accuracy', 'kappa']):
    clf = ARSLVQ(gamma=params[1],
                 sigma=params[2],
                 prototypes_per_class=int(params[3]),
                 confidence=params[4])
    stream.prepare_for_use()
    evaluator = EvaluatePrequential(show_plot=False,
                                    batch_size=10,
                                    max_samples=study_size,
                                    metrics=metrics)

    model = evaluator.evaluate(stream=stream, model=clf)

    print(evaluator.get_mean_measurements())
    return list(params) + (evaluator._data_buffer.get_data(
        metric_id="accuracy", data_id="mean"))
def demo(output_file=None, instances=50000):
    """ _test_sam_knn_prequential

    This demo shows how to produce a prequential evaluation.

    The first thing needed is a stream. For this case we use the
    moving_squares.csv dataset.

    Then we need to setup a classifier, which in this case is an instance 
    of scikit-multiflow's SAMKNNClassifier. Then, optionally we create a
    pipeline structure, initialized on that classifier.

    The evaluation is then run.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    stream = FileStream(
        "https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
        "master/moving_squares.csv")
    # stream = WaveformGenerator()

    # Setup the classifier
    classifier = SAMKNNClassifier(n_neighbors=5,
                                  weighting='distance',
                                  max_window_size=1000,
                                  stm_size_option='maxACCApprox',
                                  use_ltm=False)

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=0,
                                    max_samples=instances,
                                    batch_size=1,
                                    n_wait=100,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=True)

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
Exemplo n.º 14
0
def test_pipeline(test_path):
    n_categories = 5

    # Load test data generated using:
    # RandomTreeGenerator(tree_random_state=1, sample_random_state=1,
    #                     n_cat_features=n_categories, n_num_features=0)
    test_file = os.path.join(test_path, 'data-one-hot.npz')
    data = np.load(test_file)
    X = data['X']
    y = data['y']
    stream = DataStream(data=X, y=y)
    stream.prepare_for_use()

    # Setup transformer
    cat_att_idx = [[i + j for i in range(n_categories)]
                   for j in range(0, n_categories * n_categories, n_categories)
                   ]
    transformer = OneHotToCategorical(categorical_list=cat_att_idx)

    # Set up the classifier
    classifier = KNNAdwin(n_neighbors=2, max_window_size=50, leaf_size=40)
    # Setup the pipeline
    pipe = Pipeline([('one-hot', transformer), ('KNNAdwin', classifier)])
    # Setup the evaluator
    evaluator = EvaluatePrequential(show_plot=False,
                                    pretrain_size=10,
                                    max_samples=100)
    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)

    metrics = evaluator.get_mean_measurements()

    expected_accuracy = 0.5555555555555556
    assert np.isclose(expected_accuracy, metrics[0].get_accuracy())

    expected_kappa = 0.11111111111111116
    assert np.isclose(expected_kappa, metrics[0].get_kappa())
    print(pipe.get_info())
    expected_info = "Pipeline:\n" \
                    "[OneHotToCategorical(categorical_list=[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9],\n" \
                    "                                      [10, 11, 12, 13, 14],\n" \
                    "                                      [15, 16, 17, 18, 19],\n" \
                    "                                      [20, 21, 22, 23, 24]])\n" \
                    "KNNAdwin(leaf_size=40, max_window_size=50, n_neighbors=2,\n" \
                    "         nominal_attributes=None)]"
    assert pipe.get_info() == expected_info
Exemplo n.º 15
0
def test_pipeline(test_path):
    n_categories = 5

    # Load test data generated using:
    # RandomTreeGenerator(tree_random_state=1, sample_random_state=1,
    #                     n_cat_features=n_categories, n_num_features=0)
    test_file = os.path.join(test_path, 'data-one-hot.npz')
    data = np.load(test_file)
    X = data['X']
    y = data['y']
    stream = DataStream(data=X, y=y.astype(np.int))

    # Setup transformer
    cat_att_idx = [[i + j for i in range(n_categories)]
                   for j in range(0, n_categories * n_categories, n_categories)
                   ]
    transformer = OneHotToCategorical(categorical_list=cat_att_idx)

    # Set up the classifier
    classifier = KNNADWINClassifier(n_neighbors=2,
                                    max_window_size=50,
                                    leaf_size=40)
    # Setup the pipeline
    pipe = Pipeline([('one-hot', transformer),
                     ('KNNADWINClassifier', classifier)])
    # Setup the evaluator
    evaluator = EvaluatePrequential(show_plot=False,
                                    pretrain_size=10,
                                    max_samples=100)
    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)

    metrics = evaluator.get_mean_measurements()

    expected_accuracy = 0.5555555555555556
    assert np.isclose(expected_accuracy, metrics[0].accuracy_score())

    expected_kappa = 0.11111111111111116
    assert np.isclose(expected_kappa, metrics[0].kappa_score())
    print(pipe.get_info())
    expected_info = "Pipeline: [OneHotToCategorical(categorical_list=[[0, 1, 2, 3, 4], " \
                    "[5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19], " \
                    "[20, 21, 22, 23, 24]]) KNNADWINClassifier(leaf_size=40, " \
                    "max_window_size=50, metric='euclidean', n_neighbors=2)]"
    info = " ".join([line.strip() for line in pipe.get_info().split()])
    assert info == expected_info
Exemplo n.º 16
0
def demo():

    # The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD)
    h = HoeffdingTreeClassifier()

    # Setup Stream
    stream = FileStream(
        "https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
        "master/sea_stream.csv")

    pretrain = 100
    evaluator = EvaluatePrequential(pretrain_size=pretrain,
                                    output_file='test_filestream.csv',
                                    max_samples=10000,
                                    batch_size=1,
                                    n_wait=1000,
                                    show_plot=True)
    evaluator.evaluate(stream=stream, model=h)
Exemplo n.º 17
0
def test_evaluate_classification_metrics():

    stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=2, n_cat_features=2,
                                 n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3,
                                 fraction_leaves_per_level=0.15)
    stream.prepare_for_use()

    # Setup learner
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]
    learner = HoeffdingTree(nominal_attributes=nominal_attr_idx)

    max_samples = 1000
    metrics = ['f1', 'precision', 'recall', 'gmean']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics)

    # Evaluate
    evaluator.evaluate(stream=stream, model=learner)
    mean_performance, current_performance = evaluator.get_measurements(model_idx=0)

    expected_current_f1_score = 0.7096774193548387
    expected_current_precision = 0.6814159292035398
    expected_current_recall = 0.7403846153846154
    expected_current_g_mean = 0.6802502367624613
    expected_mean_f1_score = 0.7009803921568628
    expected_mean_precision = 0.7185929648241206
    expected_mean_recall = 0.6842105263157895
    expected_mean_g_mean = 0.6954166367760247
    print(mean_performance.get_g_mean())
    print(mean_performance.get_recall())
    print(mean_performance.get_precision())
    print(mean_performance.get_f1_score())
    print(current_performance.get_g_mean())
    print(current_performance.get_recall())
    print(current_performance.get_precision())
    print(current_performance.get_f1_score())
    assert np.isclose(current_performance.get_f1_score(), expected_current_f1_score)
    assert np.isclose(current_performance.get_precision(), expected_current_precision)
    assert np.isclose(current_performance.get_recall(), expected_current_recall)
    assert np.isclose(current_performance.get_g_mean(), expected_current_g_mean)
    assert np.isclose(mean_performance.get_f1_score(), expected_mean_f1_score)
    assert np.isclose(mean_performance.get_precision(), expected_mean_precision)
    assert np.isclose(mean_performance.get_recall(), expected_mean_recall)
    assert np.isclose(mean_performance.get_g_mean(), expected_mean_g_mean)
def demo(output_file=None, instances=40000):
    """ _test_prequential_bagging
    
    This demo shows the evaluation process of a LeverageBaggingClassifier,
    initialized with different base estimators.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    # stream = FileStream("../data/datasets/sea_big.csv", -1, 1)
    #stream = SEAGenerator(classification_function=2, noise_percentage=0.0)
    #stream.prepare_for_use()
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = OzaBaggingADWINClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000,
    #                                                                    leaf_size=30))
    #classifier = LeverageBaggingClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000,
    #                                                                    leaf_size=30),
    #                                       n_estimators=1)
    pipe = LeverageBaggingClassifier(base_estimator=HoeffdingTreeClassifier(),
                                     n_estimators=2)

    # Setup the pipeline
    #pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=2000,
                                    max_samples=instances,
                                    output_file=output_file,
                                    show_plot=False)

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
def test_evaluate_coverage(tmpdir):
    from skmultiflow.data import SEAGenerator
    from skmultiflow.bayes import NaiveBayes

    max_samples = 1000

    # Stream
    stream = SEAGenerator(random_state=1)

    # Learner
    nb = NaiveBayes()

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = ['running_time', 'model_size']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    data_points_for_classification=True,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=nb, model_names=['NB'])
def test_evaluate_multi_target_classification_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    from skmultiflow.data import MultilabelGenerator
    from skmultiflow.meta import MultiOutputLearner

    max_samples = 1000

    # Stream
    stream = MultilabelGenerator(n_samples=max_samples, random_state=1)

    # Learner
    mol = MultiOutputLearner()

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = ['hamming_score', 'hamming_loss', 'exact_match', 'j_index']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=[mol], model_names=['MOL'])
def test_evaluate_regression_coverage(tmpdir):
    # A simple coverage test. Tests for metrics are placed in the corresponding test module.
    from skmultiflow.data import RegressionGenerator
    from skmultiflow.trees import HoeffdingTreeRegressor

    max_samples = 1000

    # Stream
    stream = RegressionGenerator(n_samples=max_samples)

    # Learner
    htr = HoeffdingTreeRegressor()

    output_file = os.path.join(str(tmpdir), "prequential_summary.csv")
    metrics = ['mean_square_error', 'mean_absolute_error']
    evaluator = EvaluatePrequential(max_samples=max_samples,
                                    metrics=metrics,
                                    output_file=output_file)

    evaluator.evaluate(stream=stream, model=htr, model_names=['HTR'])
Exemplo n.º 22
0
def train(name, clusters, window, normalize=False):
    input_csv = '{}{}_clusters={}_window={}_prepared.csv'.format(
        DATA_LOCATION, name, clusters, window)
    data = pd.read_csv(input_csv, index_col=0)

    if normalize:
        states = data.filter(['current_state', 'next_state'])
        sensors = data.drop(columns=['current_state', 'next_state'])
        scaler = StandardScaler()
        data = pd.DataFrame(data=scaler.fit_transform(X=sensors),
                            index=data.index,
                            columns=sensors.columns)
        data = pd.concat([data, states], axis='columns')

    stream = DataStream(data)

    hf = HoeffdingTreeClassifier()
    sgd = SGDClassifier()

    evaluator = EvaluatePrequential()
    evaluator.evaluate(stream=stream, model=[hf, sgd])
    # print('---------------------------------------------')
    # measurements = evaluator.get_mean_measurements()[0]
    # print(measurements.confusion_matrix)
    # print(measurements.accuracy_score())
    data = []
    for i, measurements in enumerate(evaluator.get_mean_measurements()):
        data.append([
            name, clusters, window, MODEL_NAMES[i], normalize,
            measurements.accuracy_score(),
            measurements.precision_score(),
            measurements.recall_score(),
            measurements.f1_score()
        ])
    return pd.DataFrame(data=data,
                        columns=[
                            'name', 'clusters', 'window', 'model',
                            'normalized', 'accuracy', 'precision', 'recall',
                            'f1'
                        ])
def test_data_stream(test_path):
    test_file = os.path.join(test_path, 'data/data_n30000.csv')
    raw_data = pd.read_csv(test_file)
    stream = DataStream(raw_data, name='Test')
    normal_knn_learner = KNNClassifier(
        n_neighbors=8,
        max_window_size=2000,
        leaf_size=40,
    )
    weighted_knn_learner = WeightedKNNClassifier(n_neighbors=8,
                                                 max_window_size=2000,
                                                 leaf_size=40)
    standardize_knn_learner = KNNClassifier(n_neighbors=8,
                                            max_window_size=2000,
                                            leaf_size=40,
                                            standardize=True)
    nominal_attr_idx = [x for x in range(15, len(stream.feature_names))]

    hoeffding_learner = HoeffdingTreeClassifier(
        nominal_attributes=nominal_attr_idx)
    nb_learner = NaiveBayes()

    metrics = ['accuracy', 'kappa_m', 'kappa_t', 'recall']
    output_file = os.path.join(test_path, 'data/kkn_output.csv')
    evaluator = EvaluatePrequential(metrics=metrics, output_file=output_file)

    # Evaluate
    result = evaluator.evaluate(stream=stream,
                                model=[
                                    normal_knn_learner,
                                    weighted_knn_learner,
                                    standardize_knn_learner,
                                    hoeffding_learner,
                                    nb_learner,
                                ])
    mean_performance, current_performance = evaluator.get_measurements()
    assert 1 == 1
Exemplo n.º 24
0
from skmultiflow.data import FileStream
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.bayes import NaiveBayes
from skmultiflow.meta import OzaBagging

from sklearn.datasets import make_classification

with open("dataset_imb.csv", "w") as f:
    X, y = make_classification(
        n_features=10, n_informative=10, n_redundant=0, n_samples=10000, weights=[0.5]
    )
    for i in range(X.shape[0]):
        for att in X[i]:
            f.write(str(att) + ",")
        f.write(str(y[i]) + "\n")


generator = FileStream("dataset_imb.csv")

dpdes = DPDESMethod(NaiveBayes(), 200, 10, KNORAU())
ozabag = OzaBagging(NaiveBayes(), n_estimators=10)

evaluator = EvaluatePrequential(
    max_samples=10000,
    n_wait=200,
    batch_size=200,
    pretrain_size=0,
    metrics=["precision"],
)
evaluator.evaluate(generator, [dpdes, ozabag], ["DPDES", "Ozabag"])
Exemplo n.º 25
0
ds = args.dataset 
ds = ds.replace("final_800_", "")
ds = ds.replace(".pickle", "")
#ds = ds.replace("_", " ")
nama_model = nama_model+" ("+ds+")"
stream_wave = MFCCStream('dataset/'+test_dataset,nama_model=nama_model,additional_data= testDataset)

classifier = CMGMMClassifier( classes=stream_wave.get_target_values(),prune_component=prune_comp,drift_detector=detector)
classifier.train(train_dataset,'label','mfcc')


eval = EvaluatePrequential(show_plot=True,
                           pretrain_size=0,
                           batch_size=1,
                           metrics=['accuracy', 'f1','running_time'],
                           output_file=result_dir+file_name,
						   #data_points_for_classification=True

						   )

eval.evaluate(stream=stream_wave, model=classifier, model_names=[model_name])
print(eval._data_buffer.get_data(metric_id=constants.ACCURACY, data_id=constants.MEAN)[0])
print((eval.model[0].adaptasi))
'''

stream = FileStream('dataset/poker.csv')
classifier = SGDClassifier()
eval = EvaluatePrequential(show_plot=True,
                            pretrain_size=500,
                           batch_size=200,
                           metrics=['accuracy', 'kappa', 'running_time', 'model_size'])
Exemplo n.º 26
0
            f1s = []
            tprs = []
            aucs = []
            mean_fpr = np.linspace(0, 1, 100)

            for fold, split in enumerate(cross_validation.split(X_train, y_train)):
                fold_train_indexes, fold_test_indexes = split
                fold_X_train = X_train.iloc[fold_train_indexes]
                fold_y_train = y_train.iloc[fold_train_indexes]
                fold_X_test = X_train.iloc[fold_test_indexes]
                fold_y_test = y_train.iloc[fold_test_indexes]

                if (classifier_name == 'hoeffding'):
                    stream = DataStream(X, y.values.ravel())
                    stream.prepare_for_use()
                    evaluator = EvaluatePrequential(
                        show_plot=False, pretrain_size=200, metrics=['accuracy'])
                    model = evaluator.evaluate(
                        stream=stream, model=classifier)[0]
                    model.fit(fold_X_train, fold_y_train.values.ravel())

                # elif (classifier_name == 'cn2'):
                #     model = CrossValidation(
                #         table_from_frame(data), [CN2Learner()], k=5)

                else:
                    model = classifier.fit(
                        fold_X_train, fold_y_train.values.ravel())
                    y_pred = model.predict(fold_X_test)

                    accuracies.append(accuracy_score(fold_y_test, y_pred))
                    precisions.append(precision_score(
Exemplo n.º 27
0
from skmultiflow.data import FileStream
from skmultiflow.lazy.knn import KNN
from skmultiflow.evaluation import EvaluatePrequential

n_neighbors = 8
max_window_size = 2000
leaf_size = 30
n_estimators = 30
show_plot = True
pretrain_size = 100
max_samples = 7000
metrics = ['accuracy']

stream = FileStream('data/stream1.csv')
stream.prepare_for_use()
mdl = KNN(n_neighbors=n_neighbors,
          max_window_size=max_window_size,
          leaf_size=leaf_size)
evaluator = EvaluatePrequential(show_plot=show_plot,
                                pretrain_size=pretrain_size,
                                max_samples=max_samples,
                                metrics=metrics)
evaluator.evaluate(stream=stream, model=mdl)
Exemplo n.º 28
0
def flow_detection_classifier(classifier, stream):
    evaluator = EvaluatePrequential(show_plot=True, pretrain_size=2000, max_samples=50000)
    evaluator.evaluate(stream=stream, model=classifier)
    return evaluator
Exemplo n.º 29
0
            total_length = int(total_length)
            for data in response.iter_content(chunk_size=4096):
                dl += len(data)
                f.write(data)
                done = int(50 * dl / total_length)
                sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done)))
                sys.stdout.flush()
    data = np.load(file_name, allow_pickle=True)

    return data


# data = download_data()
#If dataset file is already downloaded
data = np.load(file_name, allow_pickle=True)

sam = SAMKNN()
arf = HoeffdingAdaptiveTreeClassifier()

stream = DataStream(data[:, 1:], data[:, 0].astype(int))
stream.prepare_for_use()

evaluator = EvaluatePrequential(max_samples=10000,
                                max_time=1000,
                                show_plot=True,
                                metrics=['accuracy', 'kappa'])

evaluator.evaluate(stream=stream,
                   model=[sam, arf],
                   model_names=['Sam', 'RSLVQ'])
from skmultiflow.trees import HoeffdingTreeClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.file_stream import FileStream
import pandas as pd
import numpy as np

# Load the synthetic data stream
dstream = FileStream('data_stream.csv')
dstream.prepare_for_use()

# Create the model instance
ht_class = HoeffdingTreeClassifier()

# perform prequential evaluation
evaluate1 = EvaluatePrequential(show_plot=False,
                                pretrain_size=400,
                                max_samples=10000,
                                metrics=['accuracy'])
evaluate1.evaluate(stream=dstream, model=ht_class)

###################################################

# Hoeffding Adaptive tree
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
from skmultiflow.data import ConceptDriftStream
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.evaluation import EvaluateHoldout

# Simulate a sample data stream
ds = ConceptDriftStream(random_state=777, position=30000)
ds
# Output: