Exemplo n.º 1
0
def demo(output_file=None, instances=40000):
    """ _test_holdout
    
    This demo runs a holdout evaluation task with one learner. The default 
    stream is a WaveformGenerator. The default learner is a SGDClassifier, 
    which is inserted into a Pipeline structure. All the default values can 
    be changing by uncommenting/commenting the code below.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
         
    """
    # Setup the File Stream
    #opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    #stream = FileStream(opt, -1, 1)
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    classifier = SGDClassifier()
    #classifier = PassiveAggressiveClassifier()
    #classifier = SGDRegressor()
    #classifier = PerceptronMask()

    # Setup the pipeline
    pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    eval = EvaluateHoldout(pretrain_size=10000,
                           test_size=2000,
                           dynamic_test_set=True,
                           max_instances=instances,
                           batch_size=1,
                           n_wait=15000,
                           max_time=1000,
                           output_file=output_file,
                           task_type='classification',
                           show_plot=True,
                           plot_options=['kappa', 'kappa_t', 'performance'])

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
Exemplo n.º 2
0
def demo(output_file=None, instances=40000):
    """ _test_comparison_holdout
    
    This demo will test a holdout evaluation task when more than one learner is 
    evaluated, which makes it a comparison task. 
    
    Parameters
    ----------
    output_file: string, optional
        If passed this parameter indicates the output file name. If left blank, 
        no output file will be generated.
    
    instances: int (Default: 40000)
        The evaluation's maximum number of instances.
    
    """
    # Setup the File Stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    # stream = FileStream(opt, -1, 1)
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    clf_one = SGDClassifier()
    clf_two = KNNAdwin(k=8, max_window_size=2000)
    # classifier = PassiveAggressiveClassifier()
    # classifier = SGDRegressor()
    # classifier = PerceptronMask()

    # Setup the pipeline
    classifier = [clf_one, clf_two]

    # Setup the evaluator
    evaluator = EvaluateHoldout(pretrain_size=2000,
                                test_size=2000,
                                dynamic_test_set=True,
                                max_instances=instances,
                                batch_size=1,
                                n_wait=5000,
                                max_time=1000,
                                output_file=output_file,
                                task_type='classification',
                                show_plot=True,
                                plot_options=['kappa'])

    # Evaluate
    evaluator.eval(stream=stream, classifier=classifier)
def demo(output_file=None, instances=40000):
    """ _test_prequential_bagging
    
    This demo shows the evaluation process of a LeverageBagging classifier, 
    initialized with KNN classifiers.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    # stream = FileStream("../datasets/sea_big.csv", -1, 1)
    #stream = SEAGenerator(classification_function=2, noise_percentage=0.0)
    #stream.prepare_for_use()
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None))
    #classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000, leaf_size=30), ensemble_length=1)
    pipe = LeverageBagging(h=HoeffdingTree(), ensemble_length=2)

    # Setup the pipeline
    #pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=2000,
                                    max_samples=instances,
                                    output_file=output_file,
                                    show_plot=False)

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
Exemplo n.º 4
0
def demo():
    """ _test_pipeline
    
    This demo demonstrates the Pipeline structure seemingly working as a 
    learner, while being passed as parameter to an EvaluatePrequential 
    object.
     
    """
    # # Setup the stream
    # opt = FileOption("FILE", "OPT_NAME", "../datasets/covtype.csv", "CSV", False)
    # stream = FileStream(opt, -1, 1)
    # stream.prepare_for_use()
    # # If used for Hoeffding Trees then need to pass indices for Nominal attributes

    # Test with RandomTreeGenerator
    # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5)
    # stream.prepare_for_use()

    # Test with WaveformGenerator
    stream = WaveformGenerator()
    stream.prepare_for_use()

    # Setup the classifier
    #classifier = PerceptronMask()
    #classifier = NaiveBayes()
    #classifier = PassiveAggressiveClassifier()
    classifier = HoeffdingTree()

    # Setup the pipeline
    pipe = Pipeline([('Hoeffding Tree', classifier)])

    # Setup the evaluator
    eval = EvaluatePrequential(show_plot=True,
                               pretrain_size=1000,
                               max_instances=100000)

    # Evaluate
    eval.eval(stream=stream, classifier=pipe)
Exemplo n.º 5
0
#
#         -Accuracy:
#
#         -Kappa statistic: k=1 the classifier is always correct.
#                           k=0 the predictions coincide with the correct ones as often as those of the chance classifier
#
#

# In[43]:

from skmultiflow.data.generators.waveform_generator import WaveformGenerator
from skmultiflow.classification.trees.hoeffding_tree import HoeffdingTree
from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential

# 1. Create a stream
stream = WaveformGenerator()
stream.prepare_for_use()

# 2. Instantiate the classifier
adf = AdaptiveRandomForest()

# 3. Setup the evaluator
eval = EvaluatePrequential(show_plot=True,
                           pretrain_size=100,
                           max_instances=10000)

# 4. Run evaluation
eval.eval(stream=stream, classifier=adf)

#
# # Eval Prequential with datasets.csv for ARF
def test_waveform_generator(test_path):
    stream = WaveformGenerator(seed=23, add_noise=False)
    stream.prepare_for_use()

    assert stream.estimated_remaining_instances() == -1

    expected_header = [
        'att_num_0', 'att_num_1', 'att_num_2', 'att_num_3', 'att_num_4',
        'att_num_5', 'att_num_6', 'att_num_7', 'att_num_8', 'att_num_9',
        'att_num_10', 'att_num_11', 'att_num_12', 'att_num_13', 'att_num_14',
        'att_num_15', 'att_num_16', 'att_num_17', 'att_num_18', 'att_num_19',
        'att_num_20'
    ]
    assert stream.get_attributes_header() == expected_header

    expected_classes = [0, 1, 2]
    assert stream.get_classes() == expected_classes

    assert stream.get_classes_header() == ['class']

    assert stream.get_num_attributes() == 21

    assert stream.get_num_nominal_attributes() == 0

    assert stream.get_num_numerical_attributes() == 21

    assert stream.get_num_targets() == 3

    assert stream.get_num_values_per_nominal_attribute() == 0

    assert stream.get_plot_name() == 'Waveform Generator - 3 class labels'

    assert stream.has_more_instances() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'waveform_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_instance()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.get_last_instance()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_instance(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    # Noise test

    stream = WaveformGenerator(seed=23, add_noise=True)
    stream.prepare_for_use()

    assert stream.estimated_remaining_instances() == -1

    expected_header = [
        'att_num_0',
        'att_num_1',
        'att_num_2',
        'att_num_3',
        'att_num_4',
        'att_num_5',
        'att_num_6',
        'att_num_7',
        'att_num_8',
        'att_num_9',
        'att_num_10',
        'att_num_11',
        'att_num_12',
        'att_num_13',
        'att_num_14',
        'att_num_15',
        'att_num_16',
        'att_num_17',
        'att_num_18',
        'att_num_19',
        'att_num_20',
        'att_num_21',
        'att_num_22',
        'att_num_23',
        'att_num_24',
        'att_num_25',
        'att_num_26',
        'att_num_27',
        'att_num_28',
        'att_num_29',
        'att_num_30',
        'att_num_31',
        'att_num_32',
        'att_num_33',
        'att_num_34',
        'att_num_35',
        'att_num_36',
        'att_num_37',
        'att_num_38',
        'att_num_39',
    ]
    assert stream.get_attributes_header() == expected_header

    expected_classes = [0, 1, 2]
    assert stream.get_classes() == expected_classes

    assert stream.get_classes_header() == ['class']

    assert stream.get_num_attributes() == 40

    assert stream.get_num_nominal_attributes() == 0

    assert stream.get_num_numerical_attributes() == 40

    assert stream.get_num_targets() == 3

    assert stream.get_num_values_per_nominal_attribute() == 0

    assert stream.get_plot_name() == 'Waveform Generator - 3 class labels'

    assert stream.has_more_instances() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'waveform_noise_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_instance()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.get_last_instance()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_instance(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)
Exemplo n.º 7
0
def test_waveform_generator_noise(test_path):
    # Noise test
    stream = WaveformGenerator(random_state=23, has_noise=True)
    stream.prepare_for_use()

    assert stream.n_remaining_samples() == -1

    expected_names = ['att_num_0', 'att_num_1', 'att_num_2', 'att_num_3', 'att_num_4',
                       'att_num_5', 'att_num_6', 'att_num_7', 'att_num_8', 'att_num_9',
                       'att_num_10', 'att_num_11', 'att_num_12', 'att_num_13', 'att_num_14',
                       'att_num_15', 'att_num_16', 'att_num_17', 'att_num_18', 'att_num_19',
                       'att_num_20', 'att_num_21', 'att_num_22', 'att_num_23', 'att_num_24',
                       'att_num_25', 'att_num_26', 'att_num_27', 'att_num_28', 'att_num_29',
                       'att_num_30', 'att_num_31', 'att_num_32', 'att_num_33', 'att_num_34',
                       'att_num_35', 'att_num_36', 'att_num_37', 'att_num_38', 'att_num_39',
                       ]
    assert stream.feature_names == expected_names

    expected_targets = [0, 1, 2]
    assert stream.target_values == expected_targets

    assert stream.target_names == ['target_0']

    assert stream.n_features == 40

    assert stream.n_cat_features == 0

    assert stream.n_num_features == 40

    assert stream.n_targets == 1

    assert stream.get_data_info() == 'Waveform Generator - 1 targets, 3 classes, 40 features'

    assert stream.has_more_samples() is True

    assert stream.is_restartable() is True

    # Load test data corresponding to first 10 instances
    test_file = os.path.join(test_path, 'waveform_noise_stream.npz')
    data = np.load(test_file)
    X_expected = data['X']
    y_expected = data['y']

    X, y = stream.next_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    X, y = stream.last_sample()
    assert np.alltrue(X[0] == X_expected[0])
    assert np.alltrue(y[0] == y_expected[0])

    stream.restart()
    X, y = stream.next_sample(10)
    assert np.alltrue(X == X_expected)
    assert np.alltrue(y == y_expected)

    assert stream.n_targets == np.array(y).ndim

    assert stream.n_features == X.shape[1]