def demo():

    # The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD)
    h1 = [
        HoeffdingTreeClassifier(),
        SAMKNNClassifier(),
        LeveragingBaggingClassifier(random_state=1),
        SGDClassifier()
    ]
    h2 = [
        HoeffdingTreeClassifier(),
        SAMKNNClassifier(),
        LeveragingBaggingClassifier(random_state=1),
        SGDClassifier()
    ]
    h3 = [
        HoeffdingTreeClassifier(),
        SAMKNNClassifier(),
        LeveragingBaggingClassifier(random_state=1),
        SGDClassifier()
    ]
    model_names = ['HT', 'SAMKNNClassifier', 'LBkNN', 'SGDC']

    # Demo 1 -- plot should not fail
    demo_parameterized(h1, model_names=model_names)

    # Demo 2 -- csv output should look nice
    demo_parameterized(h2, "sea_stream.csv", False, model_names)

    # Demo 3 -- should not give "'NoneType' object is not iterable" error
    demo_parameterized(h3, "covtype.csv", False, model_names)
def test_leverage_bagging():
    stream = SEAGenerator(classification_function=1,
                          noise_percentage=0.067,
                          random_state=112)
    knn = KNNClassifier(n_neighbors=8,
                        leaf_size=40,
                        max_window_size=2000)
    learner = LeveragingBaggingClassifier(base_estimator=knn,
                                          n_estimators=3,
                                          random_state=112)
    first = True

    cnt = 0
    max_samples = 5000
    predictions = []
    wait_samples = 100
    correct_predictions = 0

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            if y[0] == predictions[-1]:
                correct_predictions += 1
        if first:
            learner.partial_fit(X, y, classes=[0, 1])
            first = False
        else:
            learner.partial_fit(X, y)
        cnt += 1

    performance = correct_predictions / len(predictions)
    expected_predictions = [1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
                            0, 0, 1, 0, 1, 1, 1, 0, 1, 0,
                            0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
                            1, 1, 1, 1, 0, 0, 1, 1, 1, 0,
                            0, 1, 1, 0, 1, 0, 0, 1, 1]
    assert np.alltrue(predictions == expected_predictions)

    expected_performance = 0.8571428571428571
    assert np.isclose(expected_performance, performance)

    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray

    expected_info = "LeveragingBaggingClassifier(base_estimator=KNNClassifier(leaf_size=40, " \
                    "max_window_size=2000, metric='euclidean', n_neighbors=8), " \
                    "delta=0.002, enable_code_matrix=False, leverage_algorithm='leveraging_bag'," \
                    " n_estimators=3, random_state=112, w=6)"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
def test_leverage_bagging_me():
    stream = SEAGenerator(classification_function=1,
                          noise_percentage=0.067,
                          random_state=112)
    knn = KNNClassifier(n_neighbors=8, leaf_size=40, max_window_size=2000)

    # leveraging_bag_me
    learner = LeveragingBaggingClassifier(
        base_estimator=knn,
        n_estimators=3,
        random_state=112,
        leverage_algorithm='leveraging_bag_me')

    y_expected = np.asarray([
        0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
        1, 0
    ],
                            dtype=np.int)

    run_prequential_supervised(stream,
                               learner,
                               max_samples=2000,
                               n_wait=40,
                               y_expected=y_expected)
def test_leverage_bagging_me():
    nb = NaiveBayes()

    # leveraging_bag_me
    learner = LeveragingBaggingClassifier(base_estimator=nb,
                                          n_estimators=5,
                                          random_state=112,
                                          leverage_algorithm='leveraging_bag_me')

    y_expected = np.asarray([0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
                             0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
                             1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
                             0, 0, 0, 1, 1, 0, 1, 1, 1, 0,
                             1, 0, 1, 0, 0, 1, 1, 0, 1, 0], dtype=np.int)

    run_prequential_supervised(ConceptDriftStreamGenerator(position=500, width=100, random_state=112),
                               learner, max_samples=2000, n_wait=40, target_values=[0,1], y_expected=y_expected)
def test_leverage_bagging_code_matrix():
    nb = NaiveBayes()

    # enable the output detection code matrix
    learner = LeveragingBaggingClassifier(base_estimator=nb,
                                          n_estimators=5,
                                          random_state=12,
                                          enable_code_matrix=True)

    y_expected = np.asarray([0, 0, 3, 2, 3, 1, 4, 1, 3, 4,
                             2, 4, 2, 2, 0, 0, 2, 4, 2, 4,
                             0, 4, 2, 4, 2, 4, 0, 4, 1, 3,
                             2, 1, 2, 4, 2, 4, 1, 3, 0, 4,
                             2, 0, 0, 4, 3, 2, 4, 4, 2, 4], dtype=np.int)

    run_prequential_supervised(RandomTreeGenerator(tree_random_state=1, sample_random_state=12, n_classes=5),
                               learner, max_samples=2000, n_wait=40, target_values=[0,1,2,3,4], y_expected=y_expected)
def demo(output_file=None, instances=40000):
    """ _test_prequential_bagging
    
    This demo shows the evaluation process of a LeveragingBaggingClassifier,
    initialized with different base estimators.
    
    Parameters
    ----------
    output_file: string
        The name of the csv output file
    
    instances: int
        The evaluation's max number of instances
    
    """
    # Setup the File Stream
    #stream = SEAGenerator(classification_function=2, noise_percentage=0.0)
    stream = WaveformGenerator()

    # Setup the classifier
    #classifier = OzaBaggingADWINClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000,
    #                                                                    leaf_size=30))
    #classifier = LeveragingBaggingClassifier(base_estimator=KNNClassifier(n_neighbors=8, max_window_size=2000,
    #                                                                    leaf_size=30),
    #                                       n_estimators=1)
    pipe = LeveragingBaggingClassifier(
        base_estimator=HoeffdingTreeClassifier(), n_estimators=2)

    # Setup the pipeline
    #pipe = Pipeline([('Classifier', classifier)])

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=2000,
                                    max_samples=instances,
                                    output_file=output_file,
                                    show_plot=False)

    # Evaluate
    evaluator.evaluate(stream=stream, model=pipe)
# Applying Leveraging Bagging Classifier on a synthetic data stream
from skmultiflow.meta import LeveragingBaggingClassifier
from skmultiflow.lazy import KNNADWINClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.sea_generator import SEAGenerator

# Simulate the data stream
dstream = SEAGenerator(classification_function=2,
                       balance_classes=True,
                       noise_percentage=0.3,
                       random_state=333)

# Instantiate the Leveraging Bagging classifier method with KNN ADWIN classifier as the base model
leverage_class = LeveragingBaggingClassifier(base_estimator=KNNADWINClassifier(
    n_neighbors=10, max_window_size=1000),
                                             n_estimators=6,
                                             random_state=333)

# Prequential Evaluation
evaluate1 = EvaluatePrequential(show_plot=False,
                                pretrain_size=1000,
                                max_samples=10000,
                                metrics=['accuracy'])
# Run the evaluation
evaluate1.evaluate(stream=dstream, model=leverage_class)

###################################################

# Applying Online Boosting Classifier on a synthetic data stream
from skmultiflow.meta import OnlineBoostingClassifier
from skmultiflow.evaluation import EvaluatePrequential
def test_leverage_bagging_coverage():
    # Invalid leverage_algorithm
    with pytest.raises(ValueError):
        LeveragingBaggingClassifier(leverage_algorithm='invalid')

    estimator = LeveragingBaggingClassifier(random_state=4321)
    stream = SEAGenerator(random_state=4321)
    X, y = stream.next_sample()

    # classes not passed in partial_fit
    with pytest.raises(ValueError):
        estimator.partial_fit(X, y, classes=None)
    estimator.partial_fit(X, y, classes=[0, 1])
    # different observed classes
    with pytest.raises(ValueError):
        estimator.partial_fit(X, y, classes=[0, 1] + [-1])
    # Invalid leverage_algorithm, changed after initialization
    with pytest.raises(RuntimeError):
        estimator.leverage_algorithm = 'invalid'
        estimator.partial_fit(X, y, classes=[0, 1])

    # Reset ensemble
    estimator.reset()
    assert estimator.classes is None
    if cm.sum_col[i] != 0 else 'Ill-defined'
    print("Class {}: {}".format(i, recall))
'''
#------------------------------------------------Experiment 3--------------------------------------------------------------- 
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.meta import LeveragingBaggingClassifier
# Read in stream
stream = FileStream(r"C:\Users\luyj0\OneDrive\Desktop\COMPX523-Data Stream Mining\covtype_numeric.csv")
# Set up different classifiers
knn = MyKNNClassifier()
ht = HoeffdingTreeClassifier()
nb = NaiveBayes()
wv_knn = MyKNNClassifier(weighted_vote=True)
s_knn = MyKNNClassifier(standardize=True)
arf = AdaptiveRandomForestClassifier()
lb = LeveragingBaggingClassifier()
# Set up two ensemble algorithms
metrics = ['accuracy', 'kappa', 'kappa_m','kappa_t', 'running_time', 'model_size']
# use a test-then-train evaluation approach
evaluator = EvaluatePrequential(max_samples=30000,
                                n_wait=100,
                                show_plot=False,
                                metrics=metrics)

model_list = [knn,ht,nb,wv_knn,s_knn,arf,lb]
name_list = ['KNN','HoeffdingTree','NaiveBayes','KNN+WeightedVote','KNN+Standardize','AdaptiveRandomForest','Leverage Bagging']
# Execute each evaluation in the list until it reaches the end
for index in range(len(model_list)):
    evaluator.evaluate(stream=stream,model=[model_list[index]],model_names=[name_list[index]])
    cm = evaluator.get_mean_measurements(0).confusion_matrix
    print("Recall per class")