示例#1
0
def run(X, y, hyperParams):
    """ run
    
    Test function for SAMKNNClassifier, not integrated with evaluation modules.
    
    Parameters
    ----------
    X: numpy.ndarray of shape (n_samples, n_features)
        The feature's matrix, coded as 64 bits.
    
    y: numpy.array of size n_samples
        The labels for all the samples in X coded as 8 bits.
    
    hyperParams: dict
        A dictionary containing the __init__ params for the SAMKNNClassifier.
    
    """
    r, c = get_dimensions(X)
    classifier = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'],
                                  max_window_size=hyperParams['maxSize'],
                                  weighting=hyperParams['knnWeights'],
                                  stm_size_option=hyperParams['STMSizeAdaption'],
                                  use_ltm=hyperParams['use_ltm'])
    logging.info('applying model on dataset')
    predicted_labels = []
    true_labels = []
    for i in range(r):
        pred = classifier.predict(np.asarray([X[i]]))
        predicted_labels.append(pred[0])
        true_labels.append(y[i])
        classifier = classifier.partial_fit(np.asarray([X[i]]), np.asarray([y[i]]), None)
        if (i % (r // 20)) == 0:
            logging.info(str((i // (r / 20))*5) + "%")
    accuracy = accuracy_score(true_labels, predicted_labels)
    logging.info('error rate %.2f%%' % (100-100*accuracy))
def demo():

    # The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD)
    h1 = [
        HoeffdingTreeClassifier(),
        SAMKNNClassifier(),
        LeveragingBaggingClassifier(random_state=1),
        SGDClassifier()
    ]
    h2 = [
        HoeffdingTreeClassifier(),
        SAMKNNClassifier(),
        LeveragingBaggingClassifier(random_state=1),
        SGDClassifier()
    ]
    h3 = [
        HoeffdingTreeClassifier(),
        SAMKNNClassifier(),
        LeveragingBaggingClassifier(random_state=1),
        SGDClassifier()
    ]
    model_names = ['HT', 'SAMKNNClassifier', 'LBkNN', 'SGDC']

    # Demo 1 -- plot should not fail
    demo_parameterized(h1, model_names=model_names)

    # Demo 2 -- csv output should look nice
    demo_parameterized(h2, "sea_stream.csv", False, model_names)

    # Demo 3 -- should not give "'NoneType' object is not iterable" error
    demo_parameterized(h3, "covtype.csv", False, model_names)
def demo(output_file=None, instances=50000):
    """ _test_sam_knn_prequential

    This demo shows how to produce a prequential evaluation.

    The first thing needed is a stream. For this case we use the
    moving_squares.csv dataset.

    Then we need to setup a classifier, which in this case is an instance 
    of scikit-multiflow's SAMKNNClassifier. Then, optionally we create a
    pipeline structure, initialized on that classifier.

    The evaluation is then run.

    Parameters
    ----------
    output_file: string
        The name of the csv output file

    instances: int
        The evaluation's max number of instances

    """
    # Setup the File Stream
    stream = FileStream(
        "https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/"
        "master/moving_squares.csv")
    # stream = WaveformGenerator()

    # Setup the classifier
    classifier = SAMKNNClassifier(n_neighbors=5,
                                  weighting='distance',
                                  max_window_size=1000,
                                  stm_size_option='maxACCApprox',
                                  use_ltm=False)

    # Setup the evaluator
    evaluator = EvaluatePrequential(pretrain_size=0,
                                    max_samples=instances,
                                    batch_size=1,
                                    n_wait=100,
                                    max_time=1000,
                                    output_file=output_file,
                                    show_plot=True)

    # Evaluate
    evaluator.evaluate(stream=stream, model=classifier)
def test_sam_knn():

    stream = SEAGenerator(random_state=1)

    hyperParams = {
        'maxSize': 1000,
        'nNeighbours': 5,
        'knnWeights': 'distance',
        'STMSizeAdaption': 'maxACCApprox',
        'use_ltm': False
    }

    learner = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'],
                               max_window_size=hyperParams['maxSize'],
                               weighting=hyperParams['knnWeights'],
                               stm_size_option=hyperParams['STMSizeAdaption'],
                               use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 5000
    predictions = array('d')

    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,
        1
    ])

    assert np.alltrue(predictions == expected_predictions)

    assert type(learner.predict(X)) == np.ndarray

    with pytest.raises(NotImplementedError):
        learner.predict_proba(X)
def test_sam_knn_coverage():

    stream = SEAGenerator(random_state=1)

    hyperParams = {
        'maxSize': 50,
        'n_neighbors': 3,
        'weighting': 'uniform',
        'stm_size_option': 'maxACC',
        'min_stm_size': 10,
        'use_ltm': True
    }

    learner = SAMKNNClassifier(n_neighbors=hyperParams['n_neighbors'],
                               max_window_size=hyperParams['maxSize'],
                               weighting=hyperParams['weighting'],
                               stm_size_option=hyperParams['stm_size_option'],
                               min_stm_size=hyperParams['min_stm_size'],
                               use_ltm=hyperParams['use_ltm'])

    cnt = 0
    max_samples = 1000
    predictions = array('i')

    wait_samples = 20

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
        1
    ])
    assert np.alltrue(predictions == expected_predictions)

    expected_info = "SAMKNNClassifier(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3, " \
                    "stm_size_option='maxACC', use_ltm=True, weighting='uniform')"
    info = " ".join([line.strip() for line in learner.get_info().split()])
    assert info == expected_info
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.sea_generator import SEAGenerator

# Simulate the data stream
dstream = SEAGenerator(classification_function=2,
                       balance_classes=True,
                       noise_percentage=0.3,
                       random_state=333)

#Retrieve five samples
dstream.next_sample(5)

# Instatntiate the KNN ADWIN classifier method
sam_knn_class = SAMKNNClassifier(n_neighbors=10,
                                 weighting='distance',
                                 max_window_size=1000,
                                 stm_size_option='maxACCApprox',
                                 use_ltm=True)

# Prequential Evaluation
evaluate1 = EvaluatePrequential(show_plot=False,
                                pretrain_size=1000,
                                max_samples=10000,
                                metrics=['accuracy'])
# Run the evaluation
evaluate1.evaluate(stream=dstream, model=sam_knn_class)

###################################################

### KNN regressor
# Import the relevant libraries
示例#7
0
                    help="Name of Detector {KD3/Adwin/PageHinkley}")

args = parser.parse_args()

test_dataset = args.dataset
print("dataset:" + "datasets/" + test_dataset + '.csv')
stream = FileStream("datasets/" + test_dataset + '.csv')
#print(stream.get_target_values())

onlineBoosting = OnlineBoostingClassifier()
knn_adwin = KNNADWINClassifier(n_neighbors=8,
                               leaf_size=40,
                               max_window_size=1000)
SAMKNN = SAMKNNClassifier(n_neighbors=10,
                          weighting='distance',
                          max_window_size=500,
                          stm_size_option='maxACCApprox',
                          use_ltm=False)
learn_pp_nse = LearnPPNSEClassifier()
SGD = SGDClassifier()
rslvq = RobustSoftLearningVectorQuantization()
#CMMM2 = CMGMMClassifier(classes=stream.get_target_values(), prune_component=True, drift_detector=None)
#CMMM.train(train_dataset, 'label', 'mfcc')
#

eval = WeakEvaluatePrequential(
    show_plot=False,
    pretrain_size=1500,
    batch_size=200,
    label_size=float(args.label_size),
    metrics=['accuracy', 'f1', 'running_time', 'model_size'])
示例#8
0
    ################### Synthetic datasets ###################
    # stream = FileStream('./datasets/synthetic/HyperFast.csv')
    # stream = FileStream('./datasets/synthetic/HyperSlow.csv')
    # stream = FileStream('./datasets/synthetic/SEA_S.csv')
    # stream = FileStream('./datasets/synthetic/SEA_G.csv')




    OBA = OzaBaggingADWINClassifier(random_state=r_state)
    LB = LeveragingBaggingClassifier(random_state=r_state)
    ORUSBoost = OnlineRUSBoostClassifier(random_state=r_state)
    OAdaC2 = OnlineAdaC2Classifier(random_state=r_state)
    samknn = SAMKNNClassifier(n_neighbors=5,
                              min_stm_size=50,
                              max_window_size=5000)
    dam3 = DAM3Classifier(n_neighbors=5,
                          min_stm_size=50,
                          wm_size=0.3,
                          ltm_size=0.5,
                          max_window_size=5000,
                          drift_detector_winSize=100,
                          drift_detector_thr=0.001,
                          pretrain_size=200,
                          random_state=r_state
                          )


    models = [dam3, samknn]
    models_names = ["DAM3", "SAMkNN"]