def run(X, y, hyperParams): """ run Test function for SAMKNN, not integrated with evaluation modules. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix, coded as 64 bits. y: numpy.array of size n_samples The labels for all the samples in X coded as 8 bits. hyperParams: dict A dictionary containing the __init__ params for the SAMKNN. """ r, c = get_dimensions(X) classifier = SAMKNN(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['knnWeights'], stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['useLTM']) logging.info('applying model on dataset') predicted_labels = [] true_labels = [] for i in range(r): pred = classifier.predict(np.asarray([X[i]])) predicted_labels.append(pred[0]) true_labels.append(y[i]) classifier = classifier.partial_fit(np.asarray([X[i]]), np.asarray([y[i]]), None) if (i % (r // 20)) == 0: logging.info(str((i // (r / 20))*5) + "%") accuracy = accuracy_score(true_labels, predicted_labels) logging.info('error rate %.2f%%' % (100-100*accuracy))
def demo(): # The classifier we will use (other options: SAMKNN, LeverageBagging, SGD) h1 = [ HoeffdingTree(), SAMKNN(), LeverageBagging(random_state=1), SGDClassifier() ] h2 = [ HoeffdingTree(), SAMKNN(), LeverageBagging(random_state=1), SGDClassifier() ] h3 = [ HoeffdingTree(), SAMKNN(), LeverageBagging(random_state=1), SGDClassifier() ] model_names = ['HT', 'SAMKNN', 'LBkNN', 'SGDC'] # Demo 1 -- plot should not fail demo_parameterized(h1, model_names=model_names) # Demo 2 -- csv output should look nice demo_parameterized(h2, "sea_stream.csv", False, model_names) # Demo 3 -- should not give "'NoneType' object is not iterable" error demo_parameterized(h3, "covtype.csv", False, model_names)
def test_grid(): clfs = [AdaptiveRandomForest(), SAMKNN(), HAT()] cv = CrossValidation(clfs=clfs, max_samples=1000000, test_size=1) cv.streams = cv.init_real_world() + cv.init_standard_streams( ) + cv.init_reoccuring_standard_streams() cv.test() cv.save_summary()
def demo(output_file=None, instances=50000): """ _test_sam_knn_prequential This demo shows how to produce a prequential evaluation. The first thing needed is a stream. For this case we use a file stream which gets its samples from the movingSquares.csv file, inside the datasets folder. Then we need to setup a classifier, which in this case is an instance of scikit-multiflow's SAMKNN. Then, optionally we create a pipeline structure, initialized on that classifier. The evaluation is then run. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream stream = FileStream("../data/datasets/movingSquares.csv", -1, 1) # stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier # classifier = SGDClassifier() # classifier = KNNAdwin(n_neighbors=8, max_window_size=2000,leaf_size=40, categorical_list=None) # classifier = OzaBaggingAdwin(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30, categorical_list=None)) classifier = SAMKNN(n_neighbors=5, weighting='distance', max_window_size=1000, stm_size_option='maxACCApprox', use_ltm=False) # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline # pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=0, max_samples=instances, batch_size=1, n_wait=100, max_time=1000, output_file=output_file, show_plot=True, metrics=['performance']) # Evaluate evaluator.evaluate(stream=stream, model=classifier)
def test_sam_knn(): stream = SEAGenerator(random_state=1) stream.prepare_for_use() hyperParams = {'maxSize': 1000, 'nNeighbours': 5, 'knnWeights': 'distance', 'STMSizeAdaption': 'maxACCApprox', 'use_ltm': False} learner = SAMKNN(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['knnWeights'], stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['use_ltm']) cnt = 0 max_samples = 5000 predictions = array('d') wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1]) assert np.alltrue(predictions == expected_predictions) assert type(learner.predict(X)) == np.ndarray with pytest.raises(NotImplementedError): learner.predict_proba(X)
def test_sam_knn_coverage(): stream = SEAGenerator(random_state=1) stream.prepare_for_use() hyperParams = {'maxSize': 50, 'n_neighbors': 3, 'weighting': 'uniform', 'stm_size_option': 'maxACC', 'min_stm_size': 10, 'use_ltm': True} learner = SAMKNN(n_neighbors=hyperParams['n_neighbors'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['weighting'], stm_size_option=hyperParams['stm_size_option'], min_stm_size=hyperParams['min_stm_size'], use_ltm=hyperParams['use_ltm']) cnt = 0 max_samples = 1000 predictions = array('i') wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]) assert np.alltrue(predictions == expected_predictions) expected_info = "SAMKNN(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3,\n" \ " stm_size_option='maxACC', use_ltm=True, weighting='uniform')" assert learner.get_info() == expected_info
alpha=90.0, position=N_SAMPLES / 2) stream.name = 'LED ABRUPBT' STREAMS.append(stream) """Evaluate on ARSLVQ, SAM and HAT""" # TODO NB and ARSLVQ working for stream in STREAMS: print('{}:\n'.format(stream.name)) f = open(res_file, 'a+') f.write('{}:\n'.format(stream.name)) f.close() rrslvq = RRSLVQ(prototypes_per_class=2,confidence=1e-10) high_dim_test(copy.copy(stream), copy.copy(rrslvq), N_SAMPLES) low_dim_test(copy.copy(stream), copy.copy(rrslvq), N_SAMPLES) arslvq = RSLVQ(gradient_descent='Adadelta') high_dim_test(copy.copy(stream), copy.copy(arslvq), N_SAMPLES) low_dim_test(copy.copy(stream), copy.copy(arslvq), N_SAMPLES) samknn = SAMKNN(max_window_size=5000,stm_size_option=None) high_dim_test(copy.copy(stream), copy.copy(samknn), N_SAMPLES) low_dim_test(copy.copy(stream), copy.copy(samknn), N_SAMPLES) arf = ARF() high_dim_test(copy.copy(stream), copy.copy(arf), N_SAMPLES) low_dim_test(copy.copy(stream), copy.copy(arf), N_SAMPLES)
# while 1: # line = f.readline() # if line == '': break # arr = np.array(line.split(','), dtype='float64') # labels.append(arr[1]) # f.close() # HIGH-DIM X, y = data[:, :-1], data[:, -1] clfs = [ RSLVQ(prototypes_per_class=2, gradient_descent="Adadelta"), RRSLVQ(prototypes_per_class=2, confidence=1e-10), ARF(), SAMKNN() ] for clf in clfs: acc_fold = [] kappa_fold = [] time_fold = [] for _ in range(5): _clf = copy.deepcopy(clf) start_time = time.time() y_true = [] y_pred = [] x = data[0, :-1].reshape(1, 1000) y = data[0, -1].reshape(1, 1)
def test_led(): led_a = ConceptDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=250000, width=1) led_a.name = "led_a" led_g = ConceptDriftStream(stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift( has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, position=250000, width=50000) led_g.name = "led_g" led_fa = ReoccuringDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=2000, width=1) led_fg = ReoccuringDriftStream( stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=3), drift_stream=LEDGeneratorDrift(has_noise=False, noise_percentage=0.0, n_drift_features=7), random_state=None, position=2000, width=1000) np = 2 sigma = 3 clfs = [ ARSLVQ(prototypes_per_class=np, sigma=sigma, confidence=0.0001, window_size=1500), OzaBaggingAdwin(), AdaptiveRandomForest(), HAT(), RSLVQ(prototypes_per_class=np, sigma=sigma), SAMKNN() ] cv = CrossValidation(clfs=clfs, parallel=1) cv.streams = [led_a, led_g, led_fa, led_fg] cv.search() cv.save_summary()