def run(X, y, hyperParams): """ run Test function for SAMKNNClassifier, not integrated with evaluation modules. Parameters ---------- X: numpy.ndarray of shape (n_samples, n_features) The feature's matrix, coded as 64 bits. y: numpy.array of size n_samples The labels for all the samples in X coded as 8 bits. hyperParams: dict A dictionary containing the __init__ params for the SAMKNNClassifier. """ r, c = get_dimensions(X) classifier = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['knnWeights'], stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['use_ltm']) logging.info('applying model on dataset') predicted_labels = [] true_labels = [] for i in range(r): pred = classifier.predict(np.asarray([X[i]])) predicted_labels.append(pred[0]) true_labels.append(y[i]) classifier = classifier.partial_fit(np.asarray([X[i]]), np.asarray([y[i]]), None) if (i % (r // 20)) == 0: logging.info(str((i // (r / 20))*5) + "%") accuracy = accuracy_score(true_labels, predicted_labels) logging.info('error rate %.2f%%' % (100-100*accuracy))
def demo(): # The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD) h1 = [ HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier() ] h2 = [ HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier() ] h3 = [ HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier() ] model_names = ['HT', 'SAMKNNClassifier', 'LBkNN', 'SGDC'] # Demo 1 -- plot should not fail demo_parameterized(h1, model_names=model_names) # Demo 2 -- csv output should look nice demo_parameterized(h2, "sea_stream.csv", False, model_names) # Demo 3 -- should not give "'NoneType' object is not iterable" error demo_parameterized(h3, "covtype.csv", False, model_names)
def demo(output_file=None, instances=50000): """ _test_sam_knn_prequential This demo shows how to produce a prequential evaluation. The first thing needed is a stream. For this case we use the moving_squares.csv dataset. Then we need to setup a classifier, which in this case is an instance of scikit-multiflow's SAMKNNClassifier. Then, optionally we create a pipeline structure, initialized on that classifier. The evaluation is then run. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream stream = FileStream( "https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/" "master/moving_squares.csv") # stream = WaveformGenerator() # Setup the classifier classifier = SAMKNNClassifier(n_neighbors=5, weighting='distance', max_window_size=1000, stm_size_option='maxACCApprox', use_ltm=False) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=0, max_samples=instances, batch_size=1, n_wait=100, max_time=1000, output_file=output_file, show_plot=True) # Evaluate evaluator.evaluate(stream=stream, model=classifier)
def test_sam_knn(): stream = SEAGenerator(random_state=1) hyperParams = { 'maxSize': 1000, 'nNeighbours': 5, 'knnWeights': 'distance', 'STMSizeAdaption': 'maxACCApprox', 'use_ltm': False } learner = SAMKNNClassifier(n_neighbors=hyperParams['nNeighbours'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['knnWeights'], stm_size_option=hyperParams['STMSizeAdaption'], use_ltm=hyperParams['use_ltm']) cnt = 0 max_samples = 5000 predictions = array('d') wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1 ]) assert np.alltrue(predictions == expected_predictions) assert type(learner.predict(X)) == np.ndarray with pytest.raises(NotImplementedError): learner.predict_proba(X)
def test_sam_knn_coverage(): stream = SEAGenerator(random_state=1) hyperParams = { 'maxSize': 50, 'n_neighbors': 3, 'weighting': 'uniform', 'stm_size_option': 'maxACC', 'min_stm_size': 10, 'use_ltm': True } learner = SAMKNNClassifier(n_neighbors=hyperParams['n_neighbors'], max_window_size=hyperParams['maxSize'], weighting=hyperParams['weighting'], stm_size_option=hyperParams['stm_size_option'], min_stm_size=hyperParams['min_stm_size'], use_ltm=hyperParams['use_ltm']) cnt = 0 max_samples = 1000 predictions = array('i') wait_samples = 20 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1 ]) assert np.alltrue(predictions == expected_predictions) expected_info = "SAMKNNClassifier(ltm_size=0.4, max_window_size=None, min_stm_size=10, n_neighbors=3, " \ "stm_size_option='maxACC', use_ltm=True, weighting='uniform')" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info
from skmultiflow.evaluation import EvaluatePrequential from skmultiflow.data.sea_generator import SEAGenerator # Simulate the data stream dstream = SEAGenerator(classification_function=2, balance_classes=True, noise_percentage=0.3, random_state=333) #Retrieve five samples dstream.next_sample(5) # Instatntiate the KNN ADWIN classifier method sam_knn_class = SAMKNNClassifier(n_neighbors=10, weighting='distance', max_window_size=1000, stm_size_option='maxACCApprox', use_ltm=True) # Prequential Evaluation evaluate1 = EvaluatePrequential(show_plot=False, pretrain_size=1000, max_samples=10000, metrics=['accuracy']) # Run the evaluation evaluate1.evaluate(stream=dstream, model=sam_knn_class) ################################################### ### KNN regressor # Import the relevant libraries
help="Name of Detector {KD3/Adwin/PageHinkley}") args = parser.parse_args() test_dataset = args.dataset print("dataset:" + "datasets/" + test_dataset + '.csv') stream = FileStream("datasets/" + test_dataset + '.csv') #print(stream.get_target_values()) onlineBoosting = OnlineBoostingClassifier() knn_adwin = KNNADWINClassifier(n_neighbors=8, leaf_size=40, max_window_size=1000) SAMKNN = SAMKNNClassifier(n_neighbors=10, weighting='distance', max_window_size=500, stm_size_option='maxACCApprox', use_ltm=False) learn_pp_nse = LearnPPNSEClassifier() SGD = SGDClassifier() rslvq = RobustSoftLearningVectorQuantization() #CMMM2 = CMGMMClassifier(classes=stream.get_target_values(), prune_component=True, drift_detector=None) #CMMM.train(train_dataset, 'label', 'mfcc') # eval = WeakEvaluatePrequential( show_plot=False, pretrain_size=1500, batch_size=200, label_size=float(args.label_size), metrics=['accuracy', 'f1', 'running_time', 'model_size'])
################### Synthetic datasets ################### # stream = FileStream('./datasets/synthetic/HyperFast.csv') # stream = FileStream('./datasets/synthetic/HyperSlow.csv') # stream = FileStream('./datasets/synthetic/SEA_S.csv') # stream = FileStream('./datasets/synthetic/SEA_G.csv') OBA = OzaBaggingADWINClassifier(random_state=r_state) LB = LeveragingBaggingClassifier(random_state=r_state) ORUSBoost = OnlineRUSBoostClassifier(random_state=r_state) OAdaC2 = OnlineAdaC2Classifier(random_state=r_state) samknn = SAMKNNClassifier(n_neighbors=5, min_stm_size=50, max_window_size=5000) dam3 = DAM3Classifier(n_neighbors=5, min_stm_size=50, wm_size=0.3, ltm_size=0.5, max_window_size=5000, drift_detector_winSize=100, drift_detector_thr=0.001, pretrain_size=200, random_state=r_state ) models = [dam3, samknn] models_names = ["DAM3", "SAMkNN"]