def demo(output_file=None): """ Test iSOUP-Tree This demo demonstrates how to evaluate a iSOUP-Tree multi-target regressor. Parameters ---------- output_file: string The name of the csv output file """ stream = RegressionGenerator(n_samples=5000, n_features=20, n_informative=15, random_state=1, n_targets=7) regressor = iSOUPTreeRegressor(leaf_prediction='adaptive') # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=False, metrics=[ 'average_mean_square_error', 'average_mean_absolute_error', 'average_root_mean_square_error' ]) # Evaluate evaluator.evaluate(stream=stream, model=regressor)
def test_accuracy_stream(self): stream = SEAGenerator(random_state=42) stream.prepare_for_use() clf = ARSLVQ(sigma=0.5, prototypes_per_class=2, batch_size=5, decay_rate=0.999) evaluator = EvaluatePrequential(show_plot=False, max_samples=20000, batch_size=5) evaluator.evaluate(stream, clf, model_names=['ARSLVQ']) measurements = np.asarray(evaluator.get_measurements()[0])[0] self.assertTrue( measurements.get_accuracy() >= 0.84, msg='Accuracy was {} but has to be greater than 0.84'.format( measurements.get_accuracy())) self.assertTrue( measurements.get_kappa() >= 0.68, msg='Kappa was {} but has to be greater than 0.68'.format( measurements.get_kappa()))
def grid_job(self, clf, stream): clf_result = [] time_result = [] params = self.search_best_parameters(clf) self.chwd_root() os.chdir(os.path.join(os.getcwd(), self.path)) print(clf.__class__.__name__) clf = self.set_clf_params(clf, params, stream.name) local_result = [] for i in range(self.test_size): stream.prepare_for_use() stream.name = stream.basename if stream.name == None else stream.name path_to_save = clf.__class__.__name__ + \ "_performance_on_"+stream.name+"_"+self.date+".csv" evaluator = EvaluatePrequential( show_plot=False, max_samples=self.max_samples, restart_stream=True, batch_size=10, metrics=self.metrics, output_file=path_to_save) evaluator.evaluate(stream=stream, model=clf) saved_metric = pd.read_csv( path_to_save, comment='#', header=0).astype(np.float32) saved_values = saved_metric.values[:, 1:3] saved_values.setflags(write=1) stds = np.std(saved_values, axis=0).tolist() sliding_mean = [np.mean(saved_metric.values[:, 2], axis=0)] output = np.array([[m for m in evaluator._data_buffer.data[n]["mean"]] for n in evaluator._data_buffer.data]+[ [evaluator.running_time_measurements[0]._total_time]]).T.flatten().tolist()+sliding_mean+stds print(path_to_save+" "+str(output)) local_result.append(output) clf_result = np.mean(local_result, axis=0).tolist() return [clf.__class__.__name__]+clf_result
def demo(input_file, output_file=None): """ _test_mtr_regression This demo demonstrates how to evaluate a Multi-Target Regressor. The employed dataset is 'scm1d', which is contained in the data folder. Parameters ---------- input_file: string A string describind the path for the input dataset output_file: string The name of the csv output file """ stream = RegressionGenerator(n_samples=5000, n_features=20, n_informative=15, random_state=1, n_targets=7) stream.prepare_for_use() classifier = MultiTargetRegressionHoeffdingTree(leaf_prediction='adaptive') # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=False, metrics=['average_mean_square_error', 'average_mean_absolute_error', 'average_root_mean_square_error']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def run_comparison(data, window = 100, estimators = 50, anomaly = 0.5, drift_rate = 0.3, output_file = 'results'): # = data = SEAGenerator(classification_function=0, noise_percentage=0.7, random_state=1) models = [HalfSpaceTrees(n_features=stream.n_features, window_size=window, n_estimators=estimators, size_limit=0.1*100, anomaly_threshold=anomaly, depth=15, random_state=2), IsolationForestStream( window_size=window, n_estimators=estimators, anomaly_threshold=anomaly, drift_threshold=drift_rate, random_state=None)] # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1, max_samples=1000, show_plot=True, metrics=['accuracy', 'f1', 'kappa', 'kappa_m'], batch_size=1, output_file = 'results_test.csv') # 4. Run the evaluation evaluator.evaluate(stream=stream, model=models, model_names=['HSTrees','iForestASD']) return
def test_accuracy_stream(self): stream = ConceptDriftStream(stream=SEAGenerator(random_state=112, noise_percentage=0.1), drift_stream=SEAGenerator( random_state=112, classification_function=1, noise_percentage=0.1), random_state=None, position=20000, width=50000) stream.prepare_for_use() clf = GLVQ(prototypes_per_class=6, beta=2, C=None, decay_rate=0.9, gradient_descent="SGD") evaluator = EvaluatePrequential(pretrain_size=1, show_plot=False, max_samples=20000, batch_size=1) evaluator.evaluate(stream, clf, model_names=['GLVQ']) measurements = np.asarray(evaluator.get_measurements()[0])[0] self.assertTrue( measurements.get_accuracy() >= 0.93, msg='Accuracy was {} but has to be greater than 0.93'.format( measurements.get_accuracy())) self.assertTrue( measurements.get_kappa() >= 0.84, msg='Kappa was {} but has to be greater than 0.84'.format( measurements.get_kappa()))
def demo(instances=2000): """ _test_comparison_prequential This demo will test a prequential evaluation when more than one learner is passed, which makes it a comparison task. Parameters ---------- instances: int The evaluation's maximum number of instances. """ # Stream setup stream = FileStream("../datasets/covtype.csv", -1, 1) # stream = SEAGenerator(classification_function=2, sample_seed=53432, balance_classes=False) stream.prepare_for_use() # Setup the classifier clf = SGDClassifier() # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None) # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) clf_one = KNNAdwin(k=8, max_window_size=1000, leaf_size=30) # clf_two = KNN(k=8, max_window_size=1000, leaf_size=30) # clf_two = LeverageBagging(h=KNN(), ensemble_length=2) t_one = OneHotToCategorical([[10, 11, 12, 13], [ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53 ]]) # t_two = OneHotToCategorical([[10, 11, 12, 13], # [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, # 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53]]) pipe_one = Pipeline([('one_hot_to_categorical', t_one), ('KNN', clf_one)]) # pipe_two = Pipeline([('one_hot_to_categorical', t_two), ('KNN', clf_two)]) classifier = [clf, pipe_one] # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline # pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=2000, output_file='teste.csv', max_samples=instances, batch_size=1, n_wait=200, max_time=1000, show_plot=True, metrics=['performance', 'kappa_t']) # Evaluate evaluator.evaluate(stream=stream, model=classifier)
def demo(output_file=None, instances=50000): """ _test_sam_knn_prequential This demo shows how to produce a prequential evaluation. The first thing needed is a stream. For this case we use a file stream which gets its samples from the movingSquares.csv file, inside the datasets folder. Then we need to setup a classifier, which in this case is an instance of scikit-multiflow's SAMKNN. Then, optionally we create a pipeline structure, initialized on that classifier. The evaluation is then run. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream stream = FileStream("../datasets/movingSquares.csv", -1, 1) # stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier # classifier = SGDClassifier() # classifier = KNNAdwin(k=8, max_window_size=2000,leaf_size=40, categorical_list=None) # classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) classifier = SAMKNN(n_neighbors=5, knnWeights='distance', maxSize=1000, STMSizeAdaption='maxACCApprox', useLTM=False) # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline # pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=0, max_samples=instances, batch_size=1, n_wait=100, max_time=1000, output_file=output_file, show_plot=True, metrics=['performance']) # Evaluate evaluator.evaluate(stream=stream, model=classifier)
def run_comparison(self, stream, stream_n_features, window=100, estimators=50, anomaly=0.5, drift_rate=0.3, result_folder="Generated", max_sample=100000, n_wait=200, metrics=[ 'accuracy', 'f1', 'kappa', 'kappa_m', 'running_time', 'model_size' ]): from skmultiflow.anomaly_detection import HalfSpaceTrees from source.iforestasd_scikitmultiflow import IsolationForestStream from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential # Creation f the result csv directory_path = 'results/' + str(result_folder) self.check_directory(path=directory_path) result_file_path = directory_path + '/result_for_WS' + str( window) + '_NE' + str(estimators) + '.csv' # 2. Prepare for use This function is usefull to have data window by window # stream.prepare_for_use() # Deprecated so how to prepare data? models = [ HalfSpaceTrees(n_features=stream_n_features, window_size=window, n_estimators=estimators, anomaly_threshold=anomaly), #IForest ASD use all the window_size for the sample in the training phase IsolationForestStream(window_size=window, n_estimators=estimators, anomaly_threshold=anomaly, drift_threshold=drift_rate) ] # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1, max_samples=max_sample, show_plot=True, metrics=metrics, batch_size=1, output_file=result_file_path, n_wait=n_wait) # 4. Run the evaluation evaluator.evaluate(stream=stream, model=models, model_names=['HSTrees', 'iForestASD']) print("") print("Please find evaluation results here " + result_file_path) return directory_path
def evaluate(stream, metrics, study_size): clfs, names = init_classifiers() stream.prepare_for_use() evaluator = EvaluatePrequential(show_plot=False, batch_size=10, max_samples=study_size, metrics=metrics, output_file=stream.name + "_memory_other.csv") evaluator.evaluate(stream=stream, model=clfs, model_names=names)
def demo(output_file=None, instances=40000): """ _test_prequential This demo shows how to produce a prequential evaluation. The first thing needed is a stream. For this case we use a file stream which gets its samples from the sea_big.csv file, inside the datasets folder. Then we need to setup a classifier, which in this case is an instance of sklearn's PassiveAggressiveClassifier. Then, optionally we create a pipeline structure, initialized on that classifier. The evaluation is then run. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream stream = FileStream("../data/datasets/sea_big.csv", -1, 1) # stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier # classifier = SGDClassifier() # classifier = KNNAdwin(n_neighbors=8, max_window_size=2000,leaf_size=40, nominal_attributes=None) # classifier = OzaBaggingAdwin(base_estimator=KNN(n_neighbors=8, max_window_size=2000, leaf_size=30, categorical_list=None)) classifier = PassiveAggressiveClassifier() # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential( pretrain_size=200, max_samples=instances, batch_size=1, n_wait=100, max_time=1000, output_file=output_file, show_plot=True, metrics=['kappa', 'kappa_t', 'performance']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def test_stream(self): stream = SEAGenerator(classification_function = 2, random_state = 112, balance_classes = False, noise_percentage = 0.28) stream.prepare_for_use() evaluator = EvaluatePrequential(show_plot=False,max_samples=5000, restart_stream=True,batch_size=10,metrics=['kappa', 'kappa_m', 'accuracy']) evaluator.evaluate(stream=stream, model=RRSLVQ(prototypes_per_class=4,sigma=10)) measurements = np.asarray(evaluator.get_measurements()[0])[0] self.assertIsNotNone(eval) self.assertTrue(measurements.get_accuracy() >= 0.5, msg='Accuracy was {} but has to be greater than 0.5'. format(measurements.get_accuracy()))
def demo_parameterized(h, filename="covtype.csv", show_plot=True): # Setup Stream stream = FileStream("../datasets/" + filename, -1, 1) stream.prepare_for_use() # For each classifier, e... pretrain = 100 evaluator = EvaluatePrequential(pretrain_size=pretrain, output_file='output.csv', max_samples=10000, batch_size=1, n_wait=1000, show_plot=show_plot, metrics=['performance']) evaluator.evaluate(stream=stream, model=h)
def test_evaluate_prequential_classifier(tmpdir, test_path): # Setup file stream stream = RandomTreeGenerator(tree_random_state=23, sample_random_state=12, n_classes=4, n_cat_features=2, n_num_features=5, n_categories_per_cat_feature=5, max_tree_depth=6, min_leaf_depth=3, fraction_leaves_per_level=0.15) stream.prepare_for_use() # Setup learner nominal_attr_idx = [x for x in range(15, len(stream.feature_names))] learner = HoeffdingTree(nominal_attributes=nominal_attr_idx) # Setup evaluator max_samples = 1000 metrics = ['kappa', 'kappa_t', 'performance'] output_file = os.path.join(str(tmpdir), "prequential_summary.csv") evaluator = EvaluatePrequential(max_samples=max_samples, metrics=metrics, output_file=output_file) # Evaluate result = evaluator.evaluate(stream=stream, model=learner) result_learner = result[0] assert isinstance(result_learner, HoeffdingTree) assert learner.get_model_measurements == result_learner.get_model_measurements expected_file = os.path.join(test_path, 'prequential_summary.csv') compare_files(output_file, expected_file)
def demo(): # The classifier we will use (other options: SAMKNN, LeverageBagging, SGD) h = HoeffdingTree() # Setup Stream stream = FileStream("../datasets/sea_stream.csv", -1, 1) stream.prepare_for_use() pretrain = 100 evaluator = EvaluatePrequential(pretrain_size=pretrain, output_file='output.csv', max_samples=10000, batch_size=1, n_wait=1000, show_plot=True, metrics=['performance']) evaluator.evaluate(stream=stream, model=h)
def demo(output_file=None, instances=40000): """ _test_prequential_mol This demo shows the evaluation process of a MOL classifier, initialized with sklearn's SGDClassifier. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # stream = FileStream("../data/datasets/music.csv", 0, 6) stream = MultilabelGenerator(n_samples=instances) # stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier classifier = MultiOutputLearner(SGDClassifier(n_iter=100)) # classifier = SGDClassifier() # classifier = PassiveAggressiveClassifier() # classifier = SGDRegressor() # classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential( pretrain_size=5000, max_samples=instances - 10000, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=True, metrics=['hamming_score', 'j_index', 'exact_match']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def stream_example(): """Create stream""" stream = SEAGenerator(noise_percentage=0.1) stream.prepare_for_use() """Init BRSLVQ""" clf = [ RSLVQ(sigma=5.0, batch_size=1, n_epochs=1), RSLVQ(sigma=5.0, batch_size=5, n_epochs=1), RSLVQ(sigma=5.0, batch_size=10, n_epochs=1) ] """Evaluate""" evaluator = EvaluatePrequential(max_samples=10000, batch_size=100, show_plot=True) """Start evaluation""" evaluator.evaluate(stream=stream, model=clf, model_names=['bs=1', 'bs=5', 'bs=10'])
def demo(output_file=None, instances=40000): """ _test_regression This demo demonstrates how to evaluate a regressor. The data stream used is an instance of the RegressionGenerator, which feeds an instance from sklearn's SGDRegressor. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # stream = FileStream("../data/datasets/covtype.csv", -1, 1) # stream = WaveformGenerator() # stream.prepare_for_use() stream = RegressionGenerator(n_samples=40000) # Setup the classifier # classifier = SGDClassifier() # classifier = PassiveAggressiveClassifier() classifier = RegressionHoeffdingTree() # classifier = PerceptronMask() # Setup the pipeline pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1, max_samples=instances, batch_size=1, n_wait=200, max_time=1000, output_file=output_file, show_plot=False, metrics=['mean_square_error']) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def demo(output_file=None, instances=40000): """ _test_prequential_bagging This demo shows the evaluation process of a LeverageBagging classifier, initialized with KNN classifiers. Parameters ---------- output_file: string The name of the csv output file instances: int The evaluation's max number of instances """ # Setup the File Stream # stream = FileStream("../datasets/sea_big.csv", -1, 1) #stream = SEAGenerator(classification_function=2, noise_percentage=0.0) #stream.prepare_for_use() stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier #classifier = OzaBaggingAdwin(h=KNN(k=8, max_window_size=2000, leaf_size=30, categorical_list=None)) #classifier = LeverageBagging(h=KNN(k=8, max_window_size=2000, leaf_size=30), ensemble_length=1) pipe = LeverageBagging(h=HoeffdingTree(), ensemble_length=2) # Setup the pipeline #pipe = Pipeline([('Classifier', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=2000, max_samples=instances, output_file=output_file, show_plot=False) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def test_accuracy_stream(self): stream = SEAGenerator(random_state=42) stream.prepare_for_use() clf = GRLVQ(prototypes_per_class=2, regularization=5.0, beta=2, C=None) evaluator = EvaluatePrequential(pretrain_size=1, show_plot=False, max_samples=20000, batch_size=1) evaluator.evaluate(stream, clf, model_names=['GRLVQ']) measurements = np.asarray(evaluator.get_measurements()[0])[0] self.assertTrue( measurements.get_accuracy() >= 0.7, msg='Accuracy was {} but has to be greater than 0.7'.format( measurements.get_accuracy())) self.assertTrue( measurements.get_kappa() >= 0.3, msg='Kappa was {} but has to be greater than 0.3'.format( measurements.get_kappa()))
def evaluate_prequential(stream, model, pretrain_size=0.1, window_size=20, plot=False, output=None): stream.restart() pretrain_samples = round(stream.n_remaining_samples() * pretrain_size) batch_size = round( (stream.n_remaining_samples() - pretrain_samples) / window_size) print("Pretrain size (examples):", pretrain_samples) print("Batch size (examples):", batch_size) evaluator = EvaluatePrequential(show_plot=plot, pretrain_size=pretrain_samples, batch_size=batch_size, max_samples=1000000, metrics=[ "exact_match", "hamming_score", "hamming_loss", "j_index", "running_time", "model_size" ], output_file=output) evaluator.evaluate(stream=stream, model=model)
def self_job(self,stream,clf,grid,metrics,max_samples): results = [] matrix = list(itertools.product(*[list(v) for v in grid.values()])) for param_tuple in matrix: try: clf.reset() except NotImplementedError: clf.__init__() for i,param in enumerate(param_tuple): clf.__dict__[list(grid.keys())[i]] = int(param) if param.dtype == 'int32' else param stream.prepare_for_use() evaluator = EvaluatePrequential(show_plot=False,max_samples=self.max_samples, restart_stream=True,batch_size=10,metrics=metrics) evaluator.evaluate(stream=stream, model=clf) results.append(list(param_tuple)+np.array([[m for m in evaluator._data_buffer.data[n]["mean"]] for n in evaluator._data_buffer.data]).T.flatten().tolist()) s_name = stream.basename if stream.name==None else stream.name dfr = pd.DataFrame(results,columns=list(self.grid.keys())+np.array([[*evaluator._data_buffer.data]]).flatten().tolist()) dfr = dfr.round(3) self.chwd_root() os.chdir(os.path.join(os.getcwd(),self.path)) dfr.to_csv(path_or_buf="Result_"+"_"+self.date+"_"+s_name+"_"+self.clf.__class__.__name__+".csv") print("\n ------------------ \n") print("Best run on "+s_name+" with "+" "+self.clf.__class__.__name__+" "+str(dfr.values[dfr["accuracy"].values.argmax()])) return [s_name]+[self.clf.__class__.__name__]+dfr.values[dfr["accuracy"].values.argmax()].tolist()
def demo(): """ _test_pipeline This demo demonstrates the Pipeline structure seemingly working as a learner, while being passed as parameter to an EvaluatePrequential object. """ # # Setup the stream # stream = FileStream("../datasets/covtype.csv", -1, 1) # stream.prepare_for_use() # # If used for Hoeffding Trees then need to pass indices for Nominal attributes # Test with RandomTreeGenerator # stream = RandomTreeGenerator(n_classes=2, n_numerical_attributes=5) # stream.prepare_for_use() # Test with WaveformGenerator stream = WaveformGenerator() stream.prepare_for_use() # Setup the classifier #classifier = PerceptronMask() #classifier = NaiveBayes() #classifier = PassiveAggressiveClassifier() classifier = HoeffdingTree() # Setup the pipeline pipe = Pipeline([('Hoeffding Tree', classifier)]) # Setup the evaluator evaluator = EvaluatePrequential(show_plot=True, pretrain_size=1000, max_samples=100000) # Evaluate evaluator.evaluate(stream=stream, model=pipe)
def test_reoccuring(self): s1 = MIXEDGenerator(classification_function = 1, random_state= 112, balance_classes = False) s2 = MIXEDGenerator(classification_function = 0, random_state= 112, balance_classes = False) stream = ReoccuringDriftStream(stream=s1, drift_stream=s2, random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=2000, width=500) stream.prepare_for_use() evaluator = EvaluatePrequential(show_plot=False,batch_size=10, max_samples=1000, metrics=['accuracy', 'kappa_t', 'kappa_m', 'kappa'], output_file=None) eval = evaluator.evaluate(stream=stream, model=OzaBaggingAdwin(base_estimator=KNN())) measurements = np.asarray(evaluator.get_measurements()[0])[0] self.assertIsNotNone(eval) self.assertTrue(measurements.get_accuracy() >= 0.6, msg='Accuracy was {} but has to be greater than 0.6'. format(measurements.get_accuracy()))
from skmultiflow.data.file_stream import FileStream from skmultiflow.trees.hoeffding_tree import HoeffdingTreeClassifier from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential # Create a stream stream = FileStream("elec.csv") stream.prepare_for_use() # Not required for v0.5.0+ # Instantiate the HoeffdingTreeClassifier ht = HoeffdingTreeClassifier() # Setup the evaluator evaluator = EvaluatePrequential(pretrain_size=1000, max_samples=10000, output_file='results.csv') # Run evaluation evaluator.evaluate(stream=stream, model=ht)
#stream = RandomTreeGenerator() #stream = RandomRBFGenerator() stream = WaveformGenerator(has_noise=False) stream.prepare_for_use() h = [ HoeffdingOptTree(), # HoeffdingTree() ] evaluator = EvaluatePrequential(pretrain_size=1000, max_samples=20000, show_plot=True, metrics=['accuracy'], output_file='result_' + dataset + '.csv', batch_size=1) # 4. Run evaluator.evaluate(stream=stream, model=h, model_names=["HOT"]) '''import pandas as pd from matplotlib.pyplot import * df = pd.read_csv('result_'+dataset+'.csv', comment='#') ax = df.plot(x="id", y=["mean_acc_[HoeffdintOptTree]","mean_acc_[M1]","mean_acc_[M2]"], rot=45, linewidth=3, title=dataset) #ax = df.plot(x="id", y=["current_acc_[M0]", "current_acc_[M1]", "current_acc_[M2]"], rot=30, linewidth=3, title=dataset) #ax = df.plot(x="id", y=["mean_kappa_[M0]","mean_kappa_[M1]","mean_kappa_[M2]"], rot=45, linewidth=3, title=dataset) #ax = df.plot(x="id", y=["current_kappa_[M0]", "current_kappa_[M1]", "current_kappa_[M2]"], rot=30, linewidth=3, title=dataset) ax.set_xlabel("") ax.set_title("Performance on the %s dataset" % dataset) ax.legend([r"HT"], loc='best') print("write out to %s ..." % dataset+".pdf") #savefig("result_"+dataset+".pdf") show()'''
from skmultiflow.data.sea_generator import SEAGenerator from rslvq import RSLVQ from adaptive_rslvqs_batch import RSLVQ as BARSLVQ from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential from arslvq import RSLVQ as ARSLVQ import numpy as np from rslvq_stream import RSLVQ as MasterRSLVQ stream = SEAGenerator() stream.prepare_for_use() clf = [ RSLVQ(batch_size=5), BARSLVQ(gradient_descent='Adadelta', batch_size=5, decay_rate=0.9, sigma=1.0), RSLVQ(), MasterRSLVQ(gradient_descent='Adadelta', decay_rate=0.999, sigma=1.0) ] evaluator = EvaluatePrequential(max_samples=100000, batch_size=5, show_plot=True) evaluator.evaluate(stream=stream, model=clf, model_names=['BRSLVQ', 'BARSLVQ', 'RSLVQ', 'MRSLVQ']) measurements = np.asarray(evaluator.get_measurements()[0])[0]
# initial guess: centroid centroid = points.mean(axis=0) optimize_result = minimize(aggregate_distance, centroid, method='COBYLA') return optimize_result.x if __name__ == "__main__": s1 = MIXEDGenerator(classification_function = 1, random_state= 112, balance_classes = False) s2 = MIXEDGenerator(classification_function = 0, random_state= 112, balance_classes = False) """1. Create stream""" stream = ReoccuringDriftStream(stream=s1, drift_stream=s2, random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=2000, width=1) stream.prepare_for_use() rrslvq = ARSLVQ(prototypes_per_class=4, drift_detector="KS",sigma=2,confidence=0.1) model_names = ["rrslvq"] evaluator = EvaluatePrequential(show_plot=False,max_samples=5000, restart_stream=True,batch_size=50,metrics=['kappa', 'kappa_m', 'accuracy']) evaluator.evaluate(stream=stream, model=rrslvq,model_names=model_names)
from skmultiflow.data.hyper_plane_generator import HyperplaneGenerator from ensemble import WeightedEnsembleClassifier from skmultiflow.evaluation.evaluate_prequential import EvaluatePrequential seed = 420 hyper_gen = HyperplaneGenerator(random_state=seed, n_features=10, # number of features to generate n_drift_features=2, # number of features involved in concept drift (k) mag_change=0.0, # magnitude of change (t) noise_percentage=0.05, # noise percentage (p) sigma_percentage=0.1) # probab that the direction of change is reversed (s_i) hyper_gen.prepare_for_use() evaluator = EvaluatePrequential(pretrain_size=1000, max_samples=20000, show_plot=True, metrics=['accuracy', 'kappa'], output_file='result.csv', batch_size=1000) clf = WeightedEnsembleClassifier() # 4. Run evaluator.evaluate(stream=hyper_gen, model=clf)
if __name__ == "__main__": s1 = MIXEDGenerator(classification_function=1, random_state=112, balance_classes=False) s2 = MIXEDGenerator(classification_function=0, random_state=112, balance_classes=False) """1. Create stream""" stream = ReoccuringDriftStream( stream=s1, drift_stream=s2, random_state=None, alpha=90.0, # angle of change grade 0 - 90 position=2000, width=500) stream.prepare_for_use() oza = OzaBaggingAdwin(base_estimator=KNN()) """3. Setup evaluator""" evaluator = EvaluatePrequential( show_plot=True, batch_size=10, max_samples=5000, metrics=['accuracy', 'kappa_t', 'kappa_m', 'kappa'], output_file=None) """4. Run evaluator""" evaluator.evaluate(stream=stream, model=oza)