def __init__(self, stream=AGRAWALGenerator(random_state=112), drift_stream=AGRAWALGenerator(random_state=112, classification_function=2), position=5000, width=1000, random_state=None, alpha=0.0): super(ConceptDriftStream, self).__init__() self.n_samples = stream.n_samples self.n_targets = stream.n_targets self.n_features = stream.n_features self.n_num_features = stream.n_num_features self.n_cat_features = stream.n_cat_features self.n_classes = stream.n_classes self.cat_features_idx = stream.cat_features_idx self.feature_names = stream.feature_names self.target_names = stream.target_names self.target_values = stream.target_values self.n_targets = stream.n_targets self.name = 'Drifting' + stream.name self._original_random_state = random_state self.random_state = None self.alpha = alpha if self.alpha != 0.0: if 0 < self.alpha <= 90.0: w = int(1 / np.tan(self.alpha * np.pi / 180)) self.width = w if w > 0 else 1 else: raise ValueError('Invalid alpha value: {}'.format(alpha)) else: self.width = width self.position = position self._input_stream = stream self._drift_stream = drift_stream
def get_conceptdrift_data_generated(self, classification_function=0, noise_percentage=0.1, random_state=112, drift_classification_function=3, drift_random_state=112, drift_noise_percentage=0.0, drift_start_position=5000, drift_width=1000, n_num_features=2, n_cat_features=0): from skmultiflow.data import ConceptDriftStream from skmultiflow.data import AGRAWALGenerator stream = AGRAWALGenerator( classification_function=classification_function, perturbation=noise_percentage, random_state=random_state #,n_num_features = n_num_features, n_cat_features = n_cat_features ) drift_stream = AGRAWALGenerator( classification_function=drift_classification_function, perturbation=drift_noise_percentage, random_state=drift_random_state #,n_num_features = n_num_features, n_cat_features = n_cat_features ) return ConceptDriftStream(stream=stream, drift_stream=drift_stream, position=drift_start_position, width=drift_width)
def test_vfdr(): learner = VFDR(ordered_rules=True, rule_prediction='first_hit', nominal_attributes=[3,4,5], expand_criterion='info_gain', remove_poor_atts=True, min_weight=100, nb_prediction=False) stream = AGRAWALGenerator(random_state=11) stream.prepare_for_use() cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0]) assert np.alltrue(predictions == expected_predictions) expected_info = 'VFDR: ordered_rules: True - grace_period: 200 - split_confidence: 1e-07 ' + \ '- tie_threshold: 0.05 - remove_poor_atts: True - rule_prediction: first_hit ' + \ '- nb_threshold: 0 - nominal_attributes: [3, 4, 5] - drift_detector: NoneType ' + \ '- Predict using Naive Bayes: False' assert learner.get_info() == expected_info expected_model_description = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.24158839706533, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' expected_model_description_ = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.241588397065328, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' assert (learner.get_model_description() == expected_model_description) or \ (learner.get_model_description() == expected_model_description_) expected_model_measurements = {'Number of rules: ': 3, 'model_size in bytes': 62295} expected_model_measurements_ = {'Number of rules: ': 3, 'model_size in bytes': 73167} if sys.version_info.minor != 6: assert (learner.get_model_measurements() == expected_model_measurements) or\ (learner.get_model_measurements() == expected_model_measurements_)
def test_vfdr_info_gain(): learner = VeryFastDecisionRulesClassifier(ordered_rules=True, rule_prediction='first_hit', nominal_attributes=[3, 4, 5], expand_criterion='info_gain', remove_poor_atts=True, min_weight=100, nb_prediction=False) stream = AGRAWALGenerator(random_state=11) cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0]) assert np.alltrue(predictions == expected_predictions) expected_info = "VeryFastDecisionRulesClassifier(drift_detector=None, expand_confidence=1e-07, " \ "expand_criterion='info_gain', grace_period=200, max_rules=1000, min_weight=100, " \ "nb_prediction=False, nb_threshold=0, nominal_attributes=[3, 4, 5], ordered_rules=True, " \ "remove_poor_atts=True, rule_prediction='first_hit', tie_threshold=0.05)" info = " ".join([line.strip() for line in learner.get_info().split()]) assert info == expected_info expected_model_description_1 = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.24158839706533, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' expected_model_description_2 = 'Rule 0 :Att (2) <= 39.550| class :0 {0: 1365.7101742993455}\n' + \ 'Rule 1 :Att (2) <= 58.180| class :1 {1: 1269.7307449971418}\n' + \ 'Rule 2 :Att (2) <= 60.910| class :0 {0: 66.241588397065328, 1: 54.0}\n' + \ 'Default Rule :| class :0 {0: 1316.7584116029348}' assert (learner.get_model_description() == expected_model_description_1) or \ (learner.get_model_description() == expected_model_description_2) # Following test only covers 'Number of rules' since 'model_size in bytes' is calculated using # the 'calculate_object_size' utility function which is validated in its own test expected_number_of_rules = 3 assert learner.get_model_measurements()['Number of rules: '] == expected_number_of_rules
def __init__(self, stream=AGRAWALGenerator(random_state=112), drift_stream=AGRAWALGenerator(random_state=112, classification_function=2), pause=1000, random_state=None, alpha=0.0, position=0, width=1): self.n_samples = stream.n_samples self.n_targets = stream.n_targets self.n_features = stream.n_features self.n_num_features = stream.n_num_features self.n_cat_features = stream.n_cat_features self.n_classes = stream.n_classes self.cat_features_idx = stream.cat_features_idx self.feature_names = stream.feature_names self.target_names = ['target'] if self.n_targets == 1 else [ 'target_' + i for i in range(self.n_targets) ] self.target_values = stream.target_values self.name = stream.name + "_" + drift_stream.name + "_" + str( pause) + "_" + str(width) self.probability_function = "sigmoid_prob" self.pause = pause self.counter = -1 self._original_random_state = random_state self.random_state = None self.alpha = alpha if self.alpha != 0.0: if 0 < self.alpha <= 90.0: w = int(1 / np.tan(self.alpha * np.pi / 180)) self.width = w if w > 0 else 1 else: raise ValueError('Invalid alpha value: {}'.format(alpha)) else: self.width = width if self.width < 0: raise ValueError("Width must be greater than 0") if self.pause < 0: raise ValueError("Pause must be greater than 0") self.position = position self._input_stream = stream self._drift_stream = drift_stream self.n_targets = stream.n_targets self._prepare_for_use()
def test_vfdr_hellinger(): learner = VFDR(ordered_rules=False, rule_prediction='weighted_sum', nominal_attributes=[3, 4, 5], expand_criterion='hellinger', remove_poor_atts=True, min_weight=100, nb_prediction=True) stream = AGRAWALGenerator(random_state=11) stream.prepare_for_use() cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 ]) assert np.alltrue(predictions == expected_predictions) expected_model_description = 'Rule 0 :Att (2) > 58.180 and Att (5) = 4.000| class :0 {0: 202.0, 1: 3.0}\n' + \ 'Rule 1 :Att (2) <= 41.820| class :0 {0: 1387.1186637804824, 1: 151.83928023717402}\n' + \ 'Default Rule :| class :1 {0: 512.8813362195176, 1: 1356.160719762826}' expected_model_description_ = 'Rule 0 :Att (2) > 58.180 and Att (5) = 4.000| class :0 {0: 202.0, 1: 3.0}\n' + \ 'Rule 1 :Att (2) <= 41.820| class :0 {0: 1387.1186637804824, 1: 151.83928023717402}\n' + \ 'Default Rule :| class :1 {0: 512.8813362195176, 1: 1356.1607197628259}' if sys.version_info.minor != 6: assert (learner.get_model_description() == expected_model_description) or \ (learner.get_model_description() == expected_model_description_)
def __init__(self, stream=AGRAWALGenerator(random_state=112), drift_stream=AGRAWALGenerator(random_state=112, classification_function=2), position=5000, width=1000, random_state=None, alpha=None): super(ConceptDriftStream, self).__init__() self.n_samples = stream.n_samples self.n_targets = stream.n_targets self.n_features = stream.n_features self.n_num_features = stream.n_num_features self.n_cat_features = stream.n_cat_features self.n_classes = stream.n_classes self.cat_features_idx = stream.cat_features_idx self.feature_names = stream.feature_names self.target_names = stream.target_names self.target_values = stream.target_values self.n_targets = stream.n_targets self.name = 'Drifting' + stream.name self.random_state = random_state self._random_state = None # This is the actual random_state object used internally self.alpha = alpha if self.alpha == 0: warnings.warn( "Default value for 'alpha' has changed from 0 to None. 'alpha=0' will " "throw an error from v0.7.0", category=FutureWarning) self.alpha = None if self.alpha is not None: if 0 < self.alpha <= 90.0: w = int(1 / np.tan(self.alpha * np.pi / 180)) self.width = w if w > 0 else 1 else: raise ValueError('Invalid alpha value: {}'.format(alpha)) else: self.width = width self.position = position self.stream = stream self.drift_stream = drift_stream self._prepare_for_use()
def test_hoeffding_adaptive_tree_alternate_tree(): stream = AGRAWALGenerator(random_state=7) learner = HoeffdingAdaptiveTreeClassifier(random_state=1) cnt = 0 change_point1 = 1500 change_point2 = 2500 change_point3 = 4000 max_samples = 5000 while cnt < max_samples: X, y = stream.next_sample() learner.partial_fit(X, y) cnt += 1 if cnt > change_point1: stream.generate_drift() change_point1 = float('Inf') expected_description = "if Attribute 2 <= 63.63636363636363:\n" \ " if Attribute 2 <= 39.54545454545455:\n" \ " Leaf = Class 0 | {0: 397.5023676194098}\n" \ " if Attribute 2 > 39.54545454545455:\n" \ " if Attribute 2 <= 58.81818181818181:\n" \ " Leaf = Class 1 | {1: 299.8923824199619}\n" \ " if Attribute 2 > 58.81818181818181:\n" \ " Leaf = Class 0 | {0: 54.0, 1: 20.107617580038095}\n" \ "if Attribute 2 > 63.63636363636363:\n" \ " Leaf = Class 0 | {0: 512.5755895049351}\n" assert expected_description == learner.get_model_description() if cnt > change_point2: stream.generate_drift() change_point2 = float('Inf') expected_description = "if Attribute 8 <= 268547.7178694747:\n" \ " Leaf = Class 0 | {0: 446.18690518790413, 1: 80.6180778406834}\n" \ "if Attribute 8 > 268547.7178694747:\n" \ " Leaf = Class 1 | {0: 36.8130948120959, 1: 356.38192215931656}\n" assert expected_description == learner.get_model_description() if cnt > change_point3: stream.generate_drift() change_point3 = float('Inf') expected_description = "Leaf = Class 0 | {0: 1083.0, 1: 2.0}\n" assert expected_description == learner.get_model_description()
def test_vfdr_foil(): learner = VFDR(ordered_rules=False, rule_prediction='weighted_sum', nominal_attributes=[3, 4, 5], expand_criterion='foil_gain', remove_poor_atts=True, min_weight=100, nb_prediction=True) stream = AGRAWALGenerator(random_state=11) stream.prepare_for_use() cnt = 0 max_samples = 5000 predictions = array('i') proba_predictions = [] wait_samples = 100 while cnt < max_samples: X, y = stream.next_sample() # Test every n samples if (cnt % wait_samples == 0) and (cnt != 0): predictions.append(learner.predict(X)[0]) proba_predictions.append(learner.predict_proba(X)[0]) learner.partial_fit(X, y) cnt += 1 expected_predictions = array('i', [ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 ]) assert np.alltrue(predictions == expected_predictions) expected_model_description = 'Rule 0 :Att (2) <= 25.450 | class: 1| class :0 {0: 464.44730579120136}\n' + \ 'Rule 1 :Att (4) = 3.000 | class: 0| class :0 {0: 95.0, 1: 45.0}\n' + \ 'Rule 2 :Att (2) <= 30.910 | class: 1| class :0 {0: 330.68821225514125}\n' + \ 'Default Rule :| class :0 {0: 573.0, 1: 336.0}' assert (learner.get_model_description() == expected_model_description)
def prepare_for_use(self): if self.generator in ['sea', 'sine']: self.concepts = [v for v in range(0, 4)] elif self.generator in ['stagger']: self.concepts = [v for v in range(0, 3)] elif self.generator in ['mixed']: self.concepts = [v for v in range(0, 2)] elif self.generator in ['led']: self.concepts = [v for v in range(0, 7)] elif self.generator in ['tree']: self.concepts = [2, 3, 4, 5, 6, 7, 8, 9, 10] if self.concept_shift_step > 0: for concept in self.all_concepts: stream = AGRAWALGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, perturbation=0.05) stream.prepare_for_use() self.streams.append(stream) else: for concept in self.concepts: if self.generator == 'agrawal': stream = AGRAWALGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, perturbation=0.05) elif self.generator == 'sea': stream = SEAGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, noise_percentage=0.05) elif self.generator == 'sine': stream = SineGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False, has_noise=False) elif self.generator == 'stagger': stream = STAGGERGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False) elif self.generator == 'mixed': stream = MIXEDGenerator(classification_function=concept, random_state=self.random_state, balance_classes=False) elif self.generator == 'led': stream = LEDGeneratorDrift(random_state=self.random_state, has_noise=True, n_drift_features=concept) elif self.generator == 'tree': stream = RandomTreeGenerator(tree_random_state=concept, sample_random_state=concept, max_tree_depth=concept+2, min_leaf_depth=concept, n_classes=2) else: print(f"unknown stream generator {self.generator}") exit() stream.prepare_for_use() self.streams.append(stream) self.cur_stream = self.streams[0] self.drift_stream = self.streams[1] stream = self.cur_stream self.n_samples = stream.n_samples self.n_targets = stream.n_targets self.n_features = stream.n_features self.n_num_features = stream.n_num_features self.n_cat_features = stream.n_cat_features self.n_classes = stream.n_classes self.cat_features_idx = stream.cat_features_idx self.feature_names = stream.feature_names self.target_names = stream.target_names self.target_values = stream.target_values self.n_targets = stream.n_targets self.name = 'drifting' + stream.name print(f"len: {len(self.concepts)}") self.concept_probs = \ self.__get_poisson_probs(len(self.concepts), self.lam)
#2 0.091587 0.977452 0.411501 0.458305 ... 0.181444 0.303406 0.174454 0.0 #3 0.635272 0.496203 0.014126 0.627222 ... 0.517752 0.570683 0.546333 1.0 #4 0.450078 0.876507 0.537356 0.495684 ... 0.606895 0.217841 0.912944 1.0 # #[5 rows x 11 columns] # Store it in csv data.to_csv('data_stream_hyperplane.csv', index=False) ##################################################################################### ### Agarwal generator using scikit-multiflow from skmultiflow.data import AGRAWALGenerator import pandas as pd import numpy as np create = AGRAWALGenerator(random_state=333) create.prepare_for_use() X, Y = create.next_sample(10000) data = pd.DataFrame(np.hstack((X, np.array([Y]).T))) data.shape # output- (1000, 10) print(data.head()) # Output: # 0 1 2 ... 7 8 9 #0 90627.841313 0.000000 33.0 ... 20.0 24151.832875 0.0 #1 33588.924462 17307.813671 72.0 ... 29.0 315025.363876 0.0 #2 24375.065287 12426.917711 39.0 ... 4.0 363158.576720 0.0 #3 82949.727691 0.000000 68.0 ... 2.0 35758.528073 0.0 #4 149423.790417 0.000000 52.0 ... 29.0 98440.362484 1.0 #
Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1uHKbJ3KLUITTHJRxegzbTvA_-6M7eO5v """ !pip install -U scikit-multiflow from skmultiflow.data import AGRAWALGenerator from skmultiflow.trees import HoeffdingTree from skmultiflow.evaluation import EvaluatePrequential import numpy as np # 1. Create a stream stream = AGRAWALGenerator() stream.prepare_for_use() # 2. Instantiate the HoeffdingTree classifier ht = HoeffdingTree() # # 3. Setup the evaluator # evaluator = EvaluatePrequential(show_plot=False, # pretrain_size=500, # max_samples=500) # # 4. Run evaluation # evaluator.evaluate(stream=stream, model=ht) def base_classifier(e, U, I, L, D, wd, ws): return print("I am here")
from skmultiflow.data.file_stream import FileStream import numpy as np from Goowe import Goowe from skmultiflow.data import ConceptDriftStream from skmultiflow.data import AGRAWALGenerator import logging from GooweMS import GooweMS import random logger = logging.getLogger() logger.setLevel(logging.INFO) # Prepare the data stream stream_1 = ConceptDriftStream( stream=AGRAWALGenerator(balance_classes=False, classification_function=1, perturbation=0.0, random_state=112), drift_stream=AGRAWALGenerator(balance_classes=False, classification_function=2, perturbation=0.0, random_state=112), position=3000, width=1000, random_state=None, alpha=0.0) stream_2 = ConceptDriftStream( stream=AGRAWALGenerator(balance_classes=False, classification_function=3, perturbation=0.0, random_state=21), drift_stream=AGRAWALGenerator(balance_classes=False,