class ADWINChangeDetector(BaseDriftDetector): """ Drift detection method based in ADWIN. Parameters ---------- delta : float (default=0.002) The delta parameter for the ADWIN algorithm. Notes ----- ADWIN [1]_ (ADaptive WINdowing) is an adaptive sliding window algorithm for detecting change, and keeping updated statistics about a data stream. ADWIN allows algorithms not adapted for drifting data, to be resistant to this phenomenon. The general idea is to keep statistics from a window of variable size while detecting concept drift. The algorithm will decide the size of the window by cutting the statistics' window at different points and analysing the average of some statistic over these two windows. If the absolute value of the difference between the two averages surpasses a pre-defined threshold, change is detected at that point and all data before that time is discarded. References ---------- .. [1] Bifet, Albert, and Ricard Gavalda. "Learning from time-changing data with adaptive windowing." In Proceedings of the 2007 SIAM international conference on data mining, pp. 443-448. Society for Industrial and Applied Mathematics, 2007. Examples -------- >>> # Imports >>> import numpy as np >>> from skmultiflow.drift_detection import ADWINChangeDetector >>> adwin_change_detector = ADWINChangeDetector() >>> # Simulating a data stream as a normal distribution of 1's and 0's >>> data_stream = np.random.randint(2, size=2000) >>> # Changing the data concept from index 999 to 2000 >>> for i in range(999, 2000): ... data_stream[i] = np.random.randint(4, high=8) >>> # Adding stream elements to ADWIN and verifying if drift occurred >>> for i in range(2000): ... adwin_change_detector.add_element(data_stream[i]) ... if adwin_change_detector.detected_change(): ... print('Change detected in data: ' + str(data_stream[i]) + ' - at index: ' + str(i)) """ def __init__(self, delta=.002): super().__init__() self.adwin = ADWIN(delta=delta) super().reset() def add_element(self, input_value): err_estim = self.adwin.estimation self.adwin.add_element(input_value) res_input = self.adwin.detected_change() self.in_concept_change = False self.in_warning_zone = False if self.adwin.detected_warning_zone(): self.in_warning_zone = True if res_input: if self.adwin.estimation > err_estim: self.in_concept_change = True self.in_warning_zone = False self.estimation = self.adwin.estimation
class DeepNNPytorch(BaseSKMObject, ClassifierMixin): def __init__( self, class_labels=['0', '1'], # {'up':0,'down':1} use_cpu=True, process_as_a_batch=False, use_threads=False, background_training_after=4): # configuration variables (which has the same name as init parameters) self.class_labels = class_labels self.use_threads = use_threads self.background_training_after = background_training_after super().__init__() # status variables self.class_to_label = {} self.foreground_nets = [] # type: List[ANN] self.background_nets = [] # type: List[ANN] self.drift_detection_method = None self.warning_detection_method = None self.detected_warnings = 0 self.samples_seen = 0 self.last_detected_drift_around = 0 self.background_learner_threads = [] self.background_train_results = None self.foreground_train_results = None self.init_status_values() def init_status_values(self): # init status variables self.class_to_label = {} for i in range(len(self.class_labels)): self.class_to_label.update({i: self.class_labels[i]}) for i in range(len(foreground_net_config)): self.foreground_nets.append( ANN(learning_rate=foreground_net_config[i]['l_rate'], optimizer_type=foreground_net_config[i]['optimizer_type'], class_labels=self.class_labels)) for i in range(len(background_net_config)): self.background_nets.append( ANN(learning_rate=foreground_net_config[i]['l_rate'], optimizer_type=background_net_config[i]['optimizer_type'], class_labels=self.class_labels)) self.drift_detection_method = ADWIN(delta=1e-3, direction=ADWIN.DETECT_DOWN) self.warning_detection_method = ADWIN(delta=1e-8, direction=ADWIN.DETECT_DOWN) self.detected_warnings = 0 self.samples_seen = 0 self.last_detected_drift_around = 0 self.background_learner_threads = [] self.background_train_results = None self.foreground_train_results = None print(self) def partial_fit(self, X, y, classes=None, sample_weight=None): r, c = get_dimensions(X) self.samples_seen += r # if self.samples_seen % 2 == 0: if len(self.background_learner_threads) == 0: if self.samples_seen % self.background_training_after == 0: self.background_train_results = { 'probas': [None] * len(self.background_nets), 'y_hats': [None] * len(self.background_nets), 'avg_loss_since_last_detected_drift_by_parent': [0] * len(self.background_nets) } for i in range(len(self.background_nets)): self.background_learner_threads.append( threading.Thread(target=net_train, args=( self.background_nets[i], X, r, c, y, self.background_train_results, i, self.last_detected_drift_around, ))) for i in range(len(self.background_nets)): self.background_learner_threads[i].start() else: # there are live background learner threads # wait for self.background_training_after instances to join them if self.samples_seen % self.background_training_after == self.background_training_after - 1: # TODO: CPython does not support multi threading: https://docs.python.org/3/library/threading.html # we still may be fine as long as we don't compile the module using CPython. # Multiprocessing is an alternative: # https://docs.python.org/3/library/multiprocessing.html#module-multiprocessing for i in range(len(self.background_nets)): self.background_learner_threads[i].join() self.background_learner_threads = [] if self.foreground_train_results is not None: min_back = np.argmin(self.background_train_results[ 'avg_loss_since_last_detected_drift_by_parent'], axis=0) max_fore = np.argmax(self.foreground_train_results[ 'avg_loss_since_last_detected_drift_by_parent'], axis=0) # min_back < max_fore if self.background_train_results['avg_loss_since_last_detected_drift_by_parent'][min_back] \ < self.foreground_train_results['avg_loss_since_last_detected_drift_by_parent'][max_fore]: tmp_net = self.foreground_nets[max_fore] self.foreground_nets[max_fore] = self.background_nets[ min_back] self.background_nets[min_back] = tmp_net self.foreground_train_results = { 'probas': [None] * len(self.foreground_nets), 'y_hats': [None] * len(self.foreground_nets), 'avg_loss_since_last_detected_drift_by_parent': [0] * len(self.foreground_nets) } if self.use_threads: t = [] for i in range(len(self.foreground_nets)): t.append( threading.Thread(target=net_train, args=( self.foreground_nets[i], X, r, c, y, self.foreground_train_results, i, self.last_detected_drift_around, ))) for i in range(len(self.foreground_nets)): t[i].start() for i in range(len(self.foreground_nets)): t[i].join() else: for i in range(len(self.foreground_nets)): net_train(self.foreground_nets[i], X, r, c, y, self.foreground_train_results, i, self.last_detected_drift_around) if self.drift_detection_method is not None: # get predicted class and compare with actual class label predicted_label = vectorized_map_class_to_label( np.argmax( np.sum(self.foreground_train_results['probas'], axis=0) / len(self.foreground_nets), axis=1), class_to_label_map=self.class_to_label) # TODO: we may have to have a special case for batch processing predicted_matches_actual = predicted_label == y self.drift_detection_method.add_element( 1 if predicted_matches_actual else 0) if self.warning_detection_method is not None: self.warning_detection_method.add_element( 1 if predicted_matches_actual else 0) # pass the difference to the detector # predicted_matches_actual = torch.abs(y-output).detach().numpy()[0] # self.drift_detection_method.add_element(predicted_matches_actual) # Check if the was a warning if self.warning_detection_method is not None: if self.warning_detection_method.detected_change(): self.detected_warnings += 1 else: # warning detector is None, hence drift detector has warning detection capability. if self.drift_detection_method.detected_warning_zone(): self.detected_warnings += 1 # 3 is the threshold level # Check if the was a change if self.detected_warnings > 3 and self.drift_detection_method.detected_change( ): print('Drift detected by {} around {} th sample.'.format( self.drift_detection_method, self.samples_seen)) self.detected_warnings = 0 self.last_detected_drift_around = self.samples_seen # Find the the worst learner from the foreground and replace it with the background return self def predict(self, X): y_proba = self.predict_proba(X) pred_sum_per_class = np.sum(y_proba, axis=0) pred_avgsum_per_class = np.divide(pred_sum_per_class, len(self.foreground_nets)) y_pred = np.argmax(pred_avgsum_per_class, axis=0) return vectorized_map_class_to_label( np.asarray([y_pred]), class_to_label_map=self.class_to_label) def predict_proba(self, X): r, c = get_dimensions(X) probas = np.zeros([len(self.foreground_nets), len(self.class_labels)]) # if self.use_threads: # t = [] # for i in range(len(self.nets)): # t.append(threading.Thread(target=net_predict_proba, args=(self.nets[i], X, r, c, probas, i,))) # # for i in range(len(self.nets)): # t[i].start() # # for i in range(len(self.nets)): # t[i].join() # else: for i in range(len(self.foreground_nets)): net_predict_proba(self.foreground_nets[i], X, r, c, probas, i) return np.asarray(probas) def reset(self): # configuration variables (which has the same name as init parameters) should be copied by the caller function for i in range(len(self.foreground_nets)): self.foreground_nets[i].reset() return self def __str__(self): return str(self.__class__) + ": " + str(self.__dict__) def stream_ended(self): print('\nNetwork configuration:\n' '{}\n' '=======================================\n' 'Foreground Nets\n'.format(self)) print( 'optimizer_type,learning_rate,accumulated_loss,accumulated_loss_since_last_detected_drift_by_parent' ) for i in range(len(self.foreground_nets)): print('{},{},{},{}'.format( self.foreground_nets[i].optimizer_type, self.foreground_nets[i].learning_rate, self.foreground_nets[i].accumulated_loss / self.foreground_nets[i].samples_seen, self.foreground_nets[i]. accumulated_loss_since_last_detected_drift_by_parent / self.foreground_nets[i]. samples_seen_after_last_detected_drift_by_parent)) print('\n' 'Background Nets\n'.format(self)) for i in range(len(self.background_nets)): print('{},{},{},{}'.format( self.background_nets[i].optimizer_type, self.background_nets[i].learning_rate, self.background_nets[i].accumulated_loss / self.background_nets[i].samples_seen, self.background_nets[i]. accumulated_loss_since_last_detected_drift_by_parent / self.background_nets[i]. samples_seen_after_last_detected_drift_by_parent)) print('\n')