def test_ddm(): """ DDM drift detection test. The first half of the data contains a sequence corresponding to a normal distribution with mean 0 and sigma 0.1. The second half corresponds to a normal distribution with mean 0.5 and sigma 0.1. """ ddm = DDM() # Data np.random.seed(1) mu, sigma = 0, 0.1 # mean and standard deviation d_1 = np.random.normal(mu, sigma, 1000) > 0 mu, sigma = 0.5, 0.1 # mean and standard deviation d_2 = np.random.normal(mu, sigma, 1000) > 0 data_stream = np.concatenate((d_1.astype(int), d_2.astype(int))) expected_indices = [103, 1060] detected_indices = [] for i in range(data_stream.size): ddm.add_element(data_stream[i]) if ddm.detected_change(): detected_indices.append(i) assert detected_indices == expected_indices expected_info = "DDM(min_num_instances=None, out_control_level=3.0, warning_level=2.0)" assert ddm.get_info() == expected_info
def skmultiflow_detector(drift_detector_type: str) -> BaseDriftDetector: if drift_detector_type == "SKMULTIFLOW_EDDM": multiflow_detector = EDDM() elif drift_detector_type == "SKMULTIFLOW_PageHinkley": multiflow_detector = PageHinkley() elif drift_detector_type == "SKMULTIFLOW_DDM": multiflow_detector = DDM() elif drift_detector_type == "SKMULTIFLOW_ADWIN": multiflow_detector = ADWIN() else: raise Exception("Drift detector %s not implemented" % drift_detector_type) return multiflow_detector
def sim_ddm(input_stream, start_point=0): ddm = DDM() change_point = [] detected_warning = [] for i in range(len(input_stream)): ddm.add_element(input_stream[i]) if ddm.detected_warning_zone(): detected_warning.append(i + start_point) if ddm.detected_change(): # plt.axvline(i, color='r', linestyle='dashed') change_point.append(i + start_point) # print('Change detected in data: ' + str(input_stream[i]) + ' - at index: ' + str(i)+'\n\n') return detected_warning, change_point
def ddm_test(): ddm = DDM() true_occur_position = 4443 data_stream = np.load("data/stream_acc.npy") for i in tqdm(range(data_stream.shape[0])): # print(data_stream[i]) # print(i) ddm.add_element(data_stream[i]) if ddm.detected_warning_zone(): print('Warning zone has been detected in data: ' + str(data_stream[i]) + ' - of index: ' + str(i)) if ddm.detected_change(): print('Change has been detected in data: ' + str(data_stream[i]) + ' - of index: ' + str(i))
def test_ddm(test_path): """ DDM drift detection test. The first half of the stream contains a sequence corresponding to a normal distribution of integers from 0 to 1. From index 999 to 1999 the sequence is a normal distribution of integers from 0 to 7. """ ddm = DDM() test_file = os.path.join(test_path, 'drift_stream.npy') data_stream = np.load(test_file) expected_indices = [1009] detected_indices = [] for i in range(data_stream.size): ddm.add_element(data_stream[i]) if ddm.detected_change(): detected_indices.append(i) assert detected_indices == expected_indices
def run(self): ''' main method to simulate new experiment ''' print(f"Starting Experiment:{self}") try: start_window_size = self.window_size num_of_correct_predictions, predictions_counter = 0, 0 ddm = DDM() for record in range(self.X.shape[0]): x_record, y_record = np.array([self.X[record, :]]), np.ravel(np.array([self.y[record]])) if record < self.window_size: # aggregate records till window size continue elif record == self.window_size: # first initialization try: self.init_ofs_ol(record) except Exception as e: # case where ofs failed to find features - try to add more records and replay process if self.window_size > start_window_size * 4: raise Exception("OFS could not find features.") self.window_size += 50 logging.info(f"Changed window size from {self.window_size - 50} to {self.window_size}") continue # predict my_pred = self.ol.created_model.predict( x_record) if self.ofs is None else self.ol.created_model.predict( x_record[:, self.current_selected_features]) predictions_counter += 1 if y_record[0] == my_pred[0]: num_of_correct_predictions += 1 ddm.add_element(num_of_correct_predictions / predictions_counter) # add result to concept drift model self.prequential_accuracy.append(num_of_correct_predictions / predictions_counter) # add accuracy self.memory_usage.append(psutil.Process(os.getpid()).memory_info().rss) # add memory usage if self.ol.lazy: # partial fit for lazy models self.fit_lazy(x_record, y_record) if ddm.detected_change(): # check for concept drift self.concept_drift_detection(start_window_size, record) elif record != self.X.shape[0] - 1 and self.ofs: self.selected_features.append(self.selected_features[-1]) except Experiment as e: logging.error(f"Error: {str(e)}")
ddm_param = [3, 5, 7] ks_param1 = [100, 150, 200] ks_param2 = [30, 50, 100] ph_param1 = [25, 50, 75] ph_param2 = [0.005, 0.01, 0.02] knn = KNNClassifier() stream = driftStreams[0] for i in range(0, 3): trainX, trainY = stream.next_sample(2000) knn.partial_fit(trainX, trainY) adwin = ADWIN(delta=adwin_param[i]) ddm = DDM(out_control_level=ddm_param[i]) kswin1 = KSWIN(window_size=ks_param1[i]) # kswin2 = KSWIN(stat_size=ks_param2[i]) ph1 = PageHinkley(threshold=ph_param1[i]) ph2 = PageHinkley(delta=ph_param2[i]) adwin_results = [] ddm_results = [] kswin1_results = [] kswin2_results = [] ph1_results = [] ph2_results = [] n_samples = 0 corrects = 0
return stream def drift_flow(stream, method, name, beginning_stream, end_tables): detected_change = [] detected_warning = [] number_of_changes = 0 for i in range(len(stream)): method.add_element(stream[i]) if method.detected_warning_zone(): print(f'Warning zone has been detected in data: {stream[i]} - of index: {i}') detected_warning.append((stream[i])) if method.detected_change(): detected_change.append(stream[i]) print(f'Change has been detected in data: {stream[i]} - of index: {i}') number_of_changes += 1 else: detected_change.append(None) print(f'{name} Detected changes: {number_of_changes}') print(f'{name} Detected warning zones: {str(len(detected_warning))}') plots(stream, detected_change, name, beginning_stream, end_tables) stream = make_stream(PATH) drift_flow(stream, EDDM(), 'EDDM', 0, 500) drift_flow(stream, HDDM_A(), 'HDDM_A', 0, 500) drift_flow(stream, HDDM_W(), 'HDDM_W', 0, 500) drift_flow(stream, PageHinkley(), 'PH', 0, 500) drift_flow(stream, DDM(), 'DDM', 0, 500)
file_name = "CMGMM-"+test_dataset+".log" DETECTOR=args.detector#"" nama_model = "CMGMM" if (prune_comp): nama_model = nama_model+"+ " else: nama_model = nama_model+" " if DETECTOR == "ADWIN": print ("adwin") nama_model = nama_model+DETECTOR detector = ADWIN() elif DETECTOR == "DDM": print ("DDM") nama_model = nama_model+DETECTOR detector = DDM() elif DETECTOR == "EDDM": print ("EDDM") nama_model = nama_model+DETECTOR detector = EDDM() elif DETECTOR == "HDDM_A": print ("HDDM_A") nama_model = nama_model+DETECTOR detector = HDDM_A() elif DETECTOR == "HDDM_W": print ("HDDM_W") nama_model = nama_model+DETECTOR detector = HDDM_W() elif DETECTOR == "KSWIN": print ("KSWIN") nama_model = nama_model+DETECTOR
def mapping_experiment( save_name, lstm_model_idx=0, transformer_model_trained=TransformerModel.BERT, transformer_model_untrained=TransformerModel.SCIBERT, linear=False, method="average", batch_size=1, transform=True, print_every=1, device="cpu", ): """ Runs an adaptation experiments using the Procrustes linear mapping. Args: save_name (str): name of the file where the function saves the result lstm_model_idx (int): the index of the LSTM model (from the available ones) transformer_model_trained (TransformerModel): the embeddings on which the model was trained transformer_model_untrained (TransformerModel): the embeddings against which the model is compared linear (bool): False method (str): method parameter used for picking the adaptation dataset batch_size (int): the batch size for the stream transform (bool): whether to transform the text or used pre transformed one print_every (int): how often to print device (str): cpu or cuda Returns: a dictionary with the results """ # Add method to save name save_name += "_" + method # Initialize the stream that the model was trained on stream_trained = WOSStream( transformer_model=transformer_model_trained, transform=transform, test_split=False, device=device, ) stream_trained.prepare_for_use() # Initialize the stream with other embeddings, to add drift stream_untrained = WOSStream( transformer_model=transformer_model_untrained, transform=transform, test_split=False, device=device, ) stream_untrained.prepare_for_use() # Initialize the adaptation dataset if linear: mapping = Procrustes(method=method, x_most_common=10000) else: mapping = MLPMapping(method=method, x_most_common=10000) # Load the LSTM model model = LSTM( embedding_dim=utils.EMBEDDING_DIM, no_classes=stream_trained.n_classes ).to(device) model.load_state_dict( torch.load(LSTM_MODELS[lstm_model_idx], map_location=device), strict=False ) model.eval() # Initialize the drift detector drift_detector = DDM() # Run streams print("Running trained stream...") trained_accuracies = run_stream_lstm( stream_trained, model, drift_detector, batch_size=batch_size, print_every=print_every, warm_start=sys.maxsize, device=device, ) print("Running untrained stream...") untrained_accuracies = run_stream_lstm( stream_untrained, model, drift_detector, batch_size=batch_size, print_every=print_every, warm_start=sys.maxsize, device=device, ) # Run the stream with a mapping stream_untrained.restart() print("Running mapping stream...") mapping_accuracies = run_stream_with_mapping( stream_untrained, model, mapping, batch_size=batch_size, print_every=print_every, ) # Save the results to_save = { "trained_accuracies": trained_accuracies, "untrained_accuracies": untrained_accuracies, "mapping_accuracies": mapping_accuracies, } with open(os.path.join(PATH_RESULTS, save_name + ".pkl"), "wb") as f: pickle.dump(to_save, f) return to_save
def drift_detection_different_embeddings( save_name, lstm_model_idx=None, nb_model_idx=None, transformer_model_trained=None, transformer_model_untrained=None, batch_size=1, transform=True, print_every=1, device="cpu", ): """ Performs an experiment with two different streams on the same model. The first stream is the one with embeddings on which the model was trained on. The second stream is the one with embeddings that are different from the ones on which the model was trained on. The goal of the experiment is to find if the new embeddings can be substituted for the old ones, in which case no drift should occur, or otherwise they cannot be used and drift will be detected. Args: save_name (str): name of the file where the function saves the result lstm_model_idx (int): the index of the LSTM model (from the available ones) nb_model_idx (int): the index of the Naive Bayes model (from the available ones) transformer_model_trained (TransformerModel): the embeddings on which the model was trained transformer_model_untrained (TransformerModel): the embeddings against which the model is compared batch_size (int): the batch size for the stream transform (bool): whether to transform the text or used pre transformed one print_every (int): how often to print device (str): cpu or cuda Returns: a dictionary with the results """ # Initialize the stream that the model was trained on stream_trained = WOSStream( transformer_model=transformer_model_trained, transform=transform, test_split=False, device=device, ) stream_trained.prepare_for_use() # Initialize the stream with other embeddings, to add drift stream_untrained = WOSStream( transformer_model=transformer_model_untrained, transform=transform, test_split=False, device=device, ) stream_untrained.prepare_for_use() # Load the model model = None stream_runner = None if lstm_model_idx is None and nb_model_idx is None: raise ValueError("No index provided for either the LSTM or the NB model.") if lstm_model_idx is not None: # Load the LSTM model model = LSTM( embedding_dim=utils.EMBEDDING_DIM, no_classes=stream_trained.n_classes ).to(device) model.load_state_dict( torch.load(LSTM_MODELS[lstm_model_idx], map_location=device), strict=False ) model.eval() stream_runner = run_stream_lstm elif nb_model_idx is not None: # Load the Naive Bayes model model = load(NB_MODELS[nb_model_idx]) stream_runner = run_stream_nb # Initialize drift detector drift_detector = DDM() # Run streams print("Running trained stream...") trained_accuracies = stream_runner( stream_trained, model, drift_detector, batch_size=batch_size, print_every=print_every, warm_start=sys.maxsize, device=device, ) print("Running untrained stream...") untrained_accuracies = stream_runner( stream_untrained, model, drift_detector, batch_size=batch_size, print_every=print_every, warm_start=sys.maxsize, device=device, ) # Save the results to_save = { "trained_accuracies": trained_accuracies, "untrained_accuracies": untrained_accuracies, } with open(os.path.join(PATH_RESULTS, save_name + ".pkl"), "wb") as f: pickle.dump(to_save, f) return to_save
def drift_detection_gradual_noise( save_name, lstm_model_idx=None, nb_model_idx=None, transformer_model=TransformerModel.BERT, batch_size=1, max_std=0.1, warm_start=30, transform=True, print_every=1, device="cpu", ): """ Performs an experiment with a stream on a model. The stream is gradually perturbed with noise such that it simulates gradual concept drift. Args: save_name (str): name of the file where the function saves the result lstm_model_idx (int): the index of the LSTM model (from the available ones) nb_model_idx (int): the index of the Naive Bayes model (from the available ones) transformer_model (TransformerModel): the embeddings on which the model was trained batch_size (int): the batch size for the stream max_std (float): the maximum standard deviation for the Gaussian noise warm_start (int): number of examples run before adding noise transform (bool): whether to transform the text or used pre transformed one print_every (int): how often to print device (str): cpu or cuda Returns: a dictionary with the results """ # Initialize the stream stream = WOSStream( transformer_model=transformer_model, transform=transform, test_split=False, device=device, ) stream.prepare_for_use() # Load the model model = None stream_runner = None if lstm_model_idx is None and nb_model_idx is None: raise ValueError("No index provided for either the LSTM or the NB model.") if lstm_model_idx is not None: # Load the LSTM model model = LSTM(embedding_dim=utils.EMBEDDING_DIM, no_classes=stream.n_classes).to( device ) model.load_state_dict( torch.load(LSTM_MODELS[lstm_model_idx], map_location=device), strict=False ) model.eval() stream_runner = run_stream_lstm elif nb_model_idx is not None: # Load the Naive Bayes model model = load(NB_MODELS[nb_model_idx]) stream_runner = run_stream_nb # Initialize the drift detector drift_detector = DDM() n_iterations = stream.n_samples // batch_size + 1 # Initialize the standard deviations for the normal distribution standard_devs = torch.arange( start=0, end=max_std, step=max_std / (n_iterations - warm_start) ) # Run stream accuracies = stream_runner( stream, model, drift_detector, batch_size=batch_size, print_every=print_every, noise_stds=standard_devs, warm_start=warm_start, device=device, ) to_save = { "accuracies": accuracies, } with open(os.path.join(PATH_RESULTS, save_name + ".pkl"), "wb") as f: pickle.dump(to_save, f) return to_save
detect_end = n_global mine_pr = [] mine_std = [] mine_alpha = [] pr_min = [] std_min = [] pi = [] mine_x_mean = [] mine_sum = [] mine_threshold = [] pred_grace_ht = [] pred_grace_ht_p = [] ht_p = None ML_accuracy = 0 ddm = DDM() h = hpy() while elec_stream.has_more_samples(): n_global += 1 X_test, y_test = elec_stream.next_sample() y_predict = ht.predict(X_test) ddm_start_time = time.time() ddm.add_element(y_test != y_predict) ML_accuracy += 1 if y_test == y_predict else 0 ddm_running_time = time.time() - ddm_start_time RT_ddm.append(ddm_running_time) if (n_global > grace_end): if (n_global > detect_end): if ht_p is not None:
def test_on_data_set(data_desc, D): r = {data_desc: {"HDDDM": [], "SWIDD": [], "EDDM": [], "DDM": [], "ADWIN": [], "PageHinkley": []}} training_buffer_size = 100 # Size of training buffer of the drift detector n_train = 200 # Initial training set size concept_drifts = D["drifts"] X, Y = D["data"] data_stream = np.concatenate((X, Y.reshape(-1, 1)), axis=1) X0, Y0 = X[0:n_train, :], Y[0:n_train, :] # Training dataset data0 = data_stream[0:n_train,:] X_next, Y_next = X[n_train:, :], Y[n_train:, :] # Test set data_next = data_stream[n_train:,:] # Run unsupervised drift detector dd = DriftDetectorUnsupervised(HDDDM(data0, gamma=None, alpha=0.005), batch_size=50) changes_detected = dd.apply_to_stream(data_next) # Evaluation scores = evaluate(concept_drifts, changes_detected) r[data_desc]["HDDDM"].append(scores) dd = DriftDetectorUnsupervised(SWIDD(max_window_size=300, min_window_size=100), batch_size=1) changes_detected = dd.apply_to_stream(data_next) # Evaluation scores = evaluate(concept_drifts, changes_detected) r[data_desc]["SWIDD"].append(scores) # Run supervised drift detector model = GaussianNB() # EDDM drift_detector = EDDM() clf = Classifier(model) clf.flip_score = True clf.fit(X0, Y0.ravel()) dd = DriftDetectorSupervised(clf=clf, drift_detector=drift_detector, training_buffer_size=training_buffer_size) changes_detected = dd.apply_to_stream(X_next, Y_next) # Evaluation scores = evaluate(concept_drifts, changes_detected) r[data_desc]["EDDM"].append(scores) # DDM drift_detector = DDM(min_num_instances=30, warning_level=2.0, out_control_level=3.0) clf = Classifier(model) clf.flip_score = True clf.fit(X0, Y0.ravel()) dd = DriftDetectorSupervised(clf=clf, drift_detector=drift_detector, training_buffer_size=training_buffer_size) changes_detected = dd.apply_to_stream(X_next, Y_next) # Evaluation scores = evaluate(concept_drifts, changes_detected) r[data_desc]["DDM"].append(scores) # ADWIN drift_detector = ADWIN(delta=2.) clf = Classifier(model) clf.fit(X0, Y0.ravel()) dd = DriftDetectorSupervised(clf=clf, drift_detector=drift_detector, training_buffer_size=training_buffer_size) changes_detected = dd.apply_to_stream(X_next, Y_next) # Evaluation scores = evaluate(concept_drifts, changes_detected) r[data_desc]["ADWIN"].append(scores) # PageHinkley drift_detector = PageHinkley() clf = Classifier(model) clf.flip_score = True clf.fit(X0, Y0.ravel()) dd = DriftDetectorSupervised(clf=clf, drift_detector=drift_detector, training_buffer_size=training_buffer_size) changes_detected = dd.apply_to_stream(X_next, Y_next) # Evaluation scores = evaluate(concept_drifts, changes_detected) r[data_desc]["PageHinkley"].append(scores) return r
if A.detected_change(): print('Concept Drift detected in data: ' + str(stream[j]) + ' - at index: ' + str(j)) ### Output: #Concept Drift detected in data: 8.0 - at index: 607 #Concept Drift detected in data: 5.0 - at index: 639 #Concept Drift detected in data: 6.0 - at index: 671 ######## ### DDM code import numpy as np from skmultiflow.drift_detection import DDM # call the DDM object d2m = DDM() # set seed for reproducibility np.random.seed(123) # Simulate a data stream of size 1000 from a Standard normal distribution stream = np.random.randn(1000) stream[:10] ## Output- #array([-1.0856306 , 0.99734545, 0.2829785 , -1.50629471, -0.57860025, # 1.65143654, -2.42667924, -0.42891263, 1.26593626, -0.8667404 ]) # Data concept are changed from index 299 to 600 for j in range(299, 600): stream[j] = np.random.randint(5, high=9)
def main(): overall_kswin_tp = overall_kswin_tn = overall_kswin_fp = overall_kswin_fn = 0 overall_adwin_tp = overall_adwin_tn = overall_adwin_fp = overall_adwin_fn = 0 # mebwin_drifts = [] overall_k_swmebwin_tp = overall_k_swmebwin_tn = overall_k_swmebwin_fp = overall_k_swmebwin_fn = 0 overall_swmebwin_tp = overall_swmebwin_tn = overall_swmebwin_fp = overall_swmebwin_fn = 0 overall_eddm_tp = overall_eddm_tn = overall_eddm_fp = overall_eddm_fn = 0 overall_ddm_tp = overall_ddm_tn = overall_ddm_fp = overall_ddm_fn = 0 for stream in streams: print(stream.name) f = open('drifts.txt', 'a+') f.write(f'**{stream.name}**\n\n') f.close() stream.prepare_for_use() stream.next_sample() # mebwin = MEBWIN(epsilon=0.1, sensitivity=0.98, w_size=100, stat_size=30) adwin = [] kswin = [] ddm = DDM(min_num_instances=30) eddm = EDDM() data = [] labels = [] predictions = [] kswin_drifts = [] adwin_drifts = [] # mebwin_drifts = [] k_swmebwin_drifts = [] swmebwin_drifts = [] eddm_drifts = [] ddm_drifts = [] swmebwin = SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05) # k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05, gamma=10**10) k_swmebwin = Kernel_SWMEBWIN(classes=stream.target_values, w_size=80, epsilon=0.05) # gamma maybe 1.0 / stream.current_sample_x.shape[1] RANGE = 1000000 DIM = 50 # - 2 because first drift is at 2000 not 1000 and last drift is not detectable # COUNT_DRIFTS = RANGE / 1000 - 2 n_rand_dims = DIM - stream.current_sample_x.size multiply = n_rand_dims // stream.current_sample_x.size # partial fit -> pretrain for _m in range(multiply): current_sample_x = np.array([[]]) current_sample_x = np.concatenate( (current_sample_x, stream.current_sample_x), axis=1) bayes = NaiveBayes() bayes.partial_fit(np.array(current_sample_x), list(stream.current_sample_y.ravel())) for j in range(DIM): adwin.append(ADWIN(delta=0.002)) kswin.append(KSWIN(w_size=300, stat_size=30, alpha=0.0001)) """Add dims""" for i in range(RANGE): current_sample_x = np.array([[]]) for _m in range(multiply): current_sample_x = np.concatenate( (current_sample_x, stream.current_sample_x), axis=1) data.append(current_sample_x.ravel()) labels.append(stream.current_sample_y.ravel()[0]) predictions.append(0 if bayes.predict(current_sample_x) == labels[i] else 1) bayes.partial_fit(current_sample_x, list(stream.current_sample_y.ravel())) stream.next_sample() # MEBWIN # start = time.time() # for i in range(RANGE): # mebwin.add_element(data[i]) # # if mebwin.change_detected is True: # mebwin_drifts.append(i) # # f = open('drifts.txt', 'a+') # f.write(f'MEBWIN detected {len(mebwin_drifts)} drifts in {time.time() - start} {mebwin_drifts}\n\n') # f.close() # print(f'MEBWIN took {time.time() - start} sec and detected {len(mebwin_drifts)} drifts') # Kernel SWMEBWIN start = time.time() for i in range(RANGE): k_swmebwin.add_element(value=data[i], label=labels[i]) if k_swmebwin.change_detected is True: k_swmebwin_drifts.append(i) end = time.time() - start f1, tp, fp, tn, fn = confusion_matrix_stats(k_swmebwin_drifts, RANGE) overall_k_swmebwin_tp += tp overall_k_swmebwin_tn += tn overall_k_swmebwin_fp += fp overall_k_swmebwin_fn += fn print(f'F1-Score: {f1}') print(f'{tp} true positives, {fp} false positives') print(f'{tn} true negatives, {fn} false negatives') f = open('drifts.txt', 'a+') f.write(f'K-SWMEB detected {len(k_swmebwin_drifts)} drifts in {time.time() - start} {k_swmebwin_drifts}\n\n') f.close() print(f'K-SW-MEBWIN took {end} sec and detected {len(k_swmebwin_drifts)} drifts\n') # SWMEBWIN start = time.time() for i in range(RANGE): swmebwin.add_element(value=data[i], label=labels[i]) if swmebwin.change_detected is True: swmebwin_drifts.append(i) end = time.time() - start f1, tp, fp, tn, fn = confusion_matrix_stats(swmebwin_drifts, RANGE) overall_swmebwin_tp += tp overall_swmebwin_tn += tn overall_swmebwin_fp += fp overall_swmebwin_fn += fn print(f'F1-Score: {f1}') print(f'{tp} true positives, {fp} false positives') print(f'{tn} true negatives, {fn} false negatives') f = open('drifts.txt', 'a+') f.write(f'SWMEB detected {len(swmebwin_drifts)} drifts in {time.time() - start} {swmebwin_drifts}\n\n') f.close() print(f'SW-MEBWIN took {end} sec and detected {len(swmebwin_drifts)} drifts\n') # ADWIN start = time.time() for i in range(RANGE): adwin_detected = False for j in range(data[i].size): adwin[j].add_element(data[i][j]) if adwin[j].detected_change(): adwin_detected = True if adwin_detected is True: adwin_drifts.append(i) end = time.time() - start f1, tp, fp, tn, fn = confusion_matrix_stats(adwin_drifts, RANGE) overall_adwin_tp += tp overall_adwin_tn += tn overall_adwin_fp += fp overall_adwin_fn += fn print(f'F1-Score: {f1}') print(f'{tp} true positives, {fp} false positives') print(f'{tn} true negatives, {fn} false negatives') f = open('drifts.txt', 'a+') f.write(f'ADWIN detected {len(adwin_drifts)} drifts in {time.time() - start} at {adwin_drifts}\n\n') f.close() print(f'ADWIN took {end} sec and detected {len(adwin_drifts)} drifts\n') # KSWIN start = time.time() for i in range(RANGE): kswin_detected = False for j in range(data[i].size): kswin[j].add_element(data[i][j]) if kswin[j].detected_change(): kswin_detected = True if kswin_detected is True: kswin_drifts.append(i) end = time.time() - start f1, tp, fp, tn, fn = confusion_matrix_stats(kswin_drifts, RANGE) overall_kswin_tp += tp overall_kswin_tn += tn overall_kswin_fp += fp overall_kswin_fn += fn print(f'F1-Score: {f1}') print(f'{tp} true positives, {fp} false positives') print(f'{tn} true negatives, {fn} false negatives') f = open('drifts.txt', 'a+') f.write(f'KSWIN detected {len(kswin_drifts)} drifts in {time.time() - start} at {kswin_drifts}\n\n') f.close() print(f'KSWIN took {end} sec and detected {len(kswin_drifts)} drifts\n') # EDDM start = time.time() for i in range(RANGE): eddm_detected = False eddm.add_element(predictions[i]) if eddm.detected_change(): eddm_detected = True if eddm_detected is True: eddm_drifts.append(i) end = time.time() - start f1, tp, fp, tn, fn = confusion_matrix_stats(eddm_drifts, RANGE) overall_eddm_tp += tp overall_eddm_tn += tn overall_eddm_fp += fp overall_eddm_fn += fn print(f'F1-Score: {f1}') print(f'{tp} true positives, {fp} false positives') print(f'{tn} true negatives, {fn} false negatives') f = open('drifts.txt', 'a+') f.write(f'EDDM detected {len(eddm_drifts)} drifts in {time.time() - start} at {eddm_drifts}\n\n') f.close() print(f'EDDM took {end} sec and detected {len(eddm_drifts)} drifts\n') # DDM start = time.time() for i in range(RANGE): ddm_detected = False ddm.add_element(predictions[i]) if ddm.detected_change(): ddm_detected = True if ddm_detected is True: ddm_drifts.append(i) end = time.time() - start f1, tp, fp, tn, fn = confusion_matrix_stats(ddm_drifts, RANGE) overall_ddm_tp += tp overall_ddm_tn += tn overall_ddm_fp += fp overall_ddm_fn += tn print(f'F1-Score: {f1}') print(f'{tp} true positives, {fp} false positives') print(f'{tn} true negatives, {fn} false negatives') f = open('drifts.txt', 'a+') f.write(f'DDM detected {len(ddm_drifts)} drifts in {time.time() - start} at {ddm_drifts}\n\n') f.close() print(f'DDM took {end} sec and detected {len(ddm_drifts)} drifts\n') # OVERALL STATISTICS print(50 * '-') print('K-SWMEBWIN\n') print(f'Overall F1: {calc_f1(overall_k_swmebwin_tp, overall_k_swmebwin_fp, overall_k_swmebwin_tn, overall_k_swmebwin_fn)}') print(f'{overall_k_swmebwin_tp} true positives, {overall_k_swmebwin_fp} false positives') print(f'{overall_k_swmebwin_tn} true negatives, {overall_k_swmebwin_fn} false negatives') print(50* '-') print(50 * '-') print('SWMEBWIN\n') print(f'Overall F1: {calc_f1(overall_swmebwin_tp, overall_swmebwin_fp, overall_swmebwin_tn, overall_swmebwin_fn)}') print(f'{overall_swmebwin_tp} true positives, {overall_swmebwin_fp} false positives') print(f'{overall_swmebwin_tn} true negatives, {overall_swmebwin_fn} false negatives') print(50* '-') print(50 * '-') print('KSWIN\n') print(f'Overall F1: {calc_f1(overall_kswin_tp, overall_kswin_fp, overall_kswin_tn, overall_kswin_fn)}') print(f'{overall_kswin_tp} true positives, {overall_kswin_fp} false positives') print(f'{overall_kswin_tn} true negatives, {overall_kswin_fn} false negatives') print(50* '-') print(50 * '-') print('ADWIN\n') print(f'Overall F1: {calc_f1(overall_adwin_tp, overall_adwin_fp, overall_adwin_tn, overall_adwin_fn)}') print(f'{overall_adwin_tp} true positives, {overall_adwin_fp} false positives') print(f'{overall_adwin_tn} true negatives, {overall_adwin_fn} false negatives') print(50* '-') print(50 * '-') print('DDM\n') print(f'Overall F1: {calc_f1(overall_ddm_tp, overall_ddm_fp, overall_ddm_tn, overall_ddm_fn)}') print(f'{overall_ddm_tp} true positives, {overall_ddm_fp} false positives') print(f'{overall_ddm_tn} true negatives, {overall_ddm_fn} false negatives') print(50* '-') print(50 * '-') print('EDDM\n') print(f'Overall F1: {calc_f1(overall_eddm_tp, overall_eddm_fp, overall_eddm_tn, overall_eddm_fn)}') print(f'{overall_eddm_tp} true positives, {overall_eddm_fp} false positives') print(f'{overall_eddm_tn} true negatives, {overall_eddm_fn} false negatives') print(50* '-')