def calculate_emr(learners, error_weight=1, memory_weight=1, runtime_weight=1, lb=1, ub=10): learners_names = [] learners_errors = [] learners_runtime = [] learners_memory_usages = [] learners_emr_scores = [] for learner in learners: learners_names.append(learner.LEARNER_NAME.title()) learners_errors.append(PredictionEvaluator.calculate(TornadoDic.ERROR_RATE, learner.get_confusion_matrix())) learners_runtime.append(learner.get_total_running_time()) if memory_weight != -1: learners_memory_usages.append(asizeof.asizeof(learner)) else: learners_memory_usages.append(0) if len(learners) != 1: err_min, err_max = LearnersScoreCalculator.get_min_max(learners_errors) run_min, run_max = LearnersScoreCalculator.get_min_max(learners_runtime) mem_min, mem_max = LearnersScoreCalculator.get_min_max(learners_memory_usages) for i in range(0, len(learners_names)): error = LearnersScoreCalculator.scale(learners_errors[i], err_min, err_max, lb, ub) runtime = LearnersScoreCalculator.scale(learners_runtime[i], run_min, run_max, lb, ub) memory_usage = LearnersScoreCalculator.scale(learners_memory_usages[i], mem_min, mem_max, lb, ub) learners_emr_scores.append(LearnersScoreCalculator.__cal_emr(ub, error, runtime, memory_usage, error_weight, runtime_weight, memory_weight)) return learners_emr_scores, learners_errors, learners_memory_usages, learners_runtime else: return None, learners_errors, learners_memory_usages, learners_runtime
def run(self, stream, random_seed=1): random.seed(random_seed) for record in stream: self.__instance_counter += 1 percentage = (self.__instance_counter / len(stream)) * 100 print("%0.2f" % percentage + "% of instances are prequentially processed!", end="\r") if record.__contains__("?"): self.__num_rubbish += 1 continue # --------------------- # Data Transformation # --------------------- r = copy.copy(record) for k in range(0, len(r) - 1): if self.learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE: r[k] = Discretizer.find_bin(r[k], self.__nominal_attribute_scheme[k]) elif self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE: r[k] = NominalToNumericTransformer.map_attribute_value(r[k], self.__numeric_attribute_scheme[k]) # NORMALIZING NUMERIC DATA if self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER: r[0:len(r) - 1] = Normalizer.normalize(r[0:len(r) - 1], self.__numeric_attribute_scheme) # ---------------------- # Prequential Learning # ---------------------- if self.learner.is_ready(): real_class = r[len(r) - 1] predicted_class = self.learner.do_testing(r) if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE: self.learner.do_training(r) else: self.learner.do_loading(r) else: if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE: self.learner.do_training(r) else: self.learner.do_loading(r) self.learner.set_ready() self.learner.update_confusion_matrix(r[len(r) - 1], r[len(r) - 1]) learner_error_rate = PredictionEvaluator.calculate(TornadoDic.ERROR_RATE, self.learner.get_confusion_matrix()) learner_error_rate = round(learner_error_rate, 4) self.__learner_error_rate_array.append(learner_error_rate) print("\n" + "The stream is completely processed.") self.__store_stats() self.__plot() print("THE END!") print("\a")
def run(self, stream_records, random_seed=1): random.seed(random_seed) for record in stream_records: self.__instance_counter += 1 if self.drift_loc_index < len(self.actual_drift_points) - 1: if self.__instance_counter > self.actual_drift_points[ self.drift_loc_index] + self.drift_acceptance_interval: self.drift_loc_index += 1 if self.drift_current_context < len(self.actual_drift_points): if self.__instance_counter > self.actual_drift_points[ self.drift_current_context]: self.drift_current_context += 1 percentage = (self.__instance_counter / len(stream_records)) * 100 print("%0.2f" % percentage + "% of instances are processed!", end="\r") if record.__contains__("?"): self.__num_rubbish += 1 continue for pair in self.pairs: learner = pair[0] detector = pair[1] index = self.pairs.index(pair) # --------------------- # DATA TRANSFORMATION # --------------------- r = copy.copy(record) for k in range(0, len(r) - 1): if learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.attributes[ k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE: r[k] = Discretizer.find_bin( r[k], self.nominal_attribute_scheme[k]) elif learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.attributes[ k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE: r[k] = NominalToNumericTransformer.map_attribute_value( r[k], self.numeric_attribute_scheme[k]) # NORMALIZING NUMERIC DATA if learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER: r[0:len(r) - 1] = Normalizer.normalize( r[0:len(r) - 1], self.numeric_attribute_scheme) # ---------------------- # PREQUENTIAL LEARNING # ---------------------- if learner.is_ready(): real_class = r[len(r) - 1] predicted_class = learner.do_testing(r) prediction_status = True if real_class != predicted_class: prediction_status = False # ----------------------- # ANY DRIFTS DETECTED? # ----------------------- warning_status, drift_status = detector.detect( prediction_status) if drift_status: # APPEND 1 INTO LOCATED DRIFT POINTS self.pair_located_drift_points[index].append(1) # APPENDING ERROR-RATE, MEMORY USAGE, AND RUNTIME OF CLASSIFIER learner_error_rate = PredictionEvaluator.calculate( TornadoDic.ERROR_RATE, learner.get_confusion_matrix()) learner_error_rate = round(learner_error_rate, 4) learner_runtime = learner.get_running_time() learner_mem_use = asizeof.asizeof(learner, limit=20) / 1000 self.learners_stats[index].append([ learner_error_rate, learner_mem_use, learner_runtime ]) # APPENDING FP, FN, MEMORY USAGE, AND RUNTIME OF DETECTOR delay, [ tp_loc, tp ], fp, fn, mem, runtime = self.detectors_stats[index][ len(self.detectors_stats[index]) - 1] actual_drift_loc = self.actual_drift_points[ self.drift_loc_index] if actual_drift_loc <= self.__instance_counter <= actual_drift_loc + self.drift_acceptance_interval: if self.__instance_counter - tp_loc < self.drift_acceptance_interval: fp += 1 else: tp += 1 tp_loc = self.__instance_counter else: fp += 1 mem = asizeof.asizeof(detector) / 1000 runtime = detector.RUNTIME self.detectors_stats[index].append( [delay, [tp_loc, tp], fp, fn, mem, runtime]) learner.reset() detector.reset() continue if learner.LEARNER_TYPE == TornadoDic.TRAINABLE: learner.do_training(r) else: learner.do_loading(r) else: if learner.LEARNER_TYPE == TornadoDic.TRAINABLE: learner.do_training(r) else: learner.do_loading(r) learner.set_ready() learner.update_confusion_matrix(r[len(r) - 1], r[len(r) - 1]) self.pair_located_drift_points[index].append(0) # APPENDING ERROR-RATE, MEMORY USAGE, AND RUNTIME OF CLASSIFIERS learner_error_rate = PredictionEvaluator.calculate( TornadoDic.ERROR_RATE, learner.get_confusion_matrix()) learner_error_rate = round(learner_error_rate, 4) if self.feedback_counter % self.feedback_interval == 0 or self.__instance_counter == len( stream_records): learner_mem_use = asizeof.asizeof(learner, limit=20) / 1000 else: learner_mem_use = self.learners_stats[index][ len(self.learners_stats[index]) - 1][1] learner_runtime = learner.get_running_time() self.learners_stats[index].append( [learner_error_rate, learner_mem_use, learner_runtime]) # APPENDING FP, FN, MEMORY USAGE, AND RUNTIME OF DRIFT DETECTORS if self.__instance_counter == 1: delay, [tp_loc, tp ], fp, fn, mem, runtime = [0, [0, 0], 0, 0, 0, 0] else: delay, [ tp_loc, tp ], fp, fn, mem, runtime = self.detectors_stats[index][ len(self.detectors_stats[index]) - 1] runtime = detector.RUNTIME # print(runtime) if self.feedback_counter % self.feedback_interval == 0 or self.__instance_counter == len( stream_records): mem = asizeof.asizeof(detector) / 1000 if self.drift_current_context >= 1: if self.__instance_counter >= self.actual_drift_points[ self.drift_current_context - 1]: fn = self.drift_current_context - tp if self.__instance_counter <= self.actual_drift_points[ self.drift_current_context - 1] + self.drift_acceptance_interval: if tp_loc < self.actual_drift_points[ self.drift_current_context - 1] or tp_loc > self.actual_drift_points[ self.drift_current_context - 1] + self.drift_acceptance_interval: delay += 1 self.detectors_stats[index].append( [delay, [tp_loc, tp], fp, fn, mem, runtime]) # print(instance_counter, detectors_stats[index][len(detectors_stats[index]) - 1]) # CALCULATE SCORES & OPTIMAL CHOICE if self.score_counter % self.score_interval == 0: current_stats = [] for i in range(0, len(self.pairs)): ce, cm, cr = self.learners_stats[i][ len(self.learners_stats[i]) - 1] dd, [dtp_loc, dtp], dfp, dfn, dm, dr = self.detectors_stats[i][ len(self.detectors_stats[i]) - 1] current_stats.append([ce, dd, dfp, dfn, cm + dm, cr + dr]) # current_stats = ScoreProcessor.penalize_high_dfp(fp_level, 2, 1, current_stats) # ranked_current_stats = ScoreProcessor.rank_matrix(current_stats) scaled_current_stats = ScoreProcessor.normalize_matrix( current_stats) scaled_current_scores = ScoreProcessor.calculate_weighted_scores( scaled_current_stats, self.w_vec) self.pairs_scores.append(scaled_current_scores) # print(scaled_current_scores) max_score = max(scaled_current_scores) indexes = numpy.argwhere( numpy.array(scaled_current_scores) == max_score).flatten().tolist() optimal_index = random.choice(indexes) # index = scaled_current_scores.index(max(scaled_current_scores)) learner_name = self.pairs[optimal_index][0].LEARNER_NAME.upper( ) detector_name = self.pairs[optimal_index][ 1].DETECTOR_NAME.upper() optimal = learner_name + " + " + detector_name self.optimal_pair.append([optimal_index, optimal]) # print(optimal) # for i in range(0, len(learners_detectors)): # ce, cm, cr = learners_stats[i][len(learners_stats[i]) - 1] # dd, [dtp_loc, dtp], dfp, dfn, dm, dr = detectors_stats[i][len(detectors_stats[i]) - 1] # print("\t", learners_detectors_names[i], [ce, dd, dfp, dfn, cm + dm, cr + dr]) self.feedback_counter += 1 self.score_counter += 1 self.store_stats() self.plot() self.archive() self.print_stats() print("THE END") print("\a")
def run(self, stream, random_seed=1): random.seed(random_seed) import socket HOST = '0.0.0.0' PORT = 8000 server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.bind((HOST, PORT)) server.listen(10) # for record in stream: while True: conn, addr = server.accept() clientMessage = str(conn.recv(1024), encoding='utf-8') self.__instance_counter += 1 record = clientMessage.split(',') # percentage = (self.__instance_counter / len(stream)) * 100 # print("%0.2f" % percentage + "% of instances are prequentially processed!", end="\r") # if record.__contains__("?"): # self.__num_rubbish += 1 # continue # print(record) # break # --------------------- # Data Transformation # --------------------- r = copy.copy(record) print(r) # for k in range(0, len(r) - 1): # if self.learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE: # r[k] = Discretizer.find_bin(r[k], self.__nominal_attribute_scheme[k]) # elif self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.__attributes[k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE: # r[k] = NominalToNumericTransformer.map_attribute_value(r[k], self.__numeric_attribute_scheme[k]) # # NORMALIZING NUMERIC DATA # if self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER: # r[0:len(r) - 1] = Normalizer.normalize(r[0:len(r) - 1], self.__numeric_attribute_scheme) # ---------------------- # Prequential Learning # ---------------------- if self.learner.is_ready(): real_class = r[-1] predicted_class = self.learner.do_testing(r) prediction_status = True if real_class != predicted_class: prediction_status = False # ----------------------- # Drift Detected? # ----------------------- warning_status, drift_status = self.drift_detector.detect( prediction_status) if drift_status: self.__drift_points_boolean.append(1) self.__located_drift_points.append(self.__instance_counter) print("\n ->>> " + self.learner.LEARNER_NAME.title() + " faced a drift at instance " + str(self.__instance_counter) + ".") # print("%0.2f" % percentage, " of instances are prequentially processed!", end="\r") learner_error_rate = PredictionEvaluator.calculate( TornadoDic.ERROR_RATE, self.learner.get_global_confusion_matrix()) self.__learner_error_rate_array.append( round(learner_error_rate, 4)) self.__learner_memory_usage.append( asizeof.asizeof(self.learner, limit=20)) self.__learner_runtime.append( self.learner.get_running_time()) self.__drift_detection_memory_usage.append( asizeof.asizeof(self.drift_detector, limit=20)) self.__drift_detection_runtime.append( self.drift_detector.RUNTIME) self.learner.reset() self.drift_detector.reset() serverMessage = 'true' conn.sendall(serverMessage.encode()) conn.close() continue # if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE: # self.learner.do_training(r) # else: # self.learner.do_loading(r) serverMessage = 'false' conn.sendall(serverMessage.encode()) conn.close() else: if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE: self.learner.do_training(r) else: self.learner.do_loading(r) self.learner.set_ready() self.learner.update_confusion_matrix(r[len(r) - 1], r[len(r) - 1]) learner_error_rate = PredictionEvaluator.calculate( TornadoDic.ERROR_RATE, self.learner.get_confusion_matrix()) learner_error_rate = round(learner_error_rate, 4) self.__learner_error_rate_array.append(learner_error_rate) if self.__memory_check_step != -1: if self.__instance_counter % self.__memory_check_step == 0: self.__drift_detection_memory_usage.append( asizeof.asizeof(self.drift_detector, limit=20)) self.__drift_points_boolean.append(0) print("\n" + "The stream is completely processed.") self.__store_stats() self.__plot() print("\n\r" + "THE END!") print("\a")
def run(self, stream, random_seed=1): random.seed(random_seed) for record in stream: self.__instance_counter += 1 percentage = (self.__instance_counter / len(stream)) * 100 print("%0.2f" % percentage + "% of instances are prequentially processed!", end="\r") if record.__contains__("?"): self.__num_rubbish += 1 continue # --------------------- # Data Transformation # --------------------- r = copy.copy(record) for k in range(0, len(r) - 1): if self.learner.LEARNER_CATEGORY == TornadoDic.NOM_CLASSIFIER and self.__attributes[ k].TYPE == TornadoDic.NUMERIC_ATTRIBUTE: r[k] = Discretizer.find_bin( r[k], self.__nominal_attribute_scheme[k]) elif self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER and self.__attributes[ k].TYPE == TornadoDic.NOMINAL_ATTRIBUTE: r[k] = NominalToNumericTransformer.map_attribute_value( r[k], self.__numeric_attribute_scheme[k]) # NORMALIZING NUMERIC DATA if self.learner.LEARNER_CATEGORY == TornadoDic.NUM_CLASSIFIER: r[0:len(r) - 1] = Normalizer.normalize( r[0:len(r) - 1], self.__numeric_attribute_scheme) # ---------------------- # Prequential Learning # ---------------------- if self.learner.is_ready(): real_class = r[len(r) - 1] predicted_class = self.learner.do_testing(r) if self.drift_detector.DETECTOR_NAME == "CDDM": proba = self.learner.get_prediction_prob(r) warning_status, drift_status = self.drift_detector.detect( proba, real_class) else: prediction_status = True if real_class != predicted_class: prediction_status = False warning_status, drift_status = self.drift_detector.detect( prediction_status) # ----------------------- # Drift Detected? # ----------------------- if drift_status: self.__drift_points_boolean.append(1) self.__located_drift_points.append(self.__instance_counter) print("\n ->>> " + self.learner.LEARNER_NAME.title() + " faced a drift at instance " + str(self.__instance_counter) + ".") print("%0.2f" % percentage, " of instances are prequentially processed!", end="\r") learner_error_rate = PredictionEvaluator.calculate( TornadoDic.ERROR_RATE, self.learner.get_global_confusion_matrix()) self.__learner_error_rate_array.append( round(learner_error_rate, 4)) self.__learner_memory_usage.append( asizeof.asizeof(self.learner, limit=20)) self.__learner_runtime.append( self.learner.get_running_time()) self.__drift_detection_memory_usage.append( asizeof.asizeof(self.drift_detector, limit=20)) self.__drift_detection_runtime.append( self.drift_detector.RUNTIME) self.learner.reset() self.drift_detector.reset() continue if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE: self.learner.do_training(r) else: self.learner.do_loading(r) else: if self.learner.LEARNER_TYPE == TornadoDic.TRAINABLE: self.learner.do_training(r) else: self.learner.do_loading(r) self.learner.set_ready() self.learner.update_confusion_matrix(r[len(r) - 1], r[len(r) - 1]) learner_error_rate = PredictionEvaluator.calculate( TornadoDic.ERROR_RATE, self.learner.get_confusion_matrix()) learner_error_rate = round(learner_error_rate, 4) self.__learner_error_rate_array.append(learner_error_rate) if self.__memory_check_step != -1: if self.__instance_counter % self.__memory_check_step == 0: self.__drift_detection_memory_usage.append( asizeof.asizeof(self.drift_detector, limit=20)) self.__drift_points_boolean.append(0) print("\n" + "The stream is completely processed.") self.__store_stats() self.__plot() print("\n\r" + "THE END!") print("\a")
def get_error(self): return PredictionEvaluator.calculate(TornadoDic.ERROR_RATE, self.get_confusion_matrix())