def work(train_files: Tuple[str, str], test_files: Tuple[str, str], hyperparam: dict, fault_list: List[dict]): # bagel hyperparams bagel_window_size = hyperparam["bagel"]["window_size"] time_feature = hyperparam["bagel"]["time_feature"] epochs = hyperparam["bagel"]["epochs"] study_train_file, control_train_file = train_files study_test_file, control_test_file = test_files model = bagel.Bagel(window_size=bagel_window_size, time_feature=time_feature) # study group study_train_kpi = bagel.utils.load_kpi(study_train_file) study_test_kpi = bagel.utils.load_kpi(study_test_file) # load model param study_sign = "_".join(study_train_file.split(os.path.sep)[2:]).replace( "\\", "/") study_model_save_path = os.path.join(PROJECT_PATH, "variables", study_sign).replace("\\", "/") if os.path.exists( os.path.join(PROJECT_PATH, "variables", study_sign + ".index").replace("\\", "/")): model.load(study_model_save_path) else: model.fit(study_train_kpi, epochs=epochs, verbose=0) model.save(study_model_save_path) try: _, _, _, pred_data = model.predict_one(study_test_kpi) except: pass
def main(): bagel.utils.mkdirs(output_path) file_list = bagel.utils.file_list(input_path) for file in file_list: kpi = bagel.utils.load_kpi(file) print(f'KPI: {kpi.name}') kpi.complete_timestamp() train_kpi, valid_kpi, test_kpi = kpi.split((0.49, 0.21, 0.3)) train_kpi, mean, std = train_kpi.standardize() valid_kpi, _, _ = valid_kpi.standardize(mean=mean, std=std) test_kpi, _, _ = test_kpi.standardize(mean=mean, std=std) model = bagel.Bagel() model.fit(kpi=train_kpi.use_labels(0.), validation_kpi=valid_kpi, epochs=epochs, verbose=1) anomaly_scores = model.predict(test_kpi) results = bagel.testing.get_test_results(labels=test_kpi.labels, scores=anomaly_scores, missing=test_kpi.missing, window_size=120) stats = bagel.testing.get_kpi_stats(kpi, test_kpi) print('Metrics') print(f'precision: {results.get("precision"):.3f} - ' f'recall: {results.get("recall"):.3f} - ' f'f1score: {results.get("f1score"):.3f}\n') with open(output_path.joinpath(f'{kpi.name}.txt'), 'w') as output: output.write(f'kpi_name={kpi.name}\n\n' '[result]\n' f'threshold={results.get("threshold")}\n' f'precision={results.get("precision"):.3f}\n' f'recall={results.get("recall"):.3f}\n' f'f1_score={results.get("f1score"):.3f}\n\n' '[overall]\n' f'num_points={stats[0].num_points}\n' f'num_missing_points={stats[0].num_missing}\n' f'missing_rate={stats[0].missing_rate:.6f}\n' f'num_anomaly_points={stats[0].num_anomaly}\n' f'anomaly_rate={stats[0].anomaly_rate:.6f}\n\n' '[test]\n' f'num_points={stats[1].num_points}\n' f'num_missing_points={stats[1].num_missing}\n' f'missing_rate={stats[1].missing_rate:.6f}\n' f'num_anomaly_points={stats[1].num_anomaly}\n' f'anomaly_rate={stats[1].anomaly_rate:.6f}\n')
def call(self, body: dict): """ `API call` interface Args: body: `data` field { "data": {{ body }} } -------------- body: json The value is `request_body.data` """ # data test_value = body["test_value"] test_ts = body["test_ts"] train_value = body.get("train_value", None) train_ts = body.get("train_ts", None) # optional parameters bagel_window_size: int = body.get("bagel_window_size", 30) self.bagel_epochs: int = body.get("bagel_epochs", 50) time_feature: str = body.get("time_feature", "MH") self.mad_window_size: int = body.get("mad_window_size", 5) self.spot_init_num: int = body.get("spot_init_num", 1000) self.model = bagel.Bagel(window_size=bagel_window_size, time_feature=time_feature) if train_value is not None and train_ts is not None: self.train_kpi = KPI(train_ts, train_value) else: self.train_kpi = None self.test_kpi = KPI(test_ts, test_value)
def work(train_files: Tuple[str, str], test_files: Tuple[str, str], hyperparam: dict, fault_list: List[dict]): # bagel hyperparams bagel_window_size = hyperparam["bagel"]["window_size"] time_feature = hyperparam["bagel"]["time_feature"] epochs = hyperparam["bagel"]["epochs"] # mad hyperparams mad_window_size = hyperparam["mad"]["window_size"] # spot hyperparams spot_init_num = hyperparam["spot"]["init_num"] study_train_file, control_train_file = train_files study_test_file, control_test_file = test_files name = os.path.splitext(os.path.basename(study_train_file))[0] svc = os.path.basename(os.path.dirname(study_train_file)) model = bagel.Bagel(window_size=bagel_window_size, time_feature=time_feature) # study group study_train_kpi = bagel.utils.load_kpi(study_train_file) study_test_kpi = bagel.utils.load_kpi(study_test_file) # load model param study_sign = "_".join(study_train_file.split(os.path.sep)[2:]) study_model_save_path = os.path.join(PROJECT_PATH, "variables", study_sign) if os.path.exists( os.path.join(PROJECT_PATH, "variables", study_sign + ".index")): model.load(study_model_save_path) else: model.fit(study_train_kpi, epochs=epochs, verbose=0) model.save(study_model_save_path) try: anomaly_scores, x_mean, x_std = model.predict(study_test_kpi, verbose=0) train_data_anoamly_sc, _, _ = model.predict(study_train_kpi, verbose=0) except: print("\033[36m 数据缺失... \033[0m") return # control group control_train_kpi = bagel.utils.load_kpi(control_train_file) control_test_kpi = bagel.utils.load_kpi(control_test_file) # remove window_size - 1 points ahead _, study_test_kpi = study_test_kpi.split_by_indices(bagel_window_size - 1) _, control_test_kpi = control_test_kpi.split_by_indices(bagel_window_size - 1) # mad mad_filter = mad(study_test_kpi.raw_values, name, mad_window_size) # spot pred_label, spot_threshold = run_spot( train_data_anoamly_sc[-spot_init_num:], anomaly_scores, mad_filter) # plot if PLOT_FLAG: try: post_sub_path = study_test_file.split(os.path.sep)[-5:] post_sub_path.remove("data") if "219" in post_sub_path: post_sub_path.remove("219") if "220" in post_sub_path: post_sub_path.remove("220") save_path = os.path.join(PROJECT_PATH, "img", os.path.sep.join(post_sub_path)).replace( "csv", "png") split_save_path = save_path.split(os.path.sep) svc = split_save_path.pop(-2) save_path = os.path.sep.join(split_save_path) save_path = save_path.replace(".png", f"_{svc}.png") study_test_ts = study_test_kpi.timestamps change_ts = None for fault in fault_list: if fault["start"] >= np.min(study_test_ts) and fault[ "start"] <= np.max(study_test_ts): change_ts = fault["start"] break integrate_plot(study_test_kpi, control_test_kpi, anomaly_scores, x_mean, x_std, pred_label, spot_threshold, name, svc, save_path=save_path, change_ts=change_ts) # 数据缺失时不画了 except: print("\033[36m 数据缺失... \033[0m") return
def run(self): """ CLI interface ====> ``` anomaly_decteion = AnomalyDetection() anomaly_detection.run() ``` """ import argparse parser = argparse.ArgumentParser() parser.add_argument("-d", "--data_dir", metavar="data_dir", dest="data_dir", type=str, required=True, help="The input data.") parser.add_argument("-p", "--param_dir", metavar="param_dir", dest="param_dir", type=str, required=False, default=None, help="The directory where checkpoint is located.") # bagel parser.add_argument( "-bw", "--bagel_window_size", type=int, required=False, metavar="bagel_window_size", dest="bagel_window_size", default=30, help="The window size of bagel's time series. default: `30`") parser.add_argument( "-e", "--bagel_epochs", type=int, required=False, default=50, metavar="bagel_epochs", dest="bagel_epochs", help="The number of epochs during the training step. default: `50`" ) parser.add_argument( "-t", "--time_feature", type=str, required=False, default="MH", metavar="time_feature", dest="time_feature", help= "The dimension of time feature during the time encoding stage. `a|A|w` week; `H` one day; `I` half of the day; `M` minute; `S` second; default: `MH`" ) # median diff parser.add_argument( "-mw", "--mad_window_size", type=int, required=False, default=5, metavar="mad_window_size", dest="mad_window_size", help= "The window size used for comparing the difference of median. default: `5`" ) # spot parser.add_argument( "-n", "--spot_init_num", type=int, required=False, default=1000, metavar="spot_init_num", dest="spot_init_num", help= "The number of points used for initializing SPOT. default `1000`") config = vars(parser.parse_args()) # load data train_value, train_ts, test_value, test_ts = self.load_data( config["data_dir"]) self.bagel_epochs = config["bagel_epochs"] self.mad_window_size = config["mad_window_size"] self.spot_init_num = config["spot_init_num"] if train_value is not None and train_ts is not None: self.train_kpi = KPI(train_ts, train_value) else: self.train_kpi = None self.test_kpi = KPI(test_ts, test_value) self.model = bagel.Bagel(window_size=config["bagel_window_size"], time_feature=config["time_feature"])