def evaluate_real_datasets(): REAL_DATASET_GROUP_PATH = 'data/raw/' real_dataset_groups = glob.glob(REAL_DATASET_GROUP_PATH + '*') seeds = np.random.randint(np.iinfo(np.uint32).max, size=RUNS, dtype=np.uint32) results = pd.DataFrame() datasets = [KDDCup(seed=1)] for real_dataset_group in real_dataset_groups: for data_set_path in glob.glob(real_dataset_group + '/labeled/train/*'): data_set_name = data_set_path.split('/')[-1].replace('.pkl', '') dataset = RealPickledDataset(data_set_name, data_set_path) datasets.append(dataset) for seed in seeds: datasets[0] = KDDCup(seed) evaluator = Evaluator(datasets, detectors, seed=seed) evaluator.evaluate() result = evaluator.benchmarks() evaluator.plot_roc_curves() evaluator.plot_threshold_comparison() evaluator.plot_scores() results = results.append(result, ignore_index=True) avg_results = results.groupby(['dataset', 'algorithm'], as_index=False).mean() evaluator.set_benchmark_results(avg_results) evaluator.export_results('run_real_datasets') evaluator.create_boxplots(runs=RUNS, data=results, detectorwise=False) evaluator.create_boxplots(runs=RUNS, data=results, detectorwise=True)
def evaluate_real_datasets(folder_name=None, skill=None, anomaly_region=None): # seeds = np.random.randint(np.iinfo(np.uint32).max, size=RUNS, dtype=np.uint32) seeds = [0] # results = pd.DataFrame() for seed in seeds: datasets = [KittingExp(seed, folder_name=folder_name, skill=skill)] evaluator = Evaluator(datasets, detectors, seed=seed) evaluator.evaluate() result = evaluator.benchmarks() if anomaly_region is None: file = open( './reports/logs/%s_result_skill_%d.txt' % (folder_name, skill), 'w') else: file = open( './reports/logs/%s_result_skill_%d_%s.txt' % (folder_name, skill, anomaly_region), 'w') file.write(str(result)) file.close() evaluator.save_roc_curves(skill=skill)