Exemplo n.º 1
0
    def test_kdd_cup():
        def detectors():
            return [DAGMM(num_epochs=10, sequence_length=1)]

        evaluator = Evaluator(
            [KDDCup(21),
             KDDCup(22),
             KDDCup(23),
             KDDCup(24),
             KDDCup(25)], detectors)
        df_evaluation = pd.DataFrame(columns=[
            'dataset', 'algorithm', 'accuracy', 'precision', 'recall',
            'F1-score', 'F0.1-score'
        ])

        evaluator.evaluate()
        df = evaluator.benchmarks()
        df_evaluation = df_evaluation.append(df)

        print(df_evaluation.to_string())
        assert (df_evaluation
                == 0).sum().sum() == 0  # No zeroes in the DataFrame
        assert df_evaluation['F1-score'].std() > 0  # Not always the same value
        # Values reported in the paper -1% each
        assert df_evaluation['precision'].mean() >= 0.91
        assert df_evaluation['recall'].mean() >= 0.93
        assert df_evaluation['F1-score'].mean() >= 0.92
Exemplo n.º 2
0
def run_different_window_sizes_evaluator(detectors, seeds, runs):
    results = pd.DataFrame()
    for seed in seeds:
        datasets = [SyntheticDataGenerator.long_term_dependencies_width(seed),
                    SyntheticDataGenerator.long_term_dependencies_height(seed),
                    SyntheticDataGenerator.long_term_dependencies_missing(seed)]
        evaluator = Evaluator(datasets, detectors, seed=seed)
        evaluator.evaluate()
        evaluator.plot_scores()
        result = evaluator.benchmarks()
        results = results.append(result, ignore_index=True)
    evaluator.set_benchmark_results(results)
    evaluator.export_results('run_different_windows')
    evaluator.create_boxplots(runs=runs, data=results, detectorwise=False)
    evaluator.create_boxplots(runs=runs, data=results, detectorwise=True)
    return evaluator
Exemplo n.º 3
0
def run_experiment_evaluation(detectors, seeds, runs, output_dir, anomaly_type, steps=5, outlier_type='extreme_1',
                              store_results=True):
    datasets = list(get_datasets_for_multiple_runs(anomaly_type, seeds, steps, outlier_type))
    results = pd.DataFrame()
    evaluator = None

    for index, seed in enumerate(seeds):
        evaluator = Evaluator(datasets[index], detectors, output_dir, seed=seed)
        evaluator.evaluate()
        result = evaluator.benchmarks()
        evaluator.plot_roc_curves(store=store_results)
        evaluator.plot_threshold_comparison(store=store_results)
        evaluator.plot_scores(store=store_results)
        evaluator.set_benchmark_results(result)
        evaluator.export_results(f'experiment-run-{index}-{seed}')
        results = results.append(result, ignore_index=True)

    if not store_results:
        return

    # set average results from multiple pipeline runs for evaluation
    avg_results = results.groupby(['dataset', 'algorithm'], as_index=False).mean()
    evaluator.set_benchmark_results(avg_results)
    evaluator.export_results(f'experiment-{anomaly_type}')

    # Plots which need the whole data (not averaged)
    evaluator.create_boxplots(runs=runs, data=results, detectorwise=True, store=store_results)
    evaluator.create_boxplots(runs=runs, data=results, detectorwise=False, store=store_results)
    evaluator.gen_merged_tables(results, f'for_{anomaly_type}', store=store_results)

    # Plots using 'self.benchmark_results' -> using the averaged results
    evaluator.create_bar_charts(runs=runs, detectorwise=True, store=store_results)
    evaluator.create_bar_charts(runs=runs, detectorwise=False, store=store_results)
    evaluator.plot_auroc(title=f'Area under the curve for differing {anomaly_type} anomalies', store=store_results)

    # Plots using 'self.results' (need the score) -> only from the last run
    evaluator.plot_threshold_comparison(store=store_results)
    evaluator.plot_scores(store=store_results)
    evaluator.plot_roc_curves(store=store_results)

    return evaluator