示例#1
0
def test_y_None():
    path = os.path.join(THIS_DIR, os.pardir, 'test/dummy.txt')
    df = read_data(path)
    X = df[['value']]
    y = None

    model = MockModel()
    models = [model]
    with pytest.raises(TypeError):
        eval_models(X, y, models, label_col_name='is_anomaly')
示例#2
0
def test_real_model():
    path = os.path.join(THIS_DIR, os.pardir, 'test/dummy.txt')
    df = read_data(path)
    model = StlTrendinessDetector(is_multicategory=True,freq='12H')
    models = [model]
    X = df[['value']]
    y = df[['is_anomaly']]

    res = eval_models(X, y, models, label_col_name='is_anomaly')
    print(res)
示例#3
0
def test_X_None():
    X = None
    y = None
    model = MockModel()
    models = [model]
    try:
        res = eval_models(X, y, models, label_col_name='is_anomaly')
    except TypeError:
        assert True
        return
    assert False
示例#4
0
def test_eval_models_all_false():
    path = os.path.join(THIS_DIR, os.pardir, 'test/dummy.txt')
    df = read_data(path)
    df['is_anomaly'] = 0

    model = MockModel()
    models = [model]
    X = df[['value']]
    y = df[['is_anomaly']]
    res = eval_models(X, y, models, label_col_name='is_anomaly')
    assert math.isnan(res['MockModel']['f1'])
    assert res['MockModel']['precision'] == 0
    assert math.isnan(res['MockModel']['recall'])
示例#5
0
def test_eval_models_half_false():
    path = os.path.join(THIS_DIR, os.pardir, 'test/dummy2.txt')
    df = read_data(path)
    df['is_anomaly'] = 0
    df.iloc[-1]['is_anomaly'] = 1
    df.iloc[-2]['is_anomaly'] = 1

    model = MockModel()
    models = [model]
    X = df[['value']]
    y = df[['is_anomaly']]
    res = eval_models(X, y, models, label_col_name='is_anomaly',window_size_for_metrics=0)
    assert res['MockModel']['precision'] == 0.5
    assert res['MockModel']['recall'] == 1.0
示例#6
0
def eval_model(datapath="../SF3H_labeled.csv",
               min_date='01-01-2018',
               freq='3H',
               use_comet=True):
    dataset = read_labeled_data(datapath)
    dataset['date'] = pd.DatetimeIndex(dataset['date'])
    dataset.set_index('date', inplace=True)
    dataset = dataset[dataset.index > min_date]
    dataset = dataset.reset_index().set_index(['date', 'category'])

    dataset = dataset[~dataset.index.duplicated(keep='first')]

    X = dataset[['value']]
    y = dataset[['is_anomaly']]

    ## First model: STL

    anomaly_types = ['residual', 'trend', 'and', 'or']
    # anomaly_types = ['residual', ['trend', 'residual']]
    anomaly_type = 'residual'
    num_std = 3
    window_size_for_metrics = 3
    min_value = 15
    for num_std in [2, 2.5, 3, 3.5, 4]:
        for anomaly_type in anomaly_types:
            model = StlTrendinessDetector(is_multicategory=True,
                                          freq=freq,
                                          min_value=min_value,
                                          anomaly_type=anomaly_type,
                                          num_of_std=num_std)
            result = eval_models(
                X,
                y, [model],
                label_col_name='is_anomaly',
                train_percent=50,
                window_size_for_metrics=window_size_for_metrics)
            print('num_std = ' + str(num_std) + ', anomaly_type = ' +
                  str(anomaly_type) + ', min_value = ' + str(min_value) +
                  ', dataset = ' + datapath)
            print('F1 score = ' + str(result[model.__name__]['f1']) +
                  ", precision = " + str(result[model.__name__]['precision']) +
                  ", recall = " + str(result[model.__name__]['recall']))
            # model.plot(labels = y.reset_index().set_index('date'))

            if use_comet:
                experiment = Experiment(
                    api_key=
                    "Uv0lx3yRDH7kk8h1vtR9ZRiD2s16gnYTxfsvK2VnpV2xRrMbFobYDZRRA4tvoYiR",
                    project_name="trending-topics")
                experiment.log_dataset_hash(dataset)
                experiment.log_parameter("model", model.__name__)
                experiment.log_parameter("dataset", datapath)
                experiment.log_parameter("num_of_std", num_std)
                experiment.log_parameter("anomaly_type", anomaly_type)
                experiment.log_parameter("window_size_for_metrics",
                                         window_size_for_metrics)
                experiment.log_metric("f1", result[str(model.__name__)]['f1'])
                experiment.log_metric("f0.5",
                                      result[str(model.__name__)]['f0.5'])
                experiment.log_metric("precision",
                                      result[str(model.__name__)]['precision'])
                experiment.log_metric("recall",
                                      result[str(model.__name__)]['recall'])

    max_anoms_list = [0.05, 0.1]
    for max_anoms in max_anoms_list:
        for threshold in [None, 'med_max', 'p95', 'p99']:
            for alpha in [0.05, 0.1, 0.15]:
                model = TwitterAnomalyTrendinessDetector(is_multicategory=True,
                                                         freq=freq,
                                                         min_value=min_value,
                                                         threshold=threshold,
                                                         max_anoms=max_anoms,
                                                         longterm=False,
                                                         alpha=alpha,
                                                         seasonality_freq=7)

                result = eval_models(
                    X,
                    y, [model],
                    label_col_name='is_anomaly',
                    train_percent=50,
                    window_size_for_metrics=window_size_for_metrics)
                if threshold is None:
                    print('Threshold = None, Alpha = ' + str(alpha) +
                          ', max_anoms = None, min_value = ' + str(min_value) +
                          ', dataset = ' + datapath)
                else:
                    print('Threshold = ' + threshold + ', Alpha = ' +
                          str(alpha) + ', max_anoms = None, min_value = ' +
                          str(min_value) + ', dataset = ' + datapath)
                print('F1 score = ' + str(result[model.__name__]['f1']) +
                      ", precision = " +
                      str(result[model.__name__]['precision']) +
                      ", recall = " + str(result[model.__name__]['recall']))

                if use_comet:
                    experiment = Experiment(
                        api_key=
                        "Uv0lx3yRDH7kk8h1vtR9ZRiD2s16gnYTxfsvK2VnpV2xRrMbFobYDZRRA4tvoYiR",
                        project_name="trending-topics")
                    experiment.log_dataset_hash(dataset)
                    experiment.log_parameter("model", model.__name__)
                    experiment.log_parameter("max_anoms", 0.49)
                    experiment.log_parameter("threshold", threshold)
                    experiment.log_parameter("alpha", alpha)
                    experiment.log_parameter("longterm", True)
                    experiment.log_parameter("dataset", datapath)
                    experiment.log_parameter("window_size_for_metrics",
                                             window_size_for_metrics)
                    experiment.log_metric("f1",
                                          result[str(model.__name__)]['f1'])
                    experiment.log_metric("f0.5",
                                          result[str(model.__name__)]['f0.5'])
                    experiment.log_metric(
                        "precision", result[str(model.__name__)]['precision'])
                    experiment.log_metric(
                        "recall", result[str(model.__name__)]['recall'])