def test_y_None(): path = os.path.join(THIS_DIR, os.pardir, 'test/dummy.txt') df = read_data(path) X = df[['value']] y = None model = MockModel() models = [model] with pytest.raises(TypeError): eval_models(X, y, models, label_col_name='is_anomaly')
def test_real_model(): path = os.path.join(THIS_DIR, os.pardir, 'test/dummy.txt') df = read_data(path) model = StlTrendinessDetector(is_multicategory=True,freq='12H') models = [model] X = df[['value']] y = df[['is_anomaly']] res = eval_models(X, y, models, label_col_name='is_anomaly') print(res)
def test_X_None(): X = None y = None model = MockModel() models = [model] try: res = eval_models(X, y, models, label_col_name='is_anomaly') except TypeError: assert True return assert False
def test_eval_models_all_false(): path = os.path.join(THIS_DIR, os.pardir, 'test/dummy.txt') df = read_data(path) df['is_anomaly'] = 0 model = MockModel() models = [model] X = df[['value']] y = df[['is_anomaly']] res = eval_models(X, y, models, label_col_name='is_anomaly') assert math.isnan(res['MockModel']['f1']) assert res['MockModel']['precision'] == 0 assert math.isnan(res['MockModel']['recall'])
def test_eval_models_half_false(): path = os.path.join(THIS_DIR, os.pardir, 'test/dummy2.txt') df = read_data(path) df['is_anomaly'] = 0 df.iloc[-1]['is_anomaly'] = 1 df.iloc[-2]['is_anomaly'] = 1 model = MockModel() models = [model] X = df[['value']] y = df[['is_anomaly']] res = eval_models(X, y, models, label_col_name='is_anomaly',window_size_for_metrics=0) assert res['MockModel']['precision'] == 0.5 assert res['MockModel']['recall'] == 1.0
def eval_model(datapath="../SF3H_labeled.csv", min_date='01-01-2018', freq='3H', use_comet=True): dataset = read_labeled_data(datapath) dataset['date'] = pd.DatetimeIndex(dataset['date']) dataset.set_index('date', inplace=True) dataset = dataset[dataset.index > min_date] dataset = dataset.reset_index().set_index(['date', 'category']) dataset = dataset[~dataset.index.duplicated(keep='first')] X = dataset[['value']] y = dataset[['is_anomaly']] ## First model: STL anomaly_types = ['residual', 'trend', 'and', 'or'] # anomaly_types = ['residual', ['trend', 'residual']] anomaly_type = 'residual' num_std = 3 window_size_for_metrics = 3 min_value = 15 for num_std in [2, 2.5, 3, 3.5, 4]: for anomaly_type in anomaly_types: model = StlTrendinessDetector(is_multicategory=True, freq=freq, min_value=min_value, anomaly_type=anomaly_type, num_of_std=num_std) result = eval_models( X, y, [model], label_col_name='is_anomaly', train_percent=50, window_size_for_metrics=window_size_for_metrics) print('num_std = ' + str(num_std) + ', anomaly_type = ' + str(anomaly_type) + ', min_value = ' + str(min_value) + ', dataset = ' + datapath) print('F1 score = ' + str(result[model.__name__]['f1']) + ", precision = " + str(result[model.__name__]['precision']) + ", recall = " + str(result[model.__name__]['recall'])) # model.plot(labels = y.reset_index().set_index('date')) if use_comet: experiment = Experiment( api_key= "Uv0lx3yRDH7kk8h1vtR9ZRiD2s16gnYTxfsvK2VnpV2xRrMbFobYDZRRA4tvoYiR", project_name="trending-topics") experiment.log_dataset_hash(dataset) experiment.log_parameter("model", model.__name__) experiment.log_parameter("dataset", datapath) experiment.log_parameter("num_of_std", num_std) experiment.log_parameter("anomaly_type", anomaly_type) experiment.log_parameter("window_size_for_metrics", window_size_for_metrics) experiment.log_metric("f1", result[str(model.__name__)]['f1']) experiment.log_metric("f0.5", result[str(model.__name__)]['f0.5']) experiment.log_metric("precision", result[str(model.__name__)]['precision']) experiment.log_metric("recall", result[str(model.__name__)]['recall']) max_anoms_list = [0.05, 0.1] for max_anoms in max_anoms_list: for threshold in [None, 'med_max', 'p95', 'p99']: for alpha in [0.05, 0.1, 0.15]: model = TwitterAnomalyTrendinessDetector(is_multicategory=True, freq=freq, min_value=min_value, threshold=threshold, max_anoms=max_anoms, longterm=False, alpha=alpha, seasonality_freq=7) result = eval_models( X, y, [model], label_col_name='is_anomaly', train_percent=50, window_size_for_metrics=window_size_for_metrics) if threshold is None: print('Threshold = None, Alpha = ' + str(alpha) + ', max_anoms = None, min_value = ' + str(min_value) + ', dataset = ' + datapath) else: print('Threshold = ' + threshold + ', Alpha = ' + str(alpha) + ', max_anoms = None, min_value = ' + str(min_value) + ', dataset = ' + datapath) print('F1 score = ' + str(result[model.__name__]['f1']) + ", precision = " + str(result[model.__name__]['precision']) + ", recall = " + str(result[model.__name__]['recall'])) if use_comet: experiment = Experiment( api_key= "Uv0lx3yRDH7kk8h1vtR9ZRiD2s16gnYTxfsvK2VnpV2xRrMbFobYDZRRA4tvoYiR", project_name="trending-topics") experiment.log_dataset_hash(dataset) experiment.log_parameter("model", model.__name__) experiment.log_parameter("max_anoms", 0.49) experiment.log_parameter("threshold", threshold) experiment.log_parameter("alpha", alpha) experiment.log_parameter("longterm", True) experiment.log_parameter("dataset", datapath) experiment.log_parameter("window_size_for_metrics", window_size_for_metrics) experiment.log_metric("f1", result[str(model.__name__)]['f1']) experiment.log_metric("f0.5", result[str(model.__name__)]['f0.5']) experiment.log_metric( "precision", result[str(model.__name__)]['precision']) experiment.log_metric( "recall", result[str(model.__name__)]['recall'])