Exemplo n.º 1
0
def test_two_categories():
    path = os.path.join(THIS_DIR, os.pardir, 'tests/data/dummy4.txt')
    df = read_data(path)
    ma = MovingAverageSeasonalTrendinessDetector(freq='12H',
                                                 anomaly_type='or',
                                                 lookback='5D')
    prediction = ma.predict(df)
    assert len(prediction[prediction['prediction'] == 1]) == 2
Exemplo n.º 2
0
def test_single_category():
    path = os.path.join(THIS_DIR, os.pardir, 'tests/data/dummy4.txt')
    df = read_data(path)
    one_category = df.loc[pd.IndexSlice[:, 'housing'], :].reset_index(
        level='category', drop=True)
    ma = MovingAverageSeasonalTrendinessDetector(freq='12H',
                                                 is_multicategory=False,
                                                 anomaly_type='or')
    prediction = ma.predict(one_category)
    assert len(prediction[prediction['prediction'] == 1]) == 1
Exemplo n.º 3
0
def test_real_model():
    path = os.path.join(THIS_DIR, os.pardir, 'tests/data/dummy.txt')
    df = read_data(path)
    model = MovingAverageSeasonalTrendinessDetector(is_multicategory=True,
                                                    freq='12H')
    models = [model]
    X = df[['value']]
    y = df[['is_anomaly']]

    res = eval_models(X, y, models, label_col_name='is_anomaly')
    print(res)
Exemplo n.º 4
0
def run_model(
    datapath,
    freq,
    min_date="01-01-2018",
    plot=True,
    model_name="stl",
    min_value=9,
    min_samples_for_category=100,
):
    print("Loading file {0}, with frequency {1}. Model name = {2}".format(
        datapath, freq, model_name))
    dataset = read_data(datapath, min_date=min_date)
    dataset = dataset.rename(columns={"is_anomaly": "label"})

    is_multicategory = ("category" in dataset) or ("category" in dataset.index)

    if model_name == "twitter":
        model = TwitterAnomalyTrendinessDetector(
            is_multicategory=is_multicategory,
            freq=freq,
            min_value=min_value,
            threshold=None,
            max_anoms=0.49,
            seasonality_freq=7,
        )

    if model_name == "ma_seasonal":
        model = MovingAverageSeasonalTrendinessDetector(
            is_multicategory=is_multicategory,
            freq=freq,
            min_value=min_value,
            anomaly_type="or",
            num_of_std=3,
        )

    if model_name == "stl":
        model = STLTrendinessDetector(
            is_multicategory=is_multicategory,
            freq=freq,
            min_value=min_value,
            anomaly_type="or",
            num_of_std=4,
            lo_frac=0.5,
        )

    if model_name == "azure":
        dirname = os.path.dirname(__file__)
        filename = os.path.join(dirname, "config/config.json")
        subscription_key = get_azure_subscription_key(filename)
        model = AzureAnomalyTrendinessDetector(
            is_multicategory=is_multicategory,
            freq=freq,
            min_value=min_value,
            subscription_key=subscription_key,
        )

    if model_name == "lstm":
        model = LSTMTrendinessDetector(freq=freq,
                                       is_multicategory=is_multicategory)

    prediction = model.predict(dataset, verbose=True)
    raw_metrics = get_metrics_for_all_categories(
        dataset[["value"]],
        prediction[["prediction"]],
        dataset[["label"]],
        window_size_for_metrics=5,
    )
    metrics = get_final_metrics(raw_metrics, summarized=False)
    print(metrics)

    ## Plot each category
    if plot:
        _, file = os.path.split(datapath)
        print("Plotting...")
        model.plot(labels=dataset["label"], postfix=file)

    return prediction
Exemplo n.º 5
0
def evaluate_all_models(
    datapath="SF3H_labeled.csv",
    min_date="01-01-2018",
    freq="3H",
    use_comet=False,
    models_to_run=None,
    window_size_for_metrics=1,
):
    try:
        dataset = read_data(datapath, min_date=min_date)
    except Exception:
        print("File not found or failed to read")
        return
    dataset = dataset[~dataset.index.duplicated(keep="first")]
    dataset = dataset.rename(columns={"is_anomaly": "label"})

    X = dataset[["value"]]
    y = dataset[["label"]]

    if "H" in freq:
        min_value = 10
    else:
        min_value = 8

    print("min value for prediction = " + str(min_value))

    from comet_ml import Experiment

    # LSTM model
    if "lstm" in models_to_run:
        print("Evaluating LSTM model")
        num_std = 3
        model = LSTMTrendinessDetector(
            is_multicategory=True, num_of_std=num_std, freq=freq, min_value=min_value
        )
        result = eval_models(
            X,
            y,
            [model],
            label_col_name="label",
            train_percent=20,
            window_size_for_metrics=window_size_for_metrics,
        )

        print_lstm_model(datapath, min_value, model, result)

        if use_comet:
            params = {
                "num_std": num_std,
                "window_size_for_metrics": window_size_for_metrics,
            }
            metrics = result[model.__name__]
            metrics = summarize_metrics(metrics)
            log_experiment(datapath, dataset, model, parameters=params, metrics=metrics)

    # MA model
    if "ma_seasonal" in models_to_run or len(models_to_run) == 0:
        print("Evaluating MA model")
        anomaly_types = ["residual", "trend", "and", "or"]
        for num_std in [2.5, 3, 3.5, 4]:
            for anomaly_type in anomaly_types:
                model = MovingAverageSeasonalTrendinessDetector(
                    is_multicategory=True,
                    freq=freq,
                    min_value=min_value,
                    anomaly_type=anomaly_type,
                    num_of_std=num_std,
                )
                result = eval_models_CV(
                    X,
                    y,
                    [model],
                    label_col_name="label",
                    n_splits=5,
                    window_size_for_metrics=window_size_for_metrics,
                )
                print_ma_result(
                    anomaly_type, datapath, min_value, model, num_std, result
                )

                if use_comet:
                    params = {
                        "anomaly_type": anomaly_type,
                        "num_std": num_std,
                        "window_size_for_metrics": window_size_for_metrics,
                        "min_value": min_value,
                    }
                    metrics = result[model.__name__]
                    metrics = summarize_metrics(metrics)

                log_experiment(
                    datapath, dataset, model, parameters=params, metrics=metrics
                )

    # STL model
    if "stl" in models_to_run or len(models_to_run) == 0:
        print("Evaluating STL model")
        anomaly_types = ["residual", "trend", "and", "or"]
        for num_std in [2.5, 3, 3.5, 4]:
            for anomaly_type in anomaly_types:
                for lo_frac in [0.1, 0.5, 1, 1.5]:
                    model = STLTrendinessDetector(
                        is_multicategory=True,
                        freq=freq,
                        min_value=min_value,
                        anomaly_type=anomaly_type,
                        num_of_std=num_std,
                        lo_frac=lo_frac,
                    )
                    result = eval_models(
                        X,
                        y,
                        [model],
                        label_col_name="label",
                        train_percent=20,
                        window_size_for_metrics=window_size_for_metrics,
                    )
                    print_stl_result(
                        anomaly_type, datapath, min_value, model, num_std, result
                    )

                    if use_comet:
                        params = {
                            "anomaly_type": anomaly_type,
                            "num_std": num_std,
                            "window_size_for_metrics": window_size_for_metrics,
                            "min_value": min_value,
                            "lo_frac": lo_frac,
                        }
                        metrics = result[model.__name__]
                        metrics = summarize_metrics(metrics)
                        log_experiment(
                            datapath, dataset, model, parameters=params, metrics=metrics
                        )
    # Twitter
    if "twitter" in models_to_run or len(models_to_run) == 0:
        print("Evaluating Twitter model")
        max_anoms_list = [0.05, 0.1]
        for max_anoms in max_anoms_list:
            for threshold in [None, "med_max", "p95", "p99"]:
                for alpha in [0.05, 0.1, 0.15]:
                    model = TwitterAnomalyTrendinessDetector(
                        is_multicategory=True,
                        freq=freq,
                        min_value=min_value,
                        threshold=threshold,
                        max_anoms=max_anoms,
                        longterm=False,
                        alpha=alpha,
                        seasonality_freq=7,
                    )

                    result = eval_models(
                        X,
                        y,
                        [model],
                        label_col_name="label",
                        train_percent=20,
                        window_size_for_metrics=window_size_for_metrics,
                    )
                    print_twitter_result(
                        alpha, datapath, min_value, model, result, threshold
                    )

                    if use_comet:
                        params = {
                            "max_anoms": max_anoms,
                            "threshold": threshold,
                            "alpha": alpha,
                            "window_size_for_metrics": window_size_for_metrics,
                            "min_value": min_value,
                        }
                        metrics = result[model.__name__]
                        metrics = summarize_metrics(metrics)
                        log_experiment(
                            datapath, dataset, model, parameters=params, metrics=metrics
                        )

    if "azure" in models_to_run or len(models_to_run) == 0:
        print("Evaluating Azure model")
        # Get Azure subscription id for the Azure Anomaly Detector
        dirname = os.path.dirname(__file__)
        filename = os.path.join(dirname, "../config/config.json")
        subscription_key = get_azure_subscription_key(filename)

        model = AzureAnomalyTrendinessDetector(
            is_multicategory=True,
            freq=freq,
            min_value=min_value,
            subscription_key=subscription_key,
            sensitivity=None,
        )

        # Call Azure Anomaly Detector API
        prediction = model.predict(dataset, verbose=True)

        sensitivity = [None, 0.3, 0.5, 1, 1.5, 2]
        for sens in sensitivity:
            print("sensitivity = {}".format(sens))
            prediction = model.tune_prediction(prediction, sens)
            raw_metrics = get_metrics_for_all_categories(
                X[["value"]],
                prediction[["prediction"]],
                y[["label"]],
                window_size_for_metrics=5,
            )
            metrics = get_final_metrics(raw_metrics)
            result = {}
            result[model.__name__] = metrics
            print_azure_model(datapath, min_value, model, result, sens)
            # model.plot(labels = y.reset_index().set_index('date'))

            if use_comet:
                params = {
                    "sensitivity": sens,
                    "window_size_for_metrics": window_size_for_metrics,
                }
                metrics = result[model.__name__]
                metrics = summarize_metrics(metrics)
                log_experiment(
                    datapath, dataset, model, parameters=params, metrics=metrics
                )
Exemplo n.º 6
0
def run_model(datapath,
              freq,
              min_date='01-01-2018',
              plot=True,
              model_name='stl',
              min_value=9,
              min_samples_for_category=100):
    print("Loading file {0}, with frequency {1}. Model name = {2}".format(
        datapath, freq, model_name))
    dataset = read_data(datapath, min_date=min_date)
    dataset = dataset.rename(columns={'is_anomaly': 'label'})

    is_multicategory = ('category' in dataset) or ('category' in dataset.index)

    if model_name == 'twitter':
        model = TwitterAnomalyTrendinessDetector(
            is_multicategory=is_multicategory,
            freq=freq,
            min_value=min_value,
            threshold=None,
            max_anoms=0.49,
            seasonality_freq=7)

    if model_name == 'ma_seasonal':
        model = MovingAverageSeasonalTrendinessDetector(
            is_multicategory=is_multicategory,
            freq=freq,
            min_value=min_value,
            anomaly_type='or',
            num_of_std=3)

    if model_name == 'stl':
        model = STLTrendinessDetector(is_multicategory=is_multicategory,
                                      freq=freq,
                                      min_value=min_value,
                                      anomaly_type='or',
                                      num_of_std=4,
                                      lo_frac=0.5)

    if model_name == 'azure':
        dirname = os.path.dirname(__file__)
        filename = os.path.join(dirname, 'config/config.json')
        subscription_key = get_azure_subscription_key(filename)
        model = AzureAnomalyTrendinessDetector(
            is_multicategory=is_multicategory,
            freq=freq,
            min_value=min_value,
            subscription_key=subscription_key)

    if model_name == 'lstm':
        model = LSTMTrendinessDetector(freq=freq,
                                       is_multicategory=is_multicategory)

    prediction = model.predict(dataset, verbose=True)
    raw_metrics = get_metrics_for_all_categories(dataset[['value']],
                                                 prediction[['prediction']],
                                                 dataset[['label']],
                                                 window_size_for_metrics=5)
    metrics = get_final_metrics(raw_metrics, summarized=False)
    print(metrics)

    ## Plot each category
    if plot:
        _, file = os.path.split(datapath)
        print("Plotting...")
        model.plot(labels=dataset['label'], postfix=file)

    return prediction
Exemplo n.º 7
0
def evaluate_all_models(datapath="SF3H_labeled.csv",
                        min_date='01-01-2018',
                        freq='3H',
                        use_comet=False,
                        models_to_run=[],
                        window_size_for_metrics=1):
    try:
        dataset = read_data(datapath, min_date=min_date)
    except:
        print("File not found or failed to read")
        return
    dataset = dataset[~dataset.index.duplicated(keep='first')]
    dataset = dataset.rename(columns={'is_anomaly': 'label'})

    X = dataset[['value']]
    y = dataset[['label']]

    if 'H' in freq:
        min_value = 10
    else:
        min_value = 8

    print("min value for prediction = " + str(min_value))

    from comet_ml import Experiment

    # LSTM model
    if 'lstm' in models_to_run:
        print("Evaluating LSTM model")
        num_std = 3
        model = LSTMTrendinessDetector(is_multicategory=True,
                                       num_of_std=num_std,
                                       freq=freq,
                                       min_value=min_value)
        result = eval_models(X,
                             y, [model],
                             label_col_name='label',
                             train_percent=20,
                             window_size_for_metrics=window_size_for_metrics)

        print_lstm_model(datapath, min_value, model, result)

        if use_comet:
            params = {
                'num_std': num_std,
                'window_size_for_metrics': window_size_for_metrics
            }
            metrics = result[model.__name__]
            metrics = summarize_metrics(metrics)
            log_experiment(datapath,
                           dataset,
                           model,
                           parameters=params,
                           metrics=metrics)

    # MA model
    if 'ma_seasonal' in models_to_run or len(models_to_run) == 0:
        print("Evaluating MA model")
        anomaly_types = ['residual', 'trend', 'and', 'or']
        for num_std in [2.5, 3, 3.5, 4]:
            for anomaly_type in anomaly_types:
                model = MovingAverageSeasonalTrendinessDetector(
                    is_multicategory=True,
                    freq=freq,
                    min_value=min_value,
                    anomaly_type=anomaly_type,
                    num_of_std=num_std)
                result = eval_models_CV(
                    X,
                    y, [model],
                    label_col_name='label',
                    n_splits=5,
                    window_size_for_metrics=window_size_for_metrics)
                print_ma_result(anomaly_type, datapath, min_value, model,
                                num_std, result)

                if use_comet:
                    params = {
                        'anomaly_type': anomaly_type,
                        'num_std': num_std,
                        'window_size_for_metrics': window_size_for_metrics,
                        'min_value': min_value
                    }
                    metrics = result[model.__name__]
                    metrics = summarize_metrics(metrics)

                log_experiment(datapath,
                               dataset,
                               model,
                               parameters=params,
                               metrics=metrics)

    # STL model
    if 'stl' in models_to_run or len(models_to_run) == 0:
        print("Evaluating STL model")
        for num_std in [2.5, 3, 3.5, 4]:
            for anomaly_type in anomaly_types:
                for lo_frac in [0.1, 0.5, 1, 1.5]:
                    model = STLTrendinessDetector(is_multicategory=True,
                                                  freq=freq,
                                                  min_value=min_value,
                                                  anomaly_type=anomaly_type,
                                                  num_of_std=num_std,
                                                  lo_frac=lo_frac)
                    result = eval_models(
                        X,
                        y, [model],
                        label_col_name='label',
                        train_percent=20,
                        window_size_for_metrics=window_size_for_metrics)
                    print_stl_result(anomaly_type, datapath, min_value, model,
                                     num_std, result)

                    if use_comet:
                        params = {
                            'anomaly_type': anomaly_type,
                            'num_std': num_std,
                            'window_size_for_metrics': window_size_for_metrics,
                            'min_value': min_value,
                            'lo_frac': lo_frac
                        }
                        metrics = result[model.__name__]
                        metrics = summarize_metrics(metrics)
                        log_experiment(datapath,
                                       dataset,
                                       model,
                                       parameters=params,
                                       metrics=metrics)
    # Twitter
    if 'twitter' in models_to_run or len(models_to_run) == 0:
        print("Evaluating Twitter model")
        max_anoms_list = [0.05, 0.1]
        for max_anoms in max_anoms_list:
            for threshold in [None, 'med_max', 'p95', 'p99']:
                for alpha in [0.05, 0.1, 0.15]:
                    model = TwitterAnomalyTrendinessDetector(
                        is_multicategory=True,
                        freq=freq,
                        min_value=min_value,
                        threshold=threshold,
                        max_anoms=max_anoms,
                        longterm=False,
                        alpha=alpha,
                        seasonality_freq=7)

                    result = eval_models(
                        X,
                        y, [model],
                        label_col_name='label',
                        train_percent=20,
                        window_size_for_metrics=window_size_for_metrics)
                    print_twitter_result(alpha, datapath, min_value, model,
                                         result, threshold)

                    if use_comet:
                        params = {
                            'max_anoms': max_anoms,
                            'threshold': threshold,
                            'alpha': alpha,
                            'window_size_for_metrics': window_size_for_metrics,
                            'min_value': min_value
                        }
                        metrics = result[model.__name__]
                        metrics = summarize_metrics(metrics)
                        log_experiment(datapath,
                                       dataset,
                                       model,
                                       parameters=params,
                                       metrics=metrics)

    if 'azure' in models_to_run or len(models_to_run) == 0:
        print("Evaluating Azure model")
        # Get Azure subscription id for the Azure Anomaly Detector
        dirname = os.path.dirname(__file__)
        filename = os.path.join(dirname, '../config/config.json')
        subscription_key = get_azure_subscription_key(filename)

        model = AzureAnomalyTrendinessDetector(
            is_multicategory=True,
            freq=freq,
            min_value=min_value,
            subscription_key=subscription_key,
            sensitivity=None)

        # Call Azure Anomaly Detector API
        prediction = model.predict(dataset, verbose=True)

        sensitivity = [None, 0.3, 0.5, 1, 1.5, 2]
        for sens in sensitivity:
            print("sensitivity = {}".format(sens))
            prediction = model.tune_prediction(prediction, sens)
            raw_metrics = get_metrics_for_all_categories(
                X[['value']],
                prediction[['prediction']],
                y[['label']],
                window_size_for_metrics=5)
            metrics = get_final_metrics(raw_metrics)
            result = {}
            result[model.__name__] = metrics
            print_azure_model(datapath, min_value, model, result, sens)
            # model.plot(labels = y.reset_index().set_index('date'))

            if use_comet:
                params = {
                    'sensitivity': sens,
                    'window_size_for_metrics': window_size_for_metrics
                }
                metrics = result[model.__name__]
                metrics = summarize_metrics(metrics)
                log_experiment(datapath,
                               dataset,
                               model,
                               parameters=params,
                               metrics=metrics)