def spectral_residual(df):
    od = SpectralResidual(
        threshold=None,  # threshold for outlier score
        window_amp=20,  # window for the average log amplitude
        window_local=20,  # window for the average saliency map
        n_est_points=
        20  # nb of estimated points padded to the end of the sequence
    )
    X_outlier = np.array(df['value'])
    time_samples = np.arange(0, len(df))
    od.infer_threshold(X_outlier, time_samples, threshold_perc=99)
    od_preds = od.predict(X_outlier, time_samples, return_instance_score=True)
    pred = (od_preds['data']['is_outlier'] == 1)
    a = df.loc[pred]
    return a
示例#2
0
def run_gen(perc):
    data_path = '/Users/baconbaker/Documents/Studium/ANM/anm-project/data/train_data/host'
    dfs = {}
    for path in os.listdir(data_path):
        dfs[path[:-4]] = pd.read_csv(data_path + '/' + path)

    df_info = pd.read_csv('kpi_summary_info.data')

    window_size = 10
    od = SpectralResidual(window_amp=window_size,
                          window_local=window_size,
                          n_est_points=5,
                          n_grad_points=5)

    per1_kpis = df_info[(df_info.interval == '1min')
                        & (df_info.is_flat == False)]['kpi'].unique()
    per5_kpis = df_info[(df_info.interval == '5min')
                        & ((df_info.is_flat == False))]['kpi'].unique()

    df_thresh = pd.DataFrame(columns=['name', 'host', 'thresh'])

    for df_name in dfs:
        print('*' * 50)
        print('Running generation for', df_name)
        interval = 0
        start_key = time.time()

        df = dfs[df_name]
        kpis = dict(tuple(df.groupby(['cmdb_id', 'name'])))
        res = {}

        for key in kpis:
            kpis[key]['timestamp'] = kpis[key]['timestamp'].apply(
                lambda x: datetime.fromtimestamp(x / 1000.0))
            kpis[key] = kpis[key].set_index('timestamp').sort_index()

        print('Calculating rolling window')
        for key in kpis:
            if kpis[key]['value'].std() == 0:
                continue
            elif key[1] in per1_kpis:
                d = kpis[key]['value'].resample('T').mean().interpolate()
            elif key[1] in per5_kpis:
                d = kpis[key]['value'].resample('5T').mean().interpolate()
            else:
                continue
            d = (d - d.mean()) / d.std()
            res[key] = d.rolling(10).mean()

        for key in res:
            print('Determining threshold for', key)
            d = res[key].dropna()
            if len(res[key]) == 0:
                print("ITS EMPTY", key)
                continue
            od.infer_threshold(d, threshold_perc=perc)
            thresh = od.threshold
            df_thresh = df_thresh.append(
                {
                    'name': key[1],
                    'host': key[0],
                    'thresh': thresh
                },
                ignore_index=True)

        df_thresh.to_csv('thresh_' + str(perc).replace('.', '_') + '.data',
                         index=False)