Exemplo n.º 1
0
def show_test(test, predict_test):
    y_test = test['label']
    evalute_delay(y_test, predict_test)
    ans = truth_predict(y_test, predict_test)
    dic = {'y': ans}
    predict = pd.DataFrame(dic)
    predict['timestamp'] = test['timestamp']
    predict['value'] = test['value']
    plot_ans(predict, ratio=1)
Exemplo n.º 2
0
def show_train(y_train, df):
    x = pd.read_csv('formal.csv')
    x = x.drop(['label'], axis=1).values
    x = np.delete(x, 1, axis=1)
    predict_train = rf.predict(x)
    evalute_delay(y_train, predict_train)
    ans = truth_predict(y_train, predict_train)
    dic = {'y': ans}
    predict = pd.DataFrame(dic)
    predict['timestamp'] = df['timestamp']
    predict['value'] = df['value']
    plot_ans(predict, ratio=1)
Exemplo n.º 3
0
def benchmark_roubust_ksigma(num):
    file_name = 'E:/javacode/AIOPS/data/series/'
    log = []
    print num
    train = pd.read_csv(file_name + num + '_train.csv')
    t = TSlist(train)
    test = pd.read_csv(file_name + num + '_test.csv')
    t.append_df(test)
    t.fill_missed_median()
    timestamp = test['timestamp'].values
    span = timestamp[1] - timestamp[0]
    y = []
    w = 3600 * 3 / span
    print 'sample num in each hour:', w / 3
    for i in range(1, len(timestamp)):
        if i % 5000 == 0:
            print i * 100.0 / len(timestamp), "%"
        #构造数据块
        w_now = t.get_series(timestamp[i], w + 1)
        yest_stamp = timestamp[i] - 24 * 3600
        w_yest1 = t.get_series(yest_stamp, w + 1)
        yest_stamp2 = timestamp[i] - 24 * 3600 + 3600 * 3  #后移三个小时
        w_yest2 = t.get_series(yest_stamp2, w)
        last_week = timestamp[i] - 24 * 3600 * 7
        w_last_week1 = t.get_series(last_week, w + 1)
        last_week2 = timestamp[i] - 24 * 3600 * 7 + 3600 * 3
        w_last_week2 = t.get_series(last_week2, w)

        w_yest1.extend(w_yest2)
        w_last_week1.extend(w_last_week2)
        w_last_week1.extend(w_yest1)
        w_last_week1.extend(w_now)
        X_formal = np.array(w_last_week1)
        #algorithm
        ans = roubustResidual.roubust_residual_ksigma(X_formal, w)
        #ans = ewmaResidual.ewma_residual_ksigma(X_formal)
        #ans = ewmaResidual.ewma_residual_lof(X_formal)
        y.append(ans)
    tp, tn, fp, fn, precison, recall, f1 = evalute_delay(test['label'],
                                                         y,
                                                         delay=7)
    log.append([num, tp, tn, fp, fn, precison, recall, f1])
    log = pd.DataFrame(log,
                       columns=[
                           'kpi id', 'tp', 'tn', 'fp', 'fn', 'precison',
                           'recall', 'fscore'
                       ])
    #log.to_csv('log/'+str(num)+' ewma_lof.csv')
    log.to_csv('log/' + str(num) + ' roubust_ksigma.csv')
Exemplo n.º 4
0
    'minus_mean_now_yes', 'minus_std_now_past', 'minus_std_now_yes',
    'r_mean_now_past', 'r_mean_now_yes', '1', '2', '3', '4', '5', '6', '7',
    '8', 'ewma', 'pre_minus', 'pre_rate', 'yes_minus', 'yes_rate', 'label'
]
for num in numbers:
    print num
    df = pd.read_csv(file_name + num + '_train.csv')
    span = df['timestamp'][1] - df['timestamp'][0]
    ts = TSlist(df)
    df = df[3600 * 24 / span + 15:]
    print 'start to fill missed value'
    ts.fill_missed_median()
    print 'filling over'
    print 'start to train'
    #----------
    rf, scale = trainScaleSmote(df, ts, columns)
    print 'training done'
    test = pd.read_csv(file_name + num + '_test.csv')
    ts.append_df(test)
    ts.fill_missed_median()
    predict_test = benchmark(ts, test, rf, scale)

    precison, recall, f1 = evalute_delay(test['label'], predict_test)
    log.append([num, precison, recall, f1])
    log_tmp = pd.DataFrame(log,
                           columns=['kpi id', 'precison', 'recall', 'fscore'])
    log_tmp.to_csv(num + 'scale_smote.csv')

#log = pd.DataFrame(log, columns=['kpi id','precison','recall','fscore'])
#log.to_csv('benchmark_dumpT.csv')
Exemplo n.º 5
0

from TSlist import TSlist
df = pd.read_csv('data/series/7_train.csv')
span = df['timestamp'][1] - df['timestamp'][0]
ts = TSlist(df)
df = df[3600 * 24 / span + 5:]
print 'start to fill missed value'
ts.fill_missed_median()

print 'filling over'
y_train = df['label'].values
mark = extract_Tsamples(y_train)
columns = [
    'value', 'mean_now', 'std_now', 'minus_mean_now_past',
    'minus_mean_now_yes', 'minus_std_now_past', 'minus_std_now_yes',
    'r_mean_now_past', 'r_mean_now_yes', '1', '2', '3', '4', '5', '6', '7',
    '8', 'ewma', 'pre_minus', 'pre_rate', 'yes_minus', 'yes_rate', 'label'
]
print 'start to train'
rf = trainDumpT(df, ts, columns, mark, ratio=3)

#show_train(y_train, df)
print 'training done'
test = pd.read_csv('data/series/7_test.csv')
ts.append_df(test)
ts.fill_missed_median()
predict_test = benchmark(ts, test, rf)
#show_test(test, predict_test)
print evalute_delay(test['label'], predict_test)