示例#1
0
def get_lstm_diff(KPI_ID_name):
    if not os.path.exists(output_path):
        os.makedirs(output_path)


# if __name__ == '__main__':

    train_data_path = 'resources/train.csv'
    test_data_path = 'resources/test.csv'
    augment_data_path = 'resources/augment_data/'
    test_augment_data_path = 'resources/test_augment_data/'
    full_result_path = 'resources/result/prediction.csv'
    split_result_path = 'resources/result_split/prediction.csv'
    # output_path = 'resources/label_prediction_plot'
    test_data_raw = pd.read_csv(test_data_path)
    train_data_raw = pd.read_csv(train_data_path)

    KPI_LIST, KPI_ID = SplitKPIList(train_data_raw)
    KPI_LIST_test, KPI_ID_test = SplitKPIList(test_data_raw)

    # KPI_ID_name = '07927a9a18fa19ae' # 07927a9a18fa19ae 76f4550c43334374 a5bf5d65261d859a

    index = KPI_ID.index(KPI_ID_name)
    train_X_raw = pd.DataFrame(KPI_LIST[index])
    X_train_reserve = train_X_raw.copy()
    X_train_reserve = pd.DataFrame(X_train_reserve)
    # y_reserve = pd.DataFrame()
    y_reserve = X_train_reserve['value']

    y_reserve = pd.DataFrame(y_reserve)

    y_reserve = y_reserve.shift(-1)
    y_reserve = pd.DataFrame(y_reserve)

    y_reserve.ffill(inplace=True)
    y_reserve = pd.DataFrame(y_reserve)
    # print("y_reserve:", y_reserve)
    y_reserve = y_reserve.values
    X_reserve = X_train_reserve['value'].values
    test_index = KPI_ID_test.index(KPI_ID_name)
    test_X_raw = pd.DataFrame(KPI_LIST_test[test_index])
    # X_raw = pd.DataFrame(KPI_LIST[index])
    X_test = test_X_raw.copy()
    del X_test['KPI ID']
    del X_test['timestamp']
    label = KPI_LIST[index].copy()['label']
    KPI_LIST[index]['label'] = KPI_LIST[index]['label'].map(conv2Noneflag)
    KPI_LIST[index] = KPI_LIST[index].dropna(axis=0)
    del KPI_LIST[index]['KPI ID']
    del KPI_LIST[index]['label']
    del KPI_LIST[index]['timestamp']
    df = pd.DataFrame(KPI_LIST[index])
    # print("df:\n", df)
    df_y = df.shift(-1)
    df_y.ffill(inplace=True)
    # print("df_y:\n", df_y)

    y_test = X_test.shift(-1)
    y_test.ffill(inplace=True)

    test_data = X_test.values

    test_label = y_test.values

    train_X_scaler, train_y_scaler, train_scaled_X, train_scaled_y = scale_data(
        df, df_y)

    test_scaled_X = train_X_scaler.transform(test_data)

    test_scaled_y = train_y_scaler.transform(test_label)

    pre_item = 'value'
    model_file = './BasicLSTM_output/' + KPI_ID_name + '_lstm_model.h5'
    # 加载LSTM模型
    if os.path.exists(model_file):
        lstm_model = load_model(model_file)
    else:
        # 训练LSTM模型
        lstm_model = fit_lstm(train_scaled_X, train_scaled_y, model_file)
        # print('{}模型文件不存在'.format(model_file))
        # exit(0)

    test_diff_df = get_diff(test_data, test_scaled_y, test_scaled_X,
                            lstm_model, train_y_scaler, KPI_ID_name + 'test')

    X_reserve = X_reserve.reshape(-1, 1)
    train_scaled_X = train_X_scaler.transform(X_reserve)

    train_scaled_y = train_y_scaler.transform(y_reserve)
    train_diff_df = get_diff(X_train_reserve.values, train_scaled_y,
                             train_scaled_X, lstm_model, train_y_scaler,
                             KPI_ID_name + '_train')
    return train_diff_df, test_diff_df
示例#2
0
is_shift = False
shift = 2
# score_threshold = 0.997
# KPI_ID_name = '76f4550c43334374' 8a20c229e9860d0c
train_data_path = 'resources/train.csv'
test_data_path = 'resources/test.csv'
augment_data_path = 'resources/augment_data/'
test_augment_data_path = 'resources/test_augment_data/'
full_result_path = 'resources/result/prediction.csv'
rand_result_path = 'resources/result/rand_expand_prediction.csv'
output_path = 'resources/label_prediction_plot'
manual_features_path = 'resources/manual_feature'
test_data_raw = pd.read_csv(test_data_path)
train_data_raw = pd.read_csv(train_data_path)

KPI_LIST, KPI_ID = SplitKPIList(train_data_raw)
KPI_LIST_test, KPI_ID_test = SplitKPIList(test_data_raw)

# print(KPI_LIST)
# print('KPI_ID:', KPI_ID)
# KPI_ID:
# ['71595dd7171f4540',
# '88cf3a776ba00e7c',
# 'affb01ca2b4f0b45',
# '769894baefea4e9e',
# '02e99bd4f6cfb33f',
# '54e8a140f6237526',
# 'b3b2e6d1a791d63a',
# '07927a9a18fa19ae',
# 'a40b1df87e3f1c87',
# 'c58bfcbacb2822d1',