def get_lstm_diff(KPI_ID_name): if not os.path.exists(output_path): os.makedirs(output_path) # if __name__ == '__main__': train_data_path = 'resources/train.csv' test_data_path = 'resources/test.csv' augment_data_path = 'resources/augment_data/' test_augment_data_path = 'resources/test_augment_data/' full_result_path = 'resources/result/prediction.csv' split_result_path = 'resources/result_split/prediction.csv' # output_path = 'resources/label_prediction_plot' test_data_raw = pd.read_csv(test_data_path) train_data_raw = pd.read_csv(train_data_path) KPI_LIST, KPI_ID = SplitKPIList(train_data_raw) KPI_LIST_test, KPI_ID_test = SplitKPIList(test_data_raw) # KPI_ID_name = '07927a9a18fa19ae' # 07927a9a18fa19ae 76f4550c43334374 a5bf5d65261d859a index = KPI_ID.index(KPI_ID_name) train_X_raw = pd.DataFrame(KPI_LIST[index]) X_train_reserve = train_X_raw.copy() X_train_reserve = pd.DataFrame(X_train_reserve) # y_reserve = pd.DataFrame() y_reserve = X_train_reserve['value'] y_reserve = pd.DataFrame(y_reserve) y_reserve = y_reserve.shift(-1) y_reserve = pd.DataFrame(y_reserve) y_reserve.ffill(inplace=True) y_reserve = pd.DataFrame(y_reserve) # print("y_reserve:", y_reserve) y_reserve = y_reserve.values X_reserve = X_train_reserve['value'].values test_index = KPI_ID_test.index(KPI_ID_name) test_X_raw = pd.DataFrame(KPI_LIST_test[test_index]) # X_raw = pd.DataFrame(KPI_LIST[index]) X_test = test_X_raw.copy() del X_test['KPI ID'] del X_test['timestamp'] label = KPI_LIST[index].copy()['label'] KPI_LIST[index]['label'] = KPI_LIST[index]['label'].map(conv2Noneflag) KPI_LIST[index] = KPI_LIST[index].dropna(axis=0) del KPI_LIST[index]['KPI ID'] del KPI_LIST[index]['label'] del KPI_LIST[index]['timestamp'] df = pd.DataFrame(KPI_LIST[index]) # print("df:\n", df) df_y = df.shift(-1) df_y.ffill(inplace=True) # print("df_y:\n", df_y) y_test = X_test.shift(-1) y_test.ffill(inplace=True) test_data = X_test.values test_label = y_test.values train_X_scaler, train_y_scaler, train_scaled_X, train_scaled_y = scale_data( df, df_y) test_scaled_X = train_X_scaler.transform(test_data) test_scaled_y = train_y_scaler.transform(test_label) pre_item = 'value' model_file = './BasicLSTM_output/' + KPI_ID_name + '_lstm_model.h5' # 加载LSTM模型 if os.path.exists(model_file): lstm_model = load_model(model_file) else: # 训练LSTM模型 lstm_model = fit_lstm(train_scaled_X, train_scaled_y, model_file) # print('{}模型文件不存在'.format(model_file)) # exit(0) test_diff_df = get_diff(test_data, test_scaled_y, test_scaled_X, lstm_model, train_y_scaler, KPI_ID_name + 'test') X_reserve = X_reserve.reshape(-1, 1) train_scaled_X = train_X_scaler.transform(X_reserve) train_scaled_y = train_y_scaler.transform(y_reserve) train_diff_df = get_diff(X_train_reserve.values, train_scaled_y, train_scaled_X, lstm_model, train_y_scaler, KPI_ID_name + '_train') return train_diff_df, test_diff_df
is_shift = False shift = 2 # score_threshold = 0.997 # KPI_ID_name = '76f4550c43334374' 8a20c229e9860d0c train_data_path = 'resources/train.csv' test_data_path = 'resources/test.csv' augment_data_path = 'resources/augment_data/' test_augment_data_path = 'resources/test_augment_data/' full_result_path = 'resources/result/prediction.csv' rand_result_path = 'resources/result/rand_expand_prediction.csv' output_path = 'resources/label_prediction_plot' manual_features_path = 'resources/manual_feature' test_data_raw = pd.read_csv(test_data_path) train_data_raw = pd.read_csv(train_data_path) KPI_LIST, KPI_ID = SplitKPIList(train_data_raw) KPI_LIST_test, KPI_ID_test = SplitKPIList(test_data_raw) # print(KPI_LIST) # print('KPI_ID:', KPI_ID) # KPI_ID: # ['71595dd7171f4540', # '88cf3a776ba00e7c', # 'affb01ca2b4f0b45', # '769894baefea4e9e', # '02e99bd4f6cfb33f', # '54e8a140f6237526', # 'b3b2e6d1a791d63a', # '07927a9a18fa19ae', # 'a40b1df87e3f1c87', # 'c58bfcbacb2822d1',