def train_pred_eval_model(x_train_scaled, y_train_scaled, x_test_scaled, y_test, mu_teste_list, std_test_list, scale,lstm_units=10, \ dropout_prob=0.5, optimizer='rmsprop', epochs=50, batch_size=64,model = None): ''' Train model, do prediction, scale back to original range and do evaluation Use LSTM here. Returns rmse, mape and predicted values Outputs rmse : root mean square error mape : mean absolute percentage error est : predictions ''' if model == None: model = lstm_network.lstm_network().build_network( time_step=x_train_scaled.shape[1], features_num=x_train_scaled.shape[2], dropout_prob=dropout_prob, dense_units=y_train_scaled.shape[1], lstm_units=lstm_units, optimizer=optimizer) # Compile and fit the LSTM network model.fit(x_train_scaled, y_train_scaled, epochs=epochs, batch_size=batch_size, verbose=1) # Do prediction timesteps = x_test_scaled.shape[1] est_scaled = [] pre_num = len(y_test) predict_y = [] pre_xlist = [] #预测的输入列表 pre_xlist.extend(x_test_scaled[0, :, 0].tolist()) # test_x = x_test_scaled[0:1,:,:] #(1,10,1) for i in range(pre_num): predictx = np.array(pre_xlist[-timesteps:]) predictx = np.reshape(predictx, (1, timesteps, 1)) pre_y = model.predict(predictx) pre_xlist.extend(pre_y[0]) est_scaled.extend(pre_y) # pre = model.predict(test_x) #(1,10) # est_scaled.append(pre) # pre= np.reshape(pre,(1,x_test_scaled.shape[1],x_test_scaled.shape[2])) # test_x = pre est_scaled = np.array(est_scaled).reshape(-1, 1) # est = (est_scaled * np.array(std_test_list).reshape(-1, 1)) + np.array(mu_teste_list).reshape(-1, 1) est = scale.inverse_transform(est_scaled) # est = np.array(est).reshape(x_test_scaled.shape[0],x_test_scaled.shape[1]) y_test = np.reshape(y_test, (-1, 1)) rmse = math.sqrt(mean_squared_error(y_test, est)) mape = loss.get_mape(y_test, est) return rmse, mape, est, model
def predict(model, x_test_scaled, y_test, scaler): pre_scaled_y = model.predict(x_test_scaled) pre_scaled_y = np.reshape(pre_scaled_y, (-1, 1)) construct_pre = np.tile(pre_scaled_y, (1, feature_num)) pre_y = scaler.inverse_transform(construct_pre)[:, cap_col_index - 1] y_test = np.reshape(y_test, (-1, 1)) mape = loss.get_mape(y_test, pre_y) mse = loss.get_rmse(y_test, pre_y) print("mape:{0:.4},mse:{1:.4}".format(mape, mse)) return pre_y, mape, mse
def main(): #TODO 调试多特征值输入 parser = argparse.ArgumentParser(description='LSTM RUL Prediction') parser.add_argument('--filename', type=str, default="data/2017_06_30_cell8_data.csv") parser.add_argument('--output_path', type=str, default="snapshot/single_variable") parser.add_argument('--predict_measure', type=int, default=0, choices=[0, 1]) parser.add_argument('--sequence_length', type=int, default=20, help='time_step in lstm') parser.add_argument('--split', default=0.6, help='split of train and test set') parser.add_argument('--batch_size', type=int, default=8, help='input batch size for training (default: 8)') parser.add_argument('--epochs', type=int, default=50, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--dropout', default=0.5) parser.add_argument('--saved_figure_path', default='result/single_variable/') parser.add_argument('--feature_num', type=int, default=1, help='single feature use 1,multi use 7') parser.add_argument( '--usecols', default=[9, 10], type=int, nargs='+', help= 'single feature imput use [9,10], multi use [3, 4, 5, 6, 7, 8, 9, 10]') #必须设置type=int,否则脚本执行时会导致get_model返回None parser.add_argument( '--get_model_measure', default=1, type=int, help='0 for define model from begin, 1 for load exist model') #cell 个数越小结果越精确, 数据太少? parser.add_argument('--lstm_units', default=50, type=int) args = parser.parse_args() split = args.split dropout_prob = args.dropout sequence_length = args.sequence_length batch_size = args.batch_size epochs = args.epochs predict_measure = args.predict_measure # 0 for predicting one cycle,1 for predicting len(test(y)) cycles continuely, use current predicted value as the next input. filename = args.filename save_filepath = args.saved_figure_path feature_num = args.feature_num #用于训练的每个数据样本的特征数 usecols = args.usecols #要读取的数据文件的列 get_model_measure = args.get_model_measure lstm_units = args.lstm_units loss_file_path = args.output_path loss_file_name = 'seqlen:{0}_mse_mape_{1}.txt'.format( str(sequence_length), str(get_time())) fo = open(osp.join(loss_file_path, loss_file_name), 'w') fo.write(str('N,batch_size,epochs,mse,mape\n')) fo.flush() batch_size_list = [8, 16, 32, 64, 128] epochs_list = [50, 75, 100, 150, 200] dataloader = load_data.load_data(filename, sequence_length, split, usecols=usecols) sca_x, sca_y = dataloader.load_scaler() train_x, train_y, test_x, test_y = dataloader.get_x_y() all_y = dataloader.get_all_y() train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], feature_num)) test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], feature_num)) print(train_y.shape) lstm = lstm_model.lstm() model = get_model(lstm, get_model_measure, sequence_length, feature_num, dropout_prob, lstm_units=lstm_units) lstm.train_model(model, train_x, train_y, batch_size, epochs) predict_y = lstm.predict(model, test_x, pre_way=predict_measure) # sca_x, sca_y = dataloader.get_scaler_x_y() train_y = sca_y.inverse_transform(train_y) test_y = sca_y.inverse_transform(test_y) predict_y = sca_y.inverse_transform(predict_y) mse = loss.get_rmse(test_y, predict_y) mape = loss.get_mape(test_y, predict_y) err_str = '{0},{1},{2},{3},{4}\n'.format(sequence_length, batch_size, epochs, mse, mape) fo.write(str(err_str)) fo.flush() plotfilename = 'seqLen:{0}_batchsize:{1}_epochs:{2}_preMeasure:{3}_dropout:{4}'.format( sequence_length, batch_size, epochs, predict_measure, dropout_prob) title = plotfilename + '\nmse:{0}_mape:{1}'.format(mse, mape) plot_and_save(title, sequence_length, plotfilename, save_filepath, all_y, test_y, train_y, predict_y) fo.close()
def multi_cell_file(bound, split, timestep, pre_step, batchsize, epochs, dropout_prob, failure): ##TODO:43和46的前600个循环用于训练出一个模型,然后用于预测俩个电池的后0.3数据。 cell_name_header = 'Statistics_1-' #获取多文件的训练和测试原数据 train, test, all = [], [], [] cell_filenames = [] for cell_num in cell_nums: cell_filename = cell_name_header + cell_num cell_filenames.append(cell_filename) cell_train, cell_test, cell_all = get_origin_train_test( data_path, cell_filename, split, timestep, bound=bound, usecols=multi_usecols) train.append(cell_train) test.append(cell_test) all.append(cell_all) #获取每个test cell的RUL_actual rul_list = [] keys = ['start_pre_cycle', 'eol_cycle', 'rul', 'eol_cap', 'eol_cap_norm'] for i in range(len(all) - 2, len(all)): values = get_rul(all[i], split, col_index=cap_col_index) #python函数返回值其实是一个tuple。 rul_list.append(dict(zip(keys, values))) #将训练数据合成一个,进行scale处理 train_all = np.array(train).reshape(-1, feature_num) train_all_scaled, scaler = get_scaled_train(train_all) train_all_scaled = np.reshape(train_all_scaled, (len(train), -1, feature_num)) #将scaled数据分别拆分成x和y,最后合成一个大的train数据集喂入网络 x_train_scaled, y_train_scaled = [], [] x_test_scaled, y_test = [], [] for i in range(len(train_all_scaled)): x_cell_train_scaled, y_cel_train_scaled = get_x_y( train_all_scaled[i], timestep, cap_col_index - 1, pre_step) x_train_scaled.append(x_cell_train_scaled) y_train_scaled.append(y_cel_train_scaled) # for i in range(len(train_all_scaled)-2,len(train_all_scaled)): x_cell_test_scaled, y_cell_test = get_x_scaled_y( test[i], timestep, scaler, cap_col_index - 1, pre_step) x_test_scaled.append(x_cell_test_scaled) y_test.append(y_cell_test) x_train_scaled = np.reshape(x_train_scaled, (-1, timestep, feature_num)) y_train_scaled = np.array(y_train_scaled).reshape(-1, pre_step) #训练 model = lstm_network.lstm_network().build_network( timestep, feature_num, dense_units=pre_step, dropout_prob=dropout_prob) model.fit(x_train_scaled, y_train_scaled, batchsize, epochs) #预测 for i in range(len(x_test_scaled) - 2, len(x_test_scaled)): pre_scaled_y = model.predict(x_test_scaled[i]) # x_test_scaled[i][:,col_index-1:col_index] = pre_scaled_y pre_scaled_y = np.reshape(pre_scaled_y, (-1, 1)) construct_pre = np.tile(pre_scaled_y, (1, feature_num)) pre_y = scaler.inverse_transform(construct_pre)[:, cap_col_index - 1] y_test_cell = np.reshape(y_test[i], (-1, 1)) mape = loss.get_mape(y_test_cell, pre_y) mse = loss.get_rmse(y_test_cell, pre_y) mape_mse_info = "mape:{0:.4},mse:{1:.4}".format(mape, mse) print(mape_mse_info) pre_rul = get_pre_rul( pre_y, rul_list[i - len(x_test_scaled) + 2]['eol_cap_norm']) rul_error = rul_list[i - len(x_test_scaled) + 2]['rul'] - pre_rul info = str(i) + '_RUL_actual:{0},LSTM:{1},error:{2},'.format( rul_list[i - len(x_test_scaled) + 2]['rul'], pre_rul, rul_error) + mape_mse_info print(info) error_2_csv(cell_filenames[i], rul_list[i - len(x_test_scaled) + 2]['eol_cap'], rul_list[i - len(x_test_scaled) + 2]['start_pre_cycle'], rul_list[i - len(x_test_scaled) + 2]['eol_cycle'], rul_list[i - len(x_test_scaled) + 2]['rul'], pre_rul, rul_error, mape, mse, failure) all_y = np.array(all[i][:, cap_col_index]).reshape(-1, 1) utils.plot_and_save( rul_list[i - len(x_test_scaled) + 2]['eol_cap_norm'], cell_filenames[i], timestep, info, 'result/sks_figure/', all_y, test[i], train[i], pre_y)