def _train_on_data(self, train_set, test_set, x_features, y_features, epoch, val_split, batch_size, look_back): X_train, y_train = self._process_data(train_set, x_features, y_features, look_back) X_test, y_test = self._process_data(test_set, x_features, y_features, look_back) #print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) start = time.time() print('Start training. Time: ', start) hist_lstm = self.model.fit( X_train, y_train, batch_size=batch_size, verbose=2, nb_epoch=epoch, validation_split=val_split, callbacks=[self.early_stopping, self.checkpointer], shuffle=False) train_time = time.time() - start print('Finish trainning. Time: ', train_time) # Test the RNN # model = load_model(self.model_file) pred_lstm = self.model.predict(X_test) mse_loss_lstm = ut.mse_loss(pred_lstm, y_test) mae_loss_lstm = ut.mae_loss(pred_lstm, y_test) print('Mean square error on test set: ', mse_loss_lstm) print('Mean absolute error on the test set: ', mae_loss_lstm) return [mse_loss_lstm, mae_loss_lstm, train_time]
def predict_blind(model_file, data_dict, x_features, y_features, look_back): print("Predict blind") model = load_model(model_file) # Get the last slinding window (train and test sets are correlative) X_test = data_dict['train'][x_features].values[-decoded['look_back']:] X_test = X_test.reshape(-1, look_back, len(x_features)) y_test = data_dict['test'][y_features].values[:, :] append_features = list(filter(lambda x: x not in y_features, x_features)) pred_lstm = np.empty((0, y_test.shape[1]), int) # Add the predicted values for i in range(y_test.shape[0]): pred_lstm = np.append(pred_lstm, model.predict(X_test), axis=0) X_test = X_test[0][1:] x_append = np.concatenate( (pred_lstm[i], data_dict['test'][append_features].values[i])) X_test = np.append(X_test, x_append.reshape((1, len(x_features))), axis=0) X_test = X_test.reshape(-1, look_back, len(x_features)) mse_loss_lstm = ut.mse_loss(pred_lstm, y_test) mae_loss_lstm = ut.mae_loss(pred_lstm, y_test) print('Mean square error on test set: ', mse_loss_lstm) print('Mean absolute error on the test set: ', mae_loss_lstm) pred_df = pd.DataFrame(pred_lstm) pred_df.columns = config.y_features pred_df = pred_df.set_index(data_dict['test'].index.values) return pred_df, mse_loss_lstm, mae_loss_lstm
def _train_on_data(self, train_set, test_set, x_features, y_features, epoch, val_split, batch_size, look_back, blind=True): X_train, y_train = self._process_data(train_set, x_features, y_features, look_back) start = time.time() print('Start training. Time: ', start) hist_lstm = self.model.fit( X_train, y_train, batch_size=batch_size, verbose=0, nb_epoch=epoch, validation_split=val_split, callbacks= [self.early_stopping, self.checkpointer], shuffle=False) train_time = time.time() - start print('Finish trainning. Time: ', train_time) # Test the RNN if blind: pred_lstm = self._predict_blind(train_set, test_set, x_features, y_features, look_back) y_test = test_set[y_features].values[:,:] else: pred_lstm = self._predict_update(train_set, test_set, x_features, y_features, look_back) y_test = test_set[y_features].values[look_back:,:] mse_loss_lstm = ut.mse_loss(pred_lstm, y_test) mae_loss_lstm = ut.mae_loss(pred_lstm, y_test) print('Mean square error on test set: ', mse_loss_lstm) print('Mean absolute error on the test set: ', mae_loss_lstm) return [mse_loss_lstm, mae_loss_lstm, train_time]
def _evaluate_solution(self, encoded_solution): decoded = self._decode_solution(encoded_solution) print('Evaluate: ' + str(decoded['layers']) + ' ...') model_hash = hashlib.sha224(str(decoded['look_back']).encode('UTF-8') + str(decoded['weights']).encode('UTF-8')).hexdigest() metrics = self.cache.upsert_cache(model_hash, None) if metrics is None: rnn_solution = nn.RNNBuilder(decoded['layers'], decoded['weights'], dense_activation=self.config.dense_activation) if self.config.blind: y_predicted = rnn_solution.predict_blind(self.data['train'], self.data['test'], self.config.x_features, self.config.y_features, decoded['look_back']) y_gt = self.data['test'][self.config.y_features].values[:,:] else: y_predicted = rnn_solution.predict(self.data[self.config.x_features], decoded['look_back']) y_gt = self.data[self.config.y_features].values[decoded['look_back']:,:] mse = ut.mse_loss(y_predicted, y_gt) mae = ut.mae_loss(y_predicted, y_gt) metrics = { 'trainable_params':int(rnn_solution.trainable_params), 'num_hidden_layers':int(rnn_solution.hidden_layers), 'layers':'-'.join(map(str, decoded['layers'])), 'mse':mse, 'mae':mae, 'num_hidden_neurons':int(np.sum(decoded['layers'][1:-1])), 'look_back':int(decoded['look_back']) } del rnn_solution self.cache.upsert_cache(model_hash, metrics) else: print('Metrics load from cache') print(metrics) self.memory_tracker.print_diff() return metrics
def predict(model_file, data_dict, x_features, y_features, look_back): model = load_model(model_file) # Get the last slinding window (train and test sets are correlative) X_test = data_dict['train'][x_features].values[-decoded['look_back']:] X_test = X_test.reshape(-1,look_back, len(x_features)) y_test = data_dict['test'][y_features].values[:,:] append_features = list(filter(lambda x: x not in y_features, x_features)) pred_lstm = np.empty( (0,y_test.shape[1]), int) # Add the predicted values for i in range(y_test.shape[0]): pred_lstm = np.append( pred_lstm, model.predict(X_test), axis=0) X_test = X_test[0][1:] x_append = np.concatenate( (pred_lstm[i], data_dict['test'][append_features].values[i]) ) X_test = np.append( X_test, x_append.reshape((1, len(x_features)) ), axis=0) X_test = X_test.reshape(-1,look_back, len(x_features)) # Old deprecated version # X_test, y_test = prepare_data(df, x_features, y_features, look_back) # pred_lstm = model.predict(X_test) mse_loss_lstm = ut.mse_loss(pred_lstm, y_test) mae_loss_lstm = ut.mae_loss(pred_lstm, y_test) print('Mean square error on test set: ', mse_loss_lstm) print('Mean absolute error on the test set: ', mae_loss_lstm) pred_df = pd.DataFrame( pred_lstm ) pred_df.columns = config.y_features pred_df = pred_df.set_index(data_dict['test'].index.values) return pred_df, mse_loss_lstm, mae_loss_lstm
def predict(model_file, data_dict, x_features, y_features, look_back): print("Predict") model = load_model(model_file) df_X = data_dict['test'][x_features] len_data = len(df_X) X = np.array( [df_X.values[i:i+look_back] for i in range(len_data - look_back)] ).reshape(-1,look_back, len(x_features)) Y = model.predict(X) y_gt = data_dict['test'][config.y_features].values[look_back:,:] mse_loss_lstm = ut.mse_loss(Y, y_gt) mae_loss_lstm = ut.mae_loss(Y, y_gt) print('Mean square error on test set: ', mse_loss_lstm) print('Mean absolute error on the test set: ', mae_loss_lstm) pred_df = pd.DataFrame( Y ) pred_df.columns = config.y_features pred_df = pred_df.set_index(data_dict['test'].index.values[look_back:]) return pred_df, mse_loss_lstm, mae_loss_lstm
def predict(model_file, data_dict, x_features, y_features, look_back): print("Predict") model = load_model(model_file) df_X = data_dict['test'][x_features] len_data = len(df_X) X = np.array([ df_X.values[i:i + look_back] for i in range(len_data - look_back) ]).reshape(-1, look_back, len(x_features)) Y = model.predict(X) y_gt = data_dict['test'][config.y_features].values[look_back:, :] mse_loss_lstm = ut.mse_loss(Y, y_gt) mae_loss_lstm = ut.mae_loss(Y, y_gt) print('Mean square error on test set: ', mse_loss_lstm) print('Mean absolute error on the test set: ', mae_loss_lstm) pred_df = pd.DataFrame(Y) pred_df.columns = config.y_features pred_df = pred_df.set_index(data_dict['test'].index.values[look_back:]) return pred_df, mse_loss_lstm, mae_loss_lstm
def _train_on_data(self, train_set, test_set, x_features, y_features, epoch, val_split, batch_size, look_back, blind=True, verbose=0): X_train, y_train = self._process_data(train_set, x_features, y_features, look_back) start = time.time() print('Start training. Time: ', start) hist_lstm = self.model.fit( X_train, y_train, batch_size=batch_size, verbose=verbose, nb_epoch=epoch, validation_split=val_split, callbacks=[self.early_stopping, self.checkpointer], shuffle=False) train_time = time.time() - start print('Finish trainning. Time: ', train_time) # Test the RNN if blind: pred_lstm = self._predict_blind(train_set, test_set, x_features, y_features, look_back) y_test = test_set[y_features].values[:, :] else: pred_lstm = self._predict_update(train_set, test_set, x_features, y_features, look_back) y_test = test_set[y_features].values[look_back:, :] mse_loss_lstm = ut.mse_loss(pred_lstm, y_test) mae_loss_lstm = ut.mae_loss(pred_lstm, y_test) print('Mean square error on test set: ', mse_loss_lstm) print('Mean absolute error on the test set: ', mae_loss_lstm) return [mse_loss_lstm, mae_loss_lstm, train_time]
train_metrics = trainer.train(data_dict, x_features=config.x_features, y_features=config.y_features, epoch=config.epoch, val_split=config.val_split, batch_size=config.batch_size, look_back=decoded['look_back']) # predict if FLAGS.blind: pred_df, mse_loss_lstm, mae_loss_lstm = predict_blind( model_file, data_dict, config.x_features, config.y_features, decoded['look_back']) else: pred_df, mse_loss_lstm, mae_loss_lstm = predict( model_file, data_dict, config.x_features, config.y_features, decoded['look_back']) pred_df.to_csv(config.results_folder + model_name + '.csv') mse = ut.mse_loss(pred_df, data_dict['test']) mse.loc['Mean'] = np.mean(mse) mse.loc['Max'] = np.max(mse) mse.loc['Min'] = np.min(mse) mse.loc['Sdev'] = np.std(mse) mae = ut.mae_loss(pred_df, data_dict['test']) mae.loc['Mean'] = np.mean(mae) mae.loc['Max'] = np.max(mae) mae.loc['Min'] = np.min(mae) mae.loc['Sdev'] = np.std(mae) mse.to_csv(config.results_folder + model_name + '-mse.csv') mae.to_csv(config.results_folder + model_name + '-mae.csv')
inner = False if FLAGS.merge == 'inner': inner = True data_dict = reader.load_data( config.data_folder, inner ) layer_in = len(config.x_features) layer_out = len(config.y_features) decoded = decode_solution( config.solution, layer_in, layer_out ) model_name = '-'.join(map(str, decoded['rnn_arch'])) + '.' model_name = model_name + str(decoded['look_back']) + '.' model_name = model_name + str(decoded['drop_out']) model_file = config.models_folder + model_name + '.hdf5' # predict the occupancy pred_df, mse_loss_lstm, mae_loss_lstm = predict( model_file, data_dict, config.x_features, config.y_features, decoded['look_back']) pred_df.to_csv(config.results_folder + model_name + '.csv' ) mse = ut.mse_loss(pred_df, data_dict['test']) mse = mse.drop(['time','weekday']) mse.loc['Mean'] = np.mean(mse) mse.loc['Max'] = np.max(mse) mse.loc['Min'] = np.min(mse) mse.loc['Sdev'] = np.std(mse) mae = ut.mae_loss(pred_df, data_dict['test']) mae = mae.drop(['time','weekday']) mae.loc['Mean'] = np.mean(mae) mae.loc['Max'] = np.max(mae) mae.loc['Min'] = np.min(mae) mae.loc['Sdev'] = np.std(mae) mse.to_csv(config.results_folder + model_name + '-mse.csv') mae.to_csv(config.results_folder + model_name + '-mae.csv')