def do_model(all_data, steps, run_model=True): _steps = steps print("steps:", _steps) scaler = MinMaxScaler() all_data = scaler.fit_transform(all_data) if not run_model: return None, None, scaler features = all_data[:-_steps] labels = all_data[_steps:, -1:] tts = train_test_split(features, labels, test_size=0.4) X_train = tts[0] X_test = tts[1] Y_train = tts[2].astype(np.float64) Y_test = tts[3].astype(np.float64) optimiser = 'adam' hidden_neurons = 200 loss_function = 'mse' batch_size = 105 dropout = 0.056 inner_hidden_neurons = 269 dropout_inner = 0.22 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) print("X train shape:\t", X_train.shape) print("X test shape:\t", X_test.shape) # print("Y train shape:\t", Y_train.shape) # print("Y test shape:\t", Y_test.shape) # print("Steps:\t", _steps) in_neurons = X_train.shape[2] out_neurons = 1 model = Sequential() gpu_cpu = 'cpu' best_weight = BestWeight() model.add( LSTM(output_dim=hidden_neurons, input_dim=in_neurons, return_sequences=True, init='uniform', consume_less=gpu_cpu)) model.add(Dropout(dropout)) dense_input = inner_hidden_neurons model.add( LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu)) model.add(Dropout(dropout_inner)) model.add(Activation('relu')) model.add(Dense(output_dim=out_neurons, input_dim=dense_input)) model.add(Activation('relu')) model.compile(loss=loss_function, optimizer=optimiser) history = model.fit(X_train, Y_train, verbose=0, batch_size=batch_size, nb_epoch=30, validation_split=0.3, shuffle=False, callbacks=[best_weight]) model.set_weights(best_weight.get_best()) predicted = model.predict(X_test) + EPS rmse_val = rmse(Y_test, predicted) metrics = OrderedDict([ # ('hidden', hidden_neurons), ('steps', _steps), ('geh', geh(Y_test, predicted)), ('rmse', rmse_val), ('mape', mean_absolute_percentage_error(Y_test, predicted)), # ('smape', smape(predicted, _Y_test)), # ('median_pe', median_percentage_error(predicted, Y_test)), # ('mase', MASE(_Y_train, _Y_test, predicted)), # ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)), # ('batch_size', batch_size), # ('optimiser', optimiser), # ('dropout', dropout), # ('extra_layer_dropout', dropout_inner), # ('extra_layer_neurons', inner_hidden_neurons), # ('loss function', loss_function) # 'history': history.history ]) return metrics, model, scaler
predictions = { k: np.array(v[split_idx:]) for k, v in predictions.items() } print() table = [] print(' & '.join(['step', 'geh', 'mape', 'rmse'])+' \\\\') for step in steps: # true values stepped_vals = flow_values[step:len(predictions)] # predicted values pred_vals = predictions[step][:-step] + eps table.append(OrderedDict([ ('steps', step), ('geh', geh(stepped_vals, pred_vals)), ('mape', mape(stepped_vals, pred_vals)), ('rmse', rmse(stepped_vals, pred_vals)) ])) print(tabulate.tabulate(table, 'keys', 'latex')) print("Loading matplotlib") import matplotlib.pyplot as plt true_y = [] true_x = [] pred_y = [] print("Predicting data rows: {}".format(data_len - row_count)) progress = pyprind.ProgBar(data_len - row_count, width=50, stream=1) for row in it:
true_xy = np.array(true_xy) true_x = true_xy[:, 0] pred_x = np.reshape(pred_xy[:, 0], (-1, 1)) pred_y = np.reshape(pred_xy[:, 1].astype(dtype=np.float32), (-1, 1)) true_y = true_xy[:, 1].astype(np.float32) true_y_max = np.copy(true_y)[:-1] true_y_max[true_y_max == 0] = 1 np.savez('pred_data/3002-no-reset-on-error-all-sensor', true_x=true_x, true_y=true_y, pred_x=pred_x, pred_y=pred_y) true_y_max = true_y_max.reshape((true_y_max.shape[0], 1)) # print ("true_y_max", true_y_max.shape) # print("pred_y", pred_y.shape) print("GEH: ", geh(true_y_max, pred_y[:-1])) print("MAPE: ", mape(true_y_max, pred_y[:-1])) print("RMSE: ", rmse(true_y_max, pred_y[:-1])) font = {'size': 30} import matplotlib matplotlib.rc('font', **font) import matplotlib.pyplot as plt plt.plot(true_x, true_y, 'b-', label='Readings') plt.plot(pred_x, pred_y, 'r-', label='LSTM-Online Predictions') df = "%A %d %B, %Y" plt.title("3002: Traffic Flow from {} to {}".format( true_x[0].strftime(df), true_x[-1].strftime(df))) plt.legend()
def do_model(all_data): _steps = steps print("steps:", _steps) features = all_data[:-_steps] labels = all_data[_steps:, 4:] tts = train_test_split(features, labels, test_size=0.4) X_train = tts[0] X_test = tts[1] Y_train = tts[2].astype(np.float64) Y_test = tts[3].astype(np.float64) optimiser = 'adam' hidden_neurons = {{choice([256, 300, 332])}} #tested already on : 128, 196, 212, 230, 244, loss_function = 'mse' batch_size = {{choice([96, 105, 128])}} # already did 148, 156, 164, 196 dropout = {{uniform(0, 0.1)}} hidden_inner_factor = {{uniform(0.1, 1.1)}} inner_hidden_neurons = int(hidden_inner_factor * hidden_neurons) dropout_inner = {{uniform(0,1)}} extra_layer = {{choice([True, False])}} if not extra_layer: dropout_inner = 0 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) print("X train shape:\t", X_train.shape) # print("X test shape:\t", X_test.shape) # print("Y train shape:\t", Y_train.shape) # print("Y test shape:\t", Y_test.shape) # print("Steps:\t", _steps) print("Extra layer:\t", extra_layer) in_neurons = X_train.shape[2] out_neurons = 1 model = Sequential() gpu_cpu = 'gpu' best_weight = BestWeight() dense_input = hidden_neurons model.add(LSTM(output_dim=hidden_neurons, input_dim=X_test.shape[2], return_sequences=extra_layer, init='uniform', consume_less=gpu_cpu)) model.add(Dropout(dropout)) if extra_layer: dense_input = inner_hidden_neurons model.add(LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu)) model.add(Dropout(dropout_inner)) model.add(Activation('relu')) model.add(Dense(output_dim=out_neurons, input_dim=dense_input)) model.add(Activation('relu')) model.compile(loss=loss_function, optimizer=optimiser) history = model.fit( X_train, Y_train, verbose=0, batch_size=batch_size, nb_epoch=30, validation_split=0.3, shuffle=False, callbacks=[best_weight] ) model.set_weights(best_weight.get_best()) predicted = model.predict(X_test) + EPS rmse_val = rmse(Y_test, predicted) metrics = OrderedDict([ ('hidden', hidden_neurons), ('steps', _steps), ('geh', geh(Y_test, predicted)), ('rmse', rmse_val), ('mape', mean_absolute_percentage_error(Y_test, predicted)), # ('smape', smape(predicted, _Y_test)), ('median_pe', median_percentage_error(predicted, Y_test)), # ('mase', MASE(_Y_train, _Y_test, predicted)), ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)), ('batch_size', batch_size), ('optimiser', optimiser), ('dropout', dropout), ('extra_layer', extra_layer), ('extra_layer_dropout', dropout_inner), ('extra_layer_neurons', inner_hidden_neurons), ('loss function', loss_function) # 'history': history.history ]) # print(metrics) return {'loss': -rmse_val, 'status': STATUS_OK, 'metrics': metrics}
def do_model(all_data): _steps, tts_factor, num_epochs = get_steps_extra() # features = all_data[:-_steps] # labels = all_data[_steps:, 4:] # tts = train_test_split(features, labels, test_size=0.4) # X_train = tts[0] # X_test = tts[1] # Y_train = tts[2].astype(np.float64) # Y_test = tts[3].astype(np.float64) split_pos = int(len(all_data) * tts_factor) train_data, test_data = all_data[:split_pos], all_data[split_pos:] dataX, dataY, fields = create_dataset(test_data, 1, _steps) optimiser = {{choice(['adam', 'rmsprop'])}} hidden_neurons = int({{quniform(16, 256, 4)}}) loss_function = 'mse' batch_size = int({{quniform(1, 10, 1)}}) dropout = {{uniform(0, 0.5)}} dropout_dense = {{uniform(0, 0.5)}} hidden_inner_factor = {{uniform(0.1, 1.9)}} inner_hidden_neurons = int(hidden_inner_factor * hidden_neurons) dropout_inner = {{uniform(0, 0.5)}} dataX = fit_to_batch(dataX, batch_size) dataY = fit_to_batch(dataY, batch_size) extra_layer = {{choice([True, False])}} if not extra_layer: dropout_inner = 0 # X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) # X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) # print("X train shape:\t", X_train.shape) # print("X test shape:\t", X_test.shape) # print("Y train shape:\t", Y_train.shape) # print("Y test shape:\t", Y_test.shape) print("Steps:\t", _steps) print("Extra layer:\t", extra_layer) print("Batch size:\t", batch_size) # in_neurons = X_train.shape[2] out_neurons = 1 model = Sequential() best_weight = BestWeight() model.add( LSTM(units=hidden_neurons, batch_input_shape=(batch_size, 1, fields), return_sequences=extra_layer, stateful=True, dropout=dropout)) model.add(Activation('relu')) if extra_layer: dense_input = inner_hidden_neurons model.add( LSTM( units=dense_input, # input_shape=hidden_neurons, stateful=True, return_sequences=False, dropout=dropout_inner)) model.add(Activation('relu')) model.add(Dense(units=out_neurons, activation='relu')) model.add(Dropout(dropout_dense)) model.compile(loss=loss_function, optimizer=optimiser) history = model.fit(dataX, dataY, batch_size=batch_size, epochs=num_epochs, validation_split=0.3, shuffle=False, callbacks=[best_weight]) model.set_weights(best_weight.get_best()) X_test, Y_test, _fields = create_dataset(test_data, 1, _steps) X_test, Y_test = fit_to_batch(X_test, batch_size), fit_to_batch( Y_test, batch_size) predicted = model.predict(X_test, batch_size=batch_size) + EPS rmse_val = rmse(Y_test, predicted) metrics = OrderedDict([ ('hidden', hidden_neurons), ('steps', _steps), ('geh', geh(Y_test, predicted)), ('rmse', rmse_val), ('mape', mean_absolute_percentage_error(Y_test, predicted)), # ('smape', smape(predicted, _Y_test)), ('median_pe', median_percentage_error(predicted, Y_test)), # ('mase', MASE(_Y_train, _Y_test, predicted)), ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)), ('batch_size', batch_size), ('optimiser', optimiser), ('dropout', dropout), ('extra_layer', extra_layer), ('extra_layer_dropout', dropout_inner), ('dropout_dense', dropout_dense), ('extra_layer_neurons', inner_hidden_neurons), ('loss function', loss_function) # 'history': history.history ]) print(metrics) return {'loss': -rmse_val, 'status': STATUS_OK, 'metrics': metrics}
def do_model(all_data, steps, run_model=True): _steps = steps print("steps:", _steps) all_data = all_data if not run_model: return None, None features = all_data[:-_steps] labels = all_data[_steps:, -1:] tts = train_test_split(features, labels, test_size=0.4) X_train = tts[0] X_test = tts[1] Y_train = tts[2].astype(np.float64) Y_test = tts[3].astype(np.float64) # optimiser = 'adam' # hidden_neurons = 300 # loss_function = 'mse' # batch_size = 105 # dropout = 0.056 # inner_hidden_neurons = 269 # dropout_inner = 0.22 if steps == 1: hidden_neurons = 332 loss_function = 'mse' batch_size = 128 dropout = 0.0923 inner_hidden_neurons = 269 dropout_inner = 0.2269 elif steps == 3: hidden_neurons = 256 loss_function = 'mse' batch_size = 105 dropout = 0.0923 inner_hidden_neurons = 72 dropout_inner = 0.001 else: hidden_neurons = 332 loss_function = 'mse' batch_size = 105 dropout = 0.0042 inner_hidden_neurons = 329 dropout_inner = 0.1314 batch_size = 1 nb_epochs = 1 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) print("X train shape:\t", X_train.shape) print("X test shape:\t", X_test.shape) # print("Y train shape:\t", Y_train.shape) # print("Y test shape:\t", Y_test.shape) # print("Steps:\t", _steps) in_neurons = X_train.shape[2] out_neurons = 1 model = Sequential() gpu_cpu = 'cpu' best_weight = BestWeight() reset_state = ResetStatesCallback() model.add(LSTM(output_dim=hidden_neurons, input_dim=in_neurons, batch_input_shape=(1,1, in_neurons) ,return_sequences=True, init='uniform', consume_less=gpu_cpu, stateful=True)) model.add(Dropout(dropout)) dense_input = inner_hidden_neurons model.add(LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu, stateful=True)) model.add(Dropout(dropout_inner)) model.add(Activation('relu')) model.add(Dense(output_dim=out_neurons, input_dim=dense_input)) model.add(Activation('relu')) model.compile(loss=loss_function, optimizer=optimiser) # run through all the training data # learning training set print("Learning training set") # progress = pyprind.ProgBar(len(X_train)/batch_size +1, width=50, stream=1) for epoch in xrange(nb_epochs): mean_tr_loss = [] print("Epoch {}".format(epoch)) for x_chunk, y_chunk in chunks(X_train, Y_train, batch_size): tr_loss = model.train_on_batch(x_chunk, y_chunk) mean_tr_loss.append(tr_loss) model.reset_states() print("Training Loss: {}".format(np.mean(mean_tr_loss))) geh_l = [] rmse_l = [] mape_l = [] training_done = 0 # progress = pyprind.ProgBar(len(X_test) / batch_size +1, width=50, stream=1) for x_chunk, y_chunk in chunks(X_test, Y_test, batch_size): # start collecting stats predicted = model.predict_on_batch(x_chunk) + EPS model.reset_states() model.train_on_batch(x_chunk, y_chunk) model.reset_states() geh_l.append(geh(y_chunk, predicted)) rmse_l.append(rmse(y_chunk, predicted)) mape_l.append(mape(y_chunk, predicted)) # progress.update() print("Testing RMSE: {} GEH: {} MAPE: {}".format(np.mean(rmse_l), np.mean(geh_l), np.mean(mape_l))) print() # predict on the same chunk and collect stats, averaging them metrics = OrderedDict([ ('online', True), ('hidden', hidden_neurons), ('steps', _steps), ('geh', np.mean(geh_l)), ('rmse', np.mean(rmse_l)), ('mape', np.mean(mape_l)), # ('smape', smape(predicted, _Y_test)), # ('median_pe', median_percentage_error(predicted, Y_test)), # ('mase', MASE(_Y_train, _Y_test, predicted)), # ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)), ('batch_size', batch_size), # ('optimiser', optimiser), ('dropout', dropout), ('extra_layer_dropout', dropout_inner), ('extra_layer_neurons', inner_hidden_neurons), # ('loss function', loss_function) # 'history': history.history ]) # print (tabulate.tabulate([metrics], tablefmt='latex', headers='keys')) return metrics, model
break true_x.append(row['timestamp']) true_y.append(row['downstream']) pred_y.append(preds.inferences["multiStepBestPredictions"][1]) pred_x.append(row['timestamp'] + timedelta(minutes=5)) np.savez("pred_data/{}-htm-pred-data".format(fname), true_x=true_x, true_y=true_y, pred_x=pred_x, pred_y=pred_y) np_tx = np.array(true_x)[1:] np_ty = np.array(true_y)[1:] np_py = np.array(pred_y)[:-1] print() print("GEH: ", geh(np_ty, np_py)) print("MAPE: ", mape(np_ty, np_py)) print("RMSE: ", rmse(np_ty, np_py)) print() print("True x:", len(true_x)) print("True y:", len(true_x)) print("Pred y:", len(true_x)) plt.plot(true_x, true_y, 'b-', label='Readings') plt.plot(pred_x, pred_y, 'r-', label='Predictions') plt.legend(prop={'size': 23}) plt.grid(b=True, which='major', color='black', linestyle='-') plt.grid(b=True, which='minor', color='black', linestyle='dotted') df = "%A %d %B, %Y"