def ts_forecasting(): args = input_cmd() # get energy consumption data load = args.load f_steps = args.steps data = get_dataset(load_to_predict=load) c_target = data["energy"] t_target, f_target, fcast_range = forecast_split(c_target, n_steps=f_steps) # ML methods features, target = get_features(t_target) lags = [int(f.split("_")[1]) for f in features if "lag" in f] forecaster = Forecaster(f_steps, lags=lags) print("Forecast with Linear Regression model") model, cv_score, test_score = linear_model(features, target) if args.fcast == "direct": fcast_linear = forecaster.direct(t_target, linear_model) elif args.fcast == "recursive": fcast_linear = forecaster.recursive(t_target, model) fcast_score = mape(f_target, fcast_linear) print(f""" Linear Regression scores -------------- Cross-validation MAPE: {round(cv_score, 2)}% Test MAPE: {round(test_score, 2)}% Direct Forecast MAPE: {round(fcast_score, 2)}% """) print("Forecast with XGBoost model") model, cv_score, test_score = xgboost_model(features, target, max_evals=25) if args.fcast == "direct": fcast_xgb = forecaster.direct(t_target, xgboost_model) elif args.fcast == "recursive": fcast_xgb = forecaster.recursive(t_target, model) fcast_score = mape(f_target, fcast_xgb) print(f""" XGBoost scores -------------- Cross-validation MAPE: {round(cv_score, 2)}% Test MAPE: {round(test_score, 2)}% Recursive Forecast MAPE: {round(fcast_score, 2)}% """)
def main(): results = init_results() for model_name, predictor in predictors.items(): for sku in configuration.SKUS: for period_ind in range(len(configuration.PERIODS)): period = configuration.PERIODS[period_ind] res_path = configuration.FORECAST_RES_DIR + model_name + "\\" + sku + "\\" + str( period_ind) end_of_period = period[1] real_series = loader.load_test_sku( sku, base_dir=configuration.BASE_DIR, end_of_period=end_of_period) train, test = train_test_split(real_series, configuration.N_PREDS) train = utils.remove_holidays(train) predictor.fit(train, configuration.N_PREDS) forecast = predictor.predict(configuration.N_PREDS) resid = predictor.resid forecast_scaled = utils.scale_by_max(forecast) test_scaled = utils.scale_by_max(test) save_plot(test_scaled, forecast_scaled, end_of_period, res_path) save_forecast_resid(forecast, resid, res_path) mape = utils.mape(y_true=test, y_pred=forecast) rmse = utils.rmse(y_true=test_scaled, y_pred=forecast_scaled) save_result(results, model_name, sku, period_ind, mape, rmse, predictor.describe())
def test_RNN(self): # create recurrent neural network NN = RNN() # create training and testing inputs and targets train_input_1 = [[100, 100] for i in range(100)] train_target = [[100] for i in range(100)] train_input_2 = train_target train_input_3 = train_target test_input_1 = [[101, 101] for i in range(50)] test_target = [[101] for i in range(50)] test_input_2 = test_target test_input_3 = test_target # convert to array and normalize train_input_1 = np.array(train_input_1) / 1000 train_target = np.array(train_target) / 1000 train_input_2 = train_target train_input_3 = train_target test_input_1 = np.array(test_input_1) / 1000 test_target = np.array(test_target) / 1000 test_input_2 = test_target test_input_3 = test_target # number of training cycles epochs = 100 # train the neural network for e in range(epochs): for p in train_input_1: train_output = NN.train(train_input_1, train_input_2, train_input_3, train_target) # test on unseen data test_output = NN.test(test_input_1, test_input_2, test_input_3) # de-normalize train_output *= 1000 train_target *= 1000 test_output *= 1000 test_target *= 1000 self.assertGreaterEqual(100 - mape(train_target, train_output), 99.00) self.assertGreaterEqual(100 - mape(test_target, test_output), 97.00)
def test(testx, testy): print("Test") stime = time.time() predlist = list() for idx in range(testx.shape[0]): model = ARIMA(testx[0], order=(1, 1, 0)) model_fit = model.fit(disp=0) pred, stderr, conf_int = model_fit.forecast(config.out_seq_length) predlist.append(pred) if idx % 20 == 0: print("Test %d %d" % (idx, time.time() - stime), utils.mape(pred, testy[idx])) etime = time.time() print("Test %d" % (etime - stime)) predlist = np.stack(predlist, axis=0) mapeloss = utils.mape(predlist, testy) tloss = np.mean(mapeloss, axis=0) print("Test ", tloss) return tloss
def test_FeedForward(self): # create Neural Network NN = FeedForward() # create training and testing inputs and targets train_input = [[100, 100] for i in range(100)] train_target = [[100] for i in range(100)] test_input = [[101, 101] for i in range(50)] test_target = [[101] for i in range(50)] # normalize train_input = np.array(train_input) / 1000 train_target = np.array(train_target) / 1000 test_input = np.array(test_input) / 1000 # convert to array test_target = np.array(test_target) # number of training cycles epochs = 100 # train the neural network for e in range(epochs): for p in train_input: train_output = NN.train(train_input, train_target) # test on unseen data test_output = NN.test(test_input) # de-normalize train_output *= 1000 train_target *= 1000 test_output *= 1000 # ensure network can predict a line with high accuracy self.assertGreaterEqual(100 - mape(train_target, train_output), 99.00) self.assertGreaterEqual(100 - mape(test_target, test_output), 97.00)
def test_RNN_V2(self): # create Neural Network NN = RNN_V2() # create training and testing inputs and targets train_input = [[100, 100] for i in range(100)] train_target = [[100] for i in range(100)] test_input = [[101, 101] for i in range(50)] test_target = [[101] for i in range(50)] # normalize train_input = np.array(train_input) / 1000 train_target = np.array(train_target) / 1000 test_input = np.array(test_input) / 1000 # convert to array test_target = np.array(test_target) # convert to 3d array of format [inputs, timesteps, features] train_input = to_3d(train_input) test_input = to_3d(test_input) # train the neural network train_output = NN.train(train_input, train_target, epochs=100) # test on unseen data test_output = NN.test(test_input) # de-normalize train_output *= 1000 train_target *= 1000 test_output *= 1000 # ensure network can predict a line with high accuracy self.assertGreaterEqual(100 - mape(train_target, train_output), 99.00) self.assertGreaterEqual(100 - mape(test_target, test_output), 97.00)
def rank_model(self, fcst_model, act_st, fcst_st, test_type, test_st, rank_by='mae', error_by='mape'): """Rank model based on historical forecast""" df_act = pd.DataFrame() for i in self.df_act['id'].unique(): df_i = self.df_act[self.df_act['id'] == i].copy() df_i = TimeSeriesForecasting.filldaily( df_i, act_st, fcst_st + datetime.timedelta(days=-1)) df_i = df_i if test_type == 'daily' else TimeSeriesForecasting.daytomth( df_i) df_i['id'] = i df_act = df_act.append(df_i[['id', 'ds', 'y']], ignore_index=True) df_rank = self.df_fcstlog[(self.df_fcstlog['dsr'] >= test_st) & (self.df_fcstlog['dsr'] < fcst_st)].copy() # select only in config file df_rank['val'] = df_rank['period'].map(fcst_model) df_rank = df_rank[df_rank['val'].notnull()].copy() df_rank['val'] = df_rank.apply(lambda x: True if x['model'] in x['val'] else False, axis=1) df_rank = df_rank[df_rank['val'] == True].copy() # # calculate error comparing with actual df_rank = pd.merge(df_rank, df_act.rename(columns={'y': 'actual'}), on=['id', 'ds'], how='left') df_rank['mae'] = df_rank.apply( lambda x: abs(x['actual'] - x['forecast']), axis=1) df_rank['mape'] = df_rank.apply( lambda x: mape(x['actual'], x['forecast']), axis=1) df_rank[['mae', 'mape']] = df_rank[['mae', 'mape']].fillna(0) # ranking error df_rank = df_rank.groupby(['id', 'period', 'model'], as_index=False).agg({ 'mae': 'mean', 'mape': 'mean' }) df_rank['rank'] = df_rank.groupby(['id', 'period' ])[rank_by].rank(method='dense', ascending=True) df_rank['error'] = df_rank[error_by] return df_rank
def baseline(df): # predict next year by just taking the values from last year predicted = df.loc['2016-01-01':'2016-12-31'] actual = df.loc['2017-01-01':'2017-12-31'] # and padding the missing values with the last datapoint actual.loc[actual < 1] = np.nan actual = actual.fillna(method='pad') # transform df to series predicted = pd.Series(predicted) actual = pd.Series(actual) print("Number of actual days:", len(actual)) print('Number of predicted days:', len(predicted)) rmse = sqrt(mean_squared_error(actual, predicted)) print('For 12 Months RMSE: %.3f' % rmse) mape_value = mape(actual, predicted) print ("For 12 Months MAPE :", mape_value)
def test(testx, testy): print("Test") stime = time.time() predlist = list() for pid in range(config.out_seq_length): print("Test %d %d" % (pid, time.time() - stime)) pred = model.predict(testx) predlist.append(pred) testx[:, :-1] = testx[:, 1:] testx[:, -1] = pred etime = time.time() print("Test %d" % (etime - stime)) predlist = np.stack(predlist, axis=-1) mapeloss = utils.mape(predlist, testy) tloss = np.mean(mapeloss, axis=0) print("Test ", tloss)
def test_model(model, X_test, y_test): """ Get the RMSE for a given model on a test dataset Parameters ---------- model: a model implementing the standard scikit-learn interface X_test: pd.DataFrame holding the features of the test set y_test: pd.Series holding the test set target Returns ------- test_score: the RMSE on the test dataset """ predictions = model.predict(X_test) test_score = mape(y_test.values, predictions) return test_score
def error(self, data, times=None, metric='mape'): """ Model prediction error. metric : str Error metric to use. It can be "mape", "smape", "logaccratio", and "rmse". Default: mape. """ if times is None: times = numpy.arange(len(data)) y = self.simulate(times) if metric == 'mape': return mape(y, data) elif metric == 'smape': return smape(y, data) elif metric == 'logaccratio': return logaccratio(y, data) elif metric == 'rmse': return numpy.sqrt(self.cost_) else: raise ValueError("No such metric: {}".format(metric))
def main(): np.seterr(all='raise') data = loader.load_product_class_data("rohliky.tsv", False) data = data[data.index < pd.Timestamp('2017-10-01')] series = data.groupby( pd.Grouper(freq='D'))['product_count'].sum().fillna(0) series = series.astype('float') n_preds = 28 predictor = Smooth_Predictor() predictor.fit(series[:-n_preds]) res = predictor.predict(npred=n_preds) fig = plt.figure(figsize=(12, 8)) # series.plot() plt.plot([i for i in range(n_preds)], res[-n_preds:].values, label='Result') plt.plot([i for i in range(n_preds)], series.values[-n_preds:], label='Real') res[series[-n_preds:].values == 0] = 0 mape = utils.mape(series[-n_preds:], res) plt.legend() plt.show() print(predictor.describe(), ", mape ", mape)
def test_mape(self): test = np.array([10, 10, 10, 10, 10]) self.assertEqual(mape(test, test), 0.0)
def run_training( energy, T_val, LATENT_DIM_1, LATENT_DIM_2, BATCH_SIZE, LEARNING_RATE, ALPHA, ): from utils import create_evaluation_df, mape train_inputs, valid_inputs, test_inputs, y_scaler = create_input( energy, T_val) # Initialize the model model = get_model(LEARNING_RATE, T_val, ALPHA, LATENT_DIM_1, LATENT_DIM_2) earlystop = EarlyStopping(monitor="val_loss", min_delta=0, patience=5) best_val = ModelCheckpoint( "model_{epoch:02d}.h5", save_best_only=True, mode="min", period=1, save_weights_only=True, ) # Train the model history = model.fit( train_inputs["X"], train_inputs["target"], batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(valid_inputs["X"], valid_inputs["target"]), callbacks=[earlystop, best_val, LogRunMetrics()], verbose=0, ) # load the model with the smallest validation MAPE best_epoch = np.argmin(np.array(history.history["val_loss"])) + 1 validationLoss = np.min(np.array(history.history["val_loss"])) model.load_weights("model_{:02d}.h5".format(best_epoch)) # Save best model for this experiment model_name = "bestmodel" # serialize NN architecture to JSON model_json = model.to_json() # save model JSON with open("{}.json".format(model_name), "w") as f: f.write(model_json) # save model weights model.save_weights("{}.h5".format(model_name)) # Compute test MAPE predictions = model.predict(test_inputs["X"]) eval_df = create_evaluation_df(predictions, test_inputs, HORIZON, y_scaler) testMAPE = mape(eval_df["prediction"], eval_df["actual"]) # clean up model files for m in glob("model_*.h5"): os.remove(m) # Log validation loss and test MAPE run.log("validationLoss", validationLoss) run.log("testMAPE", testMAPE) # create a ./outputs/model folder in the compute target # files saved in the "./outputs" folder are automatically uploaded into run history os.makedirs("./outputs/model", exist_ok=True) model_files = glob("bestmodel*") for f in model_files: shutil.move(f, "./outputs/model")
plt.hold(True) plt.plot(x[n / 2:, :], yhat) plt.savefig('./bases/results/polinomio_estimado') plt.clf() print 'Letra A' print 'Polinomio encontrado: ' print 'y = {:3.3f} + {:3.3f}x {: 3.3f}x^2\n'.format(what[0][0], what[1][0], what[2][0]) # b) Obtenha o RMSE e MAPE do modelo obtido sobre os dados da segunda metade dos # dados; print 'Letra B' rmse = utils.rmse(y[n / 2:, :], yhat) print 'RMSE = ' + str(rmse) + '\n' mape = utils.mape(y[n / 2:, :], yhat) print 'MAPE = ' + str(mape) + '\n' # c) Estimar o modelo que melhor se ajusta aos dados usando todos os dados. # Informe os parametros do modelo encontrado. Use os fatores de determinacao de # complexidade do modelo para auxiliar a encontrar o modelo. Obtenha o RMSE e MAPE # do modelo obtido sobre os dados. print 'Letra C' MAXDEGREE = 5 plt.Figure plt.hold(True) plt.grid(True) plt.plot(x, y) plt.title('Ajuste Polinomial') plt.ylabel('y') plt.xlabel('x')
2 * y_train_pred[:, 1:]) y_train_pred = y_scaler.inverse_transform(y_train_pred[:, :1]) y_test = y_scaler.inverse_transform(y_test) y_test_pred = net(x_test).cpu().detach().numpy() y_test_pred_min = y_scaler.inverse_transform(y_test_pred[:, :1] - 2 * y_test_pred[:, 1:]) y_test_pred_max = y_scaler.inverse_transform(y_test_pred[:, :1] + 2 * y_test_pred[:, 1:]) y_test_pred = y_scaler.inverse_transform(y_test_pred[:, :1]) plt.plot(y_train) plt.plot(y_train_pred) plt.fill_between(np.arange(y_train.shape[0]), y_train_pred_min.squeeze(), y_train_pred_max.squeeze(), color='b', alpha=.1) plt.plot(np.arange(y_train.shape[0], df.shape[0]), y_test) plt.plot(np.arange(y_train.shape[0], df.shape[0]), y_test_pred) plt.fill_between(np.arange(y_train.shape[0], df.shape[0]), y_test_pred_min.squeeze(), y_test_pred_max.squeeze(), color='b', alpha=.1) plt.show() print('TEST RMSE: {}'.format(rmse(y_test, y_test_pred[:, 0]))) print('TEST MAPE: {}'.format(mape(y_test, y_test_pred[:, 0])))
plt.xlabel('x') plt.hold(True) plt.plot(x[n / 2:, :], yhat) plt.savefig('./bases/results/polinomio_estimado') plt.clf() print 'Letra A' print 'Polinomio encontrado: ' print 'y = {:3.3f} + {:3.3f}x {: 3.3f}x^2\n'.format(what[0][0], what[1][0], what[2][0]) # b) Obtenha o RMSE e MAPE do modelo obtido sobre os dados da segunda metade dos # dados; print 'Letra B' rmse = utils.rmse(y[n / 2:, :], yhat) print 'RMSE = ' + str(rmse) + '\n' mape = utils.mape(y[n / 2:, :], yhat) print 'MAPE = ' + str(mape) + '\n' # c) Estimar o modelo que melhor se ajusta aos dados usando todos os dados. # Informe os parametros do modelo encontrado. Use os fatores de determinacao de # complexidade do modelo para auxiliar a encontrar o modelo. Obtenha o RMSE e MAPE # do modelo obtido sobre os dados. print 'Letra C' MAXDEGREE = 5 plt.Figure plt.hold(True) plt.grid(True) plt.plot(x, y) plt.title('Ajuste Polinomial') plt.ylabel('y') plt.xlabel('x')
# desvio padrao). Selecione aleatoriamente 75% dos dados para treinamento. # Retorne a estrutura da arvore construida. nclasses = np.union1d(y, y).size n = len(y) randind = np.arange(0, n) np.random.shuffle(randind) ind_train = randind[0:0.75 * n] ind_test = randind[0.75 * n:n] tree = RegressionTree(nclasses) tree.train(x[ind_train, :], y[ind_train], SDRMIN=0.1, NMIN=3) g, pos = tree.gerar_grafo() utils.draw_graph(g, pos) # b) Use os restantes 25% dos dados para avaliacao. Retorne as medidas MAPE e # RMSE. yhat = tree.estimate(x[ind_test, :]) rmse = utils.rmse(y[ind_test], yhat) mape = utils.mape(y[ind_test], yhat) print 'RMSE encontrado: {:3.2f}\nMAPE encontrado: {:3.2f}'.format(rmse, mape) plt.plot(y[ind_test]) plt.hold(True) plt.plot(yhat) plt.legend(['real', 'estimado']) plt.show() # c) Tente obter as regras de decisao a partir da arvore construida.
#Transform data data = data.map(lambda x: (x[0], x[1], transform(x[2]))) #Split train and test train, test = utils.train_test_split(data) #Labelling Points train = utils.labelled_points(train) test = utils.labelled_points(test) #Regression (this is not least square but SGD) lrm = LinearRegressionWithSGD() model = lrm.train(train) #Test mape_train = utils.mape( train.map(lambda x: (x.label, model.predict(x.features)))) mape_test = utils.mape( test.map(lambda x: (x.label, model.predict(x.features)))) #Prediction actual_pred = data.map(lambda x: (x[0], x[1], model.predict(x[2]))) #split actual = actual_pred.map(lambda x: (x[0], x[1])) prediction = actual_pred.map(lambda x: (x[0], x[2])) #denormalization actual = utils.denormalization(sc, actual, data_min, data_max) prediction = utils.denormalization(sc, prediction, data_min, data_max) #JOIN
test_data_features = Features.feature_extraction(test_data, y_col='quantity') X_test = test_data_features.toarray() y_test = test_data['sales'].values print("test data shape: {}".format(test_data.shape)) ## Linear Regression ols = LinearRegression(fit_intercept=True) ols.fit(X_train, y_train) y_hat = ols.predict(X_test) test_data["y_hat"] = y_hat test_mae = mae(y_hat, y_test) test_rmse = rmse(y_hat, y_test) test_mape = mape(y_hat, y_test) print("--OLS--") print("MAE - (test): {:.2f}".format(test_mae)) print("RMSE - (test): {:.2f}".format(test_rmse)) print("MAPE: - (test): {:.4f}".format(test_mape)) prod_errors = test_data[['region', 'time', 'sales', 'y_hat']].groupby(['time', "region"]).sum() prod_mae = mae(prod_errors.y_hat, prod_errors.sales) prod_rmse = rmse(prod_errors.y_hat, prod_errors.sales) prod_mape = mape(prod_errors.y_hat, prod_errors.sales) print("Region MAE - (test): {:.2f}".format(prod_mae)) print("Region RMSE - (test): {:.2f}".format(prod_rmse)) print("Region MAPE - (test): {:.4f}".format(prod_mape))
# desvio padrao). Selecione aleatoriamente 75% dos dados para treinamento. # Retorne a estrutura da arvore construida. nclasses = np.union1d(y, y).size n = len(y) randind = np.arange(0, n) np.random.shuffle(randind) ind_train = randind[0:0.75 * n] ind_test = randind[0.75 * n:n] tree = RegressionTree(nclasses) tree.train(x[ind_train, :], y[ind_train], SDRMIN=0.1, NMIN=3) g, pos = tree.gerar_grafo() utils.draw_graph(g, pos) # b) Use os restantes 25% dos dados para avaliacao. Retorne as medidas MAPE e # RMSE. yhat = tree.estimate(x[ind_test, :]) rmse = utils.rmse(y[ind_test], yhat) mape = utils.mape(y[ind_test], yhat) print 'RMSE encontrado: {:3.2f}\nMAPE encontrado: {:3.2f}'.format(rmse,mape) plt.plot(y[ind_test]) plt.hold(True) plt.plot(yhat) plt.legend(['real','estimado']) plt.show() # c) Tente obter as regras de decisao a partir da arvore construida.