def test_readme_examples(): # Random training data x = np.random.randn(100, 2) y = np.random.randn(100) # Build a non-linear autoregression model with exogenous inputs # using Random Forest regression as the base model mdl1 = NARX( RandomForestRegressor(n_estimators=10), auto_order=2, exog_order=[2, 2], exog_delay=[1, 1]) mdl1.fit(x, y) ypred1 = mdl1.predict(x, y, step=3) assert len(ypred1) == 100 if has_xgboost: # Build a general autoregression model and make multi-step prediction # directly using XGBRegressor as the base model mdl2 = DirectAutoRegressor( XGBRegressor(n_estimators=10), auto_order=2, exog_order=[2, 2], exog_delay=[1, 1], pred_step=3) mdl2.fit(x, y) ypred2 = mdl2.predict(x, y) assert len(ypred2) == 100
def test_forecast(): x = np.random.randn(100, 1) y = np.random.randn(100) mdl = NARX(LinearRegression(), auto_order=2, exog_order=[2]) mdl.fit(x, y) y_forecast = mdl.forecast(x, y, step=10, X_future=np.random.randn(9, 1)) assert len(y_forecast) == 10
def narx(df): x = df y = df mdl = NARX(RandomForestRegressor(), auto_order=2, exog_order=[2], exog_delay=[1]) mdl.fit(x, y) ypred = mdl.predict(x, y, step=3) return ypred
def test_TimeSeriesRegressor_grid_search(): np.random.seed(0) X = pd.DataFrame(np.random.randn(100, 2)) y = pd.Series(np.random.randn(100)) na = 3 nb = [3, 3] nk = [1, 1] mdl = NARX(Ridge(), auto_order=na, exog_order=nb, exog_delay=nk) para_grid = {'alpha': [0, 0.1, 0.3]} mdl.grid_search(X, y, para_grid)
def test_TimeSeriesRegressor_create_features(na, nb, nk): np.random.seed(0) X = pd.DataFrame(np.random.randn(100, 2)) y = pd.Series(np.random.randn(100)) mdl = NARX(LinearRegression(), auto_order=na, exog_order=nb, exog_delay=nk) Xfeatures_act, ytarget_act = mdl._preprocess_data(X.values, y.values) Xfeatures_exp, ytarget_exp = helper_preprocess(X, y, na, nb, nk) np.testing.assert_array_equal(Xfeatures_act, Xfeatures_exp) np.testing.assert_array_equal(ytarget_act, ytarget_exp)
def test_NARX(): x = np.random.randn(100, 1) y = np.random.randn(100) mdl = NARX(RandomForestRegressor(), auto_order=2, exog_order=[2]) mdl.fit(x, y) ypred = mdl.predict(x, y, step=3) print(ypred) x = np.random.randn(100, 1) y = np.random.randn(100) mdl = NARX(RandomForestRegressor(), auto_order=1, exog_order=[1]) mdl.fit(x, y) ypred = mdl.predict(x, y, step=3) print(ypred)
def test_TimeSeriesRegressor_predict(): np.random.seed(0) X = pd.DataFrame(np.random.randn(100, 2)) y = pd.Series(np.random.randn(100)) na = 3 nb = [3, 3] nk = [1, 1] step = 2 mdl = NARX(LinearRegression(), auto_order=na, exog_order=nb, exog_delay=nk) mdl.fit(X, y) ypred_act = mdl.predict(X, y, step=step) mdl.score(X, y, step=step, method="r2") mdl.score(X, y, step=step, method="mse") # -------- manual computation --------------- kernel_mdl = LinearRegression() Xfeatures_exp, ytarget_exp = helper_preprocess(X, y, na, nb, nk, removeNA=False) mask = np.isnan(ytarget_exp) | np.isnan(Xfeatures_exp).any(axis=1) kernel_mdl.fit(Xfeatures_exp[~mask, :], ytarget_exp[~mask]) ypred_exp1 = np.empty(X.shape[0]) * np.nan ypred_exp1[~mask] = kernel_mdl.predict(Xfeatures_exp[~mask, :]) X1 = copy.deepcopy(Xfeatures_exp) X2 = copy.deepcopy(Xfeatures_exp) # Xfeatures_updated = mdl._update_lag_features(X1, ypred_exp1) X2[:, 1:3] = X2[:, 0:2] X2[:, 0] = ypred_exp1 X2[:, 4:6] = X2[:, 3:5] X2[:, 3] = shift(X2[:, 3], -1) X2[:, 7:9] = X2[:, 6:8] X2[:, 6] = shift(X2[:, 6], -1) mask = ~np.isnan(X2).any(axis=1) ypred_exp2 = np.empty(X2.shape[0]) * np.nan ypred_exp2[mask] = kernel_mdl.predict(X2[mask, :]) ypred_exp2 = np.concatenate([np.empty(2) * np.nan, ypred_exp2])[0:len(y)] # print(X2) # print(ypred_act) np.testing.assert_array_almost_equal(ypred_act, ypred_exp2)
def test_preprocess_data(): estimator = NARX(LinearRegression(), auto_order=2, exog_order=[2, 3], exog_delay=[1, 2]) X = np.array([[1., 3.], [2., 7.], [4., 6.], [3., 8.], [5., 5.], [2.5, 4.5], [3., 3.8]]) y = np.array([1., 5., 7., 4., 6., 3., 2.]) features, target = estimator._preprocess_data(X, y) y_exp = np.array([3., 2.]) X_exp = np.array([[6., 4., 3., 4., 6., 7., 3.], [3., 6., 5., 3., 8., 6., 7.]]) np.testing.assert_array_equal(target, y_exp) np.testing.assert_array_equal(features, X_exp) estimator.fit(X, y)
def test_forecast_and_predict_consistency(): np.random.seed(0) x = np.random.randn(10, 1) y = np.random.randn(10) mdl = NARX(LinearRegression(), auto_order=2, exog_order=[2]) mdl.fit(x, y) # 1-step ypred = mdl.predict(x, y, step=1) yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1) np.testing.assert_almost_equal(ypred[-1], yforecast[-1]) # 2-step ypred = mdl.predict(x, y, step=2) X_future = x[-2:-1, :] # print(X_future) yforecast = mdl.forecast(x[:-2, :], y[:-2], step=2, X_future=X_future) np.testing.assert_almost_equal(ypred[-1], yforecast[-1]) # 3-step ypred = mdl.predict(x, y, step=3) X_future = x[-3:-1, :] yforecast = mdl.forecast(x[:-3, :], y[:-3], step=3, X_future=X_future) np.testing.assert_almost_equal(ypred[-1], yforecast[-1])
def narx_rf(df): x = df y = df mdl = NARX(RandomForestRegressor(), auto_order=2, exog_order=[2], exog_delay=[1]) para_grid = {'n_estimators': [10, 30, 100]} mdl.grid_search(x, y, para_grid, verbose=2) # Best hyper-parameters are set after grid search, print the model to see the difference print(mdl) mdl.fit(x, y) ypred = mdl.predict(x, y, step=3) return ypred
import jsonpreprocess as jp import pandas from datetime import datetime from fireTS.models import NARX from sklearn.linear_model import LinearRegression if __name__ == "__main__": data = jp.getJSONObjectCP("link-chainlink", "2019-02-20", "2020-02-08") df = jp.convertJSONToDataFrame(data, datetime(2019, 2, 20), datetime(2020, 2, 8)) xtrain = pandas.concat([df["Today"][0:300], df["Volume"][0:300]], axis=1, keys=["Today", "Volume", "Lag1"]) ytrain = df["Today"][0:300] xtest = pandas.concat([df["Today"][300:], df["Volume"][300:]], axis=1, keys=["Today", "Volume", "Lag1"]) ytest = df["Today"][300:] print(xtrain) narx_mdl = NARX(LinearRegression(), auto_order=6, exog_order=[2, 2], exog_delay=[0, 0]) narx_mdl.fit(xtrain, ytrain) ypred = narx_mdl.predict(xtest, ytest, step=3) print(ypred)
def test_forecast_exog_delay(): np.random.seed(0) x = np.random.randn(10, 1) y = np.random.randn(10) # delay 0 mdl = NARX(LinearRegression(), auto_order=2, exog_order=[2], exog_delay=[0]) mdl.fit(x, y) yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1) np.testing.assert_almost_equal(yforecast, [-0.50000582]) # delay 1 mdl = NARX(LinearRegression(), auto_order=2, exog_order=[2], exog_delay=[1]) mdl.fit(x, y) yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1) np.testing.assert_almost_equal(yforecast, [-0.53345719]) # delay 2 mdl = NARX(LinearRegression(), auto_order=2, exog_order=[2], exog_delay=[2]) mdl.fit(x, y) yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1) np.testing.assert_almost_equal(yforecast, [-0.61640028])
# Scale data to range from 0 to 1 comp_scaled, comp_scaler = modeling_utilities.scale_dataset( comp_df['average_compound'].to_numpy()) pos_rate_scaled, pos_scaler = modeling_utilities.scale_dataset( pos_df['pos_rate'].to_numpy()) # Split data into training and testing x_train, x_test, y_train, y_test = train_test_split(comp_scaled, pos_rate_scaled, test_size=0.20, random_state=None, shuffle=False) # Create and train NARX model model = NARX(RandomForestRegressor(), auto_order=2, exog_order=[2], exog_delay=[1]) model.fit(x_train, y_train) # Use the model to create a prediction and plot the results full_prediction = model.predict(comp_scaled, pos_rate_scaled, step=3) full_pred_rescaled = pos_scaler.inverse_transform( full_prediction.reshape(-1, 1)) modeling_utilities.plot_prediction( comp_df['day'], pos_df['pos_rate'], full_pred_rescaled, len(y_train), 'Prediction of COVID-19 Positivity Rate with NARX Model') # Print MSE for both training and testing mse = mean_squared_error(pos_rate_scaled[5:len(y_train)], full_prediction[5:len(y_train)]) print('Training MSE =', mse)