def test_TimeSeriesRegressor_predict(): np.random.seed(0) X = pd.DataFrame(np.random.randn(100, 2)) y = pd.Series(np.random.randn(100)) na = 3 nb = [3, 3] nk = [1, 1] step = 2 mdl = NARX(LinearRegression(), auto_order=na, exog_order=nb, exog_delay=nk) mdl.fit(X, y) ypred_act = mdl.predict(X, y, step=step) mdl.score(X, y, step=step, method="r2") mdl.score(X, y, step=step, method="mse") # -------- manual computation --------------- kernel_mdl = LinearRegression() Xfeatures_exp, ytarget_exp = helper_preprocess(X, y, na, nb, nk, removeNA=False) mask = np.isnan(ytarget_exp) | np.isnan(Xfeatures_exp).any(axis=1) kernel_mdl.fit(Xfeatures_exp[~mask, :], ytarget_exp[~mask]) ypred_exp1 = np.empty(X.shape[0]) * np.nan ypred_exp1[~mask] = kernel_mdl.predict(Xfeatures_exp[~mask, :]) X1 = copy.deepcopy(Xfeatures_exp) X2 = copy.deepcopy(Xfeatures_exp) # Xfeatures_updated = mdl._update_lag_features(X1, ypred_exp1) X2[:, 1:3] = X2[:, 0:2] X2[:, 0] = ypred_exp1 X2[:, 4:6] = X2[:, 3:5] X2[:, 3] = shift(X2[:, 3], -1) X2[:, 7:9] = X2[:, 6:8] X2[:, 6] = shift(X2[:, 6], -1) mask = ~np.isnan(X2).any(axis=1) ypred_exp2 = np.empty(X2.shape[0]) * np.nan ypred_exp2[mask] = kernel_mdl.predict(X2[mask, :]) ypred_exp2 = np.concatenate([np.empty(2) * np.nan, ypred_exp2])[0:len(y)] # print(X2) # print(ypred_act) np.testing.assert_array_almost_equal(ypred_act, ypred_exp2)
def _preprocess_data(self, X, y): p = self._get_lag_feature_processor(X, y) features = p.generate_lag_features() target = shift(y, -self.pred_step) # Remove NaN all_data = np.concatenate([target.reshape(-1, 1), features], axis=1) mask = np.isnan(all_data).any(axis=1) features, target = features[~mask], target[~mask] return features, target
def _preprocess_data(self, X, y): """ Helper function to prepare the data for base_estimator. """ p = self._get_lag_feature_processor(X, y) features = p.generate_lag_features() target = shift(y, -self.pred_step) # Remove NaN introduced by shift all_data = np.concatenate([target.reshape(-1, 1), features], axis=1) mask = np.isnan(all_data).any(axis=1) features, target = features[~mask], target[~mask] return features, target
def test_shift(): x = np.array([1., 2., 3., 4.]) x1 = shift(x, 2) np.testing.assert_array_equal(x1, np.array([np.nan, np.nan, 1, 2])) x2 = shift(x, -2) np.testing.assert_array_equal(x2, np.array([3, 4, np.nan, np.nan])) x3 = shift(x, 6) np.testing.assert_array_equal(x3, np.array([np.nan, np.nan, np.nan, np.nan])) x4 = shift(x, -6) np.testing.assert_array_equal(x4, np.array([np.nan, np.nan, np.nan, np.nan])) x5 = shift(x, 0) np.testing.assert_array_equal(x5, x) x6 = shift(x, 4) np.testing.assert_array_equal(x6, np.array([np.nan, np.nan, np.nan, np.nan])) x7 = shift(x, 4) np.testing.assert_array_equal(x7, np.array([np.nan, np.nan, np.nan, np.nan])) y = np.array([[1., 2., 3.], [4., 5., 6.]]) y1 = shift(y, -1) np.testing.assert_array_equal( y1, np.array([[4., 5., 6.], [np.nan, np.nan, np.nan]])) y = np.array([[1., 2., 3.], [4., 5., 6.]]) y1 = shift(y, 1) np.testing.assert_array_equal( y1, np.array([[np.nan, np.nan, np.nan], [1., 2., 3.]]))
def add_lags(X, y, auto_order, exog_order, exog_delay): """ Adds lags based the orders and delays of the endogenous and exogenous variables. Takes the following: * X (dataframe): design matrix * y (dataframe): target variable * auto_order (int): the autoregressive order of the model * exog_order (list): the order of the exogenous variables * exog_delay (list): delay of the exogenous variables Returns: * features (array): transformed design matrix * target (array): transformed target variable """ m = utils.MetaLagFeatureProcessor(X.values, y.values, auto_order, exog_order, exog_delay) features = m.generate_lag_features() target = utils.shift(y, -1) all_data = np.concatenate([target.reshape(-1, 1), features], axis=1) mask = np.isnan(all_data).any(axis=1) features, target = features[~mask], target[~mask] return features, target