def _check_array(y): return c1d( check_array(y, ensure_2d=False, force_all_finite=False, copy=True, dtype=DTYPE))
def _normalize_seasonal_periods_to_type(self, seasonal_periods, dtype): """Validates seasonal periods and normalizes them Normalization ensures periods are of proper type, unique and sorted. """ if seasonal_periods is not None: try: seasonal_periods = c1d( check_array(seasonal_periods, ensure_2d=False, force_all_finite=True, ensure_min_samples=0, copy=True, dtype=dtype)) except Exception as validation_exception: self.context.get_exception_handler().exception( "seasonal_periods definition is invalid", error.InputArgsException, previous_exception=validation_exception) seasonal_periods = np.unique(seasonal_periods) if len(seasonal_periods[np.where(seasonal_periods <= 1)]) > 0: self.context.get_exception_handler().warn( "All seasonal periods should be values greater than 1. " "Ignoring all seasonal period values that do not meet this condition.", error.InputArgsWarning) seasonal_periods = seasonal_periods[np.where(seasonal_periods > 1)] seasonal_periods.sort() if len(seasonal_periods) == 0: seasonal_periods = None return seasonal_periods
def _validate(self, y): """Validates input time series. Also adjusts box_cox if necessary.""" try: y = c1d( check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=True, dtype=np.float64)) # type: np.ndarray except Exception as validation_exception: self.context.get_exception_handler().exception( "y series is invalid", error.InputArgsException, previous_exception=validation_exception) return False if np.any(y <= 0): if self.use_box_cox is True: self.context.get_exception_handler().warn( "Box-Cox transformation (use_box_cox) was forced to True " "but there are negative values in input series. " "Setting use_box_cox to False.", error.InputArgsWarning) self.use_box_cox = False return y
def _fit_to_observations(self, y, starting_x): """Fits model with starting x to time series""" self.warnings = [] self.is_fitted = False if self.validate_input: try: y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=True, dtype=np.float64)) # type: np.ndarray except Exception as validation_exception: self.context.get_exception_handler().exception("y series is invalid", error.InputArgsException, previous_exception=validation_exception) self.y = y yw = self._boxcox(y) matrix_builder = self.matrix w = matrix_builder.make_w_vector() g = matrix_builder.make_g_vector() F = matrix_builder.make_F_matrix() # initialize matrices yw_hat = np.asarray([0.0] * len(y)) # x = np.matrix(np.zeros((len(params.x0), len(yw) + 1))) x = starting_x with warnings.catch_warnings(): warnings.filterwarnings('error') try: for t in range(0, len(y)): yw_hat[t] = w @ x e = yw[t] - yw_hat[t] x = F @ x + g * e except RuntimeWarning: # calculation issues, values close to max float value self.add_warning('Numeric calculation issues detected. Model is not usable.') self.is_fitted = False return self # store fit results self.x_last = x self.resid_boxcox = yw - yw_hat try: self.y_hat = self._inv_boxcox(yw_hat) except RuntimeWarning: self.add_warning('Box-Cox related numeric calculation issues detected. Model is not usable.') self.is_fitted = False return self self.resid = self.y - self.y_hat self.is_fitted = True self.aic = self.calculate_aic() return self
def find_box_cox_lambda(y, seasonal_periods=None, bounds=(-1, 2)): y = c1d( check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=False, dtype=np.float64)) # type: np.ndarray guerrero = Guerrero() return guerrero.find_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds)
def inv_boxcox(y, lam, force_valid=False): y = c1d( check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=False, dtype=np.float64)) # type: np.ndarray if lam < 0 and force_valid: y[y > -1 / lam] = -1 / lam if lam < 0 and np.any(y > -1 / lam): raise error.InputArgsException( 'Not possible to transform back such y values.') if np.isclose(0.0, lam): return np.exp(y) yy = y * lam + 1 return np.sign(yy) * (np.abs(yy)**(1 / lam))
def boxcox(y, lam=None, seasonal_periods=None, bounds=(-1, 2)): y = c1d( check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=False, dtype=np.float64)) # type: np.ndarray if lam is None: lam = find_box_cox_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds) if lam <= 0 and np.any(y <= 0): raise error.InputArgsException( 'y must have only positive values for box-cox transformation.') if np.isclose(0.0, lam): return np.log(y) return (np.sign(y) * (np.abs(y)**lam) - 1) / lam
def update(self, y, exogenous=None, maxiter=None, **kwargs): """Update the model fit with additional observed endog/exog values. Updating an ARIMA adds new observations to the model, updating the MLE of the parameters accordingly by performing several new iterations (``maxiter``) from the existing model parameters. Parameters ---------- y : array-like or iterable, shape=(n_samples,) The time-series data to add to the endogenous samples on which the ``ARIMA`` estimator was previously fit. This may either be a Pandas ``Series`` object or a numpy array. This should be a one- dimensional array of finite floats. exogenous : array-like, shape=[n_obs, n_vars], optional (default=None) An optional 2-d array of exogenous variables. If the model was fit with an exogenous array of covariates, it will be required for updating the observed values. maxiter : int, optional (default=None) The number of iterations to perform when updating the model. If None, will perform ``max(5, n_samples // 10)`` iterations. **kwargs : keyword args Any keyword args that should be passed as ``**fit_kwargs`` in the new model fit. Notes ----- * Internally, this calls ``fit`` again using the OLD model parameters as the starting parameters for the new model's MLE computation. """ check_is_fitted(self, 'arima_res_') model_res = self.arima_res_ # validate the new samples to add y = c1d( check_array(y, ensure_2d=False, force_all_finite=False, copy=True, dtype=DTYPE)) # type: np.ndarray n_samples = y.shape[0] # if exogenous is None and new exog provided, or vice versa, raise exogenous = self._check_exog(exogenous) # type: np.ndarray # ensure the k_exog matches if exogenous is not None: k_exog = model_res.model.k_exog n_exog, exog_dim = exogenous.shape if exogenous.shape[1] != k_exog: raise ValueError("Dim mismatch in fit exogenous (%i) and new " "exogenous (%i)" % (k_exog, exog_dim)) # make sure the number of samples in exogenous match the number # of samples in the endog if n_exog != n_samples: raise ValueError("Dim mismatch in n_samples " "(endog=%i, exog=%i)" % (n_samples, n_exog)) # first concatenate the original data (might be 2d or 1d) y = np.squeeze(_append_to_endog(model_res.data.endog, y)) # Now create the new exogenous. if exogenous is not None: # Concatenate exog = np.concatenate((model_res.data.exog, exogenous)) else: # Just so it's in the namespace exog = None # This is currently arbitrary... but it's here to avoid accidentally # overfitting a user's model. Would be nice to find some papers that # describe the best way to set this. if maxiter is None: maxiter = max(5, n_samples // 10) # Get the model parameters, then we have to "fit" a new one. If you're # reading this source code, don't panic! We're not just fitting a new # arbitrary model. Statsmodels does not handle patching new samples in # very well, so we seed the new model with the existing parameters. params = model_res.params self._fit(y, exog, start_params=params, maxiter=maxiter, **kwargs) # Behaves like `fit` return self
def fit(self, y, exogenous=None, **fit_args): """Fit an ARIMA to a vector, ``y``, of observations with an optional matrix of ``exogenous`` variables. Parameters ---------- y : array-like or iterable, shape=(n_samples,) The time-series to which to fit the ``ARIMA`` estimator. This may either be a Pandas ``Series`` object (statsmodels can internally use the dates in the index), or a numpy array. This should be a one-dimensional array of floats, and should not contain any ``np.nan`` or ``np.inf`` values. exogenous : array-like, shape=[n_obs, n_vars], optional (default=None) An optional 2-d array of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must be provided exogenous features for making predictions. **fit_args : dict or kwargs Any keyword arguments to pass to the statsmodels ARIMA fit. """ y = c1d( check_array(y, ensure_2d=False, force_all_finite=False, copy=True, dtype=DTYPE)) # type: np.ndarray n_samples = y.shape[0] # if exog was included, check the array... if exogenous is not None: exogenous = check_array(exogenous, ensure_2d=True, force_all_finite=False, copy=False, dtype=DTYPE) # determine the CV args, if any cv = self.out_of_sample_size scoring = get_callable(self.scoring, VALID_SCORING) # don't allow negative, don't allow > n_samples cv = max(cv, 0) # if cv is too big, raise if cv >= n_samples: raise ValueError("out-of-sample size must be less than number " "of samples!") # If we want to get a score on the out-of-sample, we need to trim # down the size of our y vec for fitting. Addressed due to Issue #28 cv_samples = None cv_exog = None if cv: cv_samples = y[-cv:] y = y[:-cv] # This also means we have to address the exogenous matrix if exogenous is not None: cv_exog = exogenous[-cv:, :] exogenous = exogenous[:-cv, :] # Internal call self._fit(y, exogenous, **fit_args) # now make a forecast if we're validating to compute the # out-of-sample score if cv_samples is not None: # get the predictions (use self.predict, which calls forecast # from statsmodels internally) pred = self.predict(n_periods=cv, exogenous=cv_exog) self.oob_ = scoring(cv_samples, pred, **self.scoring_args) # If we compute out of sample scores, we have to now update the # observed time points so future forecasts originate from the end # of our y vec self.update(cv_samples, cv_exog, **fit_args) else: self.oob_ = np.nan return self
def fit(self, y, exogenous=None, **fit_args): """Fit an ARIMA to a vector, ``y``, of observations with an optional matrix of ``exogenous`` variables. Parameters ---------- y : array-like or iterable, shape=(n_samples,) The time-series to which to fit the ``ARIMA`` estimator. This may either be a Pandas ``Series`` object (statsmodels can internally use the dates in the index), or a numpy array. This should be a one-dimensional array of floats, and should not contain any ``np.nan`` or ``np.inf`` values. exogenous : array-like, shape=[n_obs, n_vars], optional (default=None) An optional 2-d array of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must be provided exogenous features for making predictions. """ y = c1d( check_array(y, ensure_2d=False, force_all_finite=False, copy=True, dtype=DTYPE)) # type: np.ndarray n_samples = y.shape[0] # if exog was included, check the array... if exogenous is not None: exogenous = check_array(exogenous, ensure_2d=True, force_all_finite=False, copy=False, dtype=DTYPE) # determine the CV args, if any cv = self.out_of_sample_size scoring = get_callable(self.scoring, VALID_SCORING) # don't allow negative, don't allow > n_samples cv = max(min(cv, n_samples), 0) def _fit_wrapper(): # these might change depending on which one method = self.method # if not seasonal: if self.seasonal_order is None: if method is None: method = "css-mle" # create the statsmodels ARIMA arima = _ARIMA(endog=y, order=self.order, missing='none', exog=exogenous, dates=None, freq=None) # there's currently a bug in the ARIMA model where on pickling # it tries to acquire an attribute called # 'self.{dates|freq|missing}', but they do not exist as class # attrs! They're passed up to TimeSeriesModel in base, but # are never set. So we inject them here so as not to get an # AttributeError later. (see http://bit.ly/2f7SkKH) for attr, val in (('dates', None), ('freq', None), ('missing', 'none')): if not hasattr(arima, attr): setattr(arima, attr, val) else: if method is None: method = 'lbfgs' # create the SARIMAX arima = sm.tsa.statespace.SARIMAX( endog=y, exog=exogenous, order=self.order, seasonal_order=self.seasonal_order, trend=self.trend, enforce_stationarity=self.transparams) # actually fit the model, now... return arima, arima.fit(start_params=self.start_params, trend=self.trend, method=method, transparams=self.transparams, solver=self.solver, maxiter=self.maxiter, disp=self.disp, callback=self.callback, **fit_args) # sometimes too many warnings... if self.suppress_warnings: with warnings.catch_warnings(record=False): warnings.simplefilter('ignore') fit, self.arima_res_ = _fit_wrapper() else: fit, self.arima_res_ = _fit_wrapper() # Set df_model attribute for SARIMAXResults object if not hasattr(self.arima_res_, 'df_model'): df_model = fit.k_exog + fit.k_trend + fit.k_ar + \ fit.k_ma + fit.k_seasonal_ar + fit.k_seasonal_ma setattr(self.arima_res_, 'df_model', df_model) # if the model is fit with an exogenous array, it must # be predicted with one as well. self.fit_with_exog_ = exogenous is not None # now make a prediction if we're validating # to save the out-of-sample value if cv > 0: # get the predictions pred = self.arima_res_.predict(exog=exogenous, typ='linear')[-cv:] self.oob_ = scoring(y[-cv:], pred, **self.scoring_args) else: self.oob_ = np.nan return self
def add_new_observations(self, y, exogenous=None): """Update the endog/exog samples after a model fit. After fitting your model and creating forecasts, you're going to need to attach new samples to the data you fit on. These are used to compute new forecasts (but using the same estimated parameters). Parameters ---------- y : array-like or iterable, shape=(n_samples,) The time-series data to add to the endogenous samples on which the ``ARIMA`` estimator was previously fit. This may either be a Pandas ``Series`` object or a numpy array. This should be a one- dimensional array of finite floats. exogenous : array-like, shape=[n_obs, n_vars], optional (default=None) An optional 2-d array of exogenous variables. If the model was fit with an exogenous array of covariates, it will be required for updating the observed values. Notes ----- This does not constitute re-fitting, as the model params do not change, so do not use this in place of periodically refreshing the model. Use it only to add new observed values from which to forecast new values. """ check_is_fitted(self, 'arima_res_') model_res = self.arima_res_ # validate the new samples to add y = c1d( check_array(y, ensure_2d=False, force_all_finite=False, copy=True, dtype=DTYPE)) # type: np.ndarray n_samples = y.shape[0] # if exogenous is None and new exog provided, or vice versa, raise self._check_exog(exogenous) # ensure the k_exog matches if exogenous is not None: k_exog = model_res.model.k_exog n_exog, exog_dim = exogenous.shape if exogenous.shape[1] != k_exog: raise ValueError("Dim mismatch in fit exogenous (%i) and new " "exogenous (%i)" % (k_exog, exog_dim)) # make sure the number of samples in exogenous match the number # of samples in the endog if n_exog != n_samples: raise ValueError("Dim mismatch in n_samples " "(endog=%i, exog=%i)" % (n_samples, n_exog)) # difference the y array to concatenate (now n_samples - d) d = self.order[1] # first concatenate the original data (might be 2d or 1d) y = _append_to_endog(model_res.data.endog, y) # Now create the new exogenous. if exogenous is not None: # Concatenate exog = np.concatenate((model_res.data.exog, exogenous)) else: # Just so it's in the namespace exog = None # Update the arrays in the data class. The statsmodels ARIMA class # stores the values a bit differently than it does in the SARIMAX # class... sarimax = self.seasonal_order is not None if not sarimax: # ARIMA or ARMA # Set the endog in two places. The undifferenced array in the # model_res.data, and the differenced array in the model_res.model model_res.data.endog = c1d(y) # type: np.ndarray # The model endog is stored differently in the ARIMA class than # in the SARIMAX class, where the ARIMA actually stores the diffed # array. However, ARMA does not (and we cannot diff for d < 1). do_diff = d > 0 if do_diff: # ARIMA y_diffed = diff(y, d) else: # ARMA y_diffed = y # This changes the length of the array! model_res.model.endog = y_diffed # Set the model result nobs (must be the differenced shape!) model_res.nobs = y_diffed.shape[0] # Set the exogenous if exog is not None: # Set in data class (this is NOT differenced, unlike the # model data) model_res.data.exog = exog # Difference and add intercept, then add to model class k_intercept = (model_res.model.exog.shape[1] - exogenous.shape[1]) exog_diff = exog[d:, :] intercept = np.ones((exog_diff.shape[0], k_intercept)) exog_diff = np.hstack((intercept, exog_diff)) # set in the model itself model_res.model.exog = exog_diff else: # Otherwise we STILL have to set the exogenous array as an # intercept in the model class for both ARMA and ARIMA. # Make sure to use y_diffed in case d > 0, since the exog # array will be multiplied by the endog at some point and we # need the dimensions to match (Issue #30) model_res.model.exog = np.ones((y_diffed.shape[0], 1)) else: # SARIMAX # The model endog is stored differently in the ARIMA class than # in the SARIMAX class, where the SARIMAX is a 2d (n x 1) array # that is NOT diffed. We also handle this piece a bit differently.. # In the SARIMAX class, statsmodels creates a "pseudo new" model # with the same parameters for forecasting, and we'll do the same. model_kwargs = model_res._init_kwds.copy() if exog is not None: model_kwargs['exog'] = exog # Create the pseudo "new" model and set its parameters with the # existing model fit parameters new_model = sm.tsa.statespace.SARIMAX(endog=y, **model_kwargs) new_model.update(model_res.params) # Point the arima result to the new model self.arima_res_.model = new_model
def fit(self, y, exogenous=None, **fit_args): """Fit an ARIMA to a vector, ``y``, of observations with an optional matrix of ``exogenous`` variables. Parameters ---------- y : array-like or iterable, shape=(n_samples,) The time-series to which to fit the ``ARIMA`` estimator. This may either be a Pandas ``Series`` object (statsmodels can internally use the dates in the index), or a numpy array. This should be a one-dimensional array of floats, and should not contain any ``np.nan`` or ``np.inf`` values. exogenous : array-like, shape=[n_obs, n_vars], optional (default=None) An optional 2-d array of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must be provided exogenous features for making predictions. **fit_args : dict or kwargs Any keyword arguments to pass to the statsmodels ARIMA fit. """ y = c1d( check_array(y, ensure_2d=False, force_all_finite=False, copy=True, dtype=DTYPE)) # type: np.ndarray n_samples = y.shape[0] # if exog was included, check the array... if exogenous is not None: exogenous = check_array(exogenous, ensure_2d=True, force_all_finite=False, copy=False, dtype=DTYPE) # determine the CV args, if any cv = self.out_of_sample_size scoring = get_callable(self.scoring, VALID_SCORING) # don't allow negative, don't allow > n_samples cv = max(cv, 0) # if cv is too big, raise if cv >= n_samples: raise ValueError("out-of-sample size must be less than number " "of samples!") # If we want to get a score on the out-of-sample, we need to trim # down the size of our y vec for fitting. Addressed due to Issue #28 cv_samples = None cv_exog = None if cv: cv_samples = y[-cv:] y = y[:-cv] # This also means we have to address the exogenous matrix if exogenous is not None: cv_exog = exogenous[-cv:, :] exogenous = exogenous[:-cv, :] # This wrapper is used for fitting either an ARIMA or a SARIMAX def _fit_wrapper(): # these might change depending on which one method = self.method # if not seasonal: if self.seasonal_order is None: if method is None: method = "css-mle" # create the statsmodels ARIMA arima = _ARIMA(endog=y, order=self.order, missing='none', exog=exogenous, dates=None, freq=None) # there's currently a bug in the ARIMA model where on pickling # it tries to acquire an attribute called # 'self.{dates|freq|missing}', but they do not exist as class # attrs! They're passed up to TimeSeriesModel in base, but # are never set. So we inject them here so as not to get an # AttributeError later. (see http://bit.ly/2f7SkKH) for attr, val in (('dates', None), ('freq', None), ('missing', 'none')): if not hasattr(arima, attr): setattr(arima, attr, val) else: if method is None: method = 'lbfgs' # create the SARIMAX arima = sm.tsa.statespace.SARIMAX( endog=y, exog=exogenous, order=self.order, seasonal_order=self.seasonal_order, trend=self.trend, enforce_stationarity=self.transparams) # actually fit the model, now... return arima, arima.fit(start_params=self.start_params, trend=self.trend, method=method, transparams=self.transparams, solver=self.solver, maxiter=self.maxiter, disp=self.disp, callback=self.callback, **fit_args) # sometimes too many warnings... if self.suppress_warnings: with warnings.catch_warnings(record=False): warnings.simplefilter('ignore') fit, self.arima_res_ = _fit_wrapper() else: fit, self.arima_res_ = _fit_wrapper() # Set df_model attribute for SARIMAXResults object if not hasattr(self.arima_res_, 'df_model'): df_model = fit.k_exog + fit.k_trend + fit.k_ar + \ fit.k_ma + fit.k_seasonal_ar + fit.k_seasonal_ma setattr(self.arima_res_, 'df_model', df_model) # if the model is fit with an exogenous array, it must # be predicted with one as well. self.fit_with_exog_ = exogenous is not None # now make a forecast if we're validating to compute the # out-of-sample score if cv_samples is not None: # get the predictions (use self.predict, which calls forecast # from statsmodels internally) pred = self.predict(n_periods=cv, exogenous=cv_exog) self.oob_ = scoring(cv_samples, pred, **self.scoring_args) # If we compute out of sample scores, we have to now update the # observed time points so future forecasts originate from the end # of our y vec self.add_new_observations(cv_samples, cv_exog) else: self.oob_ = np.nan # Save nobs since we might change it later if using OOB self.nobs_ = y.shape[0] # As of version 0.7.2, start saving the version with the model so # we can track changes over time. self.pkg_version_ = pyramid.__version__ return self