def mase_loss(y_test, y_pred, y_train, sp=1): """Mean absolute scaled error. This scale-free error metric can be used to compare forecast methods on a single series and also to compare forecast accuracy between series. This metric is well suited to intermittent-demand series because it never gives infinite or undefined values. Parameters ---------- y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon Ground truth (correct) target values. y_pred : pandas Series of shape = (fh,) Estimated target values. y_train : pandas Series of shape = (n_obs,) Observed training values. sp : int Seasonal periodicity of training data. Returns ------- loss : float MASE loss References ---------- ..[1] Hyndman, R. J. (2006). "Another look at measures of forecast accuracy", Foresight, Issue 4. """ # input checks y_test = check_y(y_test) y_pred = check_y(y_pred) y_train = check_y(y_train) check_equal_time_index(y_test, y_pred) # check if training set is prior to test set if y_train is not None: check_time_index(y_train.index) if y_train.index.max() >= y_test.min(): raise ValueError("Found `y_train` with time index which is not " "before time index of `y_pred`") # naive seasonal prediction y_train = np.asarray(y_train) y_pred_naive = y_train[:-sp] # mean absolute error of naive seasonal prediction mae_naive = np.mean(np.abs(y_train[sp:] - y_pred_naive)) # if training data is flat, mae may be zero, # return np.nan to avoid divide by zero error # and np.inf values if mae_naive == 0: return np.nan else: return np.mean(np.abs(y_test - y_pred)) / mae_naive
def remove_trend(x, coefs, time_index=None): """Remove trend from an array with a trend of given order along axis 0 or 1 Parameters ---------- x : array_like, shape=[n_samples, n_obs] Time series data, each sample is de-trended separately coefs : ndarray, shape=[n_samples, order + 1] Fitted coefficients for each sample, single column means order zero, two columns mean order 1 (linear), three columns mean order 2 (quadratic), etc time_index : array-like, shape=[n_obs], optional (default=None) Time series index for which to add the trend components Returns ------- xt : ndarray The de-trended series is the residual of the linear regression of the data on the trend of given order. See Also -------- fit_trend add_trend References ---------- Adapted from statsmodels (0.9.0), see https://www.statsmodels.org/dev/_modules/statsmodels/tsa/tsatools.html #detrend """ x = check_array(x) # infer order from shape of given coefficients order = coefs.shape[1] - 1 # special case, remove mean if order == 0: xt = x - coefs return xt else: if time_index is None: # if no time index is given, create range index n_obs = x.shape[1] time_index = np.arange(n_obs) else: # validate given time index time_index = check_time_index(time_index) if len(time_index) != x.shape[1]: raise ValueError( 'Length of passed index does not match length of passed x') poly_terms = np.vander(time_index, N=order + 1) xt = x - np.dot(poly_terms, coefs.T).T return xt
def add_trend(x, coefs, time_index=None): """Add trend to array for given fitted coefficients along axis 0 or 1, inverse function to `remove_trend()` Parameters ---------- x : array_like, shape=[n_samples, n_obs] Time series data, each sample is treated separately coefs : array-like, shape=[n_samples, order + 1] fitted coefficients of polynomial order for each sample, one column means order zero, two columns mean order 1 (linear), three columns mean order 2 (quadratic), etc time_index : array-like, shape=[n_obs], optional (default=None) Time series index for which to add the trend components Returns ------- xt : ndarray The series with added trend. See Also ------- fit_trend remove_trend """ x = check_array(x) # infer order from shape of given coefficients order = coefs.shape[1] - 1 # special case, add mean if order == 0: xt = x + coefs else: if time_index is None: n_obs = x.shape[1] time_index = np.arange(n_obs) else: # validate given time index time_index = check_time_index(time_index) if not len(time_index) == x.shape[1]: raise ValueError( 'Length of passed index does not match length of passed x') poly_terms = np.vander(time_index, N=order + 1) xt = x + np.dot(poly_terms, coefs.T).T return xt
def _get_duration(x, y=None, coerce_to_int=False, unit=None): """Compute duration of time index `x` or durations between time points `x` and `y` if `y` is given Parameters ---------- x : pd.Index, pd.Timestamp, pd.Period, int y : pd.Timestamp, pd.Period, int, optional (default=None) coerce_to_int : bool If True, duration is returned as integer value for given unit unit : str Time unit Returns ------- ret : duration type Duration """ if y is None: x = check_time_index(x) duration = x[-1] - x[0] else: assert isinstance(x, (int, np.integer, pd.Period, pd.Timestamp)) # check types allowing (np.integer, int) combinations to pass assert type(x) is type(y) or (isinstance(x, (np.integer, int)) and isinstance(x, (np.integer, int))) duration = x - y # coerce to integer result for given time unit if coerce_to_int and isinstance( x, (pd.PeriodIndex, pd.DatetimeIndex, pd.Period, pd.Timestamp)): if unit is None: # try to get the unit from the data if not given unit = _get_unit(x) duration = _coerce_duration_to_int(duration, unit=unit) return duration
def _check_y(y): # allow for pd.Series if isinstance(y, pd.Series): y = y.index return check_time_index(y)
def _set_oh_index(self, y): self._oh_index = check_time_index(y.index)