def __init__(self, scaler=MinMaxScaler(feature_range=(0, 1)), name="Scaler"): """ Generic wrapper class for using scalers that implement `fit()`, `transform()` and `inverse_transform()` methods (typically from scikit-learn) on `TimeSeries`. Parameters ---------- scaler The scaler to transform the data. It must provide the `fit()`, `transform()` and `inverse_transform()` methods. Default: `sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all the values of a time series between 0 and 1. name A specific name for the scaler """ super().__init__(name) if (not callable(getattr(scaler, "fit", None)) or not callable(getattr(scaler, "transform", None)) or not callable(getattr(scaler, "inverse_transform", None))): # noqa W503 raise_log( ValueError( 'The provided transformer object must have fit(), transform() and inverse_transform() methods' ), logger) self.transformer = scaler self.train_series = None
def __init__( self, theta: int = 2, seasonality_period: Optional[int] = None, season_mode: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE, ): """ An implementation of the Theta method with configurable `theta` parameter. See [1]_. The training time series is de-seasonalized according to `seasonality_period`, or an inferred seasonality period. `season_mode` must be a ``SeasonalityMode`` Enum member. You can access the Enum with ``from darts import SeasonalityMode``. Parameters ---------- theta Value of the theta parameter. Defaults to 2. Cannot be set to 0. If `theta = 1`, then the theta method restricts to a simple exponential smoothing (SES) seasonality_period User-defined seasonality period. If not set, will be tentatively inferred from the training series upon calling :func:`fit()`. season_mode Type of seasonality. Either ``SeasonalityMode.MULTIPLICATIVE``, ``SeasonalityMode.ADDITIVE`` or ``SeasonalityMode.NONE``. Defaults to ``SeasonalityMode.MULTIPLICATIVE``. References ---------- .. [1] `Unmasking the Theta method <https://robjhyndman.com/papers/Theta.pdf` """ super().__init__() self.model = None self.coef = 1 self.alpha = 1 self.length = 0 self.theta = theta self.is_seasonal = False self.seasonality = None self.seasonality_period = seasonality_period self.season_period = None self.season_mode = season_mode raise_if_not( season_mode in SeasonalityMode, f"Unknown value for season_mode: {season_mode}.", logger, ) if self.theta == 0: raise_log(ValueError("The parameter theta cannot be equal to 0."), logger)
def _create_from_cls_and_kwargs(cls, kws): try: return cls(**kws) except (TypeError, ValueError) as e: raise_log( ValueError( "Error when building the optimizer or learning rate scheduler;" "please check the provided class and arguments" "\nclass: {}" "\narguments (kwargs): {}" "\nerror:\n{}".format(cls, kws, e)), logger, )
def test_raise_log(self): exception_was_raised = False with LogCapture() as lc: logger = get_logger(__name__) logger.handlers = [] try: raise_log(Exception("test"), logger) except Exception: exception_was_raised = True # testing correct log message lc.check((__name__, "ERROR", "Exception: test")) # checking whether exception was properly raised self.assertTrue(exception_was_raised)
def __init__(self, scaler=None, name="Scaler", n_jobs: int = 1, verbose: bool = False): """ Generic wrapper class for using scalers that implement `fit()`, `transform()` and `inverse_transform()` methods (typically from scikit-learn) on `TimeSeries`. Parameters ---------- scaler The scaler to transform the data with. It must provide `fit()`, `transform()` and `inverse_transform()` methods. Default: `sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all the values of a time series between 0 and 1. In case the `Scaler` is applied to multiple `TimeSeries` objects, a deep-copy of the chosen scaler will be instantiated, fitted, and stored, for each `TimeSeries`. name A specific name for the scaler n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a `Sequence[TimeSeries]` is passed as input to a method, parallelising operations regarding different `TimeSeries`. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. Note: for a small amount of data, the parallelisation overhead could end up increasing the total required amount of time. verbose Optionally, whether to print operations progress """ super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) if scaler is None: scaler = MinMaxScaler(feature_range=(0, 1)) if (not callable(getattr(scaler, "fit", None)) or not callable(getattr(scaler, "transform", None)) or not callable(getattr(scaler, "inverse_transform", None))): # noqa W503 raise_log( ValueError( 'The provided transformer object must have fit(), transform() and inverse_transform() methods' ), logger) self.transformer = scaler self.transformer_instances = None
def predict( self, n: int, series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None ) -> Union[TimeSeries, Sequence[TimeSeries]]: """ Forecasts values for a certain number of time steps after the end of the series. If `fit()` has been called with only one `TimeSeries` as argument, then the `series` argument of this function is optional, and it will simply produce the next `horizon` time steps forecast. If `fit()` has been called with `series` specified as a `Sequence[TimeSeries]`, the `series` argument must be specified. When the `series` argument is specified, this function will compute the next `n` time steps forecasts for the simple series (or for each series in the sequence) given by `series`. If covariates were specified during the training, they must also be specified here. Parameters ---------- n Forecast horizon - the number of time steps after the end of the series for which to produce predictions. series The series whose future we want to predict covariates One or several covariate time series which can be fed as inputs to the model. They must match the covariates that have been used with the `fit()` function for training. Returns ------- Union[TimeSeries, Sequence[TimeSeries]] If `series` is not specified, this function returns a single time series containing the `n` next points after then end of the training series. If `series` is specified and is a simple `TimeSeries`, this function returns the `n` next points after the end of `series`. If `series` is a sequence of several time series, this function returns a sequence where each element contains the corresponding `n` points forecasts. """ if series is None and covariates is None: super().predict(n) if self._expect_covariates and covariates is None: raise_log( ValueError( 'The model has been trained with covariates. Some matching covariates ' 'have to be provided to `predict()`.'))
def predict(self, n: int) -> TimeSeries: """ Forecasts values for `n` time steps after the end of the series. Parameters ---------- n Forecast horizon - the number of time steps after the end of the series for which to produce predictions. Returns ------- TimeSeries A time series containing the `n` next points after then end of the training series. """ if not self._fit_called: raise_log( ValueError( 'The model must be fit before calling `predict()`.' 'For global models, if `predict()` is called without specifying a series,' 'the model must have been fit on a single training series.' ), logger)
def remove_from_series(ts: TimeSeries, other: TimeSeries, model: Union[SeasonalityMode, ModelMode]) -> TimeSeries: """ Removes the TimeSeries `other` from the TimeSeries `ts` as specified by `model`. Use e.g. to remove an additive or multiplicative trend from a series. Parameters ---------- ts The TimeSeries to be modified. other The TimeSeries to remove. model The type of model considered. Must be `from darts import ModelMode, SeasonalityMode` Enums member. Either MULTIPLICATIVE or ADDITIVE. Returns ------- TimeSeries A TimeSeries defined by removing `other` from `ts`. """ ts._assert_univariate() raise_if_not( model in ModelMode or model in SeasonalityMode, f"Unknown value for model_mode: {model}.", logger, ) if model.value == "multiplicative": new_ts = ts / other elif model.value == "additive": new_ts = ts - other else: raise_log( ValueError( "Invalid parameter; must be either ADDITIVE or MULTIPLICATIVE. Was: {}" .format(model))) return new_ts
def __init__(self, scaler=None, name="Scaler", n_jobs: int = 1, verbose: bool = False): """Generic wrapper class for using scalers on time series. The underlying `scaler` has to implement the ``fit()``, ``transform()`` and ``inverse_transform()`` methods (typically from scikit-learn). When the scaler is applied on multivariate series, the scaling is done per-component. When the series are stochastic, the scaling is done across all samples (for each given component). The transformation is applied independently for each dimension (component) of the time series, effectively merging all samples of a component in order to compute the transform. Notes ----- The scaler will not scale the series' static covariates. This has to be done either before constructing the series, or later on by extracting the covariates, transforming the values and then reapplying them to the series. For this, see TimeSeries properties `TimeSeries.static_covariates` and method `TimeSeries.with_static_covariates()` Parameters ---------- scaler The scaler to transform the data with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. Default: :class:`sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all the values of a time series between 0 and 1. name A specific name for the scaler n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input to a method, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. Note: for a small amount of data, the parallelisation overhead could end up increasing the total required amount of time. verbose Optionally, whether to print operations progress Notes ----- In case the :class:`Scaler` is applied to multiple ``TimeSeries`` objects, a deep-copy of the chosen scaler will be instantiated, fitted, and stored, for each ``TimeSeries``. Examples -------- >>> from darts.datasets import AirPassengersDataset >>> from sklearn.preprocessing import MinMaxScaler >>> from darts.dataprocessing.transformers import Scaler >>> series = AirPassengersDataset().load() >>> scaler = MinMaxScaler(feature_range=(-1, 1)) >>> transformer = Scaler(scaler) >>> series_transformed = transformer.fit_transform(series) >>> print(min(series_transformed.values())) [-1.] >>> print(max(series_transformed.values())) [2.] """ super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) if scaler is None: scaler = MinMaxScaler(feature_range=(0, 1)) if (not callable(getattr(scaler, "fit", None)) or not callable(getattr(scaler, "transform", None)) or not callable(getattr(scaler, "inverse_transform", None))): raise_log( ValueError( "The provided transformer object must have fit(), transform() and inverse_transform() methods" ), logger, ) self.transformer = scaler self.transformer_instances = None
def check_seasonality(ts: TimeSeries, m: Optional[int] = None, max_lag: int = 24, alpha: float = 0.05): """ Checks whether the TimeSeries `ts` is seasonal with period `m` or not. If `m` is None, we work under the assumption that there is a unique seasonality period, which is inferred from the Auto-correlation Function (ACF). Parameters ---------- ts The time series to check for seasonality. m The seasonality period to check. max_lag The maximal lag allowed in the ACF. alpha The desired confidence level (default 5%). Returns ------- Tuple[bool, int] A tuple `(season, m)`, where season is a boolean indicating whether the series has seasonality or not and `m` is the seasonality period. """ ts._assert_univariate() if m is not None and (m < 2 or not isinstance(m, int)): raise_log(ValueError("m must be an integer greater than 1."), logger) if m is not None and m > max_lag: raise_log(ValueError("max_lag must be greater than or equal to m."), logger) n_unique = np.unique(ts.values()).shape[0] if n_unique == 1: # Check for non-constant TimeSeries return False, 0 r = acf( ts.values(), nlags=max_lag, fft=False ) # In case user wants to check for seasonality higher than 24 steps. # Finds local maxima of Auto-Correlation Function candidates = argrelmax(r)[0] if len(candidates) == 0: return False, 0 if m is not None: # Check for local maximum when m is user defined. test = m not in candidates if test: return False, m candidates = [m] # Remove r[0], the auto-correlation at lag order 0, that introduces bias. r = r[1:] # The non-adjusted upper limit of the significance interval. band_upper = r.mean() + norm.ppf(1 - alpha / 2) * r.var() # Significance test, stops at first admissible value. The two '-1' below # compensate for the index change due to the restriction of the original r to r[1:]. for candidate in candidates: stat = _bartlett_formula(r, candidate - 1, len(ts)) if r[candidate - 1] > stat * band_upper: return True, candidate return False, 0
def extract_trend_and_seasonality( ts: TimeSeries, freq: int = None, model: Union[SeasonalityMode, ModelMode] = ModelMode.MULTIPLICATIVE, method: str = "naive", **kwargs, ) -> Tuple[TimeSeries, TimeSeries]: """ Extracts trend and seasonality from a TimeSeries instance using `statsmodels.tsa`. Parameters ---------- ts The series to decompose freq The seasonality period to use. model The type of decomposition to use. Must be ``from darts import ModelMode, SeasonalityMode`` Enum member. Either ``MULTIPLICATIVE`` or ``ADDITIVE``. Defaults ``ModelMode.MULTIPLICATIVE``. method The method to be used to decompose the series. - "naive" : Seasonal decomposition using moving averages [1]_. - "STL" : Season-Trend decomposition using LOESS [2]_. Only compatible with ``ADDITIVE`` model type. kwargs Other keyword arguments are passed down to the decomposition method. Returns ------- Tuple[TimeSeries, TimeSeries] A tuple of (trend, seasonal) time series. References ------- .. [1] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.seasonal_decompose.html .. [2] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.STL.html """ ts._assert_univariate() raise_if_not( model in ModelMode or model in SeasonalityMode, f"Unknown value for model_mode: {model}.", logger, ) raise_if_not( model is not SeasonalityMode.NONE, "The model must be either MULTIPLICATIVE or ADDITIVE.", ) if method == "naive": decomp = seasonal_decompose(ts.pd_series(), period=freq, model=model.value, extrapolate_trend="freq") elif method == "STL": raise_if_not( model in [SeasonalityMode.ADDITIVE, ModelMode.ADDITIVE], f"Only ADDITIVE model is compatible with the STL method. Current model is {model}.", logger, ) decomp = STL( endog=ts.pd_series(), period=freq, **kwargs, ).fit() else: raise_log(ValueError(f"Unknown value for method: {method}"), logger) season = TimeSeries.from_times_and_values( ts.time_index, decomp.seasonal, static_covariates=ts.static_covariates, hierarchy=ts.hierarchy, ) trend = TimeSeries.from_times_and_values( ts.time_index, decomp.trend, static_covariates=ts.static_covariates, hierarchy=ts.hierarchy, ) return trend, season
def _extend_time_index_until( time_index: Union[pd.DatetimeIndex, pd.RangeIndex], until: Optional[Union[int, str, pd.Timestamp]], add_length: int, ) -> pd.DatetimeIndex: if not add_length and not until: return time_index raise_if( bool(add_length) and bool(until), "set only one of add_length and until") end = time_index[-1] freq = time_index.freq if add_length: raise_if_not( add_length >= 0, f"Expected add_length, by which to extend the time series by, " f"to be positive, got {add_length}", ) try: end += add_length * freq except pd.errors.OutOfBoundsDatetime: raise_log( ValueError( f"the add operation between {end} and {add_length * freq} will overflow" ), logger, ) else: datetime_index = isinstance(time_index, pd.DatetimeIndex) if datetime_index: raise_if_not( isinstance(until, (str, pd.Timestamp)), "Expected valid timestamp for TimeSeries, " "indexed by DatetimeIndex, " f"for parameter until, got {type(end)}", logger, ) else: raise_if_not( isinstance(until, int), "Expected integer for TimeSeries, indexed by RangeIndex, " f"for parameter until, got {type(end)}", logger, ) timestamp = pd.Timestamp(until) if datetime_index else until raise_if_not( timestamp > end, f"Expected until, {timestamp} to lie past end of time index {end}", ) ahead = timestamp - end raise_if_not( (ahead % freq) == pd.Timedelta(0), f"End date must correspond with frequency {freq} of the time axis", logger, ) end = timestamp new_time_index = pd.date_range(start=time_index[0], end=end, freq=freq) return new_time_index
def __init__( self, input_chunk_length: int, output_chunk_length: int, loss_fn: nn.modules.loss._Loss = nn.MSELoss(), torch_metrics: Optional[Union[torchmetrics.Metric, torchmetrics.MetricCollection]] = None, likelihood: Optional[Likelihood] = None, optimizer_cls: torch.optim.Optimizer = torch.optim.Adam, optimizer_kwargs: Optional[Dict] = None, lr_scheduler_cls: Optional[ torch.optim.lr_scheduler._LRScheduler] = None, lr_scheduler_kwargs: Optional[Dict] = None, ) -> None: """ PyTorch Lightning-based Forecasting Module. This class is meant to be inherited to create a new PyTorch Lightning-based forecasting module. When subclassing this class, please make sure to add the following methods with the given signatures: - :func:`PLTorchForecastingModel.__init__()` - :func:`PLTorchForecastingModel.forward()` - :func:`PLTorchForecastingModel._produce_train_output()` - :func:`PLTorchForecastingModel._get_batch_prediction()` In subclass `MyModel`'s :func:`__init__` function call ``super(MyModel, self).__init__(**kwargs)`` where ``kwargs`` are the parameters of :class:`PLTorchForecastingModel`. Parameters ---------- input_chunk_length Number of input past time steps per chunk. output_chunk_length Number of output time steps per chunk. loss_fn PyTorch loss function used for training. This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified. Default: ``torch.nn.MSELoss()``. torch_metrics A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood One of Darts' :meth:`Likelihood <darts.utils.likelihood_models.Likelihood>` models to be used for probabilistic forecasts. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs Optionally, some keyword arguments for the PyTorch optimizer (e.g., ``{'lr': 1e-3}`` for specifying a learning rate). Otherwise the default values of the selected ``optimizer_cls`` will be used. Default: ``None``. lr_scheduler_cls Optionally, the PyTorch learning rate scheduler class to be used. Specifying ``None`` corresponds to using a constant learning rate. Default: ``None``. lr_scheduler_kwargs Optionally, some keyword arguments for the PyTorch learning rate scheduler. Default: ``None``. """ super().__init__() # save hyper parameters for saving/loading # do not save type nn.Module params self.save_hyperparameters(ignore=["loss_fn", "torch_metrics"]) raise_if( input_chunk_length is None or output_chunk_length is None, "Both `input_chunk_length` and `output_chunk_length` must be passed to `PLForecastingModule`", logger, ) self.input_chunk_length = input_chunk_length self.output_chunk_length = output_chunk_length # define the loss function self.criterion = loss_fn # by default models are deterministic (i.e. not probabilistic) self.likelihood = likelihood # persist optimiser and LR scheduler parameters self.optimizer_cls = optimizer_cls self.optimizer_kwargs = dict( ) if optimizer_kwargs is None else optimizer_kwargs self.lr_scheduler_cls = lr_scheduler_cls self.lr_scheduler_kwargs = (dict() if lr_scheduler_kwargs is None else lr_scheduler_kwargs) if torch_metrics is None: torch_metrics = torchmetrics.MetricCollection([]) elif isinstance(torch_metrics, torchmetrics.Metric): torch_metrics = torchmetrics.MetricCollection([torch_metrics]) elif isinstance(torch_metrics, torchmetrics.MetricCollection): pass else: raise_log( AttributeError( "`torch_metrics` only accepts type torchmetrics.Metric or torchmetrics.MetricCollection" ), logger, ) self.train_metrics = torch_metrics.clone(prefix="train_") self.val_metrics = torch_metrics.clone(prefix="val_") # initialize prediction parameters self.pred_n: Optional[int] = None self.pred_num_samples: Optional[int] = None self.pred_roll_size: Optional[int] = None self.pred_batch_size: Optional[int] = None self.pred_n_jobs: Optional[int] = None
def __init__( self, num_layers: int, layer_width: int, nr_params: int, expansion_coefficient_dim: int, input_chunk_length: int, target_length: int, g_type: GTypes, batch_norm: bool, dropout: float, activation: str, ): """PyTorch module implementing the basic building block of the N-BEATS architecture. The blocks produce outputs of size (target_length, nr_params); i.e. "one vector per parameter". The parameters are predicted only for forecast outputs. Backcast outputs are in the original "domain". Parameters ---------- num_layers The number of fully connected layers preceding the final forking layers. layer_width The number of neurons that make up each fully connected layer. nr_params The number of parameters of the likelihood (or 1 if no likelihood is used) expansion_coefficient_dim The dimensionality of the waveform generator parameters, also known as expansion coefficients. Used in the generic architecture and the trend module of the interpretable architecture, where it determines the degree of the polynomial basis. input_chunk_length The length of the input sequence fed to the model. target_length The length of the forecast of the model. g_type The type of function that is implemented by the waveform generator. batch_norm Whether to use batch norm dropout Dropout probability activation The activation function of encoder/decoder intermediate layer. Inputs ------ x of shape `(batch_size, input_chunk_length)` Tensor containing the input sequence. Outputs ------- x_hat of shape `(batch_size, input_chunk_length)` Tensor containing the 'backcast' of the block, which represents an approximation of `x` given the constraints of the functional space determined by `g`. y_hat of shape `(batch_size, output_chunk_length)` Tensor containing the forward forecast of the block. """ super().__init__() self.num_layers = num_layers self.layer_width = layer_width self.target_length = target_length self.nr_params = nr_params self.g_type = g_type self.dropout = dropout self.batch_norm = batch_norm raise_if_not(activation in ACTIVATIONS, f"'{activation}' is not in {ACTIVATIONS}") self.activation = getattr(nn, activation)() # fully connected stack before fork self.linear_layer_stack_list = [ nn.Linear(input_chunk_length, layer_width) ] for _ in range(num_layers - 1): self.linear_layer_stack_list.append( nn.Linear(layer_width, layer_width)) if self.batch_norm: self.linear_layer_stack_list.append( nn.BatchNorm1d(num_features=self.layer_width)) if self.dropout > 0: self.linear_layer_stack_list.append( MonteCarloDropout(p=self.dropout)) self.fc_stack = nn.ModuleList(self.linear_layer_stack_list) # Fully connected layer producing forecast/backcast expansion coeffcients (waveform generator parameters). # The coefficients are emitted for each parameter of the likelihood. if g_type == _GType.SEASONALITY: self.backcast_linear_layer = nn.Linear( layer_width, 2 * int(input_chunk_length / 2 - 1) + 1) self.forecast_linear_layer = nn.Linear( layer_width, nr_params * (2 * int(target_length / 2 - 1) + 1)) else: self.backcast_linear_layer = nn.Linear(layer_width, expansion_coefficient_dim) self.forecast_linear_layer = nn.Linear( layer_width, nr_params * expansion_coefficient_dim) # waveform generator functions if g_type == _GType.GENERIC: self.backcast_g = nn.Linear(expansion_coefficient_dim, input_chunk_length) self.forecast_g = nn.Linear(expansion_coefficient_dim, target_length) elif g_type == _GType.TREND: self.backcast_g = _TrendGenerator(expansion_coefficient_dim, input_chunk_length) self.forecast_g = _TrendGenerator(expansion_coefficient_dim, target_length) elif g_type == _GType.SEASONALITY: self.backcast_g = _SeasonalityGenerator(input_chunk_length) self.forecast_g = _SeasonalityGenerator(target_length) else: raise_log(ValueError("g_type not supported"), logger)
def mase( actual_series: Union[TimeSeries, Sequence[TimeSeries]], pred_series: Union[TimeSeries, Sequence[TimeSeries]], insample: Union[TimeSeries, Sequence[TimeSeries]], m: Optional[int] = 1, intersect: bool = True, *, reduction: Callable[[np.ndarray], float] = np.mean, inter_reduction: Callable[[np.ndarray], Union[float, np.ndarray]] = lambda x: x, n_jobs: int = 1, verbose: bool = False ) -> Union[float, np.ndarray]: """Mean Absolute Scaled Error (MASE). See `Mean absolute scaled error wikipedia page <https://en.wikipedia.org/wiki/Mean_absolute_scaled_error>`_ for details about the MASE and how it is computed. If any of the series is stochastic (containing several samples), the median sample value is considered. Parameters ---------- actual_series The (sequence of) actual series. pred_series The (sequence of) predicted series. insample The training series used to forecast `pred_series` . This series serves to compute the scale of the error obtained by a naive forecaster on the training data. m Optionally, the seasonality to use for differencing. `m=1` corresponds to the non-seasonal MASE, whereas `m>1` corresponds to seasonal MASE. If `m=None`, it will be tentatively inferred from the auto-correlation function (ACF). It will fall back to a value of 1 if this fails. intersect For time series that are overlapping in time without having the same time index, setting `True` will consider the values only over their common time interval (intersection in time). reduction Function taking as input a ``np.ndarray`` and returning a scalar value. This function is used to aggregate the metrics of different components in case of multivariate ``TimeSeries`` instances. inter_reduction Function taking as input a ``np.ndarray`` and returning either a scalar value or a ``np.ndarray``. This function can be used to aggregate the metrics of different series in case the metric is evaluated on a ``Sequence[TimeSeries]``. Defaults to the identity function, which returns the pairwise metrics for each pair of ``TimeSeries`` received in input. Example: ``inter_reduction=np.mean``, will return the average of the pairwise metrics. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input, parallelising operations regarding different ``TimeSeries``. Defaults to `1` (sequential). Setting the parameter to `-1` means using all the available processors. verbose Optionally, whether to print operations progress Raises ------ ValueError If the `insample` series is periodic ( :math:`X_t = X_{t-m}` ) Returns ------- float The Mean Absolute Scaled Error (MASE) """ def _multivariate_mase( actual_series: TimeSeries, pred_series: TimeSeries, insample: TimeSeries, m: int, intersect: bool, reduction: Callable[[np.ndarray], float], ): raise_if_not( actual_series.width == pred_series.width, "The two TimeSeries instances must have the same width.", logger, ) raise_if_not( actual_series.width == insample.width, "The insample TimeSeries must have the same width as the other series.", logger, ) raise_if_not( insample.end_time() + insample.freq == pred_series.start_time(), "The pred_series must be the forecast of the insample series", logger, ) insample_ = ( insample.quantile_timeseries(quantile=0.5) if insample.is_stochastic else insample ) value_list = [] for i in range(actual_series.width): # old implementation of mase on univariate TimeSeries if m is None: test_season, m = check_seasonality(insample) if not test_season: warn( "No seasonality found when computing MASE. Fixing the period to 1.", UserWarning, ) m = 1 y_true, y_hat = _get_values_or_raise( actual_series.univariate_component(i), pred_series.univariate_component(i), intersect, remove_nan_union=False, ) x_t = insample_.univariate_component(i).values() errors = np.abs(y_true - y_hat) scale = np.mean(np.abs(x_t[m:] - x_t[:-m])) raise_if_not( not np.isclose(scale, 0), "cannot use MASE with periodical signals", logger, ) value_list.append(np.mean(errors / scale)) return reduction(value_list) if isinstance(actual_series, TimeSeries): raise_if_not( isinstance(pred_series, TimeSeries), "Expecting pred_series to be TimeSeries", ) raise_if_not( isinstance(insample, TimeSeries), "Expecting insample to be TimeSeries" ) return _multivariate_mase( actual_series=actual_series, pred_series=pred_series, insample=insample, m=m, intersect=intersect, reduction=reduction, ) elif isinstance(actual_series, Sequence) and isinstance( actual_series[0], TimeSeries ): raise_if_not( isinstance(pred_series, Sequence) and isinstance(pred_series[0], TimeSeries), "Expecting pred_series to be a Sequence[TimeSeries]", ) raise_if_not( isinstance(insample, Sequence) and isinstance(insample[0], TimeSeries), "Expecting insample to be a Sequence[TimeSeries]", ) raise_if_not( len(pred_series) == len(actual_series) and len(pred_series) == len(insample), "The TimeSeries sequences must have the same length.", logger, ) raise_if_not(isinstance(n_jobs, int), "n_jobs must be an integer") raise_if_not(isinstance(verbose, bool), "verbose must be a bool") iterator = _build_tqdm_iterator( iterable=zip(actual_series, pred_series, insample), verbose=verbose, total=len(actual_series), ) value_list = _parallel_apply( iterator=iterator, fn=_multivariate_mase, n_jobs=n_jobs, fn_args=dict(), fn_kwargs={"m": m, "intersect": intersect, "reduction": reduction}, ) return inter_reduction(value_list) else: raise_log( ValueError( "Input type not supported, only TimeSeries and Sequence[TimeSeries] are accepted." ) )
def __init__( self, lags: Union[int, list] = None, lags_past_covariates: Union[int, List[int]] = None, lags_future_covariates: Union[Tuple[int, int], List[int]] = None, output_chunk_length: int = 1, model=None, ): """Regression Model Can be used to fit any scikit-learn-like regressor class to predict the target time series from lagged values. Parameters ---------- lags Lagged target values used to predict the next time step. If an integer is given the last `lags` past lags are used (from -1 backward). Otherwise a list of integers with lags is required (each lag must be < 0). lags_past_covariates Number of lagged past_covariates values used to predict the next time step. If an integer is given the last `lags_past_covariates` past lags are used (inclusive, starting from lag -1). Otherwise a list of integers with lags < 0 is required. lags_future_covariates Number of lagged future_covariates values used to predict the next time step. If an tuple (past, future) is given the last `past` lags in the past are used (inclusive, starting from lag -1) along with the first `future` future lags (starting from 0 - the prediction time - up to `future - 1` included). Otherwise a list of integers with lags is required. output_chunk_length Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may be useful if the covariates don't extend far enough into the future. model Scikit-learn-like model with ``fit()`` and ``predict()`` methods. Also possible to use model that doesn't support multi-output regression for multivariate timeseries, in which case one regressor will be used per component in the multivariate series. If None, defaults to: ``sklearn.linear_model.LinearRegression(n_jobs=-1)``. """ super().__init__() self.model = model self.lags = {} self.output_chunk_length = None self.input_dim = None # model checks if self.model is None: self.model = LinearRegression(n_jobs=-1) if not callable(getattr(self.model, "fit", None)): raise_log( Exception("Provided model object must have a fit() method", logger)) if not callable(getattr(self.model, "predict", None)): raise_log( Exception("Provided model object must have a predict() method", logger)) # check lags raise_if( (lags is None) and (lags_future_covariates is None) and (lags_past_covariates is None), "At least one of `lags`, `lags_future_covariates` or `lags_past_covariates` must be not None.", ) lags_type_checks = [ (lags, "lags"), (lags_past_covariates, "lags_past_covariates"), ] for _lags, lags_name in lags_type_checks: raise_if_not( isinstance(_lags, (int, list)) or _lags is None, f"`{lags_name}` must be of type int or list. Given: {type(_lags)}.", ) raise_if( isinstance(_lags, bool), f"`{lags_name}` must be of type int or list, not bool.", ) raise_if_not( isinstance(lags_future_covariates, (tuple, list)) or lags_future_covariates is None, f"`lags_future_covariates` must be of type tuple or list. Given: {type(lags_future_covariates)}.", ) if isinstance(lags_future_covariates, tuple): raise_if_not( len(lags_future_covariates) == 2 and isinstance(lags_future_covariates[0], int) and isinstance(lags_future_covariates[1], int), "`lags_future_covariates` tuple must be of length 2, and must contain two integers", ) raise_if( isinstance(lags_future_covariates[0], bool) or isinstance(lags_future_covariates[1], bool), "`lags_future_covariates` tuple must contain intergers, not bool", ) # set lags if isinstance(lags, int): raise_if_not(lags > 0, f"`lags` must be strictly positive. Given: {lags}.") # selecting last `lags` lags, starting from position 1 (skipping current, pos 0, the one we want to predict) self.lags["target"] = list(range(-lags, 0)) elif isinstance(lags, list): for lag in lags: raise_if( not isinstance(lag, int) or (lag >= 0), f"Every element of `lags` must be a strictly negative integer. Given: {lags}.", ) if lags: self.lags["target"] = sorted(lags) if isinstance(lags_past_covariates, int): raise_if_not( lags_past_covariates > 0, f"`lags_past_covariates` must be an integer > 0. Given: {lags_past_covariates}.", ) self.lags["past"] = list(range(-lags_past_covariates, 0)) elif isinstance(lags_past_covariates, list): for lag in lags_past_covariates: raise_if( not isinstance(lag, int) or (lag >= 0), f"Every element of `lags_covariates` must be an integer < 0. Given: {lags_past_covariates}.", ) if lags_past_covariates: self.lags["past"] = sorted(lags_past_covariates) if isinstance(lags_future_covariates, tuple): raise_if_not( lags_future_covariates[0] >= 0 and lags_future_covariates[1] >= 0, f"`lags_future_covariates` tuple must contain integers >= 0. Given: {lags_future_covariates}.", ) if (lags_future_covariates[0] is not None and lags_future_covariates[1] is not None): if not (lags_future_covariates[0] == 0 and lags_future_covariates[1] == 0): self.lags["future"] = list( range(-lags_future_covariates[0], lags_future_covariates[1])) elif isinstance(lags_future_covariates, list): for lag in lags_future_covariates: raise_if( not isinstance(lag, int) or isinstance(lag, bool), f"Every element of `lags_future_covariates` must be an integer. Given: {lags_future_covariates}.", ) if lags_future_covariates: self.lags["future"] = sorted(lags_future_covariates) # check and set output_chunk_length raise_if_not( isinstance(output_chunk_length, int) and output_chunk_length > 0, f"output_chunk_length must be an integer greater than 0. Given: {output_chunk_length}", ) self.output_chunk_length = output_chunk_length