Пример #1
0
def random_method(decorated: Callable[..., T]) -> Callable[..., T]:
    """Decorator usable on any method within a class that will provide an isolated torch random context.

    The decorator will store a `_random_instance` property on the object in order to persist successive calls to the RNG

    Parameters
    ----------
    decorated
        A method to be run in an isolated torch random context.

    """
    # check that @random_method has been applied to a method.
    raise_if_not(_is_method(decorated),
                 "@random_method can only be used on methods.", logger)

    @wraps(decorated)
    def decorator(self, *args, **kwargs) -> T:
        if "random_state" in kwargs.keys():
            self._random_instance = check_random_state(kwargs["random_state"])
        elif not hasattr(self, "_random_instance"):
            self._random_instance = check_random_state(
                randint(0, high=MAX_NUMPY_SEED_VALUE))

        with fork_rng():
            manual_seed(
                self._random_instance.randint(0, high=MAX_TORCH_SEED_VALUE))
            return decorated(self, *args, **kwargs)

    return decorator
Пример #2
0
    def inverse_transform(self,
                          data: Union[TimeSeries, Sequence[TimeSeries]],
                          partial: bool = False) -> Union[TimeSeries, Sequence[TimeSeries]]:
        """
        For each data transformer in the pipeline, inverse-transform data. Then inverse transformed data is passed to
        the next transformer. Transformers are traversed in reverse order. Raises value error if not all of the
        transformers are invertible and ``partial`` is set to False. Set ``partial`` to True for inverting only the
        InvertibleDataTransformer in the pipeline.

        Parameters
        ----------
        data
            (Sequence of) TimeSeries to be inverse transformed.
        partial
            If set to `True`, the inverse transformation is applied even if the pipeline is not fully invertible,
            calling `inverse_transform()` only on the `InvertibleDataTransformer`s

        Returns
        -------
        Union[TimeSeries, Sequence[TimeSeries]]
            Inverse transformed data.
        """
        if not partial:
            raise_if_not(self._invertible, "Not all transformers in the pipeline can perform inverse_transform", logger)

            for transformer in reversed(self._transformers):
                data = transformer.inverse_transform(data)
            return data
        else:
            for transformer in reversed(self._transformers):
                if isinstance(transformer, InvertibleDataTransformer):
                    data = transformer.inverse_transform(data)
            return data
Пример #3
0
    def __getitem__(self, key: Union[int, slice]) -> "Pipeline":
        """
        Gets subset of Pipeline based either on index or slice with indexes.
        Resulting pipeline will deep copy transformers of the original pipeline.

        Parameters
        ----------
        key
            Either int or slice indicating the subset of data transformers to keep.

        Returns
        -------
        Pipeline
            Subset of pipeline determined by key.
        """
        raise_if_not(
            isinstance(key, int) or isinstance(key, slice),
            "key must be either an int or a slice",
            logger,
        )

        if isinstance(key, int):
            transformers = [self._transformers[key]]
        else:
            transformers = self._transformers[key]
        return Pipeline(transformers, copy=True)
Пример #4
0
    def __init__(self,
                 transformers: Sequence[BaseDataTransformer[TimeSeries]],
                 copy: bool = False):
        """
        Pipeline combines multiple data transformers chaining them together.

        Parameters
        ----------
        transformers
            Sequence of data transformers.
        copy
            If set makes a (deep) copy of each data transformer before adding them to the pipeline
        """
        raise_if_not(all((isinstance(t, BaseDataTransformer)) for t in transformers),
                     "transformers should be objects deriving from BaseDataTransformer", logger)

        if transformers is None or len(transformers) == 0:
            logger.warning("Empty pipeline created")
            self._transformers: Sequence[BaseDataTransformer[TimeSeries]] = []
        elif copy:
            self._transformers = deepcopy(transformers)
        else:
            self._transformers = transformers

        self._invertible = all((isinstance(t, InvertibleDataTransformer) for t in self._transformers))
Пример #5
0
    def __init__(self, index_generator: CovariateIndexGenerator,
                 attribute: Callable):
        """
        Parameters
        ----------
        index_generator
            An instance of `CovariateIndexGenerator` with methods `generate_train_series()` and
            `generate_inference_series()`. Used to generate the index for encoders.
        attribute
            A callable that takes an index `index` of type `(pd.DatetimeIndex, pd.RangeIndex)` as input
            and returns a np.ndarray of shape `(len(index),)`.
            An example for a correct `attribute` for `index` of type pd.DatetimeIndex:
            ``attribute = lambda index: (index.year - 1950) / 50``. And for pd.RangeIndex:
            ``attribute = lambda index: (index - 1950) / 50``
        """
        raise_if_not(
            callable(attribute),
            f"Encountered invalid encoder argument `{attribute}` for encoder `callable`. "
            f"Attribute must be a callable that returns a `np.ndarray`.",
            logger,
        )

        super().__init__(index_generator)

        self.attribute = attribute
Пример #6
0
    def __init__(self, index_generator: CovariateIndexGenerator,
                 attribute: str):
        """
        Parameters
        ----------
        index_generator
            An instance of `CovariateIndexGenerator` with methods `generate_train_series()` and
            `generate_inference_series()`. Used to generate the index for encoders.
        attribute
            Either 'absolute' or 'relative'. If 'absolute', the generated encoded values will range from (0, inf)
            and the train target series will be used as a reference to set the 0-index. If 'relative', the generated
            encoded values will range from (-inf, inf) and the train target series end time will be used as a reference
            to evaluate the relative index positions.
        """
        raise_if_not(
            isinstance(attribute, str)
            and attribute in INTEGER_INDEX_ATTRIBUTES,
            f"Encountered invalid encoder argument `{attribute}` for encoder `position`. "
            f'Attribute must be one of `("absolute", "relative")`.',
            logger,
        )

        super().__init__(index_generator)

        self.attribute = attribute
        self.reference_index: Optional[Tuple[int, Optional[Union[pd.Timestamp,
                                                                 int]]]] = None
        self.was_called = False
Пример #7
0
    def __init__(self, models: Union[List[ForecastingModel],
                                     List[GlobalForecastingModel]]):
        raise_if_not(
            isinstance(models, list) and models,
            "Cannot instantiate EnsembleModel with an empty list of models",
            logger,
        )

        is_local_ensemble = all(
            isinstance(model, ForecastingModel)
            and not isinstance(model, GlobalForecastingModel)
            for model in models)
        self.is_global_ensemble = all(
            isinstance(model, GlobalForecastingModel) for model in models)

        raise_if_not(
            is_local_ensemble or self.is_global_ensemble,
            "All models must either be GlobalForecastingModel instances, or none of them should be.",
            logger,
        )

        raise_if(
            any([m._fit_called for m in models]),
            "Cannot instantiate EnsembleModel with trained/fitted models. "
            "Consider resetting all models with `my_model.untrained_model()`",
            logger,
        )

        super().__init__()
        self.models = models
        self.is_single_series = None
Пример #8
0
def fill_missing_values(series: TimeSeries, fill: Union[str, float] = 'auto', **interpolate_kwargs) -> TimeSeries:
    """
    Fills missing values in the provided time series

    Parameters
    ----------
    series
        The time series for which to fill missing values
    fill
        The value used to replace the missing values.
        If set to 'auto', will auto-fill missing values using the `pandas.Dataframe.interpolate()` method.
    interpolate_kwargs
        Keyword arguments for `pandas.Dataframe.interpolate()`, only used when fit is set to 'auto'.
        See `the documentation
        <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html>`_
        for the list of supported parameters.

    Returns
    -------
    TimeSeries
        A new TimeSeries with all missing values filled according to the rules above.
    """
    raise_if_not(isinstance(fill, str) or isinstance(fill, float),
                 "`fill` should either be a string or a float",
                 logger)
    raise_if(isinstance(fill, str) and fill != 'auto',
             "invalid string for `fill`: can only be set to 'auto'",
             logger)

    if fill == 'auto':
        return _auto_fill(series, **interpolate_kwargs)
    return _const_fill(series, fill)
Пример #9
0
    def wrapper_multivariate_support(*args, **kwargs):
        # we can avoid checks about args and kwargs since the input is adjusted by the previous decorator
        actual_series = args[0]
        pred_series = args[1]

        raise_if_not(
            actual_series.width == pred_series.width,
            "The two TimeSeries instances must have the same width.",
            logger,
        )

        value_list = []
        for i in range(actual_series.width):
            value_list.append(
                func(
                    actual_series.univariate_component(i),
                    pred_series.univariate_component(i),
                    *args[2:],
                    **kwargs
                )
            )  # [2:] since we already know the first two arguments are the series
        if "reduction" in kwargs:
            return kwargs["reduction"](value_list)
        else:
            return signature(func).parameters["reduction"].default(value_list)
Пример #10
0
    def __init__(self,
                 fill: Union[str, float] = 'auto',
                 name: str = "MissingValuesFiller",
                 n_jobs: int = 1,
                 verbose: bool = False):
        """
        Data transformer to fill missing values from a (sequence of) TimeSeries

        Parameters
        ----------
        fill
            The value used to replace the missing values.
            If set to 'auto', will auto-fill missing values using the `pandas.Dataframe.interpolate()` method.
        name
            A specific name for the transformer
        n_jobs
            The number of jobs to run in parallel. Parallel jobs are created only when a `Sequence[TimeSeries]` is
            passed as input to a method, parallelising operations regarding different `TimeSeries`. Defaults to `1`
            (sequential). Setting the parameter to `-1` means using all the available processors.
            Note: for a small amount of data, the parallelisation overhead could end up increasing the total
            required amount of time.
        verbose
            Optionally, whether to print operations progress
        """
        raise_if_not(
            isinstance(fill, str) or isinstance(fill, float),
            "`fill` should either be a string or a float", logger)
        raise_if(
            isinstance(fill, str) and fill != 'auto',
            "invalid string for `fill`: can only be set to 'auto'", logger)

        super().__init__(name=name, n_jobs=n_jobs, verbose=verbose)
        self._fill = fill
Пример #11
0
    def __init__(self,
                 version: str = "classic",
                 alpha_d: float = None,
                 alpha_p: float = None):
        """An implementation of the `Croston method
        <https://otexts.com/fpp3/counts.html>`_ for intermittent
        count series.

        Relying on the implementation of `Statsforecasts package
        <https://github.com/Nixtla/statsforecast>`_.

        Parameters
        ----------
        version
            - "classic" corresponds to classic Croston.
            - "optimized" corresponds to optimized classic Croston, which searches
              for the optimal ``alpha`` smoothing parameter and can take longer
              to run. Otherwise, a fixed value of ``alpha=0.1`` is used.
            - "sba" corresponds to the adjustment of the Croston method known as
              the Syntetos-Boylan Approximation [1]_.
            - "tsb" corresponds to the adjustment of the Croston method proposed by
              Teunter, Syntetos and Babai [2]_. In this case, `alpha_d` and `alpha_p` must
              be set.
        alpha_d
            For the "tsb" version, the alpha smoothing parameter to apply on demand.
        alpha_p
            For the "tsb" version, the alpha smoothing parameter to apply on probability.

        References
        ----------
        .. [1] Aris A. Syntetos and John E. Boylan. The accuracy of intermittent demand estimates.
               International Journal of Forecasting, 21(2):303 – 314, 2005.
        .. [2] Ruud H. Teunter, Aris A. Syntetos, and M. Zied Babai.
               Intermittent demand: Linking forecasting to inventory obsolescence.
               European Journal of Operational Research, 214(3):606 – 615, 2011.
        """
        super().__init__()
        raise_if_not(
            version.lower() in ["classic", "optimized", "sba", "tsb"],
            'The provided "version" parameter must be set to "classic", "optimized", "sba" or "tsb".',
        )

        if version == "classic":
            self.method = croston_classic
        elif version == "optimized":
            self.method = croston_optimized
        elif version == "sba":
            self.method = croston_sba
        else:
            raise_if(
                alpha_d is None or alpha_p is None,
                'alpha_d and alpha_p must be specified when using "tsb".',
            )
            self.method = croston_tsb
            self.alpha_d = alpha_d
            self.alpha_p = alpha_p

        self.version = version
Пример #12
0
 def fit(self, series: TimeSeries):
     super().fit(series)
     raise_if_not(
         len(series) >= self.K,
         f"The time series requires at least K={self.K} points",
         logger,
     )
     self.last_k_vals = series.univariate_values()[-self.K:]
     return self
Пример #13
0
 def _check_sizes(tup, name):
     raise_if_not(
         len(tup) == num_stacks,
         f"the length of {name} must match the number of stacks.",
     )
     raise_if_not(
         all([len(i) == num_blocks for i in tup]),
         f"the length of each tuple in {name} must be `num_blocks={num_blocks}`",
     )
Пример #14
0
    def inverse_transform(self, series: Union[TimeSeries,
                                              Sequence[TimeSeries]], *args,
                          **kwargs) -> Union[TimeSeries, List[TimeSeries]]:
        """Inverse-transform a (sequence of) series.

        In case a sequence is passed as input data, this function takes care of
        parallelising the transformation of multiple series in the sequence at the same time.

        Parameters
        ----------
        series
            the (sequence of) series be inverse-transformed.
        args
            Additional positional arguments for the :func:`ts_inverse_transform()` method
        kwargs
            Additional keyword arguments for the :func:`ts_inverse_transform()` method

            component_mask : Optional[np.ndarray] = None
                Optionally, a 1-D boolean np.ndarray of length ``series.n_components`` that specifies
                which components of the underlying `series` the Scaler should consider.

        Returns
        -------
        Union[TimeSeries, List[TimeSeries]]
            Inverse transformed data.
        """
        if hasattr(self, "_fit_called"):
            raise_if_not(
                self._fit_called,
                "fit() must have been called before inverse_transform()",
                logger,
            )

        desc = f"Inverse ({self._name})"

        if isinstance(series, TimeSeries):
            data = [series]
        else:
            data = series

        input_iterator = _build_tqdm_iterator(
            self._inverse_transform_iterator(data),
            verbose=self._verbose,
            desc=desc,
            total=len(data),
        )

        transformed_data = _parallel_apply(
            input_iterator,
            self.__class__.ts_inverse_transform,
            self._n_jobs,
            args,
            kwargs,
        )

        return (transformed_data[0]
                if isinstance(series, TimeSeries) else transformed_data)
Пример #15
0
    def __init__(
        self,
        theta: int = 2,
        seasonality_period: Optional[int] = None,
        season_mode: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE,
    ):
        """
        An implementation of the Theta method with configurable `theta` parameter. See [1]_.

        The training time series is de-seasonalized according to `seasonality_period`,
        or an inferred seasonality period.

        `season_mode` must be a ``SeasonalityMode`` Enum member.

        You can access the Enum with ``from darts import SeasonalityMode``.

        Parameters
        ----------
        theta
            Value of the theta parameter. Defaults to 2. Cannot be set to 0.
            If `theta = 1`, then the theta method restricts to a simple exponential smoothing (SES)
        seasonality_period
            User-defined seasonality period. If not set, will be tentatively inferred from the training series upon
            calling :func:`fit()`.
        season_mode
            Type of seasonality.
            Either ``SeasonalityMode.MULTIPLICATIVE``, ``SeasonalityMode.ADDITIVE`` or ``SeasonalityMode.NONE``.
            Defaults to ``SeasonalityMode.MULTIPLICATIVE``.

        References
        ----------
        .. [1] `Unmasking the Theta method <https://robjhyndman.com/papers/Theta.pdf`
        """

        super().__init__()

        self.model = None
        self.coef = 1
        self.alpha = 1
        self.length = 0
        self.theta = theta
        self.is_seasonal = False
        self.seasonality = None
        self.seasonality_period = seasonality_period
        self.season_period = None
        self.season_mode = season_mode

        raise_if_not(
            season_mode in SeasonalityMode,
            f"Unknown value for season_mode: {season_mode}.",
            logger,
        )

        if self.theta == 0:
            raise_log(ValueError("The parameter theta cannot be equal to 0."), logger)
Пример #16
0
    def set_n_jobs(self, value: int):
        """Set the number of processors to be used by the transformer while processing multiple ``TimeSeries``.

        Parameters
        ----------
        value
            New n_jobs value.  Set to `-1` for using all the available cores.
        """

        raise_if_not(isinstance(value, int), "n_jobs must be an integer")
        self._n_jobs = value
Пример #17
0
    def init_size(self, n: int, m: int):
        self.n = n
        self.m = m

        max_slope = self.max_slope
        diagonal_slope = m / n  # rise over run
        raise_if_not(
            max_slope > diagonal_slope,
            f"Itakura slope {max_slope} must be greater than {diagonal_slope} to form valid parallelogram.",
        )

        max_slope_angle = atan(max_slope)
        diagonal_slope_angle = atan(diagonal_slope)

        diff_slope_angle = max_slope_angle - diagonal_slope_angle
        min_slope = tan(diagonal_slope_angle - diff_slope_angle)

        # Derivation for determining how wide the steep top sides (A) and shallow bottom (D) are

        # max_slope*x + (n-x)*min_slope = m
        # max_slope*x + n*min_slope - min_slope*x = m
        # (max_slope - min_slope)*x = m - n*min_slope
        # x = (m - n*min_slope) / (max_slope - min_slope)

        ranges = np.zeros((self.n, 2), dtype=float)

        shallow_bottom = int(
            np.round((m - n * max_slope) / (min_slope - max_slope)) + 1)
        ranges[:shallow_bottom, 0] = np.arange(shallow_bottom)
        ranges[shallow_bottom:, 0] = np.arange(n - shallow_bottom) + 1

        ranges[:shallow_bottom, 0] *= min_slope
        ranges[shallow_bottom:, 0] *= max_slope
        ranges[shallow_bottom:, 0] += ranges[shallow_bottom - 1, 0]

        steep_top = int(np.round(
            (m - n * min_slope) / (max_slope - min_slope)))
        ranges[:steep_top, 1] = np.arange(steep_top) + 1
        ranges[steep_top:, 1] = np.arange(n - steep_top) + 1

        ranges[:steep_top:, 1] *= max_slope
        ranges[steep_top:, 1] *= min_slope
        ranges[steep_top:, 1] += ranges[steep_top - 1, 1]

        np.floor(ranges[:, 0], out=ranges[:, 0])
        np.ceil(ranges[:, 1], out=ranges[:, 1])

        ranges = np.maximum([0, 1], ranges)
        ranges = np.minimum([self.m - 1, self.m], ranges)
        ranges = ranges.astype(int)
        ranges[0][0] = 0

        super().__init__(n, m, ranges)
Пример #18
0
def remove_seasonality(
    ts: TimeSeries,
    freq: int = None,
    model: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE,
    method: str = "naive",
    **kwargs,
) -> TimeSeries:
    """
    Adjusts the TimeSeries `ts` for a seasonality of order `frequency` using the `model` decomposition.

    Parameters
    ----------
    ts
        The TimeSeries to adjust.
    freq
        The seasonality period to use.
    model
        The type of decomposition to use.
        Must be a `from darts import SeasonalityMode` Enum member.
        Either SeasonalityMode.MULTIPLICATIVE or SeasonalityMode.ADDITIVE.
        Defaults SeasonalityMode.MULTIPLICATIVE.
    method
        The method to be used to decompose the series.
        - "naive" : Seasonal decomposition using moving averages [1]_.
        - "STL" : Season-Trend decomposition using LOESS [2]_. Only compatible with ``ADDITIVE`` model type.
        Defaults to "naive"
    kwargs
        Other keyword arguments are passed down to the decomposition method.
     Returns
    -------
    TimeSeries
        A new TimeSeries instance that corresponds to the seasonality-adjusted 'ts'.
    References
    -------
    .. [1] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.seasonal_decompose.html
    .. [2] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.STL.html
    """
    ts._assert_univariate()
    raise_if_not(
        model is not SeasonalityMode.NONE,
        "The model must be either MULTIPLICATIVE or ADDITIVE.",
    )
    raise_if(
        model not in [SeasonalityMode.ADDITIVE, ModelMode.ADDITIVE]
        and method == "STL",
        f"Only ADDITIVE seasonality is compatible with the STL method. Current model is {model}.",
        logger,
    )

    _, seasonality = extract_trend_and_seasonality(ts, freq, model, method,
                                                   **kwargs)
    new_ts = remove_from_series(ts, seasonality, model)
    return new_ts
Пример #19
0
    def __init__(
        self,
        target_series: Union[TimeSeries, Sequence[TimeSeries]],
        covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
        n: int = 1,
        input_chunk_length: int = 12,
        output_chunk_length: int = 1,
        covariate_type: CovariateType = CovariateType.PAST,
    ):
        """
        Contains (past_target, past_covariates | historic_future_covariates, future_past_covariates | future_covariate,
        static_covariates).

        "future_past_covariates" are past covariates that happen to be also known in the future - those
        are needed for forecasting with n > output_chunk_length by any model relying on past covariates.
        For this reason, when n > output_chunk_length, this dataset will also emmit the "future past_covariates".

        "historic_future_covariates" are historic future covariates that are given for the input_chunk in the past.

        Parameters
        ----------
        target_series
            The target series that are to be predicted into the future.
        covariates
            Optionally, one or a sequence of `TimeSeries` containing either past or future covariates. If covariates
            were used during training, the same type of cavariates must be supplied at prediction.
        n
            Forecast horizon: The number of time steps to predict after the end of the target series.
        input_chunk_length
            The length of the target series the model takes as input.
        output_chunk_length
            The length of the target series the model emits in output.
        """
        super().__init__()

        self.target_series = (
            [target_series] if isinstance(target_series, TimeSeries) else target_series
        )
        self.covariates = (
            [covariates] if isinstance(covariates, TimeSeries) else covariates
        )

        self.covariate_type = covariate_type

        self.n = n
        self.input_chunk_length = input_chunk_length
        self.output_chunk_length = output_chunk_length

        raise_if_not(
            (covariates is None or len(self.target_series) == len(self.covariates)),
            "The number of target series must be equal to the number of covariates.",
        )
Пример #20
0
    def fit(self, series: TimeSeries) -> None:
        """ Trains the model on the provided series

        Parameters
        ----------
        series
            A target time series. The model will be trained to forecast this time series.
        """
        raise_if_not(
            len(series) >= self.min_train_series_length,
            "Train series only contains {} elements but {} model requires at least {} entries"
            .format(len(series), str(self), self.min_train_series_length))
        self.training_series = series
        self._fit_called = True
Пример #21
0
    def _reshape_out(
        series: TimeSeries,
        vals: np.ndarray,
        component_mask: Optional[np.ndarray] = None,
    ) -> np.ndarray:
        """Reshapes the 2-D matrix coming out of a transformer into a 3-D matrix suitable to build a TimeSeries.

        The output is a 3-D matrix, built by taking each column of the 2-D matrix (the flattened components)
        and reshaping them to (len(series), n_samples), then stacking them on 2nd axis.

        Parameters
        ----------
        series
            input TimeSeries that was fed into transformer.
        vals:
            transformer output
        component_mask
            Optionally, np.ndarray boolean mask of shape (n_components, 1) specifying which components were extracted
            from `series`. If given, insert `vals` back into the columns of the original array.
        """

        raise_if_not(
            component_mask is None or isinstance(component_mask, np.ndarray)
            and component_mask.dtype == bool,
            "If `component_mask` is given, must be a boolean np.ndarray`",
            logger,
        )

        series_width = series.width if component_mask is None else component_mask.sum(
        )
        reshaped = np.stack(
            [
                vals[:, i].reshape(-1, series.n_samples)
                for i in range(series_width)
            ],
            axis=1,
        )

        if component_mask is None:
            return reshaped

        raise_if_not(
            series.width == len(component_mask),
            "mismatch between number of components in `series` and length of `component_mask`",
            logger,
        )

        series_vals = series.all_values(copy=True)
        series_vals[:, component_mask, :] = reshaped
        return series_vals
Пример #22
0
    def __init__(self, sample_freq: str = "hourly", multivariate: bool = True):
        """
        Parameters
        ----------
        sample_freq: str
            The sampling frequency of the data. Can be "hourly" or "daily". Default is "hourly".
        multivariate: bool
            Whether to return a single multivariate timeseries - if False returns a list of univariate TimeSeries. Default is True.
        """
        valid_sample_freq = ["daily", "hourly"]
        raise_if_not(
            sample_freq in valid_sample_freq,
            f"sample_freq must be one of {valid_sample_freq}",
            logger,
        )

        def pre_proces_fn(extracted_dir, dataset_path):
            df = pd.read_csv(
                Path(extracted_dir, "uber-raw-data-janjune-15.csv"),
                header=0,
                usecols=["Pickup_date", "locationID"],
                index_col=0,
            )

            output_dict = {}
            freq_setting = "1H" if "hourly" in str(dataset_path) else "1D"
            time_series_of_locations = list(df.groupby(by="locationID"))
            for locationID, df in time_series_of_locations:
                df.sort_index()
                df.index = pd.to_datetime(df.index)

                count_series = df.resample(rule=freq_setting).size()

                output_dict[locationID] = count_series
            output_df = pd.DataFrame(output_dict)
            output_df.to_csv(dataset_path, line_terminator="\n")

        super().__init__(metadata=DatasetLoaderMetadata(
            f"uber_tlc_{sample_freq}.csv",
            uri="https://github.com/fivethirtyeight/uber-tlc-foil-response/raw/"
            "63bb878b76f47f69b4527d50af57aac26dead983/"
            "uber-trip-data/uber-raw-data-janjune-15.csv.zip",
            hash="9ed84ebe0df4bc664748724b633b3fe6" if sample_freq ==
            "hourly" else "24f9fd67e4b9e53f0214a90268cd9bee",
            header_time="Pickup_date",
            format_time="%Y-%m-%d %H:%M",
            pre_process_zipped_csv_fn=pre_proces_fn,
            multivariate=multivariate,
        ))
Пример #23
0
    def set_verbose(self, value: bool):
        """
        Setter for the verbosity status. `True` for enabling the detailed report about scaler's operation progress,
        `False` for no additional information

        Parameters
        ----------
        value
            New verbosity status

        """
        raise_if_not(isinstance(value, bool),
                     "Verbosity status must be a boolean.")

        self._verbose = value
Пример #24
0
    def test_raise_if_not(self):
        exception_was_raised = False
        with LogCapture() as lc:
            logger = get_logger(__name__)
            logger.handlers = []
            try:
                raise_if_not(True, "test", logger)
                raise_if_not(False, "test", logger)
            except Exception:
                exception_was_raised = True

        # testing correct log message
        lc.check((__name__, "ERROR", "ValueError: test"))

        # checking whether exception was properly raised
        self.assertTrue(exception_was_raised)
Пример #25
0
    def __init__(self,
                 transformers: Sequence[BaseDataTransformer],
                 copy: bool = False,
                 verbose: bool = None,
                 n_jobs: int = None):
        """
        Pipeline combines multiple data transformers chaining them together.

        Parameters
        ----------
        transformers
            Sequence of data transformers.
        copy
            If set makes a (deep) copy of each data transformer before adding them to the pipeline
        n_jobs
            The number of jobs to run in parallel. Parallel jobs are created only when a `Sequence[TimeSeries]` is
            passed as input to a method, parallelising operations regarding different `TimeSeries`. Defaults to `1`
            (sequential). Setting the parameter to `-1` means using all the available processors.
            Note: for a small amount of data, the parallelisation overhead could end up increasing the total
            required amount of time.
            Note: this parameter will overwrite the value set in each single transformer. Leave this parameter set to
            `None` for keeping the original transformers' configurations.
        verbose
            Whether to print progress of the operations.
            Note: this parameter will overwrite the value set in each single transformer. Leave this parameter set
            to `None` for keeping the transformers configurations.
        """
        raise_if_not(all((isinstance(t, BaseDataTransformer)) for t in transformers),
                     "transformers should be objects deriving from BaseDataTransformer", logger)

        if transformers is None or len(transformers) == 0:
            logger.warning("Empty pipeline created")
            self._transformers: Sequence[BaseDataTransformer[TimeSeries]] = []
        elif copy:
            self._transformers = deepcopy(transformers)
        else:
            self._transformers = transformers

        self._invertible = all((isinstance(t, InvertibleDataTransformer) for t in self._transformers))

        if verbose is not None:
            for transformer in self._transformers:
                transformer.set_verbose(verbose)

        if n_jobs is not None:
            for transformer in self._transformers:
                transformer.set_n_jobs(n_jobs)
Пример #26
0
    def _prepare_pooling_downsampling(pooling_kernel_sizes, n_freq_downsample,
                                      in_len, out_len, num_blocks, num_stacks):
        def _check_sizes(tup, name):
            raise_if_not(
                len(tup) == num_stacks,
                f"the length of {name} must match the number of stacks.",
            )
            raise_if_not(
                all([len(i) == num_blocks for i in tup]),
                f"the length of each tuple in {name} must be `num_blocks={num_blocks}`",
            )

        if pooling_kernel_sizes is None:
            # make stacks handle different frequencies
            # go from in_len/2 to 1 in num_stacks steps:
            max_v = max(in_len // 2, 1)
            pooling_kernel_sizes = tuple(
                (int(v), ) * num_blocks
                for v in max_v // np.geomspace(1, max_v, num_stacks))
            logger.info(
                f"(N-HiTS): Using automatic kernel pooling size: {pooling_kernel_sizes}."
            )
        else:
            # check provided pooling format
            _check_sizes(pooling_kernel_sizes, "`pooling_kernel_sizes`")

        if n_freq_downsample is None:
            # go from out_len/2 to 1 in num_stacks steps:
            max_v = max(out_len // 2, 1)
            n_freq_downsample = tuple(
                (int(v), ) * num_blocks
                for v in max_v // np.geomspace(1, max_v, num_stacks))
            logger.info(
                f"(N-HiTS):  Using automatic downsampling coefficients: {n_freq_downsample}."
            )
        else:
            # check provided downsample format
            _check_sizes(n_freq_downsample, "`n_freq_downsample`")

            # check that last value is 1
            raise_if_not(
                n_freq_downsample[-1][-1] == 1,
                "the downsampling coefficient of the last block of the last stack must be 1 "
                + "(i.e., `n_freq_downsample[-1][-1]`).",
            )

        return pooling_kernel_sizes, n_freq_downsample
Пример #27
0
    def __init__(
        self,
        forecasting_models: Union[List[ForecastingModel],
                                  List[GlobalForecastingModel]],
        regression_train_n_points: int,
        regression_model=None,
    ):
        """
        Use a regression model for ensembling individual models' predictions.

        The provided regression model must implement ``fit()`` and ``predict()`` methods
        (e.g. scikit-learn regression models). Note that here the regression model is used to learn how to
        best ensemble the individual forecasting models' forecasts. It is not the same usage of regression
        as in :class:`RegressionModel`, where the regression model is used to produce forecasts based on the
        lagged series.

        Parameters
        ----------
        forecasting_models
            List of forecasting models whose predictions to ensemble
        regression_train_n_points
            The number of points to use to train the regression model
        regression_model
            Any regression model with ``predict()`` and ``fit()`` methods (e.g. from scikit-learn)
            Default: ``darts.model.LinearRegressionModel(fit_intercept=False)``
        """
        super().__init__(forecasting_models)
        if regression_model is None:
            regression_model = LinearRegressionModel(
                lags=None, lags_future_covariates=[0], fit_intercept=False)
        elif isinstance(regression_model, RegressionModel):
            regression_model = regression_model
        else:
            # scikit-learn like model
            regression_model = RegressionModel(lags_future_covariates=[0],
                                               model=regression_model)

        # check lags of the regression model
        raise_if_not(
            regression_model.lags == {"future": [0]},
            f"`lags` and `lags_past_covariates` of regression model must be `None`"
            f"and `lags_future_covariates` must be [0]. Given:\n"
            f"{regression_model.lags}",
        )

        self.regression_model = regression_model
        self.train_n_points = regression_train_n_points
Пример #28
0
    def filter(self, series: TimeSeries) -> TimeSeries:
        """Filters a given series

        Parameters
        ----------
        series
            The series to filter.

        Returns
        -------
        TimeSeries
            A time series containing the filtered values.
        """
        raise_if_not(
            series.is_deterministic,
            "The input series must be deterministic (observations).",
        )
Пример #29
0
    def __init__(self, n: int, m: int, ranges: np.ndarray = None):
        """
        Parameters
        ----------
        n
            The width of the window, must be equal to the length of series1
        m
            The height of the window, must be equal to the length of series2
        ranges
            Ranges of active cells within a column [[start_column0, end_column0], ...]
            with shape (n, 2) and where start >= 0 and end <= m.
        """

        self.n = n
        self.m = m

        if ranges is not None:
            raise_if_not(
                ranges.shape == (n, 2),
                f"Expects a 2d array with [start, end] for each column and shape = ({n}, 2)",
            )

            ranges = np.insert(ranges, 0, [0, 1], axis=0)
            start = ranges[:, 0]
            end = ranges[:, 1]

            raise_if(np.any(start < 0), "Start must be >=0")
            raise_if(np.any(end > m), "End must be <m")

            diff = np.maximum(end - start, 0)
            self.length = np.sum(diff)

            ranges[1:] += 1
            ranges = ranges.flatten()
        else:
            ranges = np.zeros((n + 1) * 2, dtype=int)
            ranges[0::2] = self.m  # start
            ranges[1::2] = 0  # end
            ranges = array.array("i", ranges)

            ranges[0] = 0
            ranges[1] = 1
            self.length = 1

        self.column_ranges = array.array("i", ranges)
Пример #30
0
    def init_size(self, n: int, m: int):
        self.n = n
        self.m = m

        diff = abs(n - m)
        raise_if_not(
            diff < self.window_size,
            f"Window size must at least cover size difference ({diff})",
        )

        ranges = np.repeat(np.arange(n), 2)
        ranges[0::2] -= (self.window_size, )
        ranges[1::2] += self.window_size

        ranges[0::2] = np.maximum(0, ranges[0::2])
        ranges[1::2] = np.minimum(self.m, ranges[1::2] + 1)
        ranges = np.reshape(ranges, (-1, 2))

        super().__init__(n, m, ranges)