Пример #1
0
 def ts_transform(
     series: TimeSeries, fn: Union[Callable[[np.number], np.number],
                                   Callable[[pd.Timestamp, np.number],
                                            np.number]]
 ) -> TimeSeries:
     return series.map(fn)
Пример #2
0
 def inverse_transform(self, data: TimeSeries, *args, **kwargs):
     super().inverse_transform(data, *args, *kwargs)
     return data.map(self._inverse_fn)
Пример #3
0
 def ts_transform(series: TimeSeries, fn) -> TimeSeries:
     return series.map(fn)
Пример #4
0
    def gridsearch(model_class,
                   parameters: dict,
                   series: TimeSeries,
                   covariates: Optional[TimeSeries] = None,
                   forecast_horizon: Optional[int] = None,
                   start: Union[pd.Timestamp, float, int] = 0.5,
                   last_points_only: bool = False,
                   val_series: Optional[TimeSeries] = None,
                   use_fitted_values: bool = False,
                   metric: Callable[[TimeSeries, TimeSeries],
                                    float] = metrics.mape,
                   reduction: Callable[[np.ndarray], float] = np.mean,
                   verbose=False) -> Tuple['ForecastingModel', Dict]:
        """
        A function for finding the best hyper-parameters among a given set.
        This function has 3 modes of operation: Expanding window mode, split mode and fitted value mode.
        The three modes of operation evaluate every possible combination of hyper-parameter values
        provided in the `parameters` dictionary by instantiating the `model_class` subclass
        of ForecastingModel with each combination, and returning the best-performing model with regards
        to the `metric` function. The `metric` function is expected to return an error value,
        thus the model resulting in the smallest `metric` output will be chosen.

        The relationship of the training data and test data depends on the mode of operation.

        Expanding window mode (activated when `forecast_horizon` is passed):
        For every hyperparameter combination, the model is repeatedly trained and evaluated on different
        splits of `training_series` and `target_series`. This process is accomplished by using
        the `backtest` function as a subroutine to produce historic forecasts starting from `start`
        that are compared against the ground truth values of `training_series` or `target_series`, if
        specified. Note that the model is retrained for every single prediction, thus this mode is slower.

        Split window mode (activated when `val_series` is passed):
        This mode will be used when the `val_series` argument is passed.
        For every hyper-parameter combination, the model is trained on `series` and
        evaluated on `val_series`.

        Fitted value mode (activated when `use_fitted_values` is set to `True`):
        For every hyper-parameter combination, the model is trained on `series`
        and evaluated on the resulting fitted values.
        Not all models have fitted values, and this method raises an error if the model doesn't have a `fitted_values`
        member. The fitted values are the result of the fit of the model on `series`. Comparing with the
        fitted values can be a quick way to assess the model, but one cannot see if the model is overfitting the series.

        Parameters
        ----------
        model_class
            The ForecastingModel subclass to be tuned for 'series'.
        parameters
            A dictionary containing as keys hyperparameter names, and as values lists of values for the
            respective hyperparameter.
        series
            The TimeSeries instance used as input and target for training.
        covariates
            An optional covariate series. This applies only if the model supports covariates.
        forecast_horizon
            The integer value of the forecasting horizon used in expanding window mode.
        start
            The `int`, `float` or `pandas.Timestamp` that represents the starting point in the time index
            of `training_series` from which predictions will be made to evaluate the model.
            For a detailed description of how the different data types are interpreted, please see the documentation
            for `ForecastingModel.backtest`.
        last_points_only
            Whether to use the whole forecasts or only the last point of each forecast to compute the error
        val_series
            The TimeSeries instance used for validation in split mode. If provided, this series must start right after
            the end of `series`; so that a proper comparison of the forecast can be made.
        use_fitted_values
            If `True`, uses the comparison with the fitted values.
            Raises an error if `fitted_values` is not an attribute of `model_class`.
        metric
            A function that takes two TimeSeries instances as inputs and returns a float error value.
        reduction
            A reduction function (mapping array to float) describing how to aggregate the errors obtained
            on the different validation series when backtesting. By default it'll compute the mean of errors.
        verbose
            Whether to print progress.

        Returns
        -------
        ForecastingModel, Dict
            A tuple containing an untrained 'model_class' instance created from the best-performing hyper-parameters,
            along with a dictionary containing these best hyper-parameters.
        """
        raise_if_not(
            (forecast_horizon is not None) + (val_series is not None) +
            use_fitted_values == 1,
            "Please pass exactly one of the arguments 'forecast_horizon', "
            "'val_target_series' or 'use_fitted_values'.", logger)

        if use_fitted_values:
            raise_if_not(
                hasattr(model_class(), "fitted_values"),
                "The model must have a fitted_values attribute to compare with the train TimeSeries",
                logger)

        elif val_series is not None:
            raise_if_not(
                series.width == val_series.width,
                "Training and validation series require the same number of components.",
                logger)

        if covariates is not None:
            raise_if_not(
                series.has_same_time_as(covariates),
                'The provided series and covariates must have the '
                'same time axes.')

        min_error = float('inf')
        best_param_combination = {}

        # compute all hyperparameter combinations from selection
        params_cross_product = list(product(*parameters.values()))

        # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel
        fit_signature = signature(model_class.fit)
        predict_signature = signature(model_class.predict)

        # iterate through all combinations of the provided parameters and choose the best one
        iterator = _build_tqdm_iterator(params_cross_product, verbose)
        for param_combination in iterator:
            param_combination_dict = dict(
                list(zip(parameters.keys(), param_combination)))
            model = model_class(**param_combination_dict)
            if use_fitted_values:  # fitted value mode
                if covariates is not None and 'covariates' in fit_signature.parameters:
                    model.fit(series, covariates=covariates)
                else:
                    model.fit(series)
                fitted_values = TimeSeries.from_times_and_values(
                    series.time_index(), model.fitted_values)
                error = metric(fitted_values, series)
            elif val_series is None:  # expanding window mode
                error = model.backtest(series,
                                       covariates,
                                       start,
                                       forecast_horizon,
                                       metric=metric,
                                       reduction=reduction,
                                       last_points_only=last_points_only)
            else:  # split mode
                if covariates is not None and 'covariates' in fit_signature.parameters:
                    model.fit(series, covariates=covariates)
                else:
                    model.fit(series)

                if covariates is not None and 'covariates' in predict_signature.parameters:
                    pred = model.predict(n=len(val_series),
                                         covariates=covariates)
                else:
                    pred = model.predict(n=len(val_series))
                error = metric(pred, val_series)
            if error < min_error:
                min_error = error
                best_param_combination = param_combination_dict
        logger.info('Chosen parameters: ' + str(best_param_combination))

        return model_class(**best_param_combination), best_param_combination
Пример #5
0
 def transform(self, data: TimeSeries, *args, **kwargs) -> TimeSeries:
     super().transform(data)
     return data.map(self._fn)
Пример #6
0
 def ts_fit(series: TimeSeries, transformer, *args, **kwargs) -> Any:
     # fit_parameter will receive the transformer object instance
     scaler = transformer.fit(series.values().reshape((-1, series.width)))
     return scaler
Пример #7
0
    def historical_forecasts(
            self,
            series: TimeSeries,
            covariates: Optional[TimeSeries] = None,
            start: Union[pd.Timestamp, float, int] = 0.5,
            forecast_horizon: int = 1,
            stride: int = 1,
            retrain: bool = True,
            overlap_end: bool = False,
            last_points_only: bool = True,
            verbose: bool = False) -> Union[TimeSeries, List[TimeSeries]]:
        """
        Computes the historical forecasts the model would have produced with an expanding training window
        and (by default) returns a time series created from the last point of each of these individual forecasts.
        To this end, it repeatedly builds a training set from the beginning of `series`. It trains the
        current model on the training set, emits a forecast of length equal to forecast_horizon, and then moves
        the end of the training set forward by `stride` time steps.

        By default, this method will return a single time series made up of the last point of each
        historical forecast. This time series will thus have a frequency of `series.freq() * stride`.
        If `last_points_only` is set to False, it will instead return a list of the historical forecasts.

        By default, this method always re-trains the models on the entire available history,
        corresponding to an expanding window strategy.
        If `retrain` is set to False (useful for models for which training might be time-consuming, such as
        deep learning models), the model will only be trained on the initial training window
        (up to `start` time stamp), and only if it has not been trained before. Then, at every iteration, the
        newly expanded input sequence will be fed to the model to produce the new output.

        Parameters
        ----------
        series
            The target time series to use to successively train and evaluate the historical forecasts
        covariates
            An optional covariate series. This applies only if the model supports covariates.
        start
            The first point of time at which a prediction is computed for a future time.
            This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`.
            In the case of `float`, the parameter will be treated as the proportion of the time series
            that should lie before the first prediction point.
            In the case of `int`, the parameter will be treated as an integer index to the time index of
            `series` that will be used as first prediction time.
            In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time
            directly.
        forecast_horizon
            The forecast horizon for the predictions
        stride
            The number of time steps between two consecutive predictions.
        retrain
            Whether to retrain the model for every prediction or not. Currently only `TorchForecastingModel`
            instances such as `RNNModel`, `TCNModel`, `NBEATSModel` and `TransformerModel` support
            setting `retrain` to `False`.
        overlap_end
            Whether the returned forecasts can go beyond the series' end or not
        last_points_only
            Whether to retain only the last point of each historical forecast.
            If set to True, the method returns a single `TimeSeries` containing the successive point forecasts.
            Otherwise returns a list of historical `TimeSeries` forecasts.
        verbose
            Whether to print progress
        Returns
        -------
        TimeSeries or List[TimeSeries]
            By default, a single TimeSeries instance created from the last point of each individual forecast.
            If `last_points_only` is set to False, a list of the historical forecasts.
        """

        if covariates is not None:
            raise_if_not(
                series.has_same_time_as(covariates),
                'The provided series and covariates must have the same time index.'
            )

        # prepare the start parameter -> pd.Timestamp
        start = get_timestamp_at_point(start, series)

        # build the prediction times in advance (to be able to use tqdm)
        if not overlap_end:
            last_valid_pred_time = series.time_index()[-1 - forecast_horizon]
        else:
            last_valid_pred_time = series.time_index()[-2]

        pred_times = [start]
        while pred_times[-1] < last_valid_pred_time:
            # compute the next prediction time and add it to pred times
            pred_times.append(pred_times[-1] + series.freq() * stride)

        # the last prediction time computed might have overshot last_valid_pred_time
        if pred_times[-1] > last_valid_pred_time:
            pred_times.pop(-1)

        iterator = _build_tqdm_iterator(pred_times, verbose)

        # Either store the whole forecasts or only the last points of each forecast, depending on last_points_only
        forecasts = []

        last_points_times = []
        last_points_values = []

        # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel
        fit_signature = signature(self.fit)
        predict_signature = signature(self.predict)

        # iterate and forecast
        for pred_time in iterator:
            train = series.drop_after(pred_time)  # build the training series
            if covariates is not None:
                train_cov = covariates.drop_after(pred_time)

            if retrain:
                if covariates is not None and 'covariates' in fit_signature.parameters:
                    self.fit(series=train, covariates=train_cov)
                else:
                    self.fit(series=train)

            if covariates is not None and 'covariates' in predict_signature.parameters:
                forecast = self.predict(n=forecast_horizon,
                                        series=train,
                                        covariates=train_cov)
            else:
                if 'series' in predict_signature.parameters:
                    forecast = self.predict(n=forecast_horizon, series=train)
                else:
                    forecast = self.predict(n=forecast_horizon)

            if last_points_only:
                last_points_values.append(forecast.values()[-1])
                last_points_times.append(forecast.end_time())
            else:
                forecasts.append(forecast)

        if last_points_only:
            return TimeSeries.from_times_and_values(
                pd.DatetimeIndex(last_points_times),
                np.array(last_points_values),
                freq=series.freq() * stride)
        return forecasts
Пример #8
0
 def ts_inverse_transform(series: TimeSeries, transformer, *args,
                          **kwargs) -> TimeSeries:
     return TimeSeries.from_times_and_values(
         series.time_index(),
         transformer.inverse_transform(series.values().reshape(
             (-1, series.width))), series.freq())
Пример #9
0
 def ts_transform(series: TimeSeries, transformer) -> TimeSeries:
     return TimeSeries.from_times_and_values(
         series.time_index(),
         transformer.transform(series.values().reshape((-1, series.width))),
         series.freq())
Пример #10
0
 def test_creation(self):
     with self.assertRaises(ValueError):
         # Index is dateTimeIndex
         TimeSeries.from_series(pd.Series(range(10), range(10)))
     series_test = TimeSeries.from_series(self.pd_series1)
     self.assertTrue(series_test.pd_series().equals(self.pd_series1))
Пример #11
0
    def helper_test_shift(test_case, test_series: TimeSeries):
        seriesA = test_case.series1.shift(0)
        test_case.assertTrue(seriesA == test_case.series1)

        seriesB = test_series.shift(1)
        test_case.assertTrue(seriesB.time_index().equals(
            test_series.time_index()[1:].append(
                pd.DatetimeIndex(
                    [test_series.time_index()[-1] + test_series.freq()]))))

        seriesC = test_series.shift(-1)
        test_case.assertTrue(seriesC.time_index().equals(
            pd.DatetimeIndex([
                test_series.time_index()[0] - test_series.freq()
            ]).append(test_series.time_index()[:-1])))

        with test_case.assertRaises(OverflowError):
            test_series.shift(1e+6)

        seriesM = TimeSeries.from_times_and_values(
            pd.date_range('20130101', '20130601', freq='m'), range(5))
        with test_case.assertRaises(OverflowError):
            seriesM.shift(1e+4)

        seriesD = TimeSeries.from_times_and_values(pd.date_range(
            '20130101', '20130101'),
                                                   range(1),
                                                   freq='D')
        seriesE = seriesD.shift(1)
        test_case.assertEqual(seriesE.time_index()[0],
                              pd.Timestamp('20130102'))
Пример #12
0
class TimeSeriesTestCase(unittest.TestCase):

    times = pd.date_range('20130101', '20130110')
    pd_series1 = pd.Series(range(10), index=times)
    pd_series2 = pd.Series(range(5, 15), index=times)
    pd_series3 = pd.Series(range(15, 25), index=times)
    series1: TimeSeries = TimeSeries.from_series(pd_series1)
    series2: TimeSeries = TimeSeries.from_series(pd_series2)
    series3: TimeSeries = TimeSeries.from_series(pd_series2)

    @classmethod
    def setUpClass(cls):
        logging.disable(logging.CRITICAL)

    def test_creation(self):
        with self.assertRaises(ValueError):
            # Index is dateTimeIndex
            TimeSeries.from_series(pd.Series(range(10), range(10)))
        series_test = TimeSeries.from_series(self.pd_series1)
        self.assertTrue(series_test.pd_series().equals(self.pd_series1))

    def test_alt_creation(self):
        with self.assertRaises(ValueError):
            # Series cannot be lower than three without passing frequency as argument to constructor
            index = pd.date_range('20130101', '20130102')
            TimeSeries.from_times_and_values(index, self.pd_series1.values[:2])
        with self.assertRaises(ValueError):
            # all arrays must have same length
            TimeSeries.from_times_and_values(self.pd_series1.index,
                                             self.pd_series1.values[:-1])

        # test if reordering is correct
        rand_perm = np.random.permutation(range(1, 11))
        index = pd.to_datetime(['201301{:02d}'.format(i) for i in rand_perm])
        series_test = TimeSeries.from_times_and_values(
            index, self.pd_series1.values[rand_perm - 1])

        self.assertTrue(series_test.start_time() == pd.to_datetime('20130101'))
        self.assertTrue(series_test.end_time() == pd.to_datetime('20130110'))
        self.assertTrue(series_test.pd_series().equals(self.pd_series1))
        self.assertTrue(series_test.freq() == self.series1.freq())

    # TODO test over to_dataframe when multiple features choice is decided

    def test_eq(self):
        seriesA: TimeSeries = TimeSeries.from_series(self.pd_series1)
        self.assertTrue(self.series1 == seriesA)
        self.assertFalse(self.series1 != seriesA)

        # with different dates
        seriesC = TimeSeries.from_series(
            pd.Series(range(10), index=pd.date_range('20130102', '20130111')))
        self.assertFalse(self.series1 == seriesC)

    def test_dates(self):
        self.assertEqual(self.series1.start_time(), pd.Timestamp('20130101'))
        self.assertEqual(self.series1.end_time(), pd.Timestamp('20130110'))
        self.assertEqual(self.series1.duration(), pd.Timedelta(days=9))

    @staticmethod
    def helper_test_slice(test_case, test_series: TimeSeries):
        # base case
        seriesA = test_series.slice(pd.Timestamp('20130104'),
                                    pd.Timestamp('20130107'))
        test_case.assertEqual(seriesA.start_time(), pd.Timestamp('20130104'))
        test_case.assertEqual(seriesA.end_time(), pd.Timestamp('20130107'))

        # time stamp not in series
        seriesB = test_series.slice(pd.Timestamp('20130104 12:00:00'),
                                    pd.Timestamp('20130107'))
        test_case.assertEqual(seriesB.start_time(), pd.Timestamp('20130105'))
        test_case.assertEqual(seriesB.end_time(), pd.Timestamp('20130107'))

        # end timestamp after series
        seriesC = test_series.slice(pd.Timestamp('20130108'),
                                    pd.Timestamp('20130201'))
        test_case.assertEqual(seriesC.start_time(), pd.Timestamp('20130108'))
        test_case.assertEqual(seriesC.end_time(), pd.Timestamp('20130110'))

        # n points, base case
        seriesD = test_series.slice_n_points_after(pd.Timestamp('20130102'),
                                                   n=3)
        test_case.assertEqual(seriesD.start_time(), pd.Timestamp('20130102'))
        test_case.assertTrue(len(seriesD.values()) == 3)
        test_case.assertEqual(seriesD.end_time(), pd.Timestamp('20130104'))

        seriesE = test_series.slice_n_points_after(
            pd.Timestamp('20130107 12:00:10'), n=10)
        test_case.assertEqual(seriesE.start_time(), pd.Timestamp('20130108'))
        test_case.assertEqual(seriesE.end_time(), pd.Timestamp('20130110'))

        seriesF = test_series.slice_n_points_before(pd.Timestamp('20130105'),
                                                    n=3)
        test_case.assertEqual(seriesF.end_time(), pd.Timestamp('20130105'))
        test_case.assertTrue(len(seriesF.values()) == 3)
        test_case.assertEqual(seriesF.start_time(), pd.Timestamp('20130103'))

        seriesG = test_series.slice_n_points_before(
            pd.Timestamp('20130107 12:00:10'), n=10)
        test_case.assertEqual(seriesG.start_time(), pd.Timestamp('20130101'))
        test_case.assertEqual(seriesG.end_time(), pd.Timestamp('20130107'))

    @staticmethod
    def helper_test_split(test_case, test_series: TimeSeries):
        seriesA, seriesB = test_series.split_after(pd.Timestamp('20130104'))
        test_case.assertEqual(seriesA.end_time(), pd.Timestamp('20130104'))
        test_case.assertEqual(seriesB.start_time(), pd.Timestamp('20130105'))

        seriesC, seriesD = test_series.split_before(pd.Timestamp('20130104'))
        test_case.assertEqual(seriesC.end_time(), pd.Timestamp('20130103'))
        test_case.assertEqual(seriesD.start_time(), pd.Timestamp('20130104'))

        test_case.assertEqual(test_series.freq_str(), seriesA.freq_str())
        test_case.assertEqual(test_series.freq_str(), seriesC.freq_str())

    @staticmethod
    def helper_test_drop(test_case, test_series: TimeSeries):
        seriesA = test_series.drop_after(pd.Timestamp('20130105'))
        test_case.assertEqual(seriesA.end_time(),
                              pd.Timestamp('20130105') - test_series.freq())
        test_case.assertTrue(
            np.all(seriesA.time_index() < pd.Timestamp('20130105')))

        seriesB = test_series.drop_before(pd.Timestamp('20130105'))
        test_case.assertEqual(seriesB.start_time(),
                              pd.Timestamp('20130105') + test_series.freq())
        test_case.assertTrue(
            np.all(seriesB.time_index() > pd.Timestamp('20130105')))

        test_case.assertEqual(test_series.freq_str(), seriesA.freq_str())
        test_case.assertEqual(test_series.freq_str(), seriesB.freq_str())

    @staticmethod
    def helper_test_intersect(test_case, test_series: TimeSeries):
        seriesA = TimeSeries.from_series(
            pd.Series(range(2, 8), index=pd.date_range('20130102',
                                                       '20130107')))

        seriesB = test_series.slice_intersect(seriesA)
        test_case.assertEqual(seriesB.start_time(), pd.Timestamp('20130102'))
        test_case.assertEqual(seriesB.end_time(), pd.Timestamp('20130107'))

        # Outside of range
        seriesD = test_series.slice_intersect(
            TimeSeries.from_series(
                pd.Series(range(6, 13),
                          index=pd.date_range('20130106', '20130112'))))
        test_case.assertEqual(seriesD.start_time(), pd.Timestamp('20130106'))
        test_case.assertEqual(seriesD.end_time(), pd.Timestamp('20130110'))

        # Small intersect
        seriesE = test_series.slice_intersect(
            TimeSeries.from_series(
                pd.Series(range(9, 13),
                          index=pd.date_range('20130109', '20130112'))))
        test_case.assertEqual(len(seriesE), 2)

        # No intersect
        with test_case.assertRaises(ValueError):
            test_series.slice_intersect(
                TimeSeries(
                    pd.Series(range(6, 13),
                              index=pd.date_range('20130116', '20130122'))))

    def test_rescale(self):
        with self.assertRaises(ValueError):
            self.series1.rescale_with_value(1)

        seriesA = self.series3.rescale_with_value(0)
        self.assertTrue(np.all(seriesA.values() == 0))

        seriesB = self.series3.rescale_with_value(-5)
        self.assertTrue(self.series3 * -1. == seriesB)

        seriesC = self.series3.rescale_with_value(1)
        self.assertTrue(self.series3 * 0.2 == seriesC)

        seriesD = self.series3.rescale_with_value(
            1e+20
        )  # TODO: test will fail if value > 1e24 due to num imprecision
        self.assertTrue(self.series3 * 0.2e+20 == seriesD)

    @staticmethod
    def helper_test_shift(test_case, test_series: TimeSeries):
        seriesA = test_case.series1.shift(0)
        test_case.assertTrue(seriesA == test_case.series1)

        seriesB = test_series.shift(1)
        test_case.assertTrue(seriesB.time_index().equals(
            test_series.time_index()[1:].append(
                pd.DatetimeIndex(
                    [test_series.time_index()[-1] + test_series.freq()]))))

        seriesC = test_series.shift(-1)
        test_case.assertTrue(seriesC.time_index().equals(
            pd.DatetimeIndex([
                test_series.time_index()[0] - test_series.freq()
            ]).append(test_series.time_index()[:-1])))

        with test_case.assertRaises(OverflowError):
            test_series.shift(1e+6)

        seriesM = TimeSeries.from_times_and_values(
            pd.date_range('20130101', '20130601', freq='m'), range(5))
        with test_case.assertRaises(OverflowError):
            seriesM.shift(1e+4)

        seriesD = TimeSeries.from_times_and_values(pd.date_range(
            '20130101', '20130101'),
                                                   range(1),
                                                   freq='D')
        seriesE = seriesD.shift(1)
        test_case.assertEqual(seriesE.time_index()[0],
                              pd.Timestamp('20130102'))

    @staticmethod
    def helper_test_append(test_case, test_series: TimeSeries):
        # reconstruct series
        seriesA, seriesB = test_series.split_after(pd.Timestamp('20130106'))
        test_case.assertEqual(seriesA.append(seriesB), test_series)
        test_case.assertEqual(
            seriesA.append(seriesB).freq(), test_series.freq())

        # Creating a gap is not allowed
        seriesC = test_series.drop_before(pd.Timestamp('20130107'))
        with test_case.assertRaises(ValueError):
            seriesA.append(seriesC)

        # Changing frequence is not allowed
        seriesM = TimeSeries.from_times_and_values(
            pd.date_range('20130107', '20130507', freq='30D'), range(5))
        with test_case.assertRaises(ValueError):
            seriesA.append(seriesM)

    @staticmethod
    def helper_test_append_values(test_case, test_series: TimeSeries):
        # reconstruct series
        seriesA, seriesB = test_series.split_after(pd.Timestamp('20130106'))
        test_case.assertEqual(
            seriesA.append_values(seriesB.values(), seriesB.time_index()),
            test_series)
        test_case.assertEqual(seriesA.append_values(seriesB.values()),
                              test_series)

        # test for equality
        test_case.assertEqual(
            test_series.drop_after(pd.Timestamp('20130105')).append_values(
                test_series.drop_before(pd.Timestamp('20130104')).values()),
            test_series)
        test_case.assertEqual(seriesA.append_values([]), seriesA)

        # randomize order
        rd_order = np.random.permutation(range(len(seriesB.values())))
        test_case.assertEqual(
            seriesA.append_values(seriesB.values()[rd_order],
                                  seriesB.time_index()[rd_order]), test_series)

        # add non consecutive index
        with test_case.assertRaises(ValueError):
            test_case.assertEqual(
                seriesA.append_values(seriesB.values(),
                                      seriesB.time_index() + seriesB.freq()),
                test_series)

        # add existing indices
        with test_case.assertRaises(ValueError):
            test_case.assertEqual(
                seriesA.append_values(
                    seriesB.values(),
                    seriesB.time_index() - 3 * seriesB.freq()), test_series)

        # other frequency
        with test_case.assertRaises(ValueError):
            test_case.assertEqual(
                seriesA.append_values(
                    seriesB.values(),
                    pd.date_range('20130107', '20130113', freq='2d')),
                test_series)

    def test_slice(self):
        TimeSeriesTestCase.helper_test_slice(self, self.series1)

    def test_split(self):
        TimeSeriesTestCase.helper_test_split(self, self.series1)

    def test_drop(self):
        TimeSeriesTestCase.helper_test_drop(self, self.series1)

    def test_intersect(self):
        TimeSeriesTestCase.helper_test_intersect(self, self.series1)

    def test_shift(self):
        TimeSeriesTestCase.helper_test_shift(self, self.series1)

    def test_append(self):
        TimeSeriesTestCase.helper_test_append(self, self.series1)

    def test_append_values(self):
        TimeSeriesTestCase.helper_test_append_values(self, self.series1)

    def test_update(self):
        seriesA: TimeSeries = TimeSeries.from_times_and_values(
            self.times, [0, 1, 1, 3, 4, 5, 6, 2, 8, 0])
        seriesB: TimeSeries = TimeSeries.from_times_and_values(
            self.times, range(10))

        # change nothing
        seriesC = self.series1.copy()
        with self.assertRaises(ValueError):
            seriesA.update(self.times)
        seriesC = seriesC.update(self.times, range(10))
        self.assertEqual(seriesC, self.series1)

        # different len
        with self.assertRaises(ValueError):
            seriesA.update(self.times, [])
        with self.assertRaises(ValueError):
            seriesA.update(self.times, np.arange(3))
        with self.assertRaises(ValueError):
            seriesA.update(self.times, np.arange(4))

        # change outside
        seriesC = seriesA.copy()
        with self.assertRaises(ValueError):
            seriesC.update(self.times + 100 * seriesC.freq(), range(10))
        seriesC = seriesC.update(
            self.times.append(pd.date_range('20140101', '20140110')),
            list(range(10)) + [0] * 10)
        self.assertEqual(seriesC, self.series1)

        # change random
        seriesC = seriesA.copy()
        seriesC = seriesC.update(
            pd.DatetimeIndex(['20130108', '20130110', '20130103']), [7, 9, 2])
        self.assertEqual(seriesC, self.series1)

        # change one of each series
        seriesD = seriesB.copy()
        seriesD = seriesD.update(self.times, seriesA.pd_series().values)
        seriesA = seriesA.update(
            pd.DatetimeIndex(['20130103', '20130108', '20130110']), [2, 7, 9])
        self.assertEqual(seriesA, self.series1)
        seriesB = seriesB.update(self.times[::2], range(5))
        self.assertNotEqual(seriesB, self.series2)

        # use nan
        new_series = np.empty(10)
        new_series[:] = np.nan
        new_series[[2, 7, 9]] = [2, 7, 9]
        seriesD = seriesD.update(self.times, new_series)
        self.assertEqual(seriesD, self.series1)

    def test_ops(self):
        seriesA = TimeSeries.from_series(
            pd.Series([2 for _ in range(10)], index=self.pd_series1.index))
        targetAdd = TimeSeries.from_series(
            pd.Series(range(2, 12), index=self.pd_series1.index))
        targetSub = TimeSeries.from_series(
            pd.Series(range(-2, 8), index=self.pd_series1.index))
        targetMul = TimeSeries.from_series(
            pd.Series(range(0, 20, 2), index=self.pd_series1.index))
        targetDiv = TimeSeries.from_series(
            pd.Series([i / 2 for i in range(10)], index=self.pd_series1.index))
        targetPow = TimeSeries.from_series(
            pd.Series([float(i**2) for i in range(10)],
                      index=self.pd_series1.index))

        self.assertEqual(self.series1 + seriesA, targetAdd)
        self.assertEqual(self.series1 + 2, targetAdd)
        self.assertEqual(2 + self.series1, targetAdd)
        self.assertEqual(self.series1 - seriesA, targetSub)
        self.assertEqual(self.series1 - 2, targetSub)
        self.assertEqual(self.series1 * seriesA, targetMul)
        self.assertEqual(self.series1 * 2, targetMul)
        self.assertEqual(2 * self.series1, targetMul)
        self.assertEqual(self.series1 / seriesA, targetDiv)
        self.assertEqual(self.series1 / 2, targetDiv)
        self.assertEqual(self.series1**2, targetPow)

        with self.assertRaises(ZeroDivisionError):
            # Cannot divide by a TimeSeries with a value 0.
            self.series1 / self.series1

        with self.assertRaises(ZeroDivisionError):
            # Cannot divide by 0.
            self.series1 / 0

    def test_getitem(self):
        seriesA: TimeSeries = self.series1.drop_after(pd.Timestamp("20130105"))
        self.assertEqual(self.series1[pd.date_range('20130101', ' 20130104')],
                         seriesA)
        self.assertEqual(self.series1[:4], seriesA)
        self.assertTrue(self.series1[pd.Timestamp('20130101')] == TimeSeries(
            self.series1.pd_dataframe()[:1], freq=self.series1.freq()))
        self.assertEqual(
            self.series1[pd.Timestamp('20130101'):pd.Timestamp('20130104')],
            seriesA)

        with self.assertRaises(IndexError):
            self.series1[pd.date_range('19990101', '19990201')]

        with self.assertRaises(KeyError):
            self.series1['19990101']

        with self.assertRaises(IndexError):
            self.series1[::-1]

    def test_fill_missing_dates(self):
        with self.assertRaises(ValueError):
            # Series cannot have date holes without automatic filling
            range_ = pd.date_range('20130101', '20130104').append(
                pd.date_range('20130106', '20130110'))
            TimeSeries.from_series(pd.Series(range(9), index=range_),
                                   fill_missing_dates=False)

        with self.assertRaises(ValueError):
            # Main series should have explicit frequency in case of date holes
            range_ = pd.date_range('20130101', '20130104').append(
                pd.date_range('20130106', '20130110', freq='2D'))
            TimeSeries.from_series(pd.Series(range(7), index=range_))

        range_ = pd.date_range('20130101', '20130104').append(
            pd.date_range('20130106', '20130110'))
        series_test = TimeSeries.from_series(pd.Series(range(9), index=range_))
        self.assertEqual(series_test.freq_str(), 'D')

        range_ = pd.date_range('20130101', '20130104', freq='2D') \
            .append(pd.date_range('20130107', '20130111', freq='2D'))
        series_test = TimeSeries.from_series(pd.Series(range(5), index=range_))
        self.assertEqual(series_test.freq_str(), '2D')
        self.assertEqual(series_test.start_time(), range_[0])
        self.assertEqual(series_test.end_time(), range_[-1])
        self.assertTrue(math.isnan(series_test.pd_series().get('20130105')))

    def test_resample_timeseries(self):
        times = pd.date_range('20130101', '20130110')
        pd_series = pd.Series(range(10), index=times)
        timeseries = TimeSeries.from_series(pd_series)

        resampled_timeseries = timeseries.resample('H')
        self.assertEqual(resampled_timeseries.freq_str(), 'H')
        self.assertEqual(
            resampled_timeseries.pd_series().at[pd.Timestamp(
                '20130101020000')], 0)
        self.assertEqual(
            resampled_timeseries.pd_series().at[pd.Timestamp(
                '20130102020000')], 1)
        self.assertEqual(
            resampled_timeseries.pd_series().at[pd.Timestamp(
                '20130109090000')], 8)

        resampled_timeseries = timeseries.resample('2D')
        self.assertEqual(resampled_timeseries.freq_str(), '2D')
        self.assertEqual(
            resampled_timeseries.pd_series().at[pd.Timestamp('20130101')], 0)
        with self.assertRaises(KeyError):
            resampled_timeseries.pd_series().at[pd.Timestamp('20130102')]

        self.assertEqual(
            resampled_timeseries.pd_series().at[pd.Timestamp('20130109')], 8)

    def test_short_series_creation(self):
        # test missing freq argument error
        with self.assertRaises(ValueError):
            TimeSeries.from_times_and_values(
                pd.date_range('20130101', '20130102'), range(2))
        # test empty pandas series error
        with self.assertRaises(ValueError):
            TimeSeries.from_series(pd.Series(), freq='D')
        # test frequency mismatch case
        seriesA = TimeSeries.from_times_and_values(pd.date_range(
            '20130101', '20130105'),
                                                   range(5),
                                                   freq='M')
        self.assertEqual(seriesA.freq(), 'D')
        # test successful instantiation of TimeSeries with length 2
        TimeSeries.from_times_and_values(pd.date_range('20130101', '20130102'),
                                         range(2),
                                         freq='D')

    def test_short_series_slice(self):
        seriesA, seriesB = self.series1.split_after(pd.Timestamp('20130108'))
        self.assertEqual(len(seriesA), 8)
        self.assertEqual(len(seriesB), 2)
        seriesA, seriesB = self.series1.split_after(pd.Timestamp('20130109'))
        self.assertEqual(len(seriesA), 9)
        self.assertEqual(len(seriesB), 1)
        self.assertEqual(seriesB.time_index()[0],
                         self.series1.time_index()[-1])
        seriesA, seriesB = self.series1.split_before(pd.Timestamp('20130103'))
        self.assertEqual(len(seriesA), 2)
        self.assertEqual(len(seriesB), 8)
        seriesA, seriesB = self.series1.split_before(pd.Timestamp('20130102'))
        self.assertEqual(len(seriesA), 1)
        self.assertEqual(len(seriesB), 9)
        self.assertEqual(seriesA.time_index()[-1],
                         self.series1.time_index()[0])
        seriesC = self.series1.slice(pd.Timestamp('20130105'),
                                     pd.Timestamp('20130105'))
        self.assertEqual(len(seriesC), 1)

    def test_map(self):
        fn = np.sin  # noqa: E731
        series = TimeSeries.from_times_and_values(
            pd.date_range('20000101', '20000110'), np.random.randn(10, 3))

        df_0 = series.pd_dataframe()
        df_2 = series.pd_dataframe()
        df_01 = series.pd_dataframe()
        df_012 = series.pd_dataframe()

        df_0[["0"]] = df_0[["0"]].applymap(fn)
        df_2[["2"]] = df_2[["2"]].applymap(fn)
        df_01[["0", "1"]] = df_01[["0", "1"]].applymap(fn)
        df_012 = df_012.applymap(fn)

        series_0 = TimeSeries(df_0, 'D')
        series_2 = TimeSeries(df_2, 'D')
        series_01 = TimeSeries(df_01, 'D')
        series_012 = TimeSeries(df_012, 'D')

        self.assertEqual(series_0['0'], series['0'].map(fn))
        self.assertEqual(series_2['2'], series['2'].map(fn))
        self.assertEqual(series_01[['0', '1']], series[['0', '1']].map(fn))
        self.assertEqual(series_012, series[['0', '1', '2']].map(fn))
        self.assertEqual(series_012, series.map(fn))

        self.assertNotEqual(series_01, series[['0', '1']].map(fn))

    def test_map_with_timestamp(self):
        series = linear_timeseries(start_value=1,
                                   length=12,
                                   freq='MS',
                                   start_ts=pd.Timestamp('2000-01-01'),
                                   end_value=12)  # noqa: E501
        zeroes = constant_timeseries(value=0.0,
                                     length=12,
                                     freq='MS',
                                     start_ts=pd.Timestamp('2000-01-01'))

        def function(ts, x):
            return x - ts.month

        new_series = series.map(function)
        self.assertEqual(new_series, zeroes)

    def test_map_wrong_fn(self):
        series = linear_timeseries(start_value=1,
                                   length=12,
                                   freq='MS',
                                   start_ts=pd.Timestamp('2000-01-01'),
                                   end_value=12)  # noqa: E501

        def add(x, y, z):
            return x + y + z

        with self.assertRaises(ValueError):
            series.map(add)

        ufunc_add = np.frompyfunc(add, 3, 1)

        with self.assertRaises(ValueError):
            series.map(ufunc_add)