Exemplo n.º 1
0
    def test_add_holidays(self):
        times = pd.date_range(start=pd.Timestamp("20201201"),
                              periods=30,
                              freq="D")
        seriesA = TimeSeries.from_times_and_values(times, range(len(times)))

        # testing for christmas and non-holiday in US
        seriesA = seriesA.add_holidays("US")
        last_column = seriesA.pd_dataframe().iloc[:, seriesA.width - 1]
        self.assertEqual(last_column.at[pd.Timestamp("20201225")], 1)
        self.assertEqual(last_column.at[pd.Timestamp("20201210")], 0)
        self.assertEqual(last_column.at[pd.Timestamp("20201226")], 0)

        # testing for christmas and non-holiday in PL
        seriesA = seriesA.add_holidays("PL")
        last_column = seriesA.pd_dataframe().iloc[:, seriesA.width - 1]
        self.assertEqual(last_column.at[pd.Timestamp("20201225")], 1)
        self.assertEqual(last_column.at[pd.Timestamp("20201210")], 0)
        self.assertEqual(last_column.at[pd.Timestamp("20201226")], 1)
        self.assertEqual(seriesA.width, 3)

        # testing hourly time series
        times = pd.date_range(start=pd.Timestamp("20201224"),
                              periods=50,
                              freq="H")
        seriesB = TimeSeries.from_times_and_values(times, range(len(times)))
        seriesB = seriesB.add_holidays("US")
        last_column = seriesB.pd_dataframe().iloc[:, seriesB.width - 1]
        self.assertEqual(last_column.at[pd.Timestamp("2020-12-25 01:00:00")],
                         1)
        self.assertEqual(last_column.at[pd.Timestamp("2020-12-24 23:00:00")],
                         0)
Exemplo n.º 2
0
    def test_gaussian_process(self):
        """GaussianProcessFilter test.
        Creates a sine wave, adds noise and assumes the GP filter
        predicts values closer to real values
        """
        theta = np.radians(np.linspace(0, 360 * 5, 200))
        testing_signal = TimeSeries.from_values(np.cos(theta))

        noise = TimeSeries.from_values(
            np.random.normal(0, 0.4, len(testing_signal)))
        testing_signal_with_noise = testing_signal + noise

        kernel = ExpSineSquared()
        gpf = GaussianProcessFilter(kernel=kernel,
                                    alpha=0.2,
                                    n_restarts_optimizer=100,
                                    random_state=42)
        filtered_ts = gpf.filter(testing_signal_with_noise, num_samples=1)

        noise_diff = testing_signal_with_noise - testing_signal
        prediction_diff = filtered_ts - testing_signal
        self.assertGreater(noise_diff.values().std(),
                           prediction_diff.values().std())

        filtered_ts_median = gpf.filter(testing_signal_with_noise,
                                        num_samples=100).quantile_timeseries()
        median_prediction_diff = filtered_ts_median - testing_signal
        self.assertGreater(noise_diff.values().std(),
                           median_prediction_diff.values().std())
Exemplo n.º 3
0
def train_theta_boxcox(ts, seasonality, n):
    theta_bc = Theta(theta=0, season_mode=SeasonalityMode.NONE)
    shiftdata = 0
    if (ts.univariate_values() < 0).any():
        shiftdata = -ts.min() + 100
        ts = ts + shiftdata
    new_values, lmbd = boxcox(ts.univariate_values())
    if lmbd < 0:
        lmbds, value = boxcox_normplot(ts.univariate_values(),
                                       lmbd - 1,
                                       0,
                                       N=100)
        if np.isclose(value[0], 0):
            lmbd = lmbds[np.argmax(value)]
            new_values = boxcox(ts.univariate_values(), lmbd)
        if np.isclose(new_values, new_values[0]).all():
            lmbd = 0
            new_values = boxcox(ts.univariate_values(), lmbd)
    ts = TimeSeries.from_times_and_values(ts.time_index(), new_values)
    theta_bc.fit(ts)
    forecast = theta_bc.predict(n)

    new_values = inv_boxcox(forecast.univariate_values(), lmbd)
    forecast = TimeSeries.from_times_and_values(seasonality.time_index(),
                                                new_values)
    if shiftdata > 0:
        forecast = forecast - shiftdata
    forecast = forecast * seasonality
    if (forecast.univariate_values() < 0).any():
        indices = seasonality.time_index()[forecast < 0]
        forecast = forecast.update(indices,
                                   np.zeros(len(indices)),
                                   inplace=True)
    return forecast
Exemplo n.º 4
0
    def test_get_item(self):
        # multi component static covariates
        static_covs = pd.DataFrame([["a", 0], ["b", 1]],
                                   columns=["cat", "num"])
        ts = TimeSeries.from_values(
            values=np.random.random((10, 2)),
            columns=["comp1", "comp2"]).with_static_covariates(static_covs)

        assert ts.static_covariates.index.equals(ts.components)

        ts0 = ts[0]
        assert ts0.static_covariates.index.equals(ts.components)
        assert isinstance(ts0.static_covariates, pd.DataFrame)
        ts1 = ts["comp1"]
        assert ts1.static_covariates.index.equals(pd.Index(["comp1"]))
        assert isinstance(ts1.static_covariates, pd.DataFrame)
        ts2 = ts["comp2"]
        assert ts2.static_covariates.index.equals(pd.Index(["comp2"]))
        assert isinstance(ts2.static_covariates, pd.DataFrame)
        ts3 = ts["comp1":"comp2"]
        assert ts3.static_covariates.index.equals(pd.Index(["comp1", "comp2"]))
        assert isinstance(ts3.static_covariates, pd.DataFrame)
        ts4 = ts[["comp1", "comp2"]]
        assert ts4.static_covariates.index.equals(pd.Index(["comp1", "comp2"]))
        assert isinstance(ts4.static_covariates, pd.DataFrame)

        # uni/global component static covariates
        static_covs = pd.DataFrame([["a", 0]], columns=["cat", "num"])
        ts = TimeSeries.from_values(
            values=np.random.random((10, 3)),
            columns=["comp1", "comp2",
                     "comp3"]).with_static_covariates(static_covs)

        # 1) when static covs have 1 component but series is multivariate -> static covariate component name is set to
        # "global_components"
        assert ts.static_covariates.index.equals(
            pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME]))
        ts0 = ts[0]
        assert ts0.static_covariates.index.equals(
            pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME]))
        assert isinstance(ts0.static_covariates, pd.DataFrame)
        ts1 = ts["comp1":"comp3"]
        assert ts1.static_covariates.index.equals(
            pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME]))
        assert isinstance(ts1.static_covariates, pd.DataFrame)
        ts2 = ts[["comp1", "comp2", "comp3"]]
        assert ts2.static_covariates.index.equals(
            pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME]))
        assert isinstance(ts2.static_covariates, pd.DataFrame)

        # 2) if number of static cov components match the number of components in the series -> static covariate
        # component names are set to be equal to series component names
        ts3 = ts["comp1"]
        assert ts3.static_covariates.index.equals(pd.Index(["comp1"]))
        assert isinstance(ts3.static_covariates, pd.DataFrame)
        ts4 = ts["comp2"]
        assert ts4.static_covariates.index.equals(pd.Index(["comp2"]))
        assert isinstance(ts4.static_covariates, pd.DataFrame)
Exemplo n.º 5
0
    def test_creation(self):
        series_test = TimeSeries.from_dataframe(self.dataframe1)
        self.assertTrue(
            np.all(
                series_test.pd_dataframe().values == (self.dataframe1.values)))

        # Series cannot be lower than three without passing frequency as argument to constructor
        with self.assertRaises(ValueError):
            TimeSeries(self.dataframe1.iloc[:2, :])
        TimeSeries.from_dataframe(self.dataframe1.iloc[:2, :], freq="D")
Exemplo n.º 6
0
    def test_granger_causality(self):
        series_cause_1 = constant_timeseries(start=0, end=9999).stack(
            constant_timeseries(start=0, end=9999))
        series_cause_2 = gaussian_timeseries(start=0, end=9999)
        series_effect_1 = constant_timeseries(start=0, end=999)
        series_effect_2 = TimeSeries.from_values(np.random.uniform(
            0, 1, 10000))
        series_effect_3 = TimeSeries.from_values(
            np.random.uniform(0, 1, (1000, 2, 1000)))
        series_effect_4 = constant_timeseries(start=pd.Timestamp("2000-01-01"),
                                              length=10000)

        # Test univariate
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_cause_1,
                                    series_effect_1,
                                    10,
                                    verbose=False)
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_effect_1,
                                    series_cause_1,
                                    10,
                                    verbose=False)

        # Test deterministic
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_cause_1,
                                    series_effect_3,
                                    10,
                                    verbose=False)
        with self.assertRaises(AssertionError):
            granger_causality_tests(series_effect_3,
                                    series_cause_1,
                                    10,
                                    verbose=False)

        # Test Frequency
        with self.assertRaises(ValueError):
            granger_causality_tests(series_cause_2,
                                    series_effect_4,
                                    10,
                                    verbose=False)

        # Test granger basics
        tests = granger_causality_tests(series_effect_2,
                                        series_effect_2,
                                        10,
                                        verbose=False)
        self.assertTrue(tests[1][0]["ssr_ftest"][1] > 0.99)
        tests = granger_causality_tests(series_cause_2,
                                        series_effect_2,
                                        10,
                                        verbose=False)
        self.assertTrue(tests[1][0]["ssr_ftest"][1] > 0.01)
Exemplo n.º 7
0
def create_time_series(resampling_methods, chunk_ids, chunk_type, original_chunks, parameter,
                       window_idx, configs, mean=0, std=1):
    # Apply filler as some time series have missing measurements what would lead to ValueError in prediction
    filler = MissingValuesFiller()

    for resampling in resampling_methods:
        series_per_resampling = dict()
        pred_scalers = dict()

        for chunk_id in chunk_ids:
            current_chunk = original_chunks[original_chunks['CHUNK_ID_FILLED_TH'] == chunk_id]

            # Scale chunk values if it is configured and create filled time series
            if configs.scaling_method == 'standard':
                current_chunk[f'SCALED_{resampling}'] = apply_standard_scaling(
                    current_chunk[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], mean, std)

                series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe(
                    df=current_chunk,
                    time_col='CHARTTIME',
                    value_cols=[f'SCALED_{resampling}'],
                    freq='H'))

            elif configs.scaling_method == 'min-max':
                # Darts uses MinMaxScaler by default
                current_scaler = Scaler()

                series_per_resampling[chunk_id] = current_scaler.fit_transform(filler.transform(
                    TimeSeries.from_dataframe(
                        df=current_chunk,
                        time_col='CHARTTIME',
                        value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'],
                        freq='H')))

                if chunk_type == 'pred' and \
                        ((configs.with_exogenous_input and resampling != 'MEDIAN') or not configs.with_exogenous_input):
                    pred_scalers[chunk_id] = current_scaler

            else:  # apply no scaling
                series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe(
                    df=current_chunk,
                    time_col='CHARTTIME',
                    value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'],
                    freq='H'))

        # Save series dict
        path = get_script_path(configs)
        write_pickle_file(f'{path}/time_series/time_series_{parameter}_win{window_idx}_{chunk_type}_'
                          f'{resampling.capitalize()}.pickle', series_per_resampling)

        # Save scaler dict if it was filled
        if pred_scalers:
            write_pickle_file(f'{path}/scalers/scalers_{parameter}_win{window_idx}_{resampling.capitalize()}.pickle',
                              pred_scalers)
Exemplo n.º 8
0
def remove_seasonality(
    ts: TimeSeries,
    freq: int = None,
    model: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE,
    method: str = "naive",
    **kwargs,
) -> TimeSeries:
    """
    Adjusts the TimeSeries `ts` for a seasonality of order `frequency` using the `model` decomposition.

    Parameters
    ----------
    ts
        The TimeSeries to adjust.
    freq
        The seasonality period to use.
    model
        The type of decomposition to use.
        Must be a `from darts import SeasonalityMode` Enum member.
        Either SeasonalityMode.MULTIPLICATIVE or SeasonalityMode.ADDITIVE.
        Defaults SeasonalityMode.MULTIPLICATIVE.
    method
        The method to be used to decompose the series.
        - "naive" : Seasonal decomposition using moving averages [1]_.
        - "STL" : Season-Trend decomposition using LOESS [2]_. Only compatible with ``ADDITIVE`` model type.
        Defaults to "naive"
    kwargs
        Other keyword arguments are passed down to the decomposition method.
     Returns
    -------
    TimeSeries
        A new TimeSeries instance that corresponds to the seasonality-adjusted 'ts'.
    References
    -------
    .. [1] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.seasonal_decompose.html
    .. [2] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.STL.html
    """
    ts._assert_univariate()
    raise_if_not(
        model is not SeasonalityMode.NONE,
        "The model must be either MULTIPLICATIVE or ADDITIVE.",
    )
    raise_if(
        model not in [SeasonalityMode.ADDITIVE, ModelMode.ADDITIVE]
        and method == "STL",
        f"Only ADDITIVE seasonality is compatible with the STL method. Current model is {model}.",
        logger,
    )

    _, seasonality = extract_trend_and_seasonality(ts, freq, model, method,
                                                   **kwargs)
    new_ts = remove_from_series(ts, seasonality, model)
    return new_ts
Exemplo n.º 9
0
def groe_owa(ts: TimeSeries, model: ForecastingModel, n1: int, m: int, p: int,
             fq: int):
    """
    Backtesting.
    Compute the OWA score iteratively on ´p´ timepoints following an expanding window mode.

    Parameters
    ---------
    ts
        TimeSeries on which backtesting will be done.
    model
        model to backtest.
    n1
        minimum number of datapoints to take during training
    m
        step size
    p
        number of steps to iterate over
    fq
        Frequency of the time series.
    Returns
    -------
    Sum of all OWA errors
    """
    n = len(ts)
    errors = []
    for i in range(p):
        if n1 + i * m == n:
            break
        ni = n1 + i * m
        npred = n - ni
        train = ts[:ni]
        if npred >= 3:
            test = ts[ni:]
        else:
            test = TimeSeries(ts.pd_series()[ni:], freq=ts.freq_str())

        forecast_naive2 = naive2_groe(train, npred, fq)
        error_sape_n2 = mase_m4(train, test, forecast_naive2)
        error_ase_n2 = smape_m4(test, forecast_naive2)

        model.fit(train)
        forecast = model.predict(npred)
        try:
            error_sape = mase_m4(train, test, forecast)
            error_ase = smape_m4(test, forecast)
            owa = 0.5 * (error_sape / error_sape_n2) + 0.5 * (error_ase /
                                                              error_ase_n2)
            errors.append(np.sum(owa))
        except (ZeroDivisionError, ValueError):
            errors.append(0)
    errors = np.sum(errors)
    return errors
Exemplo n.º 10
0
 def _fit(self,
          series: TimeSeries,
          future_covariates: Optional[TimeSeries] = None):
     super()._fit(series, future_covariates)
     series._assert_univariate()
     series = self.training_series
     self.model.fit(
         series.values(copy=False).flatten(),
         X=future_covariates.values(
             copy=False) if future_covariates else None,
     )
     return self
Exemplo n.º 11
0
        def setUp(self):
            self.temp_work_dir = tempfile.mkdtemp(prefix="darts")

            times = pd.date_range("20130101", "20130410")
            pd_series = pd.Series(range(100), index=times)
            self.series = TimeSeries.from_series(pd_series)

            df = pd.DataFrame({
                "var1": range(100),
                "var2": range(100)
            },
                              index=times)
            self.multivariate_series = TimeSeries.from_dataframe(df)
Exemplo n.º 12
0
 def test_multivariate_fill(self):
     seriesA: TimeSeries = TimeSeries.from_times_and_values(
         self.time,
         np.array([np.nan] * 5 + [2.0] * 5 + [np.nan] * 5 + [2.0] * 10 +
                  [np.nan] * 5),
     )
     seriesB: TimeSeries = TimeSeries.from_times_and_values(
         self.time,
         np.array(self.lin[:10] + [np.nan] * 10 + self.lin[-10:]))
     self.assertEqual(
         self.series1.stack(self.series2),
         fill_missing_values(seriesA.stack(seriesB), "auto"),
     )
Exemplo n.º 13
0
    def warped(self) -> (TimeSeries, TimeSeries):
        """
        Warps the two time series according to the warp path returned by .path(), which minimizes
        the pair-wise distance.
        This will bring two time series that are out-of-phase back into phase.

        Returns
        -------
        (TimeSeries, TimeSeries)
            Two new TimeSeries instances of the same length, indexed by pd.RangeIndex.
        """

        series1 = self.series1
        series2 = self.series2

        xa1 = series1.data_array(copy=False)
        xa2 = series2.data_array(copy=False)

        path = self.path()

        warped_series1 = xa1[path[:, 0]]
        warped_series2 = xa2[path[:, 1]]

        time_dim1 = series1._time_dim
        time_dim2 = series2._time_dim

        range_index = True

        if range_index:
            warped_series1 = warped_series1.reset_index(
                dims_or_levels=time_dim1)
            warped_series2 = warped_series2.reset_index(
                dims_or_levels=time_dim2)

        # todo: prevent time information being lost after warping
        # Applying time index from series1 to series2 (take_dates = True) is disabled for consistency reasons
        # Applying the warp path to the dates directly will result in duplicate dates
        # and hence values being lost when converting back to a TimeSeries.
        # As a result series1 will not be warped, whereas series2 will be.
        # It could also cause the two series to have different lengths, if len(series1) < len(series2)
        # One could generate intermediate dates, but then the series would not have a consistent frequency

        take_dates = False
        if take_dates:
            time_index = warped_series1[time_dim1]
            time_index = time_index.rename({time_dim1: time_dim2})
            warped_series2[time_dim2] = time_index

        return TimeSeries.from_xarray(warped_series1), TimeSeries.from_xarray(
            warped_series2)
Exemplo n.º 14
0
def stationarity_test_adf(
    ts: TimeSeries,
    maxlag: Union[None, int] = None,
    regression: str = "c",
    autolag: Union[None, str] = "AIC",
) -> set:
    """
    Provides Augmented Dickey-Fuller unit root test for a time series,
    using :func:`statsmodels.tsa.stattools.adfuller`. See [1]_.


    Parameters
    ----------
    ts
        The time series to test.
    maxlag
        Maximum lag which is included in test, default value of 12*(nobs/100)^{1/4} is used when None.
    regression
        Constant and trend order to include in regression.
        "c" : constant only (default).
        "ct" : constant and trend.
        "ctt" : constant, and linear and quadratic trend.
        "n" : no constant, no trend.
    autolag
        Method to use when automatically determining the lag length among the values 0, 1, …, maxlag.
        If "AIC" (default) or "BIC", then the number of lags is chosen to minimize the corresponding
        information criterion. "t-stat" based choice of maxlag. Starts with maxlag and drops a lag
        until the t-statistic on the last lag length is significant using a 5%-sized test.
        If None, then the number of included lags is set to maxlag.

    Returns
    -------
    set
        | adf: The test statistic.
        | pvalue: MacKinnon's approximate p-value based on [2]_.
        | usedlag: The number of lags used.
        | nobs: The number of observations used for the ADF regression and calculation of the critical values.
        | critical: Critical values for the test statistic at the 1 %, 5 %, and 10 % levels. Based on [2]_.
        | icbest: The maximized information criterion if autolag is not None.

    References
    ----------
    .. [1] https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html
    .. [2] MacKinnon (1994, 2010)
    """

    ts._assert_univariate()
    ts._assert_deterministic()

    return adfuller(ts.values(copy=False), maxlag, regression, autolag)
Exemplo n.º 15
0
def plot_residuals_analysis(residuals: TimeSeries,
                            num_bins: int = 20,
                            fill_nan: bool = True) -> None:
    """Plots data relevant to residuals.

    This function takes a univariate TimeSeries instance of residuals and plots their values,
    their distribution and their ACF.
    Please note that if the residual TimeSeries instance contains NaN values, the plots
    might be displayed incorrectly. If `fill_nan` is set to True, the missing values will
    be interpolated.

    Parameters
    ----------
    residuals
        Univariate TimeSeries instance representing residuals.
    num_bins
        Optionally, an integer value determining the number of bins in the histogram.
    fill_nan
        A boolean value indicating whether NaN values should be filled in the residuals.
    """

    residuals._assert_univariate()

    fig = plt.figure(constrained_layout=True, figsize=(8, 6))
    gs = fig.add_gridspec(2, 2)

    if fill_nan:
        residuals = fill_missing_values(residuals)

    # plot values
    ax1 = fig.add_subplot(gs[:1, :])
    residuals.plot(ax=ax1)
    ax1.set_ylabel("value")
    ax1.set_title("Residual values")

    # plot histogram and distribution
    res_mean, res_std = np.mean(residuals.univariate_values()), np.std(
        residuals.univariate_values())
    res_min, res_max = min(residuals.univariate_values()), max(
        residuals.univariate_values())
    x = np.linspace(res_min, res_max, 100)
    ax2 = fig.add_subplot(gs[1:, 1:])
    plot_hist(residuals, bins=num_bins, ax=ax2)
    ax2.plot(
        x,
        norm(res_mean, res_std).pdf(x) * len(residuals) * (res_max - res_min) /
        num_bins,
    )
    ax2.yaxis.set_major_locator(plt.MaxNLocator(integer=True))
    ax2.set_title("Distribution")
    ax2.set_ylabel("count")
    ax2.set_xlabel("value")

    # plot ACF
    ax3 = fig.add_subplot(gs[1:, :1])
    plot_acf(residuals, axis=ax3)
    ax3.set_ylabel("ACF value")
    ax3.set_xlabel("lag")
    ax3.set_title("ACF")
Exemplo n.º 16
0
    def test_eq(self):
        seriesA = TimeSeries.from_dataframe(self.dataframe1)
        self.assertTrue(self.series1 == seriesA)
        self.assertFalse(self.series1 != seriesA)

        # with different dates
        dataframeB = self.dataframe1.copy()
        dataframeB.index = pd.date_range("20130102", "20130111")
        seriesB = TimeSeries.from_dataframe(dataframeB)
        self.assertFalse(self.series1 == seriesB)

        # with one different value
        dataframeC = self.dataframe1.copy()
        dataframeC.iloc[2, 2] = 0
        seriesC = TimeSeries.from_dataframe(dataframeC)
        self.assertFalse(self.series1 == seriesC)
Exemplo n.º 17
0
    def test_stationarity_tests(self):
        series_1 = constant_timeseries(start=0, end=9999).stack(
            constant_timeseries(start=0, end=9999))

        series_2 = TimeSeries.from_values(
            np.random.uniform(0, 1, (1000, 2, 1000)))
        series_3 = gaussian_timeseries(start=0, end=9999)

        # Test univariate
        with self.assertRaises(AssertionError):
            stationarity_tests(series_1)
        with self.assertRaises(AssertionError):
            stationarity_test_adf(series_1)
        with self.assertRaises(AssertionError):
            stationarity_test_kpss(series_1)

        # Test deterministic
        with self.assertRaises(AssertionError):
            stationarity_tests(series_2)
        with self.assertRaises(AssertionError):
            stationarity_test_adf(series_2)
        with self.assertRaises(AssertionError):
            stationarity_test_kpss(series_2)

        # Test basics
        self.assertTrue(stationarity_test_kpss(series_3)[1] > 0.05)
        self.assertTrue(stationarity_test_adf(series_3)[1] < 0.05)
        self.assertTrue(stationarity_tests)
Exemplo n.º 18
0
    def generate_train_series(
            self,
            target: TimeSeries,
            covariate: Optional[TimeSeries] = None) -> SupportedIndex:

        super().generate_train_series(target, covariate)

        # save a reference index if specified
        if (self.reference_index_type is not ReferenceIndexType.NONE
                and self.reference_index is None):
            if self.reference_index_type is ReferenceIndexType.PREDICTION:
                self.reference_index = (len(target) - 1, target.end_time())
            else:  # save the time step before start of target series
                self.reference_index = (-1, target.start_time() - target.freq)

        return covariate.time_index if covariate is not None else target.time_index
Exemplo n.º 19
0
def lstm():
    for company in lstCompanies:
        df = pd.DataFrame(list(db[company].find({})))
        df = df.drop('_id', axis=1)
        df['Open'] = df['Open'].astype('float')
        df['Close'] = df['Close'].astype('float')
        series = TimeSeries.from_dataframe(
            df, 'Date', ['Close'], freq='B',
            fill_missing_dates=True)  # 'B' = Business day
        series = auto_fillna(series)

        model = RNNModel(
            model=
            'LSTM',  # Either a string specifying the RNN module type (“RNN”, “LSTM” or “GRU”)
            output_length=
            1,  # Number of time steps to be output by the forecasting module
            hidden_size=
            25,  # Size for feature maps for each hidden RNN layer (hn)
            n_rnn_layers=1,  # Number of layers in the RNN module
            input_length=
            12,  # The dimensionality of the TimeSeries instances that will be fed to the fit function
            batch_size=
            16,  # The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters
            n_epochs=
            200,  # The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset
            optimizer_kwargs={'lr': 1e-3},
            model_name='{}_RNN'.format(company))

        model.fit(series)
        lstmPred = model.predict(1).values()[0][0]
        db.prediction.insert_one({
            "Date": datetime.datetime.today(),
            "Company": company,
            "Prediction": round(float(lstmPred), 2)
        })
Exemplo n.º 20
0
    def test_seasonality_inference(self):

        # test `seasonal_periods` inference for datetime indices
        freq_str_seasonality_periods_tuples = [
            ("D", 7),
            ("H", 24),
            ("M", 12),
            ("W", 52),
            ("Q", 4),
            ("B", 5),
        ]
        for tuple in freq_str_seasonality_periods_tuples:
            self.helper_test_seasonality_inference(*tuple)

        # test default selection for integer index
        series = TimeSeries.from_values(np.arange(1, 30, 1))
        model = ExponentialSmoothing()
        model.fit(series)
        self.assertEqual(model.seasonal_periods, 12)

        # test whether a model that inferred a seasonality period before will do it again for a new series
        series1 = tg.sine_timeseries(length=100, freq="M")
        series2 = tg.sine_timeseries(length=100, freq="D")
        model = ExponentialSmoothing()
        model.fit(series1)
        model.fit(series2)
        self.assertEqual(model.seasonal_periods, 7)
Exemplo n.º 21
0
    def test_linear(self):
        seriesB: TimeSeries = TimeSeries.from_times_and_values(
            self.time,
            np.array(self.lin[:10] + [np.nan] * 10 + self.lin[-10:]))

        # Check for linear interpolation part
        self.assertEqual(self.series2, fill_missing_values(seriesB, "auto"))
Exemplo n.º 22
0
    def test_kalman(self):
        """KalmanFilter test.
        Creates an increasing sequence of numbers, adds noise and
        assumes the kalman filter predicts values closer to real values
        """
        testing_signal = np.arange(1, 5, 0.1)

        noise = np.random.normal(0, 0.7, testing_signal.shape)
        testing_signal_with_noise = testing_signal + noise

        df = pd.DataFrame(data=testing_signal_with_noise, columns=["signal"])
        testing_signal_with_noise_ts = TimeSeries.from_dataframe(
            df, value_cols=["signal"])

        kf = KalmanFilter(dim_x=1)
        kf.fit(testing_signal_with_noise_ts)
        filtered_ts = kf.filter(testing_signal_with_noise_ts, num_samples=1)
        filtered_values = filtered_ts.univariate_values()

        noise_distance = testing_signal_with_noise - testing_signal
        prediction_distance = filtered_values - testing_signal

        self.assertGreater(noise_distance.std(), prediction_distance.std())
        self.assertEqual(filtered_ts.width, 1)
        self.assertEqual(filtered_ts.n_samples, 1)
Exemplo n.º 23
0
    def to_darts(self) -> DartsTimeSeries:
        """Convert a TimeSeries to Darts TimeSeries

        Returns:
            Darts TimeSeries object
        """
        return DartsTimeSeries.from_series(self.series[TIME_SERIES_VALUES])
Exemplo n.º 24
0
        def test_future_covariate_handling(self):
            ts_time_index = tg.sine_timeseries(length=2, freq="h")
            ts_integer_index = TimeSeries.from_values(
                values=ts_time_index.values())

            # model requires future covariates without cyclic encoding
            model = TFTModel(input_chunk_length=1, output_chunk_length=1)
            with self.assertRaises(ValueError):
                model.fit(ts_time_index, verbose=False)

            # should work with cyclic encoding for time index
            model = TFTModel(
                input_chunk_length=1,
                output_chunk_length=1,
                add_encoders={"cyclic": {
                    "future": "hour"
                }},
            )
            model.fit(ts_time_index, verbose=False)

            # should work with relative index both with time index and integer index
            model = TFTModel(input_chunk_length=1,
                             output_chunk_length=1,
                             add_relative_index=True)
            model.fit(ts_time_index, verbose=False)
            model.fit(ts_integer_index, verbose=False)
Exemplo n.º 25
0
def eval_tcn_model(serialized_model, dataset):
    tcn_model = pickle.loads(serialized_model)
    df = pd.DataFrame.from_dict(dataset)
    ts = TimeSeries.from_dataframe(df,
                                   time_col='time_interval',
                                   value_cols=['count'])
    train, val = ts.split_after(0.8)  #80% train, 20% val
    scaler = Scaler()
    ts = scaler.fit_transform(ts)
    val_transformed = scaler.transform(val)
    train_transformed = scaler.transform(train)
    backtest = tcn_model.historical_forecasts(
        series=ts,
        start=0.8,
        forecast_horizon=1,
        stride=1,
        retrain=False,
    )

    val_transformed = scaler.inverse_transform(val_transformed)
    backtest = scaler.inverse_transform(backtest)
    train_transformed = scaler.inverse_transform(train_transformed)
    scores = dict()
    scores['r2'] = r2_score(val_transformed, backtest[1:])
    scores['mase_score'] = mase(val_transformed, backtest[1:],
                                train_transformed)
    scores['mae_score'] = mae(val_transformed, backtest[1:])
    scores['rmse_score'] = np.sqrt(mse(val_transformed, backtest[1:]))
    try:
        scores['mape_score'] = mape(val_transformed, backtest[1:])
    except:
        scores[
            'mape_score'] = "Could not be calculated (Zero value in time series)"
    return scores
Exemplo n.º 26
0
    def test_gaussian_process_missing_values(self):
        ts = TimeSeries.from_values(np.ones(6))

        gpf = GaussianProcessFilter(RBF())
        filtered_values = gpf.filter(ts).values()
        np.testing.assert_allclose(filtered_values,
                                   np.ones_like(filtered_values))
Exemplo n.º 27
0
def sine_timeseries(
    value_frequency: float = 0.1,
    value_amplitude: float = 1.0,
    value_phase: float = 0.0,
    value_y_offset: float = 0.0,
    start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"),
    end: Optional[Union[pd.Timestamp, int]] = None,
    length: Optional[int] = None,
    freq: str = "D",
    column_name: Optional[str] = "sine",
    dtype: np.dtype = np.float64,
) -> TimeSeries:
    """
    Creates a univariate TimeSeries with a sinusoidal value progression with a given frequency, amplitude,
    phase and y offset.

    Parameters
    ----------
    value_frequency
        The number of periods that take place within one time unit given in `freq`.
    value_amplitude
        The maximum  difference between any value of the returned TimeSeries and `y_offset`.
    value_phase
        The relative position within one period of the first value of the returned TimeSeries (in radians).
    value_y_offset
        The shift of the sine function along the y axis.
    start
        The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas
        DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with
        either `length` or `end`.
    end
        Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is
        set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer.
    length
        Optionally, the length of the returned index. Works only with either `start` or `end`.
    freq
        The time difference between two adjacent entries in the returned TimeSeries. Only effective if `start` is a
        pandas Timestamp. A DateOffset alias is expected; see
        `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_.
    column_name
        Optionally, the name of the value column for the returned TimeSeries
    dtype
        The desired NumPy dtype (np.float32 or np.float64) for the resulting series

    Returns
    -------
    TimeSeries
        A sinusoidal TimeSeries parametrized as indicated above.
    """

    index = _generate_index(start=start, end=end, freq=freq, length=length)
    values = np.array(range(len(index)), dtype=dtype)
    f = np.vectorize(lambda x: value_amplitude * math.sin(
        2 * math.pi * value_frequency * x + value_phase) + value_y_offset)
    values = f(values)

    return TimeSeries.from_times_and_values(index,
                                            values,
                                            freq=freq,
                                            columns=pd.Index([column_name]))
Exemplo n.º 28
0
def eval_sarima_model(serialized_model, dataset):
    sarima_model = pickle.loads(serialized_model)
    df = pd.DataFrame.from_dict(dataset)
    ts = TimeSeries.from_dataframe(df,
                                   time_col='time_interval',
                                   value_cols=['count'])
    train, val = ts.split_after(0.8)  #80% train, 20% val

    no_retrain = sarima_model.predict(len(val))
    # backtest = sarima_model.historical_forecasts(
    #         series=ts,
    #         start=0.8,
    #         forecast_horizon=1,
    #         stride=1,
    # )
    scores = dict()
    scores['retrained'] = dict()
    scores['not_retrained'] = dict()
    # scores['retrained']['r2'] = r2_score(val, backtest[1:])
    # scores['retrained']['mase_score'] = mase(val, backtest[1:], train)
    # scores['retrained']['mae_score'] = mae(val, backtest[1:])
    logging.debug(no_retrain)
    logging.debug(val)
    scores['r2'] = r2_score(val, no_retrain)
    scores['mase_score'] = mase(val, no_retrain, train)
    scores['mae_score'] = mae(val, no_retrain)
    scores['rmse_score'] = np.sqrt(mse(val, no_retrain))
    try:
        #scores['retrained']['mape_score'] = mape(val, backtest[1:])
        scores['mape_score'] = mape(val, no_retrain)
    except:
        #scores['retrained']['mape_score'] = "Could not be calculated (Zero value in time series)"
        scores[
            'mape_score'] = "Could not be calculated (Zero value in time series)"
    return scores
Exemplo n.º 29
0
    def test_rho_risk(self):
        # deterministic not supported
        with self.assertRaises(ValueError):
            metrics.rho_risk(self.series1, self.series1)

        # general univariate, multivariate and multi-ts tests
        self.helper_test_multivariate_duplication_equality(
            metrics.rho_risk, is_stochastic=True
        )
        self.helper_test_multiple_ts_duplication_equality(
            metrics.rho_risk, is_stochastic=True
        )
        self.helper_test_nan(metrics.rho_risk, is_stochastic=True)

        # test perfect predictions -> risk = 0
        for rho in [0.25, 0.5]:
            self.assertAlmostEqual(
                metrics.rho_risk(self.series1, self.series11_stochastic, rho=rho), 0.0
            )
        self.assertAlmostEqual(
            metrics.rho_risk(self.series12_mean, self.series12_stochastic, rho=0.5), 0.0
        )

        # test whether stochastic sample from two TimeSeries (ts) represents the individual ts at 0. and 1. quantiles
        s1 = self.series1
        s2 = self.series1 * 2
        s12_stochastic = TimeSeries.from_times_and_values(
            s1.time_index, np.stack([s1.values(), s2.values()], axis=2)
        )
        self.assertAlmostEqual(metrics.rho_risk(s1, s12_stochastic, rho=0.0), 0.0)
        self.assertAlmostEqual(metrics.rho_risk(s2, s12_stochastic, rho=1.0), 0.0)
Exemplo n.º 30
0
    def test_add_datetime_attribute(self):
        seriesA = self.series1.add_datetime_attribute("day")
        self.assertEqual(seriesA.width, self.series1.width + 1)
        self.assertTrue(
            set(seriesA.pd_dataframe().iloc[:, seriesA.width -
                                            1].values.flatten()) == set(
                                                range(1, 11)))
        seriesB = self.series3.add_datetime_attribute("day", True)
        self.assertEqual(seriesB.width, self.series3.width + 31)
        self.assertEqual(
            set(seriesB.pd_dataframe().iloc[:, self.series3.width:].values.
                flatten()),
            {0, 1},
        )
        seriesC = self.series1.add_datetime_attribute("month", True)
        self.assertEqual(seriesC.width, self.series1.width + 12)
        seriesD = TimeSeries.from_times_and_values(
            pd.date_range("20130206", "20130430"), range(84))
        seriesD = seriesD.add_datetime_attribute("month", True)
        self.assertEqual(seriesD.width, 13)
        self.assertEqual(sum(seriesD.values().flatten()), sum(range(84)) + 84)
        self.assertEqual(sum(seriesD.values()[:, 1 + 3]), 30)
        self.assertEqual(sum(seriesD.values()[:, 1 + 1]), 23)

        # test cyclic
        times_month = pd.date_range("20130101", "20140610")
        start = times_month[0]
        end = times_month[-1]

        seriesE = TimeSeries.from_times_and_values(
            times_month, np.repeat(0.1, len(times_month)))
        seriesF = seriesE.add_datetime_attribute("day", cyclic=True)

        values_sin = seriesF.values()[:, 1]
        values_cos = seriesF.values()[:, 2]

        self.assertTrue(
            np.allclose(np.add(np.square(values_sin), np.square(values_cos)),
                        1))
        start_of_month = [
            pd.Timestamp(year=start.year, month=m, day=1) - start
            for m in range(start.month, end.month)
        ]
        start_of_month_idx = [stamp.days for stamp in start_of_month]

        self.assertTrue(np.allclose(values_sin[start_of_month_idx], 0))
        self.assertTrue(np.allclose(values_cos[start_of_month_idx], 1))