def test_add_holidays(self): times = pd.date_range(start=pd.Timestamp("20201201"), periods=30, freq="D") seriesA = TimeSeries.from_times_and_values(times, range(len(times))) # testing for christmas and non-holiday in US seriesA = seriesA.add_holidays("US") last_column = seriesA.pd_dataframe().iloc[:, seriesA.width - 1] self.assertEqual(last_column.at[pd.Timestamp("20201225")], 1) self.assertEqual(last_column.at[pd.Timestamp("20201210")], 0) self.assertEqual(last_column.at[pd.Timestamp("20201226")], 0) # testing for christmas and non-holiday in PL seriesA = seriesA.add_holidays("PL") last_column = seriesA.pd_dataframe().iloc[:, seriesA.width - 1] self.assertEqual(last_column.at[pd.Timestamp("20201225")], 1) self.assertEqual(last_column.at[pd.Timestamp("20201210")], 0) self.assertEqual(last_column.at[pd.Timestamp("20201226")], 1) self.assertEqual(seriesA.width, 3) # testing hourly time series times = pd.date_range(start=pd.Timestamp("20201224"), periods=50, freq="H") seriesB = TimeSeries.from_times_and_values(times, range(len(times))) seriesB = seriesB.add_holidays("US") last_column = seriesB.pd_dataframe().iloc[:, seriesB.width - 1] self.assertEqual(last_column.at[pd.Timestamp("2020-12-25 01:00:00")], 1) self.assertEqual(last_column.at[pd.Timestamp("2020-12-24 23:00:00")], 0)
def test_gaussian_process(self): """GaussianProcessFilter test. Creates a sine wave, adds noise and assumes the GP filter predicts values closer to real values """ theta = np.radians(np.linspace(0, 360 * 5, 200)) testing_signal = TimeSeries.from_values(np.cos(theta)) noise = TimeSeries.from_values( np.random.normal(0, 0.4, len(testing_signal))) testing_signal_with_noise = testing_signal + noise kernel = ExpSineSquared() gpf = GaussianProcessFilter(kernel=kernel, alpha=0.2, n_restarts_optimizer=100, random_state=42) filtered_ts = gpf.filter(testing_signal_with_noise, num_samples=1) noise_diff = testing_signal_with_noise - testing_signal prediction_diff = filtered_ts - testing_signal self.assertGreater(noise_diff.values().std(), prediction_diff.values().std()) filtered_ts_median = gpf.filter(testing_signal_with_noise, num_samples=100).quantile_timeseries() median_prediction_diff = filtered_ts_median - testing_signal self.assertGreater(noise_diff.values().std(), median_prediction_diff.values().std())
def train_theta_boxcox(ts, seasonality, n): theta_bc = Theta(theta=0, season_mode=SeasonalityMode.NONE) shiftdata = 0 if (ts.univariate_values() < 0).any(): shiftdata = -ts.min() + 100 ts = ts + shiftdata new_values, lmbd = boxcox(ts.univariate_values()) if lmbd < 0: lmbds, value = boxcox_normplot(ts.univariate_values(), lmbd - 1, 0, N=100) if np.isclose(value[0], 0): lmbd = lmbds[np.argmax(value)] new_values = boxcox(ts.univariate_values(), lmbd) if np.isclose(new_values, new_values[0]).all(): lmbd = 0 new_values = boxcox(ts.univariate_values(), lmbd) ts = TimeSeries.from_times_and_values(ts.time_index(), new_values) theta_bc.fit(ts) forecast = theta_bc.predict(n) new_values = inv_boxcox(forecast.univariate_values(), lmbd) forecast = TimeSeries.from_times_and_values(seasonality.time_index(), new_values) if shiftdata > 0: forecast = forecast - shiftdata forecast = forecast * seasonality if (forecast.univariate_values() < 0).any(): indices = seasonality.time_index()[forecast < 0] forecast = forecast.update(indices, np.zeros(len(indices)), inplace=True) return forecast
def test_get_item(self): # multi component static covariates static_covs = pd.DataFrame([["a", 0], ["b", 1]], columns=["cat", "num"]) ts = TimeSeries.from_values( values=np.random.random((10, 2)), columns=["comp1", "comp2"]).with_static_covariates(static_covs) assert ts.static_covariates.index.equals(ts.components) ts0 = ts[0] assert ts0.static_covariates.index.equals(ts.components) assert isinstance(ts0.static_covariates, pd.DataFrame) ts1 = ts["comp1"] assert ts1.static_covariates.index.equals(pd.Index(["comp1"])) assert isinstance(ts1.static_covariates, pd.DataFrame) ts2 = ts["comp2"] assert ts2.static_covariates.index.equals(pd.Index(["comp2"])) assert isinstance(ts2.static_covariates, pd.DataFrame) ts3 = ts["comp1":"comp2"] assert ts3.static_covariates.index.equals(pd.Index(["comp1", "comp2"])) assert isinstance(ts3.static_covariates, pd.DataFrame) ts4 = ts[["comp1", "comp2"]] assert ts4.static_covariates.index.equals(pd.Index(["comp1", "comp2"])) assert isinstance(ts4.static_covariates, pd.DataFrame) # uni/global component static covariates static_covs = pd.DataFrame([["a", 0]], columns=["cat", "num"]) ts = TimeSeries.from_values( values=np.random.random((10, 3)), columns=["comp1", "comp2", "comp3"]).with_static_covariates(static_covs) # 1) when static covs have 1 component but series is multivariate -> static covariate component name is set to # "global_components" assert ts.static_covariates.index.equals( pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME])) ts0 = ts[0] assert ts0.static_covariates.index.equals( pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME])) assert isinstance(ts0.static_covariates, pd.DataFrame) ts1 = ts["comp1":"comp3"] assert ts1.static_covariates.index.equals( pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME])) assert isinstance(ts1.static_covariates, pd.DataFrame) ts2 = ts[["comp1", "comp2", "comp3"]] assert ts2.static_covariates.index.equals( pd.Index([DEFAULT_GLOBAL_STATIC_COV_NAME])) assert isinstance(ts2.static_covariates, pd.DataFrame) # 2) if number of static cov components match the number of components in the series -> static covariate # component names are set to be equal to series component names ts3 = ts["comp1"] assert ts3.static_covariates.index.equals(pd.Index(["comp1"])) assert isinstance(ts3.static_covariates, pd.DataFrame) ts4 = ts["comp2"] assert ts4.static_covariates.index.equals(pd.Index(["comp2"])) assert isinstance(ts4.static_covariates, pd.DataFrame)
def test_creation(self): series_test = TimeSeries.from_dataframe(self.dataframe1) self.assertTrue( np.all( series_test.pd_dataframe().values == (self.dataframe1.values))) # Series cannot be lower than three without passing frequency as argument to constructor with self.assertRaises(ValueError): TimeSeries(self.dataframe1.iloc[:2, :]) TimeSeries.from_dataframe(self.dataframe1.iloc[:2, :], freq="D")
def test_granger_causality(self): series_cause_1 = constant_timeseries(start=0, end=9999).stack( constant_timeseries(start=0, end=9999)) series_cause_2 = gaussian_timeseries(start=0, end=9999) series_effect_1 = constant_timeseries(start=0, end=999) series_effect_2 = TimeSeries.from_values(np.random.uniform( 0, 1, 10000)) series_effect_3 = TimeSeries.from_values( np.random.uniform(0, 1, (1000, 2, 1000))) series_effect_4 = constant_timeseries(start=pd.Timestamp("2000-01-01"), length=10000) # Test univariate with self.assertRaises(AssertionError): granger_causality_tests(series_cause_1, series_effect_1, 10, verbose=False) with self.assertRaises(AssertionError): granger_causality_tests(series_effect_1, series_cause_1, 10, verbose=False) # Test deterministic with self.assertRaises(AssertionError): granger_causality_tests(series_cause_1, series_effect_3, 10, verbose=False) with self.assertRaises(AssertionError): granger_causality_tests(series_effect_3, series_cause_1, 10, verbose=False) # Test Frequency with self.assertRaises(ValueError): granger_causality_tests(series_cause_2, series_effect_4, 10, verbose=False) # Test granger basics tests = granger_causality_tests(series_effect_2, series_effect_2, 10, verbose=False) self.assertTrue(tests[1][0]["ssr_ftest"][1] > 0.99) tests = granger_causality_tests(series_cause_2, series_effect_2, 10, verbose=False) self.assertTrue(tests[1][0]["ssr_ftest"][1] > 0.01)
def create_time_series(resampling_methods, chunk_ids, chunk_type, original_chunks, parameter, window_idx, configs, mean=0, std=1): # Apply filler as some time series have missing measurements what would lead to ValueError in prediction filler = MissingValuesFiller() for resampling in resampling_methods: series_per_resampling = dict() pred_scalers = dict() for chunk_id in chunk_ids: current_chunk = original_chunks[original_chunks['CHUNK_ID_FILLED_TH'] == chunk_id] # Scale chunk values if it is configured and create filled time series if configs.scaling_method == 'standard': current_chunk[f'SCALED_{resampling}'] = apply_standard_scaling( current_chunk[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], mean, std) series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe( df=current_chunk, time_col='CHARTTIME', value_cols=[f'SCALED_{resampling}'], freq='H')) elif configs.scaling_method == 'min-max': # Darts uses MinMaxScaler by default current_scaler = Scaler() series_per_resampling[chunk_id] = current_scaler.fit_transform(filler.transform( TimeSeries.from_dataframe( df=current_chunk, time_col='CHARTTIME', value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], freq='H'))) if chunk_type == 'pred' and \ ((configs.with_exogenous_input and resampling != 'MEDIAN') or not configs.with_exogenous_input): pred_scalers[chunk_id] = current_scaler else: # apply no scaling series_per_resampling[chunk_id] = filler.transform(TimeSeries.from_dataframe( df=current_chunk, time_col='CHARTTIME', value_cols=[f'VITAL_PARAMTER_VALUE_{resampling}_RESAMPLING'], freq='H')) # Save series dict path = get_script_path(configs) write_pickle_file(f'{path}/time_series/time_series_{parameter}_win{window_idx}_{chunk_type}_' f'{resampling.capitalize()}.pickle', series_per_resampling) # Save scaler dict if it was filled if pred_scalers: write_pickle_file(f'{path}/scalers/scalers_{parameter}_win{window_idx}_{resampling.capitalize()}.pickle', pred_scalers)
def remove_seasonality( ts: TimeSeries, freq: int = None, model: SeasonalityMode = SeasonalityMode.MULTIPLICATIVE, method: str = "naive", **kwargs, ) -> TimeSeries: """ Adjusts the TimeSeries `ts` for a seasonality of order `frequency` using the `model` decomposition. Parameters ---------- ts The TimeSeries to adjust. freq The seasonality period to use. model The type of decomposition to use. Must be a `from darts import SeasonalityMode` Enum member. Either SeasonalityMode.MULTIPLICATIVE or SeasonalityMode.ADDITIVE. Defaults SeasonalityMode.MULTIPLICATIVE. method The method to be used to decompose the series. - "naive" : Seasonal decomposition using moving averages [1]_. - "STL" : Season-Trend decomposition using LOESS [2]_. Only compatible with ``ADDITIVE`` model type. Defaults to "naive" kwargs Other keyword arguments are passed down to the decomposition method. Returns ------- TimeSeries A new TimeSeries instance that corresponds to the seasonality-adjusted 'ts'. References ------- .. [1] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.seasonal_decompose.html .. [2] https://www.statsmodels.org/devel/generated/statsmodels.tsa.seasonal.STL.html """ ts._assert_univariate() raise_if_not( model is not SeasonalityMode.NONE, "The model must be either MULTIPLICATIVE or ADDITIVE.", ) raise_if( model not in [SeasonalityMode.ADDITIVE, ModelMode.ADDITIVE] and method == "STL", f"Only ADDITIVE seasonality is compatible with the STL method. Current model is {model}.", logger, ) _, seasonality = extract_trend_and_seasonality(ts, freq, model, method, **kwargs) new_ts = remove_from_series(ts, seasonality, model) return new_ts
def groe_owa(ts: TimeSeries, model: ForecastingModel, n1: int, m: int, p: int, fq: int): """ Backtesting. Compute the OWA score iteratively on ´p´ timepoints following an expanding window mode. Parameters --------- ts TimeSeries on which backtesting will be done. model model to backtest. n1 minimum number of datapoints to take during training m step size p number of steps to iterate over fq Frequency of the time series. Returns ------- Sum of all OWA errors """ n = len(ts) errors = [] for i in range(p): if n1 + i * m == n: break ni = n1 + i * m npred = n - ni train = ts[:ni] if npred >= 3: test = ts[ni:] else: test = TimeSeries(ts.pd_series()[ni:], freq=ts.freq_str()) forecast_naive2 = naive2_groe(train, npred, fq) error_sape_n2 = mase_m4(train, test, forecast_naive2) error_ase_n2 = smape_m4(test, forecast_naive2) model.fit(train) forecast = model.predict(npred) try: error_sape = mase_m4(train, test, forecast) error_ase = smape_m4(test, forecast) owa = 0.5 * (error_sape / error_sape_n2) + 0.5 * (error_ase / error_ase_n2) errors.append(np.sum(owa)) except (ZeroDivisionError, ValueError): errors.append(0) errors = np.sum(errors) return errors
def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None): super()._fit(series, future_covariates) series._assert_univariate() series = self.training_series self.model.fit( series.values(copy=False).flatten(), X=future_covariates.values( copy=False) if future_covariates else None, ) return self
def setUp(self): self.temp_work_dir = tempfile.mkdtemp(prefix="darts") times = pd.date_range("20130101", "20130410") pd_series = pd.Series(range(100), index=times) self.series = TimeSeries.from_series(pd_series) df = pd.DataFrame({ "var1": range(100), "var2": range(100) }, index=times) self.multivariate_series = TimeSeries.from_dataframe(df)
def test_multivariate_fill(self): seriesA: TimeSeries = TimeSeries.from_times_and_values( self.time, np.array([np.nan] * 5 + [2.0] * 5 + [np.nan] * 5 + [2.0] * 10 + [np.nan] * 5), ) seriesB: TimeSeries = TimeSeries.from_times_and_values( self.time, np.array(self.lin[:10] + [np.nan] * 10 + self.lin[-10:])) self.assertEqual( self.series1.stack(self.series2), fill_missing_values(seriesA.stack(seriesB), "auto"), )
def warped(self) -> (TimeSeries, TimeSeries): """ Warps the two time series according to the warp path returned by .path(), which minimizes the pair-wise distance. This will bring two time series that are out-of-phase back into phase. Returns ------- (TimeSeries, TimeSeries) Two new TimeSeries instances of the same length, indexed by pd.RangeIndex. """ series1 = self.series1 series2 = self.series2 xa1 = series1.data_array(copy=False) xa2 = series2.data_array(copy=False) path = self.path() warped_series1 = xa1[path[:, 0]] warped_series2 = xa2[path[:, 1]] time_dim1 = series1._time_dim time_dim2 = series2._time_dim range_index = True if range_index: warped_series1 = warped_series1.reset_index( dims_or_levels=time_dim1) warped_series2 = warped_series2.reset_index( dims_or_levels=time_dim2) # todo: prevent time information being lost after warping # Applying time index from series1 to series2 (take_dates = True) is disabled for consistency reasons # Applying the warp path to the dates directly will result in duplicate dates # and hence values being lost when converting back to a TimeSeries. # As a result series1 will not be warped, whereas series2 will be. # It could also cause the two series to have different lengths, if len(series1) < len(series2) # One could generate intermediate dates, but then the series would not have a consistent frequency take_dates = False if take_dates: time_index = warped_series1[time_dim1] time_index = time_index.rename({time_dim1: time_dim2}) warped_series2[time_dim2] = time_index return TimeSeries.from_xarray(warped_series1), TimeSeries.from_xarray( warped_series2)
def stationarity_test_adf( ts: TimeSeries, maxlag: Union[None, int] = None, regression: str = "c", autolag: Union[None, str] = "AIC", ) -> set: """ Provides Augmented Dickey-Fuller unit root test for a time series, using :func:`statsmodels.tsa.stattools.adfuller`. See [1]_. Parameters ---------- ts The time series to test. maxlag Maximum lag which is included in test, default value of 12*(nobs/100)^{1/4} is used when None. regression Constant and trend order to include in regression. "c" : constant only (default). "ct" : constant and trend. "ctt" : constant, and linear and quadratic trend. "n" : no constant, no trend. autolag Method to use when automatically determining the lag length among the values 0, 1, …, maxlag. If "AIC" (default) or "BIC", then the number of lags is chosen to minimize the corresponding information criterion. "t-stat" based choice of maxlag. Starts with maxlag and drops a lag until the t-statistic on the last lag length is significant using a 5%-sized test. If None, then the number of included lags is set to maxlag. Returns ------- set | adf: The test statistic. | pvalue: MacKinnon's approximate p-value based on [2]_. | usedlag: The number of lags used. | nobs: The number of observations used for the ADF regression and calculation of the critical values. | critical: Critical values for the test statistic at the 1 %, 5 %, and 10 % levels. Based on [2]_. | icbest: The maximized information criterion if autolag is not None. References ---------- .. [1] https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html .. [2] MacKinnon (1994, 2010) """ ts._assert_univariate() ts._assert_deterministic() return adfuller(ts.values(copy=False), maxlag, regression, autolag)
def plot_residuals_analysis(residuals: TimeSeries, num_bins: int = 20, fill_nan: bool = True) -> None: """Plots data relevant to residuals. This function takes a univariate TimeSeries instance of residuals and plots their values, their distribution and their ACF. Please note that if the residual TimeSeries instance contains NaN values, the plots might be displayed incorrectly. If `fill_nan` is set to True, the missing values will be interpolated. Parameters ---------- residuals Univariate TimeSeries instance representing residuals. num_bins Optionally, an integer value determining the number of bins in the histogram. fill_nan A boolean value indicating whether NaN values should be filled in the residuals. """ residuals._assert_univariate() fig = plt.figure(constrained_layout=True, figsize=(8, 6)) gs = fig.add_gridspec(2, 2) if fill_nan: residuals = fill_missing_values(residuals) # plot values ax1 = fig.add_subplot(gs[:1, :]) residuals.plot(ax=ax1) ax1.set_ylabel("value") ax1.set_title("Residual values") # plot histogram and distribution res_mean, res_std = np.mean(residuals.univariate_values()), np.std( residuals.univariate_values()) res_min, res_max = min(residuals.univariate_values()), max( residuals.univariate_values()) x = np.linspace(res_min, res_max, 100) ax2 = fig.add_subplot(gs[1:, 1:]) plot_hist(residuals, bins=num_bins, ax=ax2) ax2.plot( x, norm(res_mean, res_std).pdf(x) * len(residuals) * (res_max - res_min) / num_bins, ) ax2.yaxis.set_major_locator(plt.MaxNLocator(integer=True)) ax2.set_title("Distribution") ax2.set_ylabel("count") ax2.set_xlabel("value") # plot ACF ax3 = fig.add_subplot(gs[1:, :1]) plot_acf(residuals, axis=ax3) ax3.set_ylabel("ACF value") ax3.set_xlabel("lag") ax3.set_title("ACF")
def test_eq(self): seriesA = TimeSeries.from_dataframe(self.dataframe1) self.assertTrue(self.series1 == seriesA) self.assertFalse(self.series1 != seriesA) # with different dates dataframeB = self.dataframe1.copy() dataframeB.index = pd.date_range("20130102", "20130111") seriesB = TimeSeries.from_dataframe(dataframeB) self.assertFalse(self.series1 == seriesB) # with one different value dataframeC = self.dataframe1.copy() dataframeC.iloc[2, 2] = 0 seriesC = TimeSeries.from_dataframe(dataframeC) self.assertFalse(self.series1 == seriesC)
def test_stationarity_tests(self): series_1 = constant_timeseries(start=0, end=9999).stack( constant_timeseries(start=0, end=9999)) series_2 = TimeSeries.from_values( np.random.uniform(0, 1, (1000, 2, 1000))) series_3 = gaussian_timeseries(start=0, end=9999) # Test univariate with self.assertRaises(AssertionError): stationarity_tests(series_1) with self.assertRaises(AssertionError): stationarity_test_adf(series_1) with self.assertRaises(AssertionError): stationarity_test_kpss(series_1) # Test deterministic with self.assertRaises(AssertionError): stationarity_tests(series_2) with self.assertRaises(AssertionError): stationarity_test_adf(series_2) with self.assertRaises(AssertionError): stationarity_test_kpss(series_2) # Test basics self.assertTrue(stationarity_test_kpss(series_3)[1] > 0.05) self.assertTrue(stationarity_test_adf(series_3)[1] < 0.05) self.assertTrue(stationarity_tests)
def generate_train_series( self, target: TimeSeries, covariate: Optional[TimeSeries] = None) -> SupportedIndex: super().generate_train_series(target, covariate) # save a reference index if specified if (self.reference_index_type is not ReferenceIndexType.NONE and self.reference_index is None): if self.reference_index_type is ReferenceIndexType.PREDICTION: self.reference_index = (len(target) - 1, target.end_time()) else: # save the time step before start of target series self.reference_index = (-1, target.start_time() - target.freq) return covariate.time_index if covariate is not None else target.time_index
def lstm(): for company in lstCompanies: df = pd.DataFrame(list(db[company].find({}))) df = df.drop('_id', axis=1) df['Open'] = df['Open'].astype('float') df['Close'] = df['Close'].astype('float') series = TimeSeries.from_dataframe( df, 'Date', ['Close'], freq='B', fill_missing_dates=True) # 'B' = Business day series = auto_fillna(series) model = RNNModel( model= 'LSTM', # Either a string specifying the RNN module type (“RNN”, “LSTM” or “GRU”) output_length= 1, # Number of time steps to be output by the forecasting module hidden_size= 25, # Size for feature maps for each hidden RNN layer (hn) n_rnn_layers=1, # Number of layers in the RNN module input_length= 12, # The dimensionality of the TimeSeries instances that will be fed to the fit function batch_size= 16, # The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters n_epochs= 200, # The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset optimizer_kwargs={'lr': 1e-3}, model_name='{}_RNN'.format(company)) model.fit(series) lstmPred = model.predict(1).values()[0][0] db.prediction.insert_one({ "Date": datetime.datetime.today(), "Company": company, "Prediction": round(float(lstmPred), 2) })
def test_seasonality_inference(self): # test `seasonal_periods` inference for datetime indices freq_str_seasonality_periods_tuples = [ ("D", 7), ("H", 24), ("M", 12), ("W", 52), ("Q", 4), ("B", 5), ] for tuple in freq_str_seasonality_periods_tuples: self.helper_test_seasonality_inference(*tuple) # test default selection for integer index series = TimeSeries.from_values(np.arange(1, 30, 1)) model = ExponentialSmoothing() model.fit(series) self.assertEqual(model.seasonal_periods, 12) # test whether a model that inferred a seasonality period before will do it again for a new series series1 = tg.sine_timeseries(length=100, freq="M") series2 = tg.sine_timeseries(length=100, freq="D") model = ExponentialSmoothing() model.fit(series1) model.fit(series2) self.assertEqual(model.seasonal_periods, 7)
def test_linear(self): seriesB: TimeSeries = TimeSeries.from_times_and_values( self.time, np.array(self.lin[:10] + [np.nan] * 10 + self.lin[-10:])) # Check for linear interpolation part self.assertEqual(self.series2, fill_missing_values(seriesB, "auto"))
def test_kalman(self): """KalmanFilter test. Creates an increasing sequence of numbers, adds noise and assumes the kalman filter predicts values closer to real values """ testing_signal = np.arange(1, 5, 0.1) noise = np.random.normal(0, 0.7, testing_signal.shape) testing_signal_with_noise = testing_signal + noise df = pd.DataFrame(data=testing_signal_with_noise, columns=["signal"]) testing_signal_with_noise_ts = TimeSeries.from_dataframe( df, value_cols=["signal"]) kf = KalmanFilter(dim_x=1) kf.fit(testing_signal_with_noise_ts) filtered_ts = kf.filter(testing_signal_with_noise_ts, num_samples=1) filtered_values = filtered_ts.univariate_values() noise_distance = testing_signal_with_noise - testing_signal prediction_distance = filtered_values - testing_signal self.assertGreater(noise_distance.std(), prediction_distance.std()) self.assertEqual(filtered_ts.width, 1) self.assertEqual(filtered_ts.n_samples, 1)
def to_darts(self) -> DartsTimeSeries: """Convert a TimeSeries to Darts TimeSeries Returns: Darts TimeSeries object """ return DartsTimeSeries.from_series(self.series[TIME_SERIES_VALUES])
def test_future_covariate_handling(self): ts_time_index = tg.sine_timeseries(length=2, freq="h") ts_integer_index = TimeSeries.from_values( values=ts_time_index.values()) # model requires future covariates without cyclic encoding model = TFTModel(input_chunk_length=1, output_chunk_length=1) with self.assertRaises(ValueError): model.fit(ts_time_index, verbose=False) # should work with cyclic encoding for time index model = TFTModel( input_chunk_length=1, output_chunk_length=1, add_encoders={"cyclic": { "future": "hour" }}, ) model.fit(ts_time_index, verbose=False) # should work with relative index both with time index and integer index model = TFTModel(input_chunk_length=1, output_chunk_length=1, add_relative_index=True) model.fit(ts_time_index, verbose=False) model.fit(ts_integer_index, verbose=False)
def eval_tcn_model(serialized_model, dataset): tcn_model = pickle.loads(serialized_model) df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val scaler = Scaler() ts = scaler.fit_transform(ts) val_transformed = scaler.transform(val) train_transformed = scaler.transform(train) backtest = tcn_model.historical_forecasts( series=ts, start=0.8, forecast_horizon=1, stride=1, retrain=False, ) val_transformed = scaler.inverse_transform(val_transformed) backtest = scaler.inverse_transform(backtest) train_transformed = scaler.inverse_transform(train_transformed) scores = dict() scores['r2'] = r2_score(val_transformed, backtest[1:]) scores['mase_score'] = mase(val_transformed, backtest[1:], train_transformed) scores['mae_score'] = mae(val_transformed, backtest[1:]) scores['rmse_score'] = np.sqrt(mse(val_transformed, backtest[1:])) try: scores['mape_score'] = mape(val_transformed, backtest[1:]) except: scores[ 'mape_score'] = "Could not be calculated (Zero value in time series)" return scores
def test_gaussian_process_missing_values(self): ts = TimeSeries.from_values(np.ones(6)) gpf = GaussianProcessFilter(RBF()) filtered_values = gpf.filter(ts).values() np.testing.assert_allclose(filtered_values, np.ones_like(filtered_values))
def sine_timeseries( value_frequency: float = 0.1, value_amplitude: float = 1.0, value_phase: float = 0.0, value_y_offset: float = 0.0, start: Optional[Union[pd.Timestamp, int]] = pd.Timestamp("2000-01-01"), end: Optional[Union[pd.Timestamp, int]] = None, length: Optional[int] = None, freq: str = "D", column_name: Optional[str] = "sine", dtype: np.dtype = np.float64, ) -> TimeSeries: """ Creates a univariate TimeSeries with a sinusoidal value progression with a given frequency, amplitude, phase and y offset. Parameters ---------- value_frequency The number of periods that take place within one time unit given in `freq`. value_amplitude The maximum difference between any value of the returned TimeSeries and `y_offset`. value_phase The relative position within one period of the first value of the returned TimeSeries (in radians). value_y_offset The shift of the sine function along the y axis. start The start of the returned TimeSeries' index. If a pandas Timestamp is passed, the TimeSeries will have a pandas DatetimeIndex. If an integer is passed, the TimeSeries will have a pandas RangeIndex index. Works only with either `length` or `end`. end Optionally, the end of the returned index. Works only with either `start` or `length`. If `start` is set, `end` must be of same type as `start`. Else, it can be either a pandas Timestamp or an integer. length Optionally, the length of the returned index. Works only with either `start` or `end`. freq The time difference between two adjacent entries in the returned TimeSeries. Only effective if `start` is a pandas Timestamp. A DateOffset alias is expected; see `docs <https://pandas.pydata.org/pandas-docs/stable/user_guide/TimeSeries.html#dateoffset-objects>`_. column_name Optionally, the name of the value column for the returned TimeSeries dtype The desired NumPy dtype (np.float32 or np.float64) for the resulting series Returns ------- TimeSeries A sinusoidal TimeSeries parametrized as indicated above. """ index = _generate_index(start=start, end=end, freq=freq, length=length) values = np.array(range(len(index)), dtype=dtype) f = np.vectorize(lambda x: value_amplitude * math.sin( 2 * math.pi * value_frequency * x + value_phase) + value_y_offset) values = f(values) return TimeSeries.from_times_and_values(index, values, freq=freq, columns=pd.Index([column_name]))
def eval_sarima_model(serialized_model, dataset): sarima_model = pickle.loads(serialized_model) df = pd.DataFrame.from_dict(dataset) ts = TimeSeries.from_dataframe(df, time_col='time_interval', value_cols=['count']) train, val = ts.split_after(0.8) #80% train, 20% val no_retrain = sarima_model.predict(len(val)) # backtest = sarima_model.historical_forecasts( # series=ts, # start=0.8, # forecast_horizon=1, # stride=1, # ) scores = dict() scores['retrained'] = dict() scores['not_retrained'] = dict() # scores['retrained']['r2'] = r2_score(val, backtest[1:]) # scores['retrained']['mase_score'] = mase(val, backtest[1:], train) # scores['retrained']['mae_score'] = mae(val, backtest[1:]) logging.debug(no_retrain) logging.debug(val) scores['r2'] = r2_score(val, no_retrain) scores['mase_score'] = mase(val, no_retrain, train) scores['mae_score'] = mae(val, no_retrain) scores['rmse_score'] = np.sqrt(mse(val, no_retrain)) try: #scores['retrained']['mape_score'] = mape(val, backtest[1:]) scores['mape_score'] = mape(val, no_retrain) except: #scores['retrained']['mape_score'] = "Could not be calculated (Zero value in time series)" scores[ 'mape_score'] = "Could not be calculated (Zero value in time series)" return scores
def test_rho_risk(self): # deterministic not supported with self.assertRaises(ValueError): metrics.rho_risk(self.series1, self.series1) # general univariate, multivariate and multi-ts tests self.helper_test_multivariate_duplication_equality( metrics.rho_risk, is_stochastic=True ) self.helper_test_multiple_ts_duplication_equality( metrics.rho_risk, is_stochastic=True ) self.helper_test_nan(metrics.rho_risk, is_stochastic=True) # test perfect predictions -> risk = 0 for rho in [0.25, 0.5]: self.assertAlmostEqual( metrics.rho_risk(self.series1, self.series11_stochastic, rho=rho), 0.0 ) self.assertAlmostEqual( metrics.rho_risk(self.series12_mean, self.series12_stochastic, rho=0.5), 0.0 ) # test whether stochastic sample from two TimeSeries (ts) represents the individual ts at 0. and 1. quantiles s1 = self.series1 s2 = self.series1 * 2 s12_stochastic = TimeSeries.from_times_and_values( s1.time_index, np.stack([s1.values(), s2.values()], axis=2) ) self.assertAlmostEqual(metrics.rho_risk(s1, s12_stochastic, rho=0.0), 0.0) self.assertAlmostEqual(metrics.rho_risk(s2, s12_stochastic, rho=1.0), 0.0)
def test_add_datetime_attribute(self): seriesA = self.series1.add_datetime_attribute("day") self.assertEqual(seriesA.width, self.series1.width + 1) self.assertTrue( set(seriesA.pd_dataframe().iloc[:, seriesA.width - 1].values.flatten()) == set( range(1, 11))) seriesB = self.series3.add_datetime_attribute("day", True) self.assertEqual(seriesB.width, self.series3.width + 31) self.assertEqual( set(seriesB.pd_dataframe().iloc[:, self.series3.width:].values. flatten()), {0, 1}, ) seriesC = self.series1.add_datetime_attribute("month", True) self.assertEqual(seriesC.width, self.series1.width + 12) seriesD = TimeSeries.from_times_and_values( pd.date_range("20130206", "20130430"), range(84)) seriesD = seriesD.add_datetime_attribute("month", True) self.assertEqual(seriesD.width, 13) self.assertEqual(sum(seriesD.values().flatten()), sum(range(84)) + 84) self.assertEqual(sum(seriesD.values()[:, 1 + 3]), 30) self.assertEqual(sum(seriesD.values()[:, 1 + 1]), 23) # test cyclic times_month = pd.date_range("20130101", "20140610") start = times_month[0] end = times_month[-1] seriesE = TimeSeries.from_times_and_values( times_month, np.repeat(0.1, len(times_month))) seriesF = seriesE.add_datetime_attribute("day", cyclic=True) values_sin = seriesF.values()[:, 1] values_cos = seriesF.values()[:, 2] self.assertTrue( np.allclose(np.add(np.square(values_sin), np.square(values_cos)), 1)) start_of_month = [ pd.Timestamp(year=start.year, month=m, day=1) - start for m in range(start.month, end.month) ] start_of_month_idx = [stamp.days for stamp in start_of_month] self.assertTrue(np.allclose(values_sin[start_of_month_idx], 0)) self.assertTrue(np.allclose(values_cos[start_of_month_idx], 1))