def test_generate_index(self): def test_routine(start, end=None, length=None, freq="D"): # testing length, correct start and if sorted (monotonic increasing) index = _generate_index(start=start, end=end, length=length, freq=freq) self.assertEqual(len(index), length_assert) self.assertTrue(index.is_monotonic_increasing) self.assertTrue(index[0] == start_assert) self.assertTrue(index[-1] == end_assert) for length_assert in [1, 2, 5, 10, 100]: for start_pos in [0, 1]: # pandas.RangeIndex start_assert, end_assert = start_pos, start_pos + length_assert - 1 test_routine(start=start_assert, length=length_assert, freq="") test_routine(start=start_assert, length=length_assert, freq="D") test_routine(start=start_assert, end=end_assert) test_routine(start=start_assert, end=end_assert, freq="D") test_routine(start=None, end=end_assert, length=length_assert, freq="BH") # pandas.DatetimeIndex start_date = pd.DatetimeIndex(["2000-01-01"], freq="D") start_date += start_date.freq * start_pos # dates = _generate_index(start=start_date[0], length=length_assert) dates = _generate_index(start=start_date[0], length=length_assert) start_assert, end_assert = dates[0], dates[-1] test_routine(start=start_assert, length=length_assert) test_routine(start=start_assert, end=end_assert) test_routine(start=None, end=end_assert, length=length_assert, freq="D") # `start`, `end` and `length` cannot both be set simultaneously with self.assertRaises(ValueError): _generate_index(start=0, end=9, length=10) # same as above but `start` defaults to timestamp '2000-01-01' in all timeseries generation functions with self.assertRaises(ValueError): linear_timeseries(end=9, length=10) # exactly two of [`start`, `end`, `length`] must be set with self.assertRaises(ValueError): test_routine(start=0) with self.assertRaises(ValueError): test_routine(start=None, end=1) with self.assertRaises(ValueError): test_routine(start=None, end=None, length=10) # `start` and `end` must have same type with self.assertRaises(ValueError): test_routine(start=0, end=pd.Timestamp("2000-01-01")) with self.assertRaises(ValueError): test_routine(start=pd.Timestamp("2000-01-01"), end=10)
def test_exogenous_variables_support(self): # test case with pd.DatetimeIndex target_dt_idx = self.ts_gaussian fc_dt_idx = self.ts_gaussian_long # test case with numerical pd.RangeIndex target_num_idx = TimeSeries.from_times_and_values( times=tg._generate_index(start=0, length=len(self.ts_gaussian)), values=self.ts_gaussian.all_values(copy=False), ) fc_num_idx = TimeSeries.from_times_and_values( times=tg._generate_index(start=0, length=len(self.ts_gaussian_long)), values=self.ts_gaussian_long.all_values(copy=False), ) for target, future_covariates in zip([target_dt_idx, target_num_idx], [fc_dt_idx, fc_num_idx]): for model in dual_models: # skip models which do not support RangeIndex if isinstance(target.time_index, pd.RangeIndex): try: # _supports_range_index raises a ValueError if model does not support RangeIndex model._supports_range_index() except ValueError: continue # Test models runnability - proper future covariates slicing model.fit(target, future_covariates=future_covariates) prediction = model.predict(self.forecasting_horizon, future_covariates=future_covariates) self.assertTrue(len(prediction) == self.forecasting_horizon) # Test mismatch in length between exogenous variables and forecasting horizon with self.assertRaises(ValueError): model.predict( self.forecasting_horizon, future_covariates=tg.gaussian_timeseries( start=future_covariates.start_time(), length=self.forecasting_horizon - 1, ), ) # Test mismatch in time-index/length between series and exogenous variables with self.assertRaises(ValueError): model.fit(target, future_covariates=target[:-1]) with self.assertRaises(ValueError): model.fit(target[1:], future_covariates=target[:-1])
def test_sine_timeseries(self): # testing parameters value_amplitude = 5 value_y_offset = -3 def test_routine(start, end=None, length=None): # testing for correct value range sine_ts = sine_timeseries( start=start, end=end, length=length, value_amplitude=value_amplitude, value_y_offset=value_y_offset, ) self.assertTrue( (sine_ts <= value_y_offset + value_amplitude).all().all()) self.assertTrue( (sine_ts >= value_y_offset - value_amplitude).all().all()) self.assertEqual(len(sine_ts), length_assert) for length_assert in [1, 2, 5, 10, 100]: test_routine(start=0, length=length_assert) test_routine(start=0, end=length_assert - 1) test_routine(start=pd.Timestamp("2000-01-01"), length=length_assert) end_date = _generate_index(start=pd.Timestamp("2000-01-01"), length=length_assert)[-1] test_routine(start=pd.Timestamp("2000-01-01"), end=end_date)
def test_linear_timeseries(self): # testing parameters start_value = 5 end_value = 12 def test_routine(start, end=None, length=None): # testing for start value, end value and delta between two adjacent entries linear_ts = linear_timeseries( start=start, end=end, length=length, start_value=start_value, end_value=end_value, ) self.assertEqual(linear_ts.values()[0][0], start_value) self.assertEqual(linear_ts.values()[-1][0], end_value) self.assertAlmostEqual( linear_ts.values()[-1][0] - linear_ts.values()[-2][0], (end_value - start_value) / (length_assert - 1), ) self.assertEqual(len(linear_ts), length_assert) for length_assert in [2, 5, 10, 100]: test_routine(start=0, length=length_assert) test_routine(start=0, end=length_assert - 1) test_routine(start=pd.Timestamp("2000-01-01"), length=length_assert) end_date = _generate_index(start=pd.Timestamp("2000-01-01"), length=length_assert)[-1] test_routine(start=pd.Timestamp("2000-01-01"), end=end_date)
def test_autoregressive_timeseries(self): # testing for correct length def test_length(start, end=None, length=None): autoregressive_ts = autoregressive_timeseries(coef=[-1, 1.618], start=start, end=end, length=length) self.assertEqual(len(autoregressive_ts), length_assert) # testing for correct calculation def test_calculation(coef): autoregressive_values = autoregressive_timeseries( coef=coef, length=100).values() for idx, val in enumerate(autoregressive_values[len(coef):]): self.assertTrue(val == np.dot( coef, autoregressive_values[idx:idx + len(coef)].ravel())) for length_assert in [1, 2, 5, 10, 100]: test_length(start=0, length=length_assert) test_length(start=0, end=length_assert - 1) test_length(start=pd.Timestamp("2000-01-01"), length=length_assert) end_date = _generate_index(start=pd.Timestamp("2000-01-01"), length=length_assert)[-1] test_length(start=pd.Timestamp("2000-01-01"), end=end_date) for coef_assert in [[-1], [-1, 1.618], [1, 2, 3], list(range(10))]: test_calculation(coef=coef_assert)
def setUpClass(cls): super().setUpClass() n_groups = 5 len_ts = 10 times = (pd.concat( [ pd.DataFrame( _generate_index(start=pd.Timestamp(2010, 1, 1), length=len_ts)) ] * n_groups, axis=0, ).reset_index(drop=True).rename(columns={0: "times"})) x = pd.DataFrame(np.random.randn(n_groups * len_ts, 3), columns=["a", "b", "c"]) static_multivar = pd.DataFrame( [[i, 0 if j < (len_ts // 2) else 1] for i in range(n_groups) for j in range(len_ts)], columns=["st1", "st2"], ) df_long_multi = pd.DataFrame( pd.concat([times, x, static_multivar], axis=1), ) df_long_multi.loc[:, "constant"] = 1 df_long_uni = df_long_multi.drop(columns=["st2"]) cls.n_groups = n_groups cls.len_ts = len_ts cls.df_long_multi = df_long_multi cls.df_long_uni = df_long_uni
def test_routine(start, end=None, length=None, freq="D"): # testing length, correct start and if sorted (monotonic increasing) index = _generate_index(start=start, end=end, length=length, freq=freq) self.assertEqual(len(index), length_assert) self.assertTrue(index.is_monotonic_increasing) self.assertTrue(index[0] == start_assert) self.assertTrue(index[-1] == end_assert)
def test_random_walk_timeseries(self): # testing for correct length def test_routine(start, end=None, length=None): random_walk_ts = random_walk_timeseries(start=start, end=end, length=length) self.assertEqual(len(random_walk_ts), length_assert) for length_assert in [1, 2, 5, 10, 100]: test_routine(start=0, length=length_assert) test_routine(start=0, end=length_assert - 1) test_routine(start=pd.Timestamp("2000-01-01"), length=length_assert) end_date = _generate_index(start=pd.Timestamp("2000-01-01"), length=length_assert)[-1] test_routine(start=pd.Timestamp("2000-01-01"), end=end_date)
def test_datetime_attribute_encoder(self): """Test past and future `DatetimeAttributeEncoder`""" attribute = "month" month_series = TimeSeries.from_times_and_values( times=tg._generate_index(start=pd.to_datetime("2000-01-01"), length=24, freq="MS"), values=np.arange(24), ) encoder = FutureDatetimeAttributeEncoder(input_chunk_length=1, output_chunk_length=1, attribute="month") first_halve = encoder.encode_train(target=month_series[:12], covariate=month_series[:12], merge_covariate=False) second_halve = encoder.encode_train(target=month_series[12:], covariate=month_series[12:], merge_covariate=False) # check if encoded values for first 12 months are equal to values of last 12 months self.assertTrue((first_halve.values() == second_halve.values()).all()) # test past cyclic encoder self.helper_test_cyclic_encoder( PastDatetimeAttributeEncoder, attribute=attribute, inf_ts_short=self.inf_ts_short_past, inf_ts_long=self.inf_ts_long_past, cyclic=False, ) # test future cyclic encoder self.helper_test_cyclic_encoder( FutureDatetimeAttributeEncoder, attribute=attribute, inf_ts_short=self.inf_ts_short_future, inf_ts_long=self.inf_ts_long_future, cyclic=False, )
def test_constant_timeseries(self): # testing parameters value = 5 def test_routine(start, end=None, length=None): # testing for constant value constant_ts = constant_timeseries(start=start, end=end, value=value, length=length) value_set = set(constant_ts.values().flatten()) self.assertTrue(len(value_set) == 1) self.assertEqual(len(constant_ts), length_assert) for length_assert in [1, 2, 5, 10, 100]: test_routine(start=0, length=length_assert) test_routine(start=0, end=length_assert - 1) test_routine(start=pd.Timestamp("2000-01-01"), length=length_assert) end_date = _generate_index(start=pd.Timestamp("2000-01-01"), length=length_assert)[-1] test_routine(start=pd.Timestamp("2000-01-01"), end=end_date)
def generate_inference_series( self, n: int, target: TimeSeries, covariate: Optional[TimeSeries] = None) -> SupportedIndex: """For prediction (`n` is given) with future covariates we have to distinguish between two cases: 1) If future covariates are given, we can use them as reference 2) If future covariates are missing, we need to generate a time index that starts `input_chunk_length` before the end of `target` and ends `max(n, output_chunk_length)` after the end of `target` """ super().generate_inference_series(n, target, covariate) if covariate is not None: return covariate.time_index else: return _generate_index( start=target.end_time() - target.freq * (self.input_chunk_length - 1), length=self.input_chunk_length + max(n, self.output_chunk_length), freq=target.freq, )
class CovariateIndexGeneratorTestCase(DartsBaseTestClass): n_target = 24 target_time = tg.linear_timeseries(length=n_target, freq="MS") cov_time_train = tg.datetime_attribute_timeseries(target_time, attribute="month", cyclic=True) cov_time_train_short = cov_time_train[1:] target_int = tg.linear_timeseries(length=n_target, start=2) cov_int_train = target_int cov_int_train_short = cov_int_train[1:] input_chunk_length = 12 output_chunk_length = 6 n_short = 6 n_long = 8 # pd.DatetimeIndex # target covariate for inference dataset for n <= output_chunk_length cov_time_inf_short = TimeSeries.from_times_and_values( tg._generate_index( start=target_time.start_time(), length=n_target + n_short, freq=target_time.freq, ), np.arange(n_target + n_short), ) # target covariate for inference dataset for n > output_chunk_length cov_time_inf_long = TimeSeries.from_times_and_values( tg._generate_index( start=target_time.start_time(), length=n_target + n_long, freq=target_time.freq, ), np.arange(n_target + n_long), ) # integer index # target covariate for inference dataset for n <= output_chunk_length cov_int_inf_short = TimeSeries.from_times_and_values( tg._generate_index( start=target_int.start_time(), length=n_target + n_short, freq=target_int.freq, ), np.arange(n_target + n_short), ) # target covariate for inference dataset for n > output_chunk_length cov_int_inf_long = TimeSeries.from_times_and_values( tg._generate_index( start=target_int.start_time(), length=n_target + n_long, freq=target_int.freq, ), np.arange(n_target + n_long), ) def helper_test_index_types(self, ig: CovariateIndexGenerator): """test the index type of generated index""" # pd.DatetimeIndex idx = ig.generate_train_series(self.target_time, self.cov_time_train) self.assertTrue(isinstance(idx, pd.DatetimeIndex)) idx = ig.generate_inference_series(self.n_short, self.target_time, self.cov_time_inf_short) self.assertTrue(isinstance(idx, pd.DatetimeIndex)) idx = ig.generate_train_series(self.target_time, None) self.assertTrue(isinstance(idx, pd.DatetimeIndex)) # pd.RangeIndex idx = ig.generate_train_series(self.target_int, self.cov_int_train) self.assertTrue(isinstance(idx, pd.RangeIndex)) idx = ig.generate_inference_series(self.n_short, self.target_int, self.cov_int_inf_short) self.assertTrue(isinstance(idx, pd.RangeIndex)) idx = ig.generate_train_series(self.target_int, None) self.assertTrue(isinstance(idx, pd.RangeIndex)) def helper_test_index_generator_train(self, ig: CovariateIndexGenerator): """ If covariates are given, the index generators should return the covariate series' index. If covariates are not given, the index generators should return the target series' index. """ # pd.DatetimeIndex # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_time, self.cov_time_train) self.assertTrue(idx.equals(self.cov_time_train.time_index)) # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_time, self.cov_time_train_short) self.assertTrue(idx.equals(self.cov_time_train_short.time_index)) # generated index must be equal to input target index when no covariates are defined idx = ig.generate_train_series(self.target_time, None) self.assertTrue(idx.equals(self.cov_time_train.time_index)) # integer index # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_int, self.cov_int_train) self.assertTrue(idx.equals(self.cov_int_train.time_index)) # generated index must be equal to input covariate index idx = ig.generate_train_series(self.target_time, self.cov_int_train_short) self.assertTrue(idx.equals(self.cov_int_train_short.time_index)) # generated index must be equal to input target index when no covariates are defined idx = ig.generate_train_series(self.target_int, None) self.assertTrue(idx.equals(self.cov_int_train.time_index)) def helper_test_index_generator_inference(self, ig, is_past=False): """ For prediction (`n` is given) with past covariates we have to distinguish between two cases: 1) if past covariates are given, we can use them as reference 2) if past covariates are missing, we need to generate a time index that starts `input_chunk_length` before the end of `target` and ends `max(0, n - output_chunk_length)` after the end of `target` For prediction (`n` is given) with future covariates we have to distinguish between two cases: 1) if future covariates are given, we can use them as reference 2) if future covariates are missing, we need to generate a time index that starts `input_chunk_length` before the end of `target` and ends `max(n, output_chunk_length)` after the end of `target` """ # check generated inference index without passing covariates when n <= output_chunk_length idx = ig.generate_inference_series(self.n_short, self.target_time, None) if is_past: n_out = self.input_chunk_length last_idx = self.target_time.end_time() else: n_out = self.input_chunk_length + self.output_chunk_length last_idx = self.cov_time_inf_short.end_time() self.assertTrue(len(idx) == n_out) self.assertTrue(idx[-1] == last_idx) # check generated inference index without passing covariates when n > output_chunk_length idx = ig.generate_inference_series(self.n_long, self.target_time, None) if is_past: n_out = self.input_chunk_length + self.n_long - self.output_chunk_length last_idx = (self.target_time.end_time() + (self.n_long - self.output_chunk_length) * self.target_time.freq) else: n_out = self.input_chunk_length + self.n_long last_idx = self.cov_time_inf_long.end_time() self.assertTrue(len(idx) == n_out) self.assertTrue(idx[-1] == last_idx) idx = ig.generate_inference_series(self.n_short, self.target_time, self.cov_time_inf_short) self.assertTrue(idx.equals(self.cov_time_inf_short.time_index)) idx = ig.generate_inference_series(self.n_long, self.target_time, self.cov_time_inf_long) self.assertTrue(idx.equals(self.cov_time_inf_long.time_index)) idx = ig.generate_inference_series(self.n_short, self.target_int, self.cov_int_inf_short) self.assertTrue(idx.equals(self.cov_int_inf_short.time_index)) idx = ig.generate_inference_series(self.n_long, self.target_int, self.cov_int_inf_long) self.assertTrue(idx.equals(self.cov_int_inf_long.time_index)) def test_past_index_generator(self): ig = PastCovariateIndexGenerator(self.input_chunk_length, self.output_chunk_length) self.helper_test_index_types(ig) self.helper_test_index_generator_train(ig) self.helper_test_index_generator_inference(ig, is_past=True) def test_future_index_generator(self): ig = FutureCovariateIndexGenerator(self.input_chunk_length, self.output_chunk_length) self.helper_test_index_types(ig) self.helper_test_index_generator_train(ig) self.helper_test_index_generator_inference(ig, is_past=False)
class EncoderTestCase(DartsBaseTestClass): n_target_1 = 12 n_target_2 = 24 shift = 50 target_1 = tg.linear_timeseries(length=n_target_1, freq="MS") target_2 = tg.linear_timeseries(start=target_1.end_time() + shift * target_1.freq, length=n_target_2, freq="MS") covariate_1 = tg.linear_timeseries(length=2 * n_target_1, freq="MS") covariate_2 = tg.linear_timeseries( start=target_1.end_time() + shift * target_1.freq, length=2 * n_target_2, freq="MS", ) target_multi = [target_1, target_2] covariate_multi = [covariate_1, covariate_2] input_chunk_length = 12 output_chunk_length = 6 n_short = 6 n_long = 8 # for the given input_chunk_length, ..., n_long from above, the time_index of the expected encoded covariate # multi-TS at prediction should be as follows inf_ts_short_future = [ TimeSeries.from_times_and_values( tg._generate_index(start=ts.end_time() + (1 - 12) * ts.freq, length=12 + 6, freq=ts.freq), np.arange(12 + 6), ) for ts in target_multi ] inf_ts_long_future = [ TimeSeries.from_times_and_values( tg._generate_index(start=ts.end_time() + (1 - 12) * ts.freq, length=12 + 8, freq=ts.freq), np.arange(12 + 8), ) for ts in target_multi ] inf_ts_short_past = [ TimeSeries.from_times_and_values( tg._generate_index(start=ts.end_time() + (1 - 12) * ts.freq, length=12, freq=ts.freq), np.arange(12), ) for ts in target_multi ] inf_ts_long_past = [ TimeSeries.from_times_and_values( tg._generate_index( start=ts.end_time() + (1 - 12) * ts.freq, length=12 + (8 - 6), freq=ts.freq, ), np.arange(12 + (8 - 6)), ) for ts in target_multi ] @unittest.skipUnless( TORCH_AVAILABLE, "Torch not available. SequentialEncoder tests with models will be skipped.", ) def test_sequence_encoder_from_model_params(self): """test if sequence encoder is initialized properly from model params""" # valid encoder model parameters are ('past', 'future') for the main key and datetime attribute for sub keys valid_encoder_args = { "cyclic": { "past": ["month"], "future": ["dayofyear", "dayofweek"] } } encoders = self.helper_encoder_from_model( add_encoder_dict=valid_encoder_args) self.assertTrue(len(encoders.past_encoders) == 1) self.assertTrue(len(encoders.future_encoders) == 2) # test if encoders have the correct attributes self.assertTrue(encoders.past_encoders[0].attribute == "month") self.assertTrue([enc.attribute for enc in encoders.future_encoders] == ["dayofyear", "dayofweek"]) valid_encoder_args = {"cyclic": {"past": ["month"]}} encoders = self.helper_encoder_from_model( add_encoder_dict=valid_encoder_args, takes_future_covariates=False) self.assertTrue(len(encoders.past_encoders) == 1) self.assertTrue(len(encoders.future_encoders) == 0) # test invalid encoder kwarg at model creation bad_encoder = {"no_encoder": {"past": ["month"]}} with self.assertRaises(ValueError): _ = self.helper_encoder_from_model(add_encoder_dict=bad_encoder) # test invalid kwargs at model creation bad_time = {"cyclic": {"ppast": ["month"]}} with self.assertRaises(ValueError): _ = self.helper_encoder_from_model(add_encoder_dict=bad_time) bad_attribute = {"cyclic": {"past": ["year"]}} with self.assertRaises(ValueError): _ = self.helper_encoder_from_model(add_encoder_dict=bad_attribute) bad_type = {"cyclic": {"past": 1}} with self.assertRaises(ValueError): _ = self.helper_encoder_from_model(add_encoder_dict=bad_type) @unittest.skipUnless( TORCH_AVAILABLE, "Torch not available. SequentialEncoder tests with models will be skipped.", ) def test_encoder_sequence_train(self): """Test `SequentialEncoder.encode_train()` output""" # ====> Sequential Cyclic Encoder Tests <==== encoder_args = { "cyclic": { "past": ["month"], "future": ["month", "month"] } } encoders = self.helper_encoder_from_model( add_encoder_dict=encoder_args) # ==> test training <== past_covs_train, future_covs_train = encoders.encode_train( target=self.target_multi, past_covariate=self.covariate_multi, future_covariate=self.covariate_multi, ) # encoded multi TS covariates should have same number as input covariates self.assertEqual(len(past_covs_train), 2) self.assertEqual(len(future_covs_train), 2) # each attribute (i.e., 'month', ...) generates 2 output variables (+ 1 covariate from input covariates) self.assertEqual(past_covs_train[0].n_components, 3) self.assertEqual(future_covs_train[0].n_components, 5) # check with different inputs encoder_args = {"cyclic": {"past": ["month"], "future": ["month"]}} encoders = self.helper_encoder_from_model( add_encoder_dict=encoder_args) # ==> test training <== past_covs_train, future_covs_train = encoders.encode_train( target=self.target_multi, past_covariate=self.covariate_multi, future_covariate=self.covariate_multi, ) # encoded multi TS covariates should have same number as input covariates self.assertEqual(len(past_covs_train), 2) self.assertEqual(len(future_covs_train), 2) # each attribute (i.e., 'month', ...) generates 2 output variables (+ 1 covariate from input covariates) self.assertEqual(past_covs_train[0].n_components, 3) self.assertEqual(future_covs_train[0].n_components, 3) # encoded past covariates must have equal index as input past covariates for pc, pc_in in zip(past_covs_train, self.covariate_multi): self.assertTrue(pc.time_index.equals(pc_in.time_index)) # encoded future covariates must have equal index as input future covariates for fc, fc_in in zip(future_covs_train, self.covariate_multi): self.assertTrue(fc.time_index.equals(fc_in.time_index)) # for training dataset: both encoded past and future covariates with cyclic encoder 'month' should be equal for pc, fc in zip(past_covs_train, future_covs_train): self.assertEqual(pc, fc) @unittest.skipUnless( TORCH_AVAILABLE, "Torch not available. SequentialEncoder tests with models will be skipped.", ) def test_encoder_sequence_inference(self): """Test `SequentialEncoder.encode_inference()` output""" # ==> test prediction <== encoder_args = {"cyclic": {"past": ["month"], "future": ["month"]}} encoders = self.helper_encoder_from_model( add_encoder_dict=encoder_args) # tests with n <= output_chunk_length # with supplying past and future covariates as input self.helper_sequence_encode_inference( encoders=encoders, n=self.n_short, past_covariates=self.covariate_multi, future_covariates=self.covariate_multi, expected_past_idx_ts=self.covariate_multi, expected_future_idx_ts=self.covariate_multi, ) # without supplying covariates as input self.helper_sequence_encode_inference( encoders=encoders, n=self.n_short, past_covariates=None, future_covariates=None, expected_past_idx_ts=self.inf_ts_short_past, expected_future_idx_ts=self.inf_ts_short_future, ) # tests with n > output_chunk_length # with supplying past covariates as input self.helper_sequence_encode_inference( encoders=encoders, n=self.n_long, past_covariates=self.covariate_multi, future_covariates=None, expected_past_idx_ts=self.covariate_multi, expected_future_idx_ts=self.inf_ts_long_future, ) # with supplying future covariates as input self.helper_sequence_encode_inference( encoders=encoders, n=self.n_long, past_covariates=None, future_covariates=self.covariate_multi, expected_past_idx_ts=self.inf_ts_long_past, expected_future_idx_ts=self.covariate_multi, ) def helper_sequence_encode_inference( self, encoders, n, past_covariates, future_covariates, expected_past_idx_ts, expected_future_idx_ts, ): """test comparisons for `SequentialEncoder.encode_inference()""" # generate encodings past_covs_pred, future_covs_pred = encoders.encode_inference( n=n, target=self.target_multi, past_covariate=past_covariates, future_covariate=future_covariates, ) # encoded past and future covariates must have equal index as expected past and future for pc, pc_in in zip(past_covs_pred, expected_past_idx_ts): self.assertTrue(pc.time_index.equals(pc_in.time_index)) for fc, fc_in in zip(future_covs_pred, expected_future_idx_ts): self.assertTrue(fc.time_index.equals(fc_in.time_index)) def helper_encoder_from_model(self, add_encoder_dict, takes_past_covariates=True, takes_future_covariates=True): """extracts encoders from parameters at model creation""" model = TFTModel( input_chunk_length=self.input_chunk_length, output_chunk_length=self.output_chunk_length, add_encoders=add_encoder_dict, ) encoders = model.initialize_encoders() # see if encoding works _ = encoders.encode_train(self.target_multi, self.covariate_multi, self.covariate_multi) _ = encoders.encode_inference(3, self.target_multi, self.covariate_multi, self.covariate_multi) return encoders def test_cyclic_encoder(self): """Test past and future `CyclicTemporalEncoder``""" attribute = "month" month_series = TimeSeries.from_times_and_values( times=tg._generate_index(start=pd.to_datetime("2000-01-01"), length=24, freq="MS"), values=np.arange(24), ) encoder = FutureCyclicEncoder(input_chunk_length=1, output_chunk_length=1, attribute="month") first_halve = encoder.encode_train(target=month_series[:12], covariate=month_series[:12], merge_covariate=False) second_halve = encoder.encode_train(target=month_series[12:], covariate=month_series[12:], merge_covariate=False) # check if encoded values for first 12 months are equal to values of last 12 months self.assertTrue((first_halve.values() == second_halve.values()).all()) # test past cyclic encoder self.helper_test_cyclic_encoder( PastCyclicEncoder, attribute=attribute, inf_ts_short=self.inf_ts_short_past, inf_ts_long=self.inf_ts_long_past, cyclic=True, ) # test future cyclic encoder self.helper_test_cyclic_encoder( FutureCyclicEncoder, attribute=attribute, inf_ts_short=self.inf_ts_short_future, inf_ts_long=self.inf_ts_long_future, cyclic=True, ) def test_datetime_attribute_encoder(self): """Test past and future `DatetimeAttributeEncoder`""" attribute = "month" month_series = TimeSeries.from_times_and_values( times=tg._generate_index(start=pd.to_datetime("2000-01-01"), length=24, freq="MS"), values=np.arange(24), ) encoder = FutureDatetimeAttributeEncoder(input_chunk_length=1, output_chunk_length=1, attribute="month") first_halve = encoder.encode_train(target=month_series[:12], covariate=month_series[:12], merge_covariate=False) second_halve = encoder.encode_train(target=month_series[12:], covariate=month_series[12:], merge_covariate=False) # check if encoded values for first 12 months are equal to values of last 12 months self.assertTrue((first_halve.values() == second_halve.values()).all()) # test past cyclic encoder self.helper_test_cyclic_encoder( PastDatetimeAttributeEncoder, attribute=attribute, inf_ts_short=self.inf_ts_short_past, inf_ts_long=self.inf_ts_long_past, cyclic=False, ) # test future cyclic encoder self.helper_test_cyclic_encoder( FutureDatetimeAttributeEncoder, attribute=attribute, inf_ts_short=self.inf_ts_short_future, inf_ts_long=self.inf_ts_long_future, cyclic=False, ) def test_integer_positional_encoder(self): """Test past `IntegerIndexEncoder`""" ts = tg.linear_timeseries(length=24, freq="MS") input_chunk_length = 12 output_chunk_length = 6 # ===> test absolute position encoder <=== encoder_params = {"position": {"past": ["absolute"]}} encs = SequentialEncoder( add_encoders=encoder_params, input_chunk_length=input_chunk_length, output_chunk_length=output_chunk_length, takes_past_covariates=True, takes_future_covariates=True, ) t1, _ = encs.encode_train(ts) t2, _ = encs.encode_train( TimeSeries.from_times_and_values(ts.time_index + ts.freq, ts.values())) t3, _ = encs.encode_train( TimeSeries.from_times_and_values(ts.time_index - ts.freq, ts.values())) # absolute encoder takes the first observed index as a reference (from training) vals = np.arange(len(ts)).reshape((len(ts), 1)) self.assertTrue((t1[0].time_index == ts.time_index).all() and (t1[0].values() == vals).all()) # test that the position values are updated correctly self.assertTrue((t2[0].time_index == ts.time_index + ts.freq).all() and (t2[0].values() == vals + 1).all()) self.assertTrue((t3[0].time_index == ts.time_index - ts.freq).all() and (t3[0].values() == vals - 1).all()) # quickly test inference encoding # n > output_chunk_length t4, _ = encs.encode_inference(output_chunk_length + 1, ts) self.assertTrue((t4[0].values()[:, 0] == np.arange( len(ts) - input_chunk_length, len(ts) + 1)).all()) # n <= output_chunk_length t5, _ = encs.encode_inference(output_chunk_length - 1, ts) self.assertTrue((t5[0].values()[:, 0] == np.arange( len(ts) - input_chunk_length, len(ts))).all()) # ===> test relative position encoder <=== encoder_params = {"position": {"past": ["relative"]}} encs = SequentialEncoder( add_encoders=encoder_params, input_chunk_length=input_chunk_length, output_chunk_length=output_chunk_length, takes_past_covariates=True, takes_future_covariates=True, ) t1, _ = encs.encode_train(ts) t2, _ = encs.encode_train( TimeSeries.from_times_and_values(ts.time_index + ts.freq, ts.values())) t3, _ = encs.encode_train( TimeSeries.from_times_and_values(ts.time_index - ts.freq, ts.values())) # relative encoder takes the end of the training series as reference vals = np.arange(-len(ts) + 1, 1).reshape((len(ts), 1)) self.assertTrue((t1[0].time_index == ts.time_index).all() and (t1[0].values() == vals).all()) self.assertTrue((t2[0].time_index == ts.time_index + ts.freq).all() and (t2[0].values() == vals + 1).all()) self.assertTrue((t3[0].time_index == ts.time_index - ts.freq).all() and (t3[0].values() == vals - 1).all()) # quickly test inference encoding # n > output_chunk_length t4, _ = encs.encode_inference(output_chunk_length + 1, ts) self.assertTrue( (t4[0].values()[:, 0] == np.arange(-input_chunk_length + 1, 1 + 1)).all()) # n <= output_chunk_length t5, _ = encs.encode_inference(output_chunk_length - 1, ts) self.assertTrue( (t5[0].values()[:, 0] == np.arange(-input_chunk_length + 1, 0 + 1)).all()) def test_callable_encoder(self): """Test `CallableIndexEncoder`""" ts = tg.linear_timeseries(length=24, freq="A") input_chunk_length = 12 output_chunk_length = 6 # ===> test absolute position encoder <=== encoder_params = { "custom": { "past": [lambda index: index.year, lambda index: index.year - 1] } } encs = SequentialEncoder( add_encoders=encoder_params, input_chunk_length=input_chunk_length, output_chunk_length=output_chunk_length, takes_past_covariates=True, takes_future_covariates=True, ) t1, _ = encs.encode_train(ts) self.assertTrue((ts.time_index.year.values == t1[0].values()[:, 0]).all()) self.assertTrue( (ts.time_index.year.values - 1 == t1[0].values()[:, 1]).all()) def test_transformer(self): ts1 = tg.linear_timeseries(start_value=1, end_value=2, length=60, freq="T", column_name="cov_in") encoder_params = { "position": { "future": ["absolute"] }, "cyclic": { "future": ["minute"] }, "transformer": Scaler(), } encs = SequentialEncoder( add_encoders=encoder_params, input_chunk_length=12, output_chunk_length=6, takes_past_covariates=True, takes_future_covariates=True, ) _, t1 = encs.encode_train(ts1, future_covariate=ts1) # ===> train set test <=== # user supplied covariates should not be transformed self.assertTrue(t1[0]["cov_in"] == ts1) # cyclic encodings should not be transformed for curve in ["sin", "cos"]: self.assertAlmostEqual( t1[0][f"minute_{curve}"].all_values(copy=False).min(), -1.0, delta=10e-9) self.assertAlmostEqual( t1[0][f"minute_{curve}"].values(copy=False).max(), 1.0, delta=10e-9) # all others should be transformed to values between 0 and 1 self.assertAlmostEqual(t1[0]["absolute_idx"].values(copy=False).min(), 0.0, delta=10e-9) self.assertAlmostEqual(t1[0]["absolute_idx"].values(copy=False).max(), 1.0, delta=10e-9) # ===> validation set test <=== ts2 = tg.linear_timeseries( start_value=1, end_value=2, start=ts1.end_time(), length=60, freq=ts1.freq, column_name="cov_in", ) _, t2 = encs.encode_train(ts2, future_covariate=ts2) # make sure that when calling encoders the second time, scalers are not fit again (for validation and inference) self.assertAlmostEqual(t2[0]["absolute_idx"].values(copy=False).min(), 1.0, delta=10e-9) self.assertAlmostEqual(t2[0]["absolute_idx"].values(copy=False).max(), 2.0, delta=10e-9) fc_inf = tg.linear_timeseries(start_value=1, end_value=3, length=80, freq="T", column_name="cov_in") _, t3 = encs.encode_inference(n=12, target=ts1, future_covariate=fc_inf) # index 0 is also start of train target series and value should be 0 self.assertAlmostEqual(t3[0]["absolute_idx"][0].values()[0, 0], 0.0) # index len(ts1) - 1 is the prediction point and value should be 0 self.assertAlmostEqual( t3[0]["absolute_idx"][len(ts1) - 1].values()[0, 0], 1.0) # the future should scale proportional to distance to prediction point self.assertAlmostEqual(t3[0]["absolute_idx"][80 - 1].values()[0, 0], 80 / 60, delta=0.01) def helper_test_cyclic_encoder(self, encoder_class, attribute, inf_ts_short, inf_ts_long, cyclic): """Test cases for both `PastCyclicEncoder` and `FutureCyclicEncoder`""" encoder = encoder_class( input_chunk_length=self.input_chunk_length, output_chunk_length=self.output_chunk_length, attribute=attribute, ) # covs: covariates; ds: dataset # expected generated covs when covs are supplied as input for train and inference ds result_with_cov = [ tg.datetime_attribute_timeseries(ts, attribute=attribute, cyclic=cyclic) for ts in self.covariate_multi ] # expected generated covs when covs are not supplied as input for train ds result_no_cov = [ tg.datetime_attribute_timeseries(ts, attribute=attribute, cyclic=cyclic) for ts in self.target_multi ] # expected generated covs when covs are not supplied as input for inference ds and n <= output_chunk_length result_no_cov_inf_short = [ tg.datetime_attribute_timeseries(ts, attribute=attribute, cyclic=cyclic) for ts in inf_ts_short ] # expected generated covs when covs are not supplied as input for inference ds and n > output_chunk_length result_no_cov_inf_long = [ tg.datetime_attribute_timeseries(ts, attribute=attribute, cyclic=cyclic) for ts in inf_ts_long ] # test train encoding with covariates self.helper_test_encoder_single_train( encoder=encoder, target=self.target_multi, covariate=self.covariate_multi, result=result_with_cov, merge_covariates=False, ) # test train encoding without covariates self.helper_test_encoder_single_train( encoder=encoder, target=self.target_multi, covariate=[None] * len(self.target_multi), result=result_no_cov, merge_covariates=False, ) # test inference encoding with covariates and n <= output_chunk_length self.helper_test_encoder_single_inference( encoder=encoder, n=self.n_short, target=self.target_multi, covariate=self.covariate_multi, result=result_with_cov, merge_covariates=False, ) # test inference encoding with covariates and n > output_chunk_length self.helper_test_encoder_single_inference( encoder=encoder, n=self.n_long, target=self.target_multi, covariate=self.covariate_multi, result=result_with_cov, merge_covariates=False, ) # test inference encoding without covariates and n <= output_chunk_length self.helper_test_encoder_single_inference( encoder=encoder, n=self.n_short, target=self.target_multi, covariate=[None] * len(self.target_multi), result=result_no_cov_inf_short, merge_covariates=False, ) # test inference encoding without covariates and n > output_chunk_length self.helper_test_encoder_single_inference( encoder=encoder, n=self.n_long, target=self.target_multi, covariate=[None] * len(self.target_multi), result=result_no_cov_inf_long, merge_covariates=False, ) def helper_test_encoder_single_train( self, encoder: SingleEncoder, target: Sequence[TimeSeries], covariate: Sequence[Optional[TimeSeries]], result: Sequence[TimeSeries], merge_covariates: bool = True, ): """Test `SingleEncoder.encode_train()`""" encoded = [] for ts, cov in zip(target, covariate): encoded.append( encoder.encode_train(ts, cov, merge_covariate=merge_covariates)) self.assertTrue(encoded == result) def helper_test_encoder_single_inference( self, encoder: SingleEncoder, n: int, target: Sequence[TimeSeries], covariate: Sequence[Optional[TimeSeries]], result: Sequence[TimeSeries], merge_covariates: bool = True, ): """Test `SingleEncoder.encode_inference()`""" encoded = [] for ts, cov in zip(target, covariate): encoded.append( encoder.encode_inference(n, ts, cov, merge_covariate=merge_covariates)) self.assertTrue(encoded == result)