def test_seasonality_inference(self): # test `seasonal_periods` inference for datetime indices freq_str_seasonality_periods_tuples = [ ("D", 7), ("H", 24), ("M", 12), ("W", 52), ("Q", 4), ("B", 5), ] for tuple in freq_str_seasonality_periods_tuples: self.helper_test_seasonality_inference(*tuple) # test default selection for integer index series = TimeSeries.from_values(np.arange(1, 30, 1)) model = ExponentialSmoothing() model.fit(series) self.assertEqual(model.seasonal_periods, 12) # test whether a model that inferred a seasonality period before will do it again for a new series series1 = tg.sine_timeseries(length=100, freq="M") series2 = tg.sine_timeseries(length=100, freq="D") model = ExponentialSmoothing() model.fit(series1) model.fit(series2) self.assertEqual(model.seasonal_periods, 7)
def helper_generate_multivariate_case_data(self, season_length, n_repeat): """generates multivariate test case data. Target series is a sine wave stacked with a repeating linear curve of equal seasonal length. Covariates are datetime attributes for 'hours'. """ # generate sine wave ts_sine = tg.sine_timeseries( value_frequency=1 / season_length, length=n_repeat * season_length, freq="h", ) # generate repeating linear curve ts_linear = tg.linear_timeseries(0, 1, length=season_length, start=ts_sine.end_time() + ts_sine.freq) for i in range(n_repeat - 1): start = ts_linear.end_time() + ts_linear.freq new_ts = tg.linear_timeseries(0, 1, length=season_length, start=start) ts_linear = ts_linear.append(new_ts) ts_linear = TimeSeries.from_times_and_values( times=ts_sine.time_index, values=ts_linear.values()) # create multivariate TimeSeries by stacking sine and linear curves ts = ts_sine.stack(ts_linear) # create train/test sets val_length = 10 * season_length ts_train, ts_val = ts[:-val_length], ts[-val_length:] # scale data scaler_ts = Scaler() ts_train_scaled = scaler_ts.fit_transform(ts_train) ts_val_scaled = scaler_ts.transform(ts_val) ts_scaled = scaler_ts.transform(ts) # generate long enough covariates (past and future covariates will be the same for simplicity) long_enough_ts = tg.sine_timeseries(value_frequency=1 / season_length, length=1000, freq=ts.freq) covariates = tg.datetime_attribute_timeseries(long_enough_ts, attribute="hour") scaler_covs = Scaler() covariates_scaled = scaler_covs.fit_transform(covariates) return ts_scaled, ts_train_scaled, ts_val_scaled, covariates_scaled
def test_itakura_window(self): n = 6 m = 5 slope = 1.5 window = dtw.Itakura(max_slope=slope) window.init_size(n, m) cells = list(window) self.assertEqual( cells, [ (1, 1), (1, 2), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3), (3, 4), (4, 2), (4, 3), (4, 4), (5, 2), (5, 3), (5, 4), (5, 5), (6, 4), (6, 5), ], ) sizes = [(10, 43), (543, 45), (34, 11)] for n, m in sizes: slope = m / n + 1 series1 = tg.sine_timeseries(length=n, value_frequency=1 / n, value_phase=0) series2 = tg.sine_timeseries(length=m, value_frequency=1 / m, value_phase=np.pi / 4) dist = dtw.dtw(series1, series2, window=dtw.Itakura(slope)).mean_distance() self.assertGreater(1, dist)
def test_static_covariates_support(self): target_multi = concatenate( [tg.sine_timeseries(length=10, freq="h")] * 2, axis=1) target_multi = target_multi.with_static_covariates( pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], index=["st1", "st2"])) # should work with cyclic encoding for time index model = TFTModel( input_chunk_length=3, output_chunk_length=4, add_encoders={"cyclic": { "future": "hour" }}, pl_trainer_kwargs={"fast_dev_run": True}, ) model.fit(target_multi, verbose=False) assert len(model.model.static_variables) == len( target_multi.static_covariates.columns) model.predict(n=1, series=target_multi, verbose=False) # raise an error when trained with static covariates of wrong dimensionality target_multi = target_multi.with_static_covariates( pd.concat([target_multi.static_covariates] * 2, axis=1)) with pytest.raises(ValueError): model.predict(n=1, series=target_multi, verbose=False) # raise an error when trained with static covariates and trying to predict without target_multi = target_multi.with_static_covariates(None) with pytest.raises(ValueError): model.predict(n=1, series=target_multi, verbose=False)
def helper_relevant_attributes(self, freq, length, period_attributes_tuples): # test random walk random_walk_ts = tg.random_walk_timeseries(freq=freq, length=length) self.assertEqual(_find_relevant_timestamp_attributes(random_walk_ts), set()) for period, relevant_attributes in period_attributes_tuples: # test seasonal period with no noise seasonal_ts = tg.sine_timeseries(freq=freq, value_frequency=1 / period, length=length) self.assertEqual( _find_relevant_timestamp_attributes(seasonal_ts), relevant_attributes, "failed to recognize season in non-noisy timeseries", ) # test seasonal period with no noise seasonal_noisy_ts = seasonal_ts + tg.gaussian_timeseries( freq=freq, length=length) self.assertEqual( _find_relevant_timestamp_attributes(seasonal_noisy_ts), relevant_attributes, "failed to recognize season in noisy timeseries", )
def test_future_covariate_handling(self): ts_time_index = tg.sine_timeseries(length=2, freq="h") ts_integer_index = TimeSeries.from_values( values=ts_time_index.values()) # model requires future covariates without cyclic encoding model = TFTModel(input_chunk_length=1, output_chunk_length=1) with self.assertRaises(ValueError): model.fit(ts_time_index, verbose=False) # should work with cyclic encoding for time index model = TFTModel( input_chunk_length=1, output_chunk_length=1, add_encoders={"cyclic": { "future": "hour" }}, ) model.fit(ts_time_index, verbose=False) # should work with relative index both with time index and integer index model = TFTModel(input_chunk_length=1, output_chunk_length=1, add_relative_index=True) model.fit(ts_time_index, verbose=False) model.fit(ts_integer_index, verbose=False)
def test_kalman_samples(self): kf = KalmanFilter(dim_x=1) series = tg.sine_timeseries(length=30, value_frequency=0.1) kf.fit(series) prediction = kf.filter(series, num_samples=10) self.assertEqual(prediction.width, 1) self.assertEqual(prediction.n_samples, 10)
def test_gaussian_process_multivariate(self): gpf = GaussianProcessFilter() sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1) noise_ts = tg.gaussian_timeseries(length=30) * 0.1 ts = sine_ts.stack(noise_ts) prediction = gpf.filter(ts) self.assertEqual(prediction.width, 2)
def test_kalman_covariates(self): kf = KalmanFilter(dim_x=2) series = tg.sine_timeseries(length=30, value_frequency=0.1) covariates = -series.copy() kf.fit(series, covariates=covariates) prediction = kf.filter(series, covariates=covariates) self.assertEqual(prediction.width, 1) self.assertEqual(prediction.n_samples, 1)
def test_kalman_multivariate(self): kf = KalmanFilter(dim_x=3) sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1) noise_ts = tg.gaussian_timeseries(length=30) * 0.1 series = sine_ts.stack(noise_ts) kf.fit(series) prediction = kf.filter(series) self.assertEqual(prediction.width, 2) self.assertEqual(prediction.n_samples, 1)
def test_routine(start, end=None, length=None): # testing for correct value range sine_ts = sine_timeseries( start=start, end=end, length=length, value_amplitude=value_amplitude, value_y_offset=value_y_offset, ) self.assertTrue( (sine_ts <= value_y_offset + value_amplitude).all().all()) self.assertTrue( (sine_ts >= value_y_offset - value_amplitude).all().all()) self.assertEqual(len(sine_ts), length_assert)
def test_moving_average_multivariate(self): ma = MovingAverage(window=3) sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1) noise_ts = tg.gaussian_timeseries(length=30) * 0.1 ts = sine_ts.stack(noise_ts) ts_filtered = ma.filter(ts) self.assertGreater( np.mean(np.abs(ts.values()[:, 0])), np.mean(np.abs(ts_filtered.values()[:, 0])), ) self.assertGreater( np.mean(np.abs(ts.values()[:, 1])), np.mean(np.abs(ts_filtered.values()[:, 1])), )
def test_performance(self): # test TCN performance on dummy time series ts = tg.sine_timeseries(length=100) + tg.linear_timeseries( length=100, end_value=2 ) train, test = ts[:90], ts[90:] model = TCNModel( input_chunk_length=12, output_chunk_length=10, n_epochs=300, random_state=0, ) model.fit(train) pred = model.predict(n=10) self.assertTrue(mae(pred, test) < 0.3)
def test_kalman_given_kf(self): nfoursid_ss = state_space.StateSpace(a=np.eye(2), b=np.ones((2, 1)), c=np.ones((1, 2)), d=np.ones((1, 1))) nfoursid_kf = kalman.Kalman(nfoursid_ss, np.ones((3, 3)) * 0.1) kf = KalmanFilter(dim_x=1, kf=nfoursid_kf) series = tg.sine_timeseries(length=30, value_frequency=0.1) prediction = kf.filter(series, covariates=-series.copy()) self.assertEqual(kf.dim_u, 1) self.assertEqual(kf.dim_x, 2) self.assertEqual(prediction.width, 1) self.assertEqual(prediction.n_samples, 1)
def helper_test_freq_coversion(self, test_cases): for freq, period in test_cases.items(): ts_sine = tg.sine_timeseries( value_frequency=1 / period, length=3, freq=freq ) # this should not raise an error if frequency is known _ = Prophet._freq_to_days(freq=ts_sine.freq_str) self.assertAlmostEqual( Prophet._freq_to_days(freq="30S"), 30 * Prophet._freq_to_days(freq="S"), delta=10e-9, ) # check bad frequency string with self.assertRaises(ValueError): _ = Prophet._freq_to_days(freq="30SS")
def denoising_input(self): np.random.seed(self.RANDOM_SEED) ts_periodic = tg.sine_timeseries(length=500) ts_gaussian = tg.gaussian_timeseries(length=500) ts_random_walk = tg.random_walk_timeseries(length=500) ts_cov1 = ts_periodic.stack(ts_gaussian) ts_cov1 = ts_cov1.pd_dataframe() ts_cov1.columns = ["Periodic", "Gaussian"] ts_cov1 = TimeSeries.from_dataframe(ts_cov1) ts_sum1 = ts_periodic + ts_gaussian ts_cov2 = ts_sum1.stack(ts_random_walk) ts_sum2 = ts_sum1 + ts_random_walk return ts_sum1, ts_cov1, ts_sum2, ts_cov2
def test_kalman_missing_values(self): sine = tg.sine_timeseries( length=100, value_frequency=0.05) + 0.1 * tg.gaussian_timeseries(length=100) values = sine.values() values[20:22] = np.nan values[28:40] = np.nan sine_holes = TimeSeries.from_values(values) sine = TimeSeries.from_values(sine.values()) kf = KalmanFilter(dim_x=2) kf.fit(sine_holes[-50:]) # fit on the part with no holes # reconstructruction should succeed filtered_series = kf.filter(sine_holes, num_samples=100) # reconstruction error should be sufficiently small self.assertLess(rmse(filtered_series, sine), 0.1)
class BoxCoxTestCase(unittest.TestCase): sine_series = sine_timeseries(length=50, value_y_offset=5, value_frequency=0.05) lin_series = linear_timeseries(start_value=1, end_value=10, length=50) multi_series = sine_series.stack(lin_series) def test_boxbox_lambda(self): boxcox = BoxCox() boxcox.fit(self.multi_series, 0.3) self.assertEqual(boxcox._lmbda, [0.3, 0.3]) boxcox.fit(self.multi_series, [0.3, 0.4]) self.assertEqual(boxcox._lmbda, [0.3, 0.4]) with self.assertRaises(ValueError): boxcox.fit(self.multi_series, [0.2, 0.4, 0.5]) boxcox.fit(self.multi_series, optim_method='mle') lmbda1 = boxcox._lmbda boxcox.fit(self.multi_series, optim_method='pearsonr') lmbda2 = boxcox._lmbda self.assertNotEqual(lmbda1.array, lmbda2.array) def test_boxcox_transform(self): log_mapper = Mapper(lambda x: log(x)) boxcox = BoxCox() transformed1 = log_mapper.transform(self.sine_series) transformed2 = boxcox.fit(self.sine_series, lmbda=0).transform(self.sine_series) self.assertEqual(transformed1, transformed2) def test_boxcox_inverse(self): boxcox = BoxCox() transformed = boxcox.fit_transform(self.multi_series) back = boxcox.inverse_transform(transformed) pd.testing.assert_frame_equal(self.multi_series._df, back._df, check_exact=False)
class RegressionEnsembleModelsTestCase(DartsBaseTestClass): RANDOM_SEED = 111 sine_series = tg.sine_timeseries(value_frequency=(1 / 5), value_y_offset=10, length=50) lin_series = tg.linear_timeseries(length=50) combined = sine_series + lin_series seq1 = [_make_ts(0), _make_ts(10), _make_ts(20)] cov1 = [_make_ts(5), _make_ts(15), _make_ts(25)] seq2 = [_make_ts(0, 20), _make_ts(10, 20), _make_ts(20, 20)] cov2 = [_make_ts(5, 30), _make_ts(15, 30), _make_ts(25, 30)] # dummy feature and target TimeSeries instances ts_periodic = tg.sine_timeseries(length=500) ts_gaussian = tg.gaussian_timeseries(length=500) ts_random_walk = tg.random_walk_timeseries(length=500) ts_cov1 = ts_periodic.stack(ts_gaussian) ts_cov1 = ts_cov1.pd_dataframe() ts_cov1.columns = ["Periodic", "Gaussian"] ts_cov1 = TimeSeries.from_dataframe(ts_cov1) ts_sum1 = ts_periodic + ts_gaussian ts_cov2 = ts_sum1.stack(ts_random_walk) ts_sum2 = ts_sum1 + ts_random_walk def get_local_models(self): return [NaiveDrift(), NaiveSeasonal(5), NaiveSeasonal(10)] @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def get_global_models(self, output_chunk_length=5): return [ RNNModel( input_chunk_length=20, output_chunk_length=output_chunk_length, n_epochs=1, random_state=42, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=output_chunk_length, n_epochs=1, random_state=42, ), ] @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_accepts_different_regression_models(self): regr1 = LinearRegression() regr2 = RandomForestRegressor() regr3 = RandomForest(lags_future_covariates=[0]) model0 = RegressionEnsembleModel(self.get_local_models(), 10) model1 = RegressionEnsembleModel(self.get_local_models(), 10, regr1) model2 = RegressionEnsembleModel(self.get_local_models(), 10, regr2) model3 = RegressionEnsembleModel(self.get_local_models(), 10, regr3) models = [model0, model1, model2, model3] for model in models: model.fit(series=self.combined) model.predict(10) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_accepts_one_model(self): regr1 = LinearRegression() regr2 = RandomForest(lags_future_covariates=[0]) model0 = RegressionEnsembleModel([self.get_local_models()[0]], 10) model1 = RegressionEnsembleModel([self.get_local_models()[0]], 10, regr1) model2 = RegressionEnsembleModel([self.get_local_models()[0]], 10, regr2) models = [model0, model1, model2] for model in models: model.fit(series=self.combined) model.predict(10) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_n_points(self): regr = LinearRegressionModel(lags_future_covariates=[0]) # same values ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr) # too big value to perform the split ensemble = RegressionEnsembleModel(self.get_local_models(), 100) with self.assertRaises(ValueError): ensemble.fit(self.combined) ensemble = RegressionEnsembleModel(self.get_local_models(), 50) with self.assertRaises(ValueError): ensemble.fit(self.combined) # too big value considering min_train_series_length ensemble = RegressionEnsembleModel(self.get_local_models(), 45) with self.assertRaises(ValueError): ensemble.fit(self.combined) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_torch_models_retrain(self): model1 = BlockRNNModel(input_chunk_length=12, output_chunk_length=1, random_state=0, n_epochs=2) model2 = BlockRNNModel(input_chunk_length=12, output_chunk_length=1, random_state=0, n_epochs=2) ensemble = RegressionEnsembleModel([model1], 5) ensemble.fit(self.combined) model1_fitted = ensemble.models[0] forecast1 = model1_fitted.predict(10) model2.fit(self.combined) forecast2 = model2.predict(10) self.assertAlmostEqual(sum(forecast1.values() - forecast2.values())[0], 0.0, places=2) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_predict_global_models_univar(self): ensemble_models = self.get_global_models(output_chunk_length=10) ensemble_models.append(RegressionModel(lags=1)) ensemble = RegressionEnsembleModel(ensemble_models, 10) ensemble.fit(series=self.combined) ensemble.predict(10) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_predict_global_models_multivar_no_covariates(self): ensemble_models = self.get_global_models(output_chunk_length=10) ensemble_models.append(RegressionModel(lags=1)) ensemble = RegressionEnsembleModel(ensemble_models, 10) ensemble.fit(self.seq1) ensemble.predict(10, self.seq1) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_predict_global_models_multivar_with_covariates(self): ensemble_models = self.get_global_models(output_chunk_length=10) ensemble_models.append( RegressionModel(lags=1, lags_past_covariates=[-1])) ensemble = RegressionEnsembleModel(ensemble_models, 10) ensemble.fit(self.seq1, self.cov1) ensemble.predict(10, self.seq2, self.cov2) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def helper_test_models_accuracy(self, model_instance, n, series, past_covariates, min_rmse): # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse` train_series, test_series = train_test_split(series, pd.Timestamp("20010101")) train_past_covariates, _ = train_test_split(past_covariates, pd.Timestamp("20010101")) model_instance.fit(series=train_series, past_covariates=train_past_covariates) prediction = model_instance.predict(n=n, past_covariates=past_covariates) current_rmse = rmse(test_series, prediction) self.assertTrue( current_rmse <= min_rmse, f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.", ) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def denoising_input(self): np.random.seed(self.RANDOM_SEED) ts_periodic = tg.sine_timeseries(length=500) ts_gaussian = tg.gaussian_timeseries(length=500) ts_random_walk = tg.random_walk_timeseries(length=500) ts_cov1 = ts_periodic.stack(ts_gaussian) ts_cov1 = ts_cov1.pd_dataframe() ts_cov1.columns = ["Periodic", "Gaussian"] ts_cov1 = TimeSeries.from_dataframe(ts_cov1) ts_sum1 = ts_periodic + ts_gaussian ts_cov2 = ts_sum1.stack(ts_random_walk) ts_sum2 = ts_sum1 + ts_random_walk return ts_sum1, ts_cov1, ts_sum2, ts_cov2 @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_ensemble_models_denoising(self): # for every model, test whether it correctly denoises ts_sum using ts_gaussian and ts_sum as inputs # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients horizon = 10 ts_sum1, ts_cov1, _, _ = self.denoising_input() torch.manual_seed(self.RANDOM_SEED) ensemble_models = [ RNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), RegressionModel(lags_past_covariates=[-1]), ] ensemble = RegressionEnsembleModel(ensemble_models, horizon) self.helper_test_models_accuracy(ensemble, horizon, ts_sum1, ts_cov1, 3) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_ensemble_models_denoising_multi_input(self): # for every model, test whether it correctly denoises ts_sum_2 using ts_random_multi and ts_sum_2 as inputs # WARNING: this test isn't numerically stable, changing self.RANDOM_SEED can lead to exploding coefficients horizon = 10 _, _, ts_sum2, ts_cov2 = self.denoising_input() torch.manual_seed(self.RANDOM_SEED) ensemble_models = [ RNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), BlockRNNModel( input_chunk_length=20, output_chunk_length=horizon, n_epochs=1, random_state=self.RANDOM_SEED, ), RegressionModel(lags_past_covariates=[-1]), RegressionModel(lags_past_covariates=[-1]), ] ensemble = RegressionEnsembleModel(ensemble_models, horizon) self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2, 3)
class RegressionModelsTestCase(DartsBaseTestClass): np.random.seed(42) # default regression models models = [ RandomForest, LinearRegressionModel, RegressionModel, LightGBMModel ] # register likelihood regression models QuantileLightGBMModel = partialclass( LightGBMModel, likelihood="quantile", quantiles=[0.05, 0.5, 0.95], random_state=42, ) PoissonLightGBMModel = partialclass(LightGBMModel, likelihood="poisson", random_state=42) QuantileLinearRegressionModel = partialclass( LinearRegressionModel, likelihood="quantile", quantiles=[0.05, 0.5, 0.95], random_state=42, ) PoissonLinearRegressionModel = partialclass(LinearRegressionModel, likelihood="poisson", random_state=42) # targets for poisson regression must be positive, so we exclude them for some tests models.extend([ QuantileLightGBMModel, QuantileLinearRegressionModel, PoissonLightGBMModel, PoissonLinearRegressionModel, ]) # dummy feature and target TimeSeries instances target_series, past_covariates, future_covariates = dummy_timeseries( length=100, n_series=3, comps_target=3, comps_pcov=2, comps_fcov=1, multiseries_offset=10, pcov_offset=0, fcov_offset=0, ) # shift sines to positive values for poisson regressors sine_univariate1 = tg.sine_timeseries(length=100) + 1.5 sine_univariate2 = tg.sine_timeseries(length=100, value_phase=1.5705) + 1.5 sine_univariate3 = tg.sine_timeseries(length=100, value_phase=0.78525) + 1.5 sine_univariate4 = tg.sine_timeseries(length=100, value_phase=0.392625) + 1.5 sine_univariate5 = tg.sine_timeseries(length=100, value_phase=0.1963125) + 1.5 sine_univariate6 = tg.sine_timeseries(length=100, value_phase=0.09815625) + 1.5 sine_multivariate1 = sine_univariate1.stack(sine_univariate2) sine_multivariate2 = sine_univariate2.stack(sine_univariate3) sine_multiseries1 = [ sine_univariate1, sine_univariate2, sine_univariate3 ] sine_multiseries2 = [ sine_univariate4, sine_univariate5, sine_univariate6 ] lags_1 = {"target": [-3, -2, -1], "past": [-4, -2], "future": [-5, 2]} def test_model_construction(self): for model in self.models: # TESTING SINGLE INT # testing lags model_instance = model(lags=5) self.assertEqual(model_instance.lags.get("target"), [-5, -4, -3, -2, -1]) # testing lags_past_covariates model_instance = model(lags=None, lags_past_covariates=3) self.assertEqual(model_instance.lags.get("past"), [-3, -2, -1]) # testing lags_future covariates model_instance = model(lags=None, lags_future_covariates=(3, 5)) self.assertEqual(model_instance.lags.get("future"), [-3, -2, -1, 0, 1, 2, 3, 4]) # TESTING LIST of int # lags values = [-5, -3, -1] model_instance = model(lags=values) self.assertEqual(model_instance.lags.get("target"), values) # testing lags_past_covariates model_instance = model(lags_past_covariates=values) self.assertEqual(model_instance.lags.get("past"), values) # testing lags_future_covariates with self.assertRaises(ValueError): model() with self.assertRaises(ValueError): model(lags=0) with self.assertRaises(ValueError): model(lags=[-1, 0]) with self.assertRaises(ValueError): model(lags=[3, 5]) with self.assertRaises(ValueError): model(lags=[-3, -5.0]) with self.assertRaises(ValueError): model(lags=-5) with self.assertRaises(ValueError): model(lags=3.6) with self.assertRaises(ValueError): model(lags=None, lags_past_covariates=False) with self.assertRaises(ValueError): model(lags=None) with self.assertRaises(ValueError): model(lags=5, lags_future_covariates=True) with self.assertRaises(ValueError): model(lags=5, lags_future_covariates=(1, -3)) with self.assertRaises(ValueError): model(lags=5, lags_future_covariates=(1, 2, 3)) with self.assertRaises(ValueError): model(lags=5, lags_future_covariates=(1, True)) with self.assertRaises(ValueError): model(lags=5, lags_future_covariates=(1, 1.0)) def test_training_data_creation(self): # testing _get_training_data function model_instance = RegressionModel( lags=self.lags_1["target"], lags_past_covariates=self.lags_1["past"], lags_future_covariates=self.lags_1["future"], ) max_samples_per_ts = 17 training_samples, training_labels = model_instance._create_lagged_data( target_series=self.target_series, past_covariates=self.past_covariates, future_covariates=self.future_covariates, max_samples_per_ts=max_samples_per_ts, ) # checking number of dimensions self.assertEqual(len(training_samples.shape), 2) # samples, features self.assertEqual(len(training_labels.shape), 2) # samples, components (multivariate) self.assertEqual(training_samples.shape[0], training_labels.shape[0]) self.assertEqual(training_samples.shape[0], len(self.target_series) * max_samples_per_ts) self.assertEqual( training_samples.shape[1], len(self.lags_1["target"]) * self.target_series[0].width + len(self.lags_1["past"]) * self.past_covariates[0].width + len(self.lags_1["future"]) * self.future_covariates[0].width, ) # check last sample self.assertListEqual( list(training_samples[0, :]), [ 79.0, 179.0, 279.0, 80.0, 180.0, 280.0, 81.0, 181.0, 281.0, 10078.0, 10178.0, 10080.0, 10180.0, 20077.0, 20084.0, ], ) self.assertListEqual(list(training_labels[0]), [82, 182, 282]) def test_prediction_data_creation(self): # assigning correct names to variables series = [ts[:-50] for ts in self.target_series] output_chunk_length = 5 n = 12 # prediction preprocessing start covariates = { "past": (self.past_covariates, self.lags_1.get("past")), "future": (self.future_covariates, self.lags_1.get("future")), } # dictionary containing covariate data over time span required for prediction covariate_matrices = {} # dictionary containing covariate lags relative to minimum covariate lag relative_cov_lags = {} # number of prediction steps given forecast horizon and output_chunk_length n_pred_steps = math.ceil(n / output_chunk_length) for cov_type, (covs, lags) in covariates.items(): if covs is not None: relative_cov_lags[cov_type] = np.array(lags) - lags[0] covariate_matrices[cov_type] = [] for idx, (ts, cov) in enumerate(zip(series, covs)): first_pred_ts = ts.end_time() + 1 * ts.freq last_pred_ts = (first_pred_ts + ( (n_pred_steps - 1) * output_chunk_length) * ts.freq) first_req_ts = first_pred_ts + lags[0] * ts.freq last_req_ts = last_pred_ts + lags[-1] * ts.freq # not enough covariate data checks excluded, they are tested elsewhere if cov.has_datetime_index: covariate_matrices[cov_type].append( cov[first_req_ts:last_req_ts].values()) else: # include last_req_ts when slicing series with integer indices covariate_matrices[cov_type].append( cov[first_req_ts:last_req_ts + 1].values()) covariate_matrices[cov_type] = np.stack( covariate_matrices[cov_type]) series_matrix = None if "target" in self.lags_1: series_matrix = np.stack( [ts[self.lags_1["target"][0]:].values() for ts in series]) # prediction preprocessing end # tests self.assertTrue( all([ lag >= 0 for lags in relative_cov_lags.values() for lag in lags ])) self.assertEqual( covariate_matrices["past"].shape, ( len(series), relative_cov_lags["past"][-1] + (n_pred_steps - 1) * output_chunk_length + 1, covariates["past"][0][0].width, ), ) self.assertEqual( covariate_matrices["future"].shape, ( len(series), relative_cov_lags["future"][-1] + (n_pred_steps - 1) * output_chunk_length + 1, covariates["future"][0][0].width, ), ) self.assertEqual( series_matrix.shape, (len(series), -self.lags_1["target"][0], series[0].width), ) self.assertListEqual( list(covariate_matrices["past"][0, :, 0]), [ 10047.0, 10048.0, 10049.0, 10050.0, 10051.0, 10052.0, 10053.0, 10054.0, 10055.0, 10056.0, 10057.0, 10058.0, 10059.0, ], ) self.assertListEqual( list(covariate_matrices["future"][0, :, 0]), [ 20046.0, 20047.0, 20048.0, 20049.0, 20050.0, 20051.0, 20052.0, 20053.0, 20054.0, 20055.0, 20056.0, 20057.0, 20058.0, 20059.0, 20060.0, 20061.0, 20062.0, 20063.0, ], ) self.assertListEqual(list(series_matrix[0, :, 0]), [48.0, 49.0, 50.0]) def test_models_runnability(self): train_y, test_y = self.sine_univariate1.split_before(0.7) for model in self.models: # testing past covariates with self.assertRaises(ValueError): # testing lags_past_covariates None but past_covariates during training model_instance = model(lags=4, lags_past_covariates=None) model_instance.fit( series=self.sine_univariate1, past_covariates=self.sine_multivariate1, ) with self.assertRaises(ValueError): # testing lags_past_covariates but no past_covariates during fit model_instance = model(lags=4, lags_past_covariates=3) model_instance.fit(series=self.sine_univariate1) # testing future_covariates with self.assertRaises(ValueError): # testing lags_future_covariates None but future_covariates during training model_instance = model(lags=4, lags_future_covariates=None) model_instance.fit( series=self.sine_univariate1, future_covariates=self.sine_multivariate1, ) with self.assertRaises(ValueError): # testing lags_covariate but no covariate during fit model_instance = model(lags=4, lags_future_covariates=3) model_instance.fit(series=self.sine_univariate1) # testing input_dim model_instance = model(lags=4, lags_past_covariates=2) model_instance.fit( series=train_y, past_covariates=self.sine_univariate1.stack( self.sine_univariate1), ) self.assertEqual(model_instance.input_dim, { "target": 1, "past": 2, "future": None }) with self.assertRaises(ValueError): prediction = model_instance.predict(n=len(test_y) + 2) # while it should work with n = 1 prediction = model_instance.predict(n=1) self.assertTrue( len(prediction) == 1, f"Expected length 1, found {len(prediction)} instead", ) def test_fit(self): for model in self.models: # test fitting both on univariate and multivariate timeseries for series in [self.sine_univariate1, self.sine_multivariate2]: with self.assertRaises(ValueError): model_instance = model(lags=4, lags_past_covariates=4) model_instance.fit( series=series, past_covariates=self.sine_multivariate1) model_instance.predict(n=10) model_instance = model(lags=12) model_instance.fit(series=series) self.assertEqual(model_instance.lags.get("past"), None) model_instance = model(lags=12, lags_past_covariates=12) model_instance.fit(series=series, past_covariates=self.sine_multivariate1) self.assertEqual(len(model_instance.lags.get("past")), 12) model_instance = model(lags=12, lags_future_covariates=(0, 1)) model_instance.fit( series=series, future_covariates=self.sine_multivariate1) self.assertEqual(len(model_instance.lags.get("future")), 1) model_instance = model(lags=12, lags_past_covariates=[-1, -4, -6]) model_instance.fit(series=series, past_covariates=self.sine_multivariate1) self.assertEqual(len(model_instance.lags.get("past")), 3) model_instance = model( lags=12, lags_past_covariates=[-1, -4, -6], lags_future_covariates=[-2, 0], ) model_instance.fit( series=series, past_covariates=self.sine_multivariate1, future_covariates=self.sine_multivariate1, ) self.assertEqual(len(model_instance.lags.get("past")), 3) def helper_test_models_accuracy(self, series, past_covariates, min_rmse_model): # for every model, test whether it predicts the target with a minimum r2 score of `min_rmse` train_series, test_series = train_test_split(series, 70) train_past_covariates, _ = train_test_split(past_covariates, 70) for output_chunk_length in [1, 5]: for idx, model in enumerate(self.models): model_instance = model( lags=12, lags_past_covariates=2, output_chunk_length=output_chunk_length, ) model_instance.fit(series=train_series, past_covariates=train_past_covariates) prediction = model_instance.predict( n=len(test_series), series=train_series, past_covariates=past_covariates, ) current_rmse = rmse(prediction, test_series) # in case of multi-series take mean rmse mean_rmse = np.mean(current_rmse) self.assertTrue( mean_rmse <= min_rmse_model[idx], f"{str(model_instance)} model was not able to predict data as well as expected. " f"A mean rmse score of {mean_rmse} was recorded.", ) def test_models_accuracy_univariate(self): # for every model, and different output_chunk_lengths test whether it predicts the univariate time series # as well as expected self.helper_test_models_accuracy( self.sine_univariate1, self.sine_univariate2, [0.03, 1e-13, 1e-13, 0.3, 0.5, 0.8, 0.4, 0.4], ) def test_models_accuracy_multivariate(self): # for every model, and different output_chunk_lengths test whether it predicts the multivariate time series # as well as expected self.helper_test_models_accuracy( self.sine_multivariate1, self.sine_multivariate2, [0.3, 1e-13, 1e-13, 0.4, 0.4, 0.8, 0.4, 0.4], ) def test_models_accuracy_multiseries_multivariate(self): # for every model, and different output_chunk_lengths test whether it predicts the multiseries, multivariate # time series as well as expected self.helper_test_models_accuracy( self.sine_multiseries1, self.sine_multiseries2, [0.05, 1e-13, 1e-13, 0.05, 0.4, 0.8, 0.4, 0.4], ) def test_historical_forecast(self): model = self.models[1](lags=5) result = model.historical_forecasts( series=self.sine_univariate1, future_covariates=None, start=0.8, forecast_horizon=1, stride=1, retrain=True, overlap_end=False, last_points_only=True, verbose=False, ) self.assertEqual(len(result), 21) model = self.models[1](lags=5, lags_past_covariates=5) result = model.historical_forecasts( series=self.sine_univariate1, past_covariates=self.sine_multivariate1, start=0.8, forecast_horizon=1, stride=1, retrain=True, overlap_end=False, last_points_only=True, verbose=False, ) self.assertEqual(len(result), 21) model = self.models[1](lags=5, lags_past_covariates=5, output_chunk_length=5) result = model.historical_forecasts( series=self.sine_univariate1, past_covariates=self.sine_multivariate1, start=0.8, forecast_horizon=1, stride=1, retrain=True, overlap_end=False, last_points_only=True, verbose=False, ) self.assertEqual(len(result), 21) def test_multioutput_wrapper(self): lags = 12 models = [ (RegressionModel(lags=lags), True), (RegressionModel(lags=lags, model=LinearRegression()), True), (RegressionModel(lags=lags, model=RandomForestRegressor()), True), ( RegressionModel(lags=lags, model=HistGradientBoostingRegressor()), False, ), ] for model, supports_multioutput_natively in models: model.fit(series=self.sine_multivariate1) if supports_multioutput_natively: self.assertFalse( isinstance(model.model, MultiOutputRegressor)) else: self.assertTrue( isinstance(model.model, MultiOutputRegressor)) def test_regression_model(self): lags = 12 models = [ RegressionModel(lags=lags), RegressionModel(lags=lags, model=LinearRegression()), RegressionModel(lags=lags, model=RandomForestRegressor()), RegressionModel(lags=lags, model=HistGradientBoostingRegressor()), ] for model in models: model.fit(series=self.sine_univariate1) self.assertEqual(len(model.lags.get("target")), lags) model.predict(n=10) def test_multiple_ts(self): lags = 4 lags_past_covariates = 3 model = RegressionModel(lags=lags, lags_past_covariates=lags_past_covariates) target_series = tg.linear_timeseries(start_value=0, end_value=49, length=50) past_covariates = tg.linear_timeseries(start_value=100, end_value=149, length=50) past_covariates = past_covariates.stack( tg.linear_timeseries(start_value=400, end_value=449, length=50)) target_train, target_test = target_series.split_after(0.7) past_covariates_train, past_covariates_test = past_covariates.split_after( 0.7) model.fit( series=[target_train, target_train + 0.5], past_covariates=[ past_covariates_train, past_covariates_train + 0.5 ], ) predictions = model.predict( 10, series=[target_train, target_train + 0.5], past_covariates=[past_covariates, past_covariates + 0.5], ) self.assertEqual(len(predictions[0]), 10, f"Found {len(predictions)} instead") # multiple TS, both future and past covariates, checking that both covariates lead to better results than # using a single one (target series = past_cov + future_cov + noise) np.random.seed(42) linear_ts_1 = tg.linear_timeseries(start_value=10, end_value=59, length=50) linear_ts_2 = tg.linear_timeseries(start_value=40, end_value=89, length=50) past_covariates = tg.sine_timeseries(length=50) * 10 future_covariates = ( tg.sine_timeseries(length=50, value_frequency=0.015) * 50) target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates target_series_1_noise = (linear_ts_1 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_series_2_noise = (linear_ts_2 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_train_1, target_test_1 = target_series_1.split_after(0.7) target_train_2, target_test_2 = target_series_2.split_after(0.7) ( target_train_1_noise, target_test_1_noise, ) = target_series_1_noise.split_after(0.7) ( target_train_2_noise, target_test_2_noise, ) = target_series_2_noise.split_after(0.7) # testing improved denoise with multiple TS # test 1: with single TS, 2 covariates should be better than one model = RegressionModel(lags=3, lags_past_covariates=5) model.fit([target_train_1_noise], [past_covariates]) prediction_past_only = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, ) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit([target_train_1_noise], [past_covariates], [future_covariates]) prediction_past_and_future = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_past_only = rmse( [target_test_1, target_test_2], prediction_past_only, inter_reduction=np.mean, ) error_both = rmse( [target_test_1, target_test_2], prediction_past_and_future, inter_reduction=np.mean, ) self.assertTrue(error_past_only > error_both) # test 2: with both covariates, 2 TS should learn more than one (with little noise) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit( [target_train_1_noise, target_train_2_noise], [past_covariates] * 2, [future_covariates] * 2, ) prediction_past_and_future_multi_ts = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_both_multi_ts = rmse( [target_test_1, target_test_2], prediction_past_and_future_multi_ts, inter_reduction=np.mean, ) self.assertTrue(error_both > error_both_multi_ts) def test_only_future_covariates(self): model = RegressionModel(lags_future_covariates=[-2]) target_series = tg.linear_timeseries(start_value=0, end_value=49, length=50) covariates = tg.linear_timeseries(start_value=100, end_value=149, length=50) covariates = covariates.stack( tg.linear_timeseries(start_value=400, end_value=449, length=50)) target_train, target_test = target_series.split_after(0.7) covariates_train, covariates_test = covariates.split_after(0.7) model.fit( series=[target_train, target_train + 0.5], future_covariates=[covariates_train, covariates_train + 0.5], ) predictions = model.predict( 10, series=[target_train, target_train + 0.5], future_covariates=[covariates, covariates + 0.5], ) self.assertEqual(len(predictions[0]), 10, f"Found {len(predictions[0])} instead") def test_not_enough_covariates(self): target_series = tg.linear_timeseries(start_value=0, end_value=100, length=50) past_covariates = tg.linear_timeseries(start_value=100, end_value=200, length=50) future_covariates = tg.linear_timeseries(start_value=200, end_value=300, length=50) model = RegressionModel( lags_past_covariates=[-10], lags_future_covariates=[-5, 5], output_chunk_length=7, ) model.fit( series=target_series, past_covariates=past_covariates, future_covariates=future_covariates, max_samples_per_ts=1, ) # output_chunk_length, required past_offset, required future_offset test_cases = [ (1, 0, 13), (5, -4, 9), (7, -2, 11), ] for (output_chunk_length, req_past_offset, req_future_offset) in test_cases: model = RegressionModel( lags_past_covariates=[-10], lags_future_covariates=[-4, 3], output_chunk_length=output_chunk_length, ) model.fit( series=target_series, past_covariates=past_covariates, future_covariates=future_covariates, ) # check that given the required offsets no ValueError is raised model.predict( 10, series=target_series[:-25], past_covariates=past_covariates[:-25 + req_past_offset], future_covariates=future_covariates[:-25 + req_future_offset], ) # check that one less past covariate time step causes ValueError with self.assertRaises(ValueError): model.predict( 10, series=target_series[:-25], past_covariates=past_covariates[:-26 + req_past_offset], future_covariates=future_covariates[:-25 + req_future_offset], ) # check that one less future covariate time step causes ValueError with self.assertRaises(ValueError): model.predict( 10, series=target_series[:-25], past_covariates=past_covariates[:-25 + req_past_offset], future_covariates=future_covariates[:-26 + req_future_offset], ) @patch.object( darts.models.forecasting.gradient_boosted_model.lgb.LGBMRegressor, "fit") # @patch.object(darts.models.forecasting.gradient_boosted_model.lgb.LGBMRegressor, 'fit') def test_gradient_boosted_model_with_eval_set(self, lgb_fit_patch): """Test whether these evaluation set parameters are passed to LGBRegressor""" model = LightGBMModel(lags=4, lags_past_covariates=2) model.fit( series=self.sine_univariate1, past_covariates=self.sine_multivariate1, val_series=self.sine_univariate1, val_past_covariates=self.sine_multivariate1, early_stopping_rounds=2, ) lgb_fit_patch.assert_called_once() assert lgb_fit_patch.call_args[1]["eval_set"] is not None assert lgb_fit_patch.call_args[1]["early_stopping_rounds"] == 2
def test_multiple_ts(self): lags = 4 lags_past_covariates = 3 model = RegressionModel(lags=lags, lags_past_covariates=lags_past_covariates) target_series = tg.linear_timeseries(start_value=0, end_value=49, length=50) past_covariates = tg.linear_timeseries(start_value=100, end_value=149, length=50) past_covariates = past_covariates.stack( tg.linear_timeseries(start_value=400, end_value=449, length=50)) target_train, target_test = target_series.split_after(0.7) past_covariates_train, past_covariates_test = past_covariates.split_after( 0.7) model.fit( series=[target_train, target_train + 0.5], past_covariates=[ past_covariates_train, past_covariates_train + 0.5 ], ) predictions = model.predict( 10, series=[target_train, target_train + 0.5], past_covariates=[past_covariates, past_covariates + 0.5], ) self.assertEqual(len(predictions[0]), 10, f"Found {len(predictions)} instead") # multiple TS, both future and past covariates, checking that both covariates lead to better results than # using a single one (target series = past_cov + future_cov + noise) np.random.seed(42) linear_ts_1 = tg.linear_timeseries(start_value=10, end_value=59, length=50) linear_ts_2 = tg.linear_timeseries(start_value=40, end_value=89, length=50) past_covariates = tg.sine_timeseries(length=50) * 10 future_covariates = ( tg.sine_timeseries(length=50, value_frequency=0.015) * 50) target_series_1 = linear_ts_1 + 4 * past_covariates + 2 * future_covariates target_series_2 = linear_ts_2 + 4 * past_covariates + 2 * future_covariates target_series_1_noise = (linear_ts_1 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_series_2_noise = (linear_ts_2 + 4 * past_covariates + 2 * future_covariates + tg.gaussian_timeseries(std=7, length=50)) target_train_1, target_test_1 = target_series_1.split_after(0.7) target_train_2, target_test_2 = target_series_2.split_after(0.7) ( target_train_1_noise, target_test_1_noise, ) = target_series_1_noise.split_after(0.7) ( target_train_2_noise, target_test_2_noise, ) = target_series_2_noise.split_after(0.7) # testing improved denoise with multiple TS # test 1: with single TS, 2 covariates should be better than one model = RegressionModel(lags=3, lags_past_covariates=5) model.fit([target_train_1_noise], [past_covariates]) prediction_past_only = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, ) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit([target_train_1_noise], [past_covariates], [future_covariates]) prediction_past_and_future = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_past_only = rmse( [target_test_1, target_test_2], prediction_past_only, inter_reduction=np.mean, ) error_both = rmse( [target_test_1, target_test_2], prediction_past_and_future, inter_reduction=np.mean, ) self.assertTrue(error_past_only > error_both) # test 2: with both covariates, 2 TS should learn more than one (with little noise) model = RegressionModel(lags=3, lags_past_covariates=5, lags_future_covariates=(5, 0)) model.fit( [target_train_1_noise, target_train_2_noise], [past_covariates] * 2, [future_covariates] * 2, ) prediction_past_and_future_multi_ts = model.predict( n=len(target_test_1), series=[target_train_1_noise, target_train_2_noise], past_covariates=[past_covariates] * 2, future_covariates=[future_covariates] * 2, ) error_both_multi_ts = rmse( [target_test_1, target_test_2], prediction_past_and_future_multi_ts, inter_reduction=np.mean, ) self.assertTrue(error_both > error_both_multi_ts)
class EnsembleModelsTestCase(DartsBaseTestClass): series1 = tg.sine_timeseries(value_frequency=(1 / 5), value_y_offset=10, length=50) series2 = tg.linear_timeseries(length=50) seq1 = [_make_ts(0), _make_ts(10), _make_ts(20)] cov1 = [_make_ts(5), _make_ts(15), _make_ts(25)] def test_untrained_models(self): model = NaiveDrift() _ = NaiveEnsembleModel([model]) # trained models should raise error model.fit(self.series1) with self.assertRaises(ValueError): NaiveEnsembleModel([model]) def test_input_models_local_models(self): with self.assertRaises(ValueError): NaiveEnsembleModel([]) with self.assertRaises(ValueError): NaiveEnsembleModel( [NaiveDrift, NaiveSeasonal, Theta, ExponentialSmoothing]) with self.assertRaises(ValueError): NaiveEnsembleModel( [NaiveDrift(), NaiveSeasonal, Theta(), ExponentialSmoothing()]) NaiveEnsembleModel( [NaiveDrift(), NaiveSeasonal(), Theta(), ExponentialSmoothing()]) def test_call_predict_local_models(self): naive_ensemble = NaiveEnsembleModel([NaiveSeasonal(), Theta()]) with self.assertRaises(Exception): naive_ensemble.predict(5) naive_ensemble.fit(self.series1) naive_ensemble.predict(5) def test_predict_ensemble_local_models(self): naive = NaiveSeasonal(K=5) theta = Theta() naive_ensemble = NaiveEnsembleModel([naive, theta]) naive_ensemble.fit(self.series1 + self.series2) forecast_naive_ensemble = naive_ensemble.predict(5) naive.fit(self.series1 + self.series2) theta.fit(self.series1 + self.series2) forecast_mean = 0.5 * naive.predict(5) + 0.5 * theta.predict(5) self.assertTrue( np.array_equal(forecast_naive_ensemble.values(), forecast_mean.values())) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_input_models_global_models(self): NaiveEnsembleModel([RNNModel(12), TCNModel(10, 2), NBEATSModel(10, 2)]) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_call_predict_global_models_univariate_input_no_covariates(self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) with self.assertRaises(Exception): naive_ensemble.predict(5) naive_ensemble.fit(self.series1) naive_ensemble.predict(5) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_call_predict_global_models_multivariate_input_no_covariates(self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) naive_ensemble.fit(self.seq1) naive_ensemble.predict(n=5, series=self.seq1) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_call_predict_global_models_multivariate_input_with_covariates( self): naive_ensemble = NaiveEnsembleModel([ RNNModel(12, n_epochs=1), TCNModel(10, 2, n_epochs=1), NBEATSModel(10, 2, n_epochs=1), ]) naive_ensemble.fit(self.seq1, self.cov1) predict_series = [s[:12] for s in self.seq1] predict_covariates = [c[:14] for c in self.cov1] naive_ensemble.predict(n=2, series=predict_series, past_covariates=predict_covariates) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_input_models_mixed(self): with self.assertRaises(ValueError): NaiveEnsembleModel([NaiveDrift(), Theta(), RNNModel(12)]) def test_fit_multivar_ts_with_local_models(self): naive = NaiveEnsembleModel( [NaiveDrift(), NaiveSeasonal(), Theta(), ExponentialSmoothing()]) with self.assertRaises(ValueError): naive.fit(self.seq1) def test_fit_univar_ts_with_covariates_for_local_models(self): naive = NaiveEnsembleModel( [NaiveDrift(), NaiveSeasonal(), Theta(), ExponentialSmoothing()]) with self.assertRaises(ValueError): naive.fit(self.series1, self.series2)
class ReconciliationTestCase(unittest.TestCase): __test__ = True @classmethod def setUpClass(cls): logging.disable(logging.CRITICAL) np.random.seed(42) """ test case with a more intricate hierarchy """ LENGTH = 200 total_series = (tg.sine_timeseries(value_frequency=0.03, length=LENGTH) + 1 + tg.gaussian_timeseries(length=LENGTH) * 0.2) bottom_1 = total_series / 3 + tg.gaussian_timeseries(length=LENGTH) * 0.01 bottom_2 = 2 * total_series / 3 + tg.gaussian_timeseries( length=LENGTH) * 0.01 series = concatenate([total_series, bottom_1, bottom_2], axis=1) hierarchy = {"sine_1": ["sine"], "sine_2": ["sine"]} series = series.with_hierarchy(hierarchy) # get a single forecast model = LinearRegressionModel(lags=30, output_chunk_length=10) model.fit(series) pred = model.predict(n=20) # get a backtest forecast to get residuals pred_back = model.historical_forecasts(series, start=0.75, forecast_horizon=10) intersection = series.slice_intersect(pred_back) residuals = intersection - pred_back """ test case with a more intricate hierarchy """ components_complex = ["total", "a", "b", "x", "y", "ax", "ay", "bx", "by"] hierarchy_complex = { "ax": ["a", "x"], "ay": ["a", "y"], "bx": ["b", "x"], "by": ["b", "y"], "a": ["total"], "b": ["total"], "x": ["total"], "y": ["total"], } series_complex = TimeSeries.from_values( values=np.random.rand(50, len(components_complex), 5), columns=components_complex, hierarchy=hierarchy_complex, ) def _assert_reconciliation(self, fitted_recon): pred_r = fitted_recon.transform(self.pred) np.testing.assert_almost_equal( pred_r["sine"].values(copy=False), (pred_r["sine_1"] + pred_r["sine_2"]).values(copy=False), ) def _assert_reconciliation_complex(self, fitted_recon): reconciled = fitted_recon.transform(self.series_complex) def _assert_comps(comp, comps): np.testing.assert_almost_equal( reconciled[comp].values(copy=False), sum(reconciled[c] for c in comps).values(copy=False), ) _assert_comps("a", ["ax", "ay"]) _assert_comps("b", ["bx", "by"]) _assert_comps("x", ["ax", "bx"]) _assert_comps("y", ["ay", "by"]) _assert_comps("total", ["ax", "ay", "bx", "by"]) _assert_comps("total", ["a", "b"]) _assert_comps("total", ["x", "y"]) def test_bottom_up(self): recon = BottomUpReconciliator() self._assert_reconciliation(recon) def test_top_down(self): # should work when fitting on training series recon = TopDownReconciliator() recon.fit(self.series) self._assert_reconciliation(recon) # or when fitting on forecasts recon = TopDownReconciliator() recon.fit(self.pred) self._assert_reconciliation(recon) def test_mint(self): # ols recon = MinTReconciliator("ols") recon.fit(self.series) self._assert_reconciliation(recon) # wls_struct recon = MinTReconciliator("wls_struct") recon.fit(self.series) self._assert_reconciliation(recon) # wls_var recon = MinTReconciliator("wls_var") recon.fit(self.residuals) self._assert_reconciliation(recon) # mint_cov recon = MinTReconciliator("mint_cov") recon.fit(self.residuals) self._assert_reconciliation(recon) # wls_val recon = MinTReconciliator("wls_val") recon.fit(self.series) self._assert_reconciliation(recon) def test_summation_matrix(self): np.testing.assert_equal( _get_summation_matrix(self.series_complex), np.array([ [1, 1, 1, 1], [1, 1, 0, 0], [0, 0, 1, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], ]), ) def test_hierarchy_preserved_after_predict(self): self.assertEqual(self.pred.hierarchy, self.series.hierarchy) def test_more_intricate_hierarchy(self): recon = BottomUpReconciliator() self._assert_reconciliation_complex(recon) recon = TopDownReconciliator() recon.fit(self.series_complex) self._assert_reconciliation_complex(recon) recon = MinTReconciliator("ols") recon.fit(self.series_complex) self._assert_reconciliation_complex(recon) recon = MinTReconciliator("wls_struct") recon.fit(self.series_complex) self._assert_reconciliation_complex(recon) recon = MinTReconciliator("wls_val") recon.fit(self.series_complex) self._assert_reconciliation_complex(recon)
def helper_test_prophet_model(self, period, freq, compare_all_models=False): """Test which includes adding custom seasonalities and future covariates. The tests compare the output of univariate and stochastic forecasting with the validation timeseries and Prophet's base model output. The underlying curve to forecast is a sine timeseries multiplied with another sine timeseries. The curve shape repeats every 2*period timesteps (i.e. for period=24 hours -> seasonal_periods=48). We take the second sine wave as a covariate for the model. With the added custom seasonality and covariate, the model should have a very accurate forecast. """ repetitions = 8 ts_sine1 = tg.sine_timeseries( value_frequency=1 / period, length=period * repetitions, freq=freq ) ts_sine2 = tg.sine_timeseries( value_frequency=1 / (period * 2), length=period * repetitions, freq=freq ) ts_sine = ts_sine1 * ts_sine2 covariate = ts_sine2 split = int(-period * repetitions / 2) train, val = ts_sine[:split], ts_sine[split:] train_cov, val_cov = covariate[:split], covariate[split:] supress_auto_seasonality = { "daily_seasonality": False, "weekly_seasonality": False, "yearly_seasonality": False, } custom_seasonality = { "name": "custom", "seasonal_periods": int(2 * period), "fourier_order": 4, } model = Prophet( add_seasonalities=custom_seasonality, seasonality_mode="additive", **supress_auto_seasonality ) model.fit(train, future_covariates=train_cov) # univariate, stochastic and Prophet's base model forecast pred_darts = model.predict( n=len(val), num_samples=1, future_covariates=val_cov ) compare_preds = [pred_darts] if compare_all_models: pred_darts_stochastic = model.predict( n=len(val), num_samples=200, future_covariates=val_cov ) pred_raw_df = model.predict_raw(n=len(val), future_covariates=val_cov) pred_raw = TimeSeries.from_dataframe( pred_raw_df[["ds", "yhat"]], time_col="ds" ) compare_preds += [ pred_darts_stochastic.quantile_timeseries(0.5), pred_raw, ] # all predictions should fit the underlying curve very well for pred in compare_preds: for val_i, pred_i in zip( val.univariate_values(), pred.univariate_values() ): self.assertAlmostEqual(val_i, pred_i, delta=0.1)
class GlobalForecastingModelsTestCase(DartsBaseTestClass): # forecasting horizon used in runnability tests forecasting_horizon = 12 np.random.seed(42) torch.manual_seed(42) # some arbitrary static covariates static_covariates = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"]) # real timeseries for functionality tests ts_passengers = (AirPassengersDataset().load().with_static_covariates( static_covariates)) scaler = Scaler() ts_passengers = scaler.fit_transform(ts_passengers) ts_pass_train, ts_pass_val = ts_passengers[:-36], ts_passengers[-36:] # an additional noisy series ts_pass_train_1 = ts_pass_train + 0.01 * tg.gaussian_timeseries( length=len(ts_pass_train), freq=ts_pass_train.freq_str, start=ts_pass_train.start_time(), ) # an additional time series serving as covariates year_series = tg.datetime_attribute_timeseries(ts_passengers, attribute="year") month_series = tg.datetime_attribute_timeseries(ts_passengers, attribute="month") scaler_dt = Scaler() time_covariates = scaler_dt.fit_transform( year_series.stack(month_series)) time_covariates_train, time_covariates_val = ( time_covariates[:-36], time_covariates[-36:], ) # an artificial time series that is highly dependent on covariates ts_length = 400 split_ratio = 0.6 sine_1_ts = tg.sine_timeseries(length=ts_length) sine_2_ts = tg.sine_timeseries(length=ts_length, value_frequency=0.05) sine_3_ts = tg.sine_timeseries(length=ts_length, value_frequency=0.003, value_amplitude=5) linear_ts = tg.linear_timeseries(length=ts_length, start_value=3, end_value=8) covariates = sine_3_ts.stack(sine_2_ts).stack(linear_ts) covariates_past, _ = covariates.split_after(split_ratio) target = sine_1_ts + sine_2_ts + linear_ts + sine_3_ts target_past, target_future = target.split_after(split_ratio) def test_save_model_parameters(self): # model creation parameters were saved before. check if re-created model has same params as original for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) self.assertTrue(model._model_params, model.untrained_model()._model_params) def test_single_ts(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls( input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, random_state=0, **kwargs, ) model.fit(self.ts_pass_train) pred = model.predict(n=36) mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (one time " "series). Error = {}".format(model_cls, mape_err), ) self.assertTrue( pred.static_covariates.equals( self.ts_passengers.static_covariates)) def test_multi_ts(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls( input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, random_state=0, **kwargs, ) model.fit([self.ts_pass_train, self.ts_pass_train_1]) with self.assertRaises(ValueError): # when model is fit from >1 series, one must provide a series in argument model.predict(n=1) pred = model.predict(n=36, series=self.ts_pass_train) mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (several time " "series). Error = {}".format(model_cls, mape_err), ) # check prediction for several time series pred_list = model.predict( n=36, series=[self.ts_pass_train, self.ts_pass_train_1]) self.assertTrue( len(pred_list) == 2, f"Model {model_cls} did not return a list of prediction", ) for pred in pred_list: mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (several time series 2). " "Error = {}".format(model_cls, mape_err), ) def test_covariates(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls( input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, random_state=0, **kwargs, ) # Here we rely on the fact that all non-Dual models currently are Past models cov_name = ("future_covariates" if isinstance( model, DualCovariatesTorchModel) else "past_covariates") cov_kwargs = { cov_name: [self.time_covariates_train, self.time_covariates_train] } model.fit(series=[self.ts_pass_train, self.ts_pass_train_1], **cov_kwargs) with self.assertRaises(ValueError): # when model is fit from >1 series, one must provide a series in argument model.predict(n=1) with self.assertRaises(ValueError): # when model is fit using multiple covariates, covariates are required at prediction time model.predict(n=1, series=self.ts_pass_train) cov_kwargs_train = {cov_name: self.time_covariates_train} cov_kwargs_notrain = {cov_name: self.time_covariates} with self.assertRaises(ValueError): # when model is fit using covariates, n cannot be greater than output_chunk_length... model.predict(n=13, series=self.ts_pass_train, **cov_kwargs_train) # ... unless future covariates are provided pred = model.predict(n=13, series=self.ts_pass_train, **cov_kwargs_notrain) pred = model.predict(n=12, series=self.ts_pass_train, **cov_kwargs_notrain) mape_err = mape(self.ts_pass_val, pred) self.assertTrue( mape_err < err, "Model {} produces errors too high (several time " "series with covariates). Error = {}".format( model_cls, mape_err), ) # when model is fit using 1 training and 1 covariate series, time series args are optional if model._is_probabilistic: continue model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) model.fit(series=self.ts_pass_train, **cov_kwargs_train) pred1 = model.predict(1) pred2 = model.predict(1, series=self.ts_pass_train) pred3 = model.predict(1, **cov_kwargs_train) pred4 = model.predict(1, **cov_kwargs_train, series=self.ts_pass_train) self.assertEqual(pred1, pred2) self.assertEqual(pred1, pred3) self.assertEqual(pred1, pred4) def test_future_covariates(self): # models with future covariates should produce better predictions over a long forecasting horizon # than a model trained with no covariates model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past) long_pred_no_cov = model.predict(n=160) model = TCNModel( input_chunk_length=50, output_chunk_length=5, n_epochs=20, random_state=0, ) model.fit(series=self.target_past, past_covariates=self.covariates_past) long_pred_with_cov = model.predict(n=160, past_covariates=self.covariates) self.assertTrue( mape(self.target_future, long_pred_no_cov) > mape( self.target_future, long_pred_with_cov), "Models with future covariates should produce better predictions.", ) # block models can predict up to self.output_chunk_length points beyond the last future covariate... model.predict(n=165, past_covariates=self.covariates) # ... not more with self.assertRaises(ValueError): model.predict(n=166, series=self.ts_pass_train) # recurrent models can only predict data points for time steps where future covariates are available model = RNNModel(12, n_epochs=1) model.fit(series=self.target_past, future_covariates=self.covariates_past) model.predict(n=160, future_covariates=self.covariates) with self.assertRaises(ValueError): model.predict(n=161, future_covariates=self.covariates) def test_batch_predictions(self): # predicting multiple time series at once needs to work for arbitrary batch sizes # univariate case targets_univar = [ self.target_past, self.target_past[:60], self.target_past[:80], ] self._batch_prediction_test_helper_function(targets_univar) # multivariate case targets_multivar = [tgt.stack(tgt) for tgt in targets_univar] self._batch_prediction_test_helper_function(targets_multivar) def _batch_prediction_test_helper_function(self, targets): epsilon = 1e-4 model = TCNModel( input_chunk_length=50, output_chunk_length=10, n_epochs=10, random_state=0, ) model.fit(series=targets[0], past_covariates=self.covariates_past) preds_default = model.predict( n=160, series=targets, past_covariates=[self.covariates] * len(targets), batch_size=None, ) # make batch size large enough to test stacking samples for batch_size in range(1, 4 * len(targets)): preds = model.predict( n=160, series=targets, past_covariates=[self.covariates] * len(targets), batch_size=batch_size, ) for i in range(len(targets)): self.assertLess( sum(sum((preds[i] - preds_default[i]).values())), epsilon) def test_predict_from_dataset_unsupported_input(self): # an exception should be thrown if an unsupported type is passed unsupported_type = "unsupported_type" # just need to test this with one model model_cls, kwargs, err = models_cls_kwargs_errs[0] model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) model.fit([self.ts_pass_train, self.ts_pass_train_1]) with self.assertRaises(ValueError): model.predict_from_dataset( n=1, input_series_dataset=unsupported_type) def test_prediction_with_different_n(self): # test model predictions for n < out_len, n == out_len and n > out_len for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) self.assertTrue( isinstance( model, ( PastCovariatesTorchModel, DualCovariatesTorchModel, MixedCovariatesTorchModel, ), ), "unit test not yet defined for the given {X}CovariatesTorchModel.", ) if isinstance(model, PastCovariatesTorchModel): past_covs, future_covs = self.covariates, None elif isinstance(model, DualCovariatesTorchModel): past_covs, future_covs = None, self.covariates else: past_covs, future_covs = self.covariates, self.covariates model.fit( self.target_past, past_covariates=past_covs, future_covariates=future_covs, epochs=1, ) # test prediction for n < out_len, n == out_len and n > out_len for n in [OUT_LEN - 1, OUT_LEN, 2 * OUT_LEN - 1]: pred = model.predict(n=n, past_covariates=past_covs, future_covariates=future_covs) self.assertEqual(len(pred), n) def test_same_result_with_different_n_jobs(self): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 model.fit(multiple_ts) # safe random state for two successive identical predictions if model._is_probabilistic(): random_state = deepcopy(model._random_instance) else: random_state = None pred1 = model.predict(n=36, series=multiple_ts, n_jobs=1) if random_state is not None: model._random_instance = random_state pred2 = model.predict( n=36, series=multiple_ts, n_jobs=-1) # assuming > 1 core available in the machine self.assertEqual( pred1, pred2, "Model {} produces different predictions with different number of jobs", ) @patch( "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer" ) def test_fit_with_constr_epochs(self, init_trainer): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 model.fit(multiple_ts) init_trainer.assert_called_with(max_epochs=kwargs["n_epochs"], trainer_params=ANY) @patch( "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer" ) def test_fit_with_fit_epochs(self, init_trainer): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 epochs = 3 model.fit(multiple_ts, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) model.total_epochs = epochs # continue training model.fit(multiple_ts, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) @patch( "darts.models.forecasting.torch_forecasting_model.TorchForecastingModel._init_trainer" ) def test_fit_from_dataset_with_epochs(self, init_trainer): for model_cls, kwargs, err in models_cls_kwargs_errs: model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 train_dataset = model._build_train_dataset( multiple_ts, past_covariates=None, future_covariates=None, max_samples_per_ts=None, ) epochs = 3 model.fit_from_dataset(train_dataset, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) # continue training model.fit_from_dataset(train_dataset, epochs=epochs) init_trainer.assert_called_with(max_epochs=epochs, trainer_params=ANY) def test_predit_after_fit_from_dataset(self): model_cls, kwargs, _ = models_cls_kwargs_errs[0] model = model_cls(input_chunk_length=IN_LEN, output_chunk_length=OUT_LEN, **kwargs) multiple_ts = [self.ts_pass_train] * 10 train_dataset = model._build_train_dataset( multiple_ts, past_covariates=None, future_covariates=None, max_samples_per_ts=None, ) model.fit_from_dataset(train_dataset, epochs=3) # test predict() works after fit_from_dataset() model.predict(n=1, series=multiple_ts[0]) def test_sample_smaller_than_batch_size(self): """ Checking that the TorchForecastingModels do not crash even if the number of available samples for training is strictly lower than the selected batch_size """ # TS with 50 timestamps. TorchForecastingModels will use the SequentialDataset for producing training # samples, which means we will have 50 - 22 - 2 + 1 = 27 samples, which is < 32 (batch_size). The model # should still train on those samples and not crash in any way ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts) def test_max_samples_per_ts(self): """ Checking that we can fit TorchForecastingModels with max_samples_per_ts, without crash """ ts = linear_timeseries(start_value=0, end_value=1, length=50) model = RNNModel(input_chunk_length=20, output_chunk_length=2, n_epochs=2, batch_size=32) model.fit(ts, max_samples_per_ts=5) def test_residuals(self): """ Torch models should not fail when computing residuals on a series long enough to accomodate at least one training sample. """ ts = linear_timeseries(start_value=0, end_value=1, length=38) model = NBEATSModel( input_chunk_length=24, output_chunk_length=12, num_stacks=2, num_blocks=1, num_layers=1, layer_widths=2, n_epochs=2, ) model.residuals(ts)
def test_moving_average_univariate(self): ma = MovingAverage(window=3, centered=False) sine_ts = tg.sine_timeseries(length=30, value_frequency=0.1) sine_filtered = ma.filter(sine_ts) self.assertGreater(np.mean(np.abs(sine_ts.values())), np.mean(np.abs(sine_filtered.values())))
def helper_test_seasonality_inference(self, freq_string, expected_seasonal_periods): series = tg.sine_timeseries(length=200, freq=freq_string) model = ExponentialSmoothing() model.fit(series) self.assertEqual(model.seasonal_periods, expected_seasonal_periods)
class BoxCoxTestCase(unittest.TestCase): sine_series = sine_timeseries(length=50, value_y_offset=5, value_frequency=0.05) lin_series = linear_timeseries(start_value=1, end_value=10, length=50) multi_series = sine_series.stack(lin_series) def test_boxbox_lambda(self): boxcox = BoxCox(lmbda=0.3) boxcox.fit(self.multi_series) self.assertEqual(boxcox._fitted_params, [[0.3, 0.3]]) boxcox = BoxCox(lmbda=[0.3, 0.4]) boxcox.fit(self.multi_series) self.assertEqual(boxcox._fitted_params, [[0.3, 0.4]]) with self.assertRaises(ValueError): boxcox = BoxCox(lmbda=[0.2, 0.4, 0.5]) boxcox.fit(self.multi_series) boxcox = BoxCox(optim_method="mle") boxcox.fit(self.multi_series) lmbda1 = boxcox._fitted_params[0].tolist() boxcox = BoxCox(optim_method="pearsonr") boxcox.fit(self.multi_series) lmbda2 = boxcox._fitted_params[0].tolist() self.assertNotEqual(lmbda1, lmbda2) def test_boxcox_transform(self): log_mapper = Mapper(lambda x: np.log(x)) boxcox = BoxCox(lmbda=0) transformed1 = log_mapper.transform(self.sine_series) transformed2 = boxcox.fit(self.sine_series).transform(self.sine_series) np.testing.assert_almost_equal( transformed1.all_values(copy=False), transformed2.all_values(copy=False), decimal=4, ) def test_boxcox_inverse(self): boxcox = BoxCox() transformed = boxcox.fit_transform(self.multi_series) back = boxcox.inverse_transform(transformed) pd.testing.assert_frame_equal(self.multi_series.pd_dataframe(), back.pd_dataframe(), check_exact=False) def test_boxcox_multi_ts(self): test_cases = [ ([[0.2, 0.4], [0.3, 0.6]]), # full lambda (0.4), # single value None, # None ] for lmbda in test_cases: box_cox = BoxCox(lmbda=lmbda) transformed = box_cox.fit_transform( [self.multi_series, self.multi_series]) back = box_cox.inverse_transform(transformed) pd.testing.assert_frame_equal( self.multi_series.pd_dataframe(), back[0].pd_dataframe(), check_exact=False, ) pd.testing.assert_frame_equal( self.multi_series.pd_dataframe(), back[1].pd_dataframe(), check_exact=False, ) def test_boxcox_multiple_calls_to_fit(self): """ This test checks whether calling the scaler twice is calculating new lambdas instead of keeping the old ones """ box_cox = BoxCox() box_cox.fit(self.sine_series) lambda1 = deepcopy(box_cox._fitted_params)[0].tolist() box_cox.fit(self.lin_series) lambda2 = deepcopy(box_cox._fitted_params)[0].tolist() self.assertNotEqual( lambda1, lambda2, "Lambdas should change when the transformer is retrained") def test_multivariate_stochastic_series(self): transformer = BoxCox() vals = np.random.rand(10, 5, 10) series = TimeSeries.from_values(vals) new_series = transformer.fit_transform(series) series_back = transformer.inverse_transform(new_series) # Test inverse transform np.testing.assert_allclose(series.all_values(), series_back.all_values())
class DynamicTimeWarpingTestCase(DartsBaseTestClass): length = 20 freq = 1 / length series1 = tg.sine_timeseries(length=length, value_frequency=freq, value_phase=0, value_y_offset=5) series2 = tg.sine_timeseries(length=length, value_frequency=freq, value_phase=np.pi / 4, value_y_offset=5) def test_shift(self): input1 = [ 1, 1, 1, 1, 1.2, 1.4, 1.2, 1, 1, 1, 1, 1, 1, 1.2, 1.4, 1.6, 1.8, 1.6, 1.4, 1.2, 1, 1, ] input2 = [1] + input1[:-1] expected_path = ([(0, 0)] + list( (i - 1, i) for i in range(1, len(input1))) + [(len(input1) - 1, len(input2) - 1)]) series1 = _series_from_values(input1) series2 = _series_from_values(input2) exact_alignment = dtw.dtw(series1, series2, multi_grid_radius=-1) self.assertEqual( exact_alignment.distance(), 0, "Minimum cost between two shifted series should be 0", ) self.assertTrue(np.array_equal(exact_alignment.path(), expected_path), "Incorrect path") def test_multi_grid(self): size = 2**5 - 1 # test odd size freq = 1 / size input1 = np.cos(np.arange(size) * 2 * np.pi * freq) input2 = np.sin(np.arange(size) * 2 * np.pi * freq) + 0.1 * np.random.random(size=size) series1 = _series_from_values(input1) series2 = _series_from_values(input2) exact_distance = dtw.dtw(series1, series2, multi_grid_radius=-1).distance() approx_distance = dtw.dtw(series1, series2, multi_grid_radius=1).distance() self.assertAlmostEqual(exact_distance, approx_distance, 3) def test_sakoe_chiba_window(self): window = 2 alignment = dtw.dtw(self.series1, self.series2, window=dtw.SakoeChiba(window_size=2)) path = alignment.path() for i, j in path: self.assertGreaterEqual(window, abs(i - j)) def test_itakura_window(self): n = 6 m = 5 slope = 1.5 window = dtw.Itakura(max_slope=slope) window.init_size(n, m) cells = list(window) self.assertEqual( cells, [ (1, 1), (1, 2), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3), (3, 4), (4, 2), (4, 3), (4, 4), (5, 2), (5, 3), (5, 4), (5, 5), (6, 4), (6, 5), ], ) sizes = [(10, 43), (543, 45), (34, 11)] for n, m in sizes: slope = m / n + 1 series1 = tg.sine_timeseries(length=n, value_frequency=1 / n, value_phase=0) series2 = tg.sine_timeseries(length=m, value_frequency=1 / m, value_phase=np.pi / 4) dist = dtw.dtw(series1, series2, window=dtw.Itakura(slope)).mean_distance() self.assertGreater(1, dist) def test_warp(self): # Support different time dimension names xa1 = self.series1.data_array().rename({"time": "time1"}) xa2 = self.series2.data_array().rename({"time": "time2"}) static_covs = pd.DataFrame([[0.0, 1.0]], columns=["st1", "st2"]) series1 = TimeSeries.from_xarray(xa1).with_static_covariates( static_covs) series2 = TimeSeries.from_xarray(xa2).with_static_covariates( static_covs) alignment = dtw.dtw(series1, series2) warped1, warped2 = alignment.warped() self.assertAlmostEqual(alignment.mean_distance(), mae(warped1, warped2)) assert warped1.static_covariates.equals(series1.static_covariates) assert warped2.static_covariates.equals(series2.static_covariates) """ See DTWAlignment.warped for why this functionality is currently disabled #Mutually Exclusive Option with self.assertRaises(ValueError): alignment.warped(take_dates=True, range_index=True) #Take_dates does not support indexing by RangeIndex with self.assertRaises(ValueError): xa3 = xa1.copy() xa3["time1"] = pd.RangeIndex(0, len(self.series1)) dtw.dtw(TimeSeries.from_xarray(xa3), series2).warped(take_dates=True) warped1, warped2 = alignment.warped(take_dates=True) self.assertTrue(np.all(warped1.time_index == warped2.time_index)) """ def test_metric(self): metric1 = dtw_metric(self.series1, self.series2, metric=mae) metric2 = dtw_metric(self.series1, self.series2, metric=mape) self.assertGreater(0.5, metric1) self.assertGreater(5, metric2) def test_nans(self): with self.assertRaises(ValueError): series1 = _series_from_values([np.nan, 0, 1, 2, 3]) series2 = _series_from_values([0, 1, 2, 3, 4]) dtw.dtw(series1, series2) def test_plot(self): align = dtw.dtw(self.series2, self.series1) align.plot() align.plot_alignment() def test_multivariate(self): n = 2 values1 = np.repeat(self.series1.univariate_values(), n) values2 = np.repeat(self.series2.univariate_values(), n) values1 = values1.reshape((-1, n)) values2 = values2.reshape((-1, n)) multi_series1 = TimeSeries.from_values(values1) multi_series2 = TimeSeries.from_values(values2) radius = 2 alignment_uni = dtw.dtw(self.series1, self.series2, multi_grid_radius=radius) alignment_multi = dtw.dtw(multi_series1, multi_series2, multi_grid_radius=radius) self.assertTrue(np.all(alignment_uni.path() == alignment_multi.path()))
class BoxCoxTestCase(unittest.TestCase): sine_series = sine_timeseries(length=50, value_y_offset=5, value_frequency=0.05) lin_series = linear_timeseries(start_value=1, end_value=10, length=50) multi_series = sine_series.stack(lin_series) def test_boxbox_lambda(self): boxcox = BoxCox(lmbda=0.3) boxcox.fit(self.multi_series) self.assertEqual(boxcox._fitted_params, [[0.3, 0.3]]) boxcox = BoxCox(lmbda=[0.3, 0.4]) boxcox.fit(self.multi_series) self.assertEqual(boxcox._fitted_params, [[0.3, 0.4]]) with self.assertRaises(ValueError): boxcox = BoxCox(lmbda=[0.2, 0.4, 0.5]) boxcox.fit(self.multi_series) boxcox = BoxCox(optim_method='mle') boxcox.fit(self.multi_series) lmbda1 = boxcox._fitted_params[0].tolist() boxcox = BoxCox(optim_method='pearsonr') boxcox.fit(self.multi_series) lmbda2 = boxcox._fitted_params[0].tolist() self.assertNotEqual(lmbda1, lmbda2) def test_boxcox_transform(self): log_mapper = Mapper(lambda x: log(x)) boxcox = BoxCox(lmbda=0) transformed1 = log_mapper.transform(self.sine_series) transformed2 = boxcox.fit(self.sine_series).transform(self.sine_series) self.assertEqual(transformed1, transformed2) def test_boxcox_inverse(self): boxcox = BoxCox() transformed = boxcox.fit_transform(self.multi_series) back = boxcox.inverse_transform(transformed) pd.testing.assert_frame_equal(self.multi_series._df, back._df, check_exact=False) def test_boxcox_multi_ts(self): test_cases = [ ([[0.2, 0.4], [0.3, 0.6]]), # full lambda (0.4), # single value None # None ] for lmbda in test_cases: box_cox = BoxCox(lmbda=lmbda) transformed = box_cox.fit_transform([self.multi_series, self.multi_series]) back = box_cox.inverse_transform(transformed) pd.testing.assert_frame_equal(self.multi_series._df, back[0]._df, check_exact=False) pd.testing.assert_frame_equal(self.multi_series._df, back[1]._df, check_exact=False) def test_boxcox_multiple_calls_to_fit(self): """ This test checks whether calling the scaler twice is calculating new lambdas instead of keeping the old ones """ box_cox = BoxCox() box_cox.fit(self.sine_series) lambda1 = deepcopy(box_cox._fitted_params)[0].tolist() box_cox.fit(self.lin_series) lambda2 = deepcopy(box_cox._fitted_params)[0].tolist() self.assertNotEqual(lambda1, lambda2, "Lambdas should change when the transformer is retrained")