def test_benchmark_silverkite_template_with_real_data(): # setting every list to 1 item to speed up test case forecast_horizons = [30] max_cvs = [3] fit_algorithms = ["linear"] metric = EvaluationMetricEnum.MeanSquaredError evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name) # real data dl = DataLoader() data_path = dl.get_data_home(data_sub_dir="daily") data_name = "daily_female_births" df = dl.get_df(data_path=data_path, data_name="daily_female_births") time_col = "Date" value_col = "Births" metadata = MetadataParam(time_col=time_col, value_col=value_col, freq="D") result_silverkite_real = benchmark_silverkite_template( data_name=data_name, df=df, metadata=metadata, evaluation_metric=evaluation_metric, forecast_horizons=forecast_horizons, fit_algorithms=fit_algorithms, max_cvs=max_cvs) result_silverkite_real = result_silverkite_real[0] assert result_silverkite_real["data_name"] == data_name assert result_silverkite_real["forecast_model_name"] == "silverkite_linear" assert result_silverkite_real["train_period"] == df.shape[0] assert result_silverkite_real["forecast_horizon"] == 30 assert result_silverkite_real["cv_folds"] == 3
def test_benchmark_silverkite_template_with_simulated_data(): # setting every list to 1 item to speed up test case forecast_horizons = [30] max_cvs = [3] fit_algorithms = ["linear"] metric = EvaluationMetricEnum.MeanSquaredError evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name) # Simulated data data_name = "daily_simulated" train_period = 365 data = generate_df_for_tests(freq="D", periods=train_period) df = data["df"] time_col, value_col = df.columns metadata = MetadataParam(time_col=time_col, value_col=value_col, freq="D") result_silverkite_simulated = benchmark_silverkite_template( data_name=data_name, df=df, metadata=metadata, evaluation_metric=evaluation_metric, forecast_horizons=forecast_horizons, fit_algorithms=fit_algorithms, max_cvs=max_cvs) result_silverkite_simulated = result_silverkite_simulated[0] assert result_silverkite_simulated["data_name"] == data_name assert result_silverkite_simulated["forecast_model_name"] == "silverkite_linear" assert result_silverkite_simulated["train_period"] == train_period assert result_silverkite_simulated["forecast_horizon"] == 30 assert result_silverkite_simulated["cv_folds"] == 3
def test_apply_template_for_pipeline_params(df): mt = MyTemplate() config = ForecastConfig(metadata_param=MetadataParam( time_col=NEW_TIME_COL, value_col=NEW_VALUE_COL, ), evaluation_metric_param=EvaluationMetricParam( cv_selection_metric="MeanSquaredError")) original_config = dataclasses.replace(config) # Tests apply_template_for_pipeline_params pipeline_params = mt.apply_template_for_pipeline_params(df=df, config=config) assert_equal(pipeline_params["df"], df) assert pipeline_params["train_end_date"] is None estimator = pipeline_params["pipeline"].steps[-1][-1] assert isinstance(estimator, SilverkiteEstimator) assert estimator.coverage == mt.config.coverage assert mt.estimator is not estimator assert mt.estimator.coverage is None assert ( pipeline_params["pipeline"].named_steps["input"].transformer_list[2] [1].named_steps["select_reg"].column_names == mt.get_regressor_cols()) # Tests `apply_template_decorator` assert mt.config == mt.apply_forecast_config_defaults(config) assert mt.config != config # `mt.config` has default values added assert config == original_config # `config` is not modified by the function
def test_get_pipeline(df): mt = MyTemplate() # Initializes attributes needed by the function mt.regressor_cols = mt.get_regressor_cols() mt.lagged_regressor_cols = mt.get_lagged_regressor_info()["lagged_regressor_cols"] metric = EvaluationMetricEnum.MeanSquaredError mt.score_func = metric.name mt.score_func_greater_is_better = metric.get_metric_greater_is_better() mt.config = ForecastConfig( coverage=0.9, evaluation_metric_param=EvaluationMetricParam( cv_selection_metric=metric.name ) ) # Checks get_pipeline output pipeline = mt.get_pipeline() assert isinstance(pipeline, sklearn.pipeline.Pipeline) estimator = pipeline.steps[-1][-1] assert isinstance(estimator, SilverkiteEstimator) assert estimator.coverage == mt.config.coverage assert mt.estimator is not estimator assert mt.estimator.coverage is None expected_col_names = ["regressor1", "regressor2", "regressor_categ", "regressor_bool"] assert pipeline.named_steps["input"].transformer_list[2][1].named_steps["select_reg"].column_names == expected_col_names assert_eval_function_equal(pipeline.steps[-1][-1].score_func, metric.get_metric_func())
def apply_evaluation_metric_defaults( evaluation: Optional[EvaluationMetricParam] = None ) -> EvaluationMetricParam: """Applies the default EvaluationMetricParam values to the given object. If an expected attribute value is provided, the value is unchanged. Otherwise the default value for it is used. Other attributes are untouched. If the input object is None, it creates a EvaluationMetricParam object. Parameters ---------- evaluation : `~greykite.framework.templates.autogen.forecast_config.EvaluationMetricParam` or None The EvaluationMetricParam object. Returns ------- evaluation : `~greykite.framework.templates.autogen.forecast_config.EvaluationMetricParam` Valid EvaluationMetricParam object with the provided attribute values and the default attribute values if not. """ if evaluation is None: evaluation = EvaluationMetricParam() if evaluation.cv_selection_metric is None: # NB: subclass may want to override, if designed for a different objective (e.g. quantile loss) evaluation.cv_selection_metric = EvaluationMetricEnum.MeanAbsolutePercentError.name if evaluation.cv_report_metrics is None: evaluation.cv_report_metrics = CV_REPORT_METRICS_ALL return evaluation
def test_forecast_config(): """Tests ForecastConfig dataclass""" config = ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, metadata_param=MetadataParam(time_col="custom_time_col", anomaly_info=[{ "key": "value" }, { "key2": "value2" }]), evaluation_period_param=EvaluationPeriodParam( test_horizon=10, periods_between_train_test=5, cv_min_train_periods=20), evaluation_metric_param=EvaluationMetricParam( cv_selection_metric=EvaluationMetricEnum.MeanSquaredError.name, cv_report_metrics=[ EvaluationMetricEnum.MeanAbsoluteError.name, EvaluationMetricEnum.MeanAbsolutePercentError.name ], relative_error_tolerance=0.02), model_components_param=ModelComponentsParam( autoregression={"autoreg_dict": { "autoreg_param": 0 }}, changepoints=None, custom={"custom_param": 1}, growth={"growth_param": 2}, events={"events_param": 3}, hyperparameter_override=[{ "h1": 4 }, { "h2": 5 }, None], regressors={"names": ["regressor1", "regressor2"]}, lagged_regressors={"lagged_regressor_dict": { "lag_reg_param": 0 }}, seasonality={"seas_param": 6}, uncertainty={"uncertainty_param": 7}), computation_param=ComputationParam(n_jobs=None)) assert_forecast_config(config) # Tests a string passed to `cv_report_metrics` assert ForecastConfig(evaluation_metric_param=EvaluationMetricParam( cv_report_metrics=CV_REPORT_METRICS_ALL), ).to_dict()
def df_config(): data = generate_df_with_reg_for_tests(freq="W-MON", periods=140, remove_extra_cols=True, mask_test_actuals=True) reg_cols = ["regressor1", "regressor2", "regressor_categ"] keep_cols = [TIME_COL, VALUE_COL] + reg_cols df = data["df"][keep_cols] model_template = "SILVERKITE" evaluation_metric = EvaluationMetricParam( cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name, agg_periods=7, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.5 }) evaluation_period = EvaluationPeriodParam(test_horizon=10, periods_between_train_test=5, cv_horizon=4, cv_min_train_periods=80, cv_expanding_window=False, cv_periods_between_splits=20, cv_periods_between_train_test=3, cv_max_splits=3) model_components = ModelComponentsParam( regressors={"regressor_cols": reg_cols}, custom={ "fit_algorithm_dict": { "fit_algorithm": "ridge", "fit_algorithm_params": { "cv": 2 } } }) computation = ComputationParam(verbose=2) forecast_horizon = 27 coverage = 0.90 config = ForecastConfig(model_template=model_template, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=model_components) return { "df": df, "config": config, "model_template": model_template, "reg_cols": reg_cols, }
def test_prophet_template_custom(): """Tests prophet_template with custom values, with long range input""" # prepares input data data = generate_df_with_reg_for_tests(freq="H", periods=300 * 24, remove_extra_cols=True, mask_test_actuals=True) df = data["df"] time_col = "some_time_col" value_col = "some_value_col" df.rename({ cst.TIME_COL: time_col, cst.VALUE_COL: value_col }, axis=1, inplace=True) # prepares params and calls template metric = EvaluationMetricEnum.MeanAbsoluteError # anomaly adjustment adds 10.0 to every record adjustment_size = 10.0 anomaly_df = pd.DataFrame({ cst.START_DATE_COL: [df[time_col].min()], cst.END_DATE_COL: [df[time_col].max()], cst.ADJUSTMENT_DELTA_COL: [adjustment_size], cst.METRIC_COL: [value_col] }) anomaly_info = { "value_col": cst.VALUE_COL, "anomaly_df": anomaly_df, "start_date_col": cst.START_DATE_COL, "end_date_col": cst.END_DATE_COL, "adjustment_delta_col": cst.ADJUSTMENT_DELTA_COL, "filter_by_dict": { cst.METRIC_COL: cst.VALUE_COL }, "adjustment_method": "add" } metadata = MetadataParam( time_col=time_col, value_col=value_col, freq="H", date_format="%Y-%m-%d-%H", train_end_date=datetime.datetime(2019, 7, 1), anomaly_info=anomaly_info, ) evaluation_metric = EvaluationMetricParam( cv_selection_metric=metric.name, cv_report_metrics=[ EvaluationMetricEnum.MedianAbsolutePercentError.name ], agg_periods=24, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }, relative_error_tolerance=0.01) evaluation_period = EvaluationPeriodParam(test_horizon=1, periods_between_train_test=2, cv_horizon=3, cv_min_train_periods=4, cv_expanding_window=True, cv_periods_between_splits=5, cv_periods_between_train_test=6, cv_max_splits=7) model_components = ModelComponentsParam( seasonality={ "yearly_seasonality": [True], "weekly_seasonality": [False], "daily_seasonality": [4], "add_seasonality_dict": [{ "yearly": { "period": 365.25, "fourier_order": 20, "prior_scale": 20.0 }, "quarterly": { "period": 365.25 / 4, "fourier_order": 15 }, "weekly": { "period": 7, "fourier_order": 35, "prior_scale": 30.0 } }] }, growth={"growth_term": "linear"}, events={ "holiday_lookup_countries": ["UnitedStates", "UnitedKingdom", "India"], "holiday_pre_num_days": [2], "holiday_post_num_days": [3], "holidays_prior_scale": [5.0] }, regressors={ "add_regressor_dict": [{ "regressor1": { "prior_scale": 10.0, "mode": 'additive' }, "regressor2": { "prior_scale": 20.0, "mode": 'multiplicative' }, }] }, changepoints={ "changepoint_prior_scale": [0.05], "changepoints": [None], "n_changepoints": [50], "changepoint_range": [0.9] }, uncertainty={ "mcmc_samples": [500], "uncertainty_samples": [2000] }, hyperparameter_override={ "input__response__null__impute_algorithm": "ts_interpolate", "input__response__null__impute_params": { "orders": [7, 14] }, "input__regressors_numeric__normalize__normalize_algorithm": "RobustScaler", }) computation = ComputationParam(hyperparameter_budget=10, n_jobs=None, verbose=1) forecast_horizon = 20 coverage = 0.7 config = ForecastConfig(model_template=ModelTemplateEnum.PROPHET.name, metadata_param=metadata, forecast_horizon=forecast_horizon, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, model_components_param=model_components, computation_param=computation) template = ProphetTemplate() params = template.apply_template_for_pipeline_params(df=df, config=config) pipeline = params.pop("pipeline", None) # Adding start_year and end_year based on the input df model_components.events["start_year"] = df[time_col].min().year model_components.events["end_year"] = df[time_col].max().year expected_params = dict( df=df, time_col=time_col, value_col=value_col, date_format=metadata.date_format, freq=metadata.freq, train_end_date=metadata.train_end_date, anomaly_info=metadata.anomaly_info, # model regressor_cols=template.regressor_cols, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=computation.hyperparameter_budget, n_jobs=computation.n_jobs, verbose=computation.verbose, # forecast forecast_horizon=forecast_horizon, coverage=coverage, test_horizon=evaluation_period.test_horizon, periods_between_train_test=evaluation_period. periods_between_train_test, agg_periods=evaluation_metric.agg_periods, agg_func=evaluation_metric.agg_func, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=evaluation_metric.cv_report_metrics, null_model_params=evaluation_metric.null_model_params, relative_error_tolerance=evaluation_metric.relative_error_tolerance, # CV cv_horizon=evaluation_period.cv_horizon, cv_min_train_periods=evaluation_period.cv_min_train_periods, cv_expanding_window=evaluation_period.cv_expanding_window, cv_periods_between_splits=evaluation_period.cv_periods_between_splits, cv_periods_between_train_test=evaluation_period. cv_periods_between_train_test, cv_max_splits=evaluation_period.cv_max_splits) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def test_silverkite_template_custom(model_components_param): """"Tests simple_silverkite_template with custom parameters, and data that has regressors""" data = generate_df_with_reg_for_tests( freq="H", periods=300*24, remove_extra_cols=True, mask_test_actuals=True) df = data["df"] time_col = "some_time_col" value_col = "some_value_col" df.rename({ TIME_COL: time_col, VALUE_COL: value_col }, axis=1, inplace=True) metric = EvaluationMetricEnum.MeanAbsoluteError # anomaly adjustment adds 10.0 to every record adjustment_size = 10.0 anomaly_df = pd.DataFrame({ START_DATE_COL: [df[time_col].min()], END_DATE_COL: [df[time_col].max()], ADJUSTMENT_DELTA_COL: [adjustment_size], METRIC_COL: [value_col] }) anomaly_info = { "value_col": VALUE_COL, "anomaly_df": anomaly_df, "start_date_col": START_DATE_COL, "end_date_col": END_DATE_COL, "adjustment_delta_col": ADJUSTMENT_DELTA_COL, "filter_by_dict": {METRIC_COL: VALUE_COL}, "adjustment_method": "add" } metadata = MetadataParam( time_col=time_col, value_col=value_col, freq="H", date_format="%Y-%m-%d-%H", train_end_date=datetime.datetime(2019, 7, 1), anomaly_info=anomaly_info ) evaluation_metric = EvaluationMetricParam( cv_selection_metric=metric.name, cv_report_metrics=[EvaluationMetricEnum.MedianAbsolutePercentError.name], agg_periods=24, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.8 }, relative_error_tolerance=0.01 ) evaluation_period = EvaluationPeriodParam( test_horizon=1, periods_between_train_test=2, cv_horizon=3, cv_min_train_periods=4, cv_expanding_window=True, cv_periods_between_splits=5, cv_periods_between_train_test=6, cv_max_splits=7 ) computation = ComputationParam( hyperparameter_budget=10, n_jobs=None, verbose=1 ) forecast_horizon = 20 coverage = 0.7 template = SilverkiteTemplate() params = template.apply_template_for_pipeline_params( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SK.name, metadata_param=metadata, forecast_horizon=forecast_horizon, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, model_components_param=model_components_param, computation_param=computation ) ) pipeline = params.pop("pipeline", None) expected_params = dict( df=df, time_col=time_col, value_col=value_col, date_format=metadata.date_format, freq=metadata.freq, train_end_date=metadata.train_end_date, anomaly_info=metadata.anomaly_info, # model regressor_cols=template.regressor_cols, estimator=None, hyperparameter_grid=template.hyperparameter_grid, hyperparameter_budget=computation.hyperparameter_budget, n_jobs=computation.n_jobs, verbose=computation.verbose, # forecast forecast_horizon=forecast_horizon, coverage=coverage, test_horizon=evaluation_period.test_horizon, periods_between_train_test=evaluation_period.periods_between_train_test, agg_periods=evaluation_metric.agg_periods, agg_func=evaluation_metric.agg_func, relative_error_tolerance=evaluation_metric.relative_error_tolerance, # evaluation score_func=metric.name, score_func_greater_is_better=metric.get_metric_greater_is_better(), cv_report_metrics=evaluation_metric.cv_report_metrics, null_model_params=evaluation_metric.null_model_params, # CV cv_horizon=evaluation_period.cv_horizon, cv_min_train_periods=evaluation_period.cv_min_train_periods, cv_expanding_window=evaluation_period.cv_expanding_window, cv_periods_between_splits=evaluation_period.cv_periods_between_splits, cv_periods_between_train_test=evaluation_period.cv_periods_between_train_test, cv_max_splits=evaluation_period.cv_max_splits ) assert_basic_pipeline_equal(pipeline, template.pipeline) assert_equal(params, expected_params)
def valid_configs(): metadata = MetadataParam(time_col=TIME_COL, value_col=VALUE_COL, freq="D") computation = ComputationParam(hyperparameter_budget=10, n_jobs=None, verbose=1) forecast_horizon = 2 * 7 coverage = 0.90 evaluation_metric = EvaluationMetricParam( cv_selection_metric=EvaluationMetricEnum.MeanAbsoluteError.name, cv_report_metrics=None, agg_periods=7, agg_func=np.mean, null_model_params=None) evaluation_period = EvaluationPeriodParam(test_horizon=2 * 7, periods_between_train_test=2 * 7, cv_horizon=1 * 7, cv_min_train_periods=8 * 7, cv_expanding_window=True, cv_periods_between_splits=7, cv_periods_between_train_test=3 * 7, cv_max_splits=2) silverkite_components = ModelComponentsParam( seasonality={ "yearly_seasonality": False, "weekly_seasonality": True }, growth={"growth_term": "quadratic"}, events={ "holidays_to_model_separately": SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES, "holiday_lookup_countries": ["UnitedStates"], "holiday_pre_num_days": 3, }, changepoints={ "changepoints_dict": { "method": "uniform", "n_changepoints": 20, } }, regressors={ "regressor_cols": ["regressor1", "regressor2", "regressor3"] }, uncertainty={ "uncertainty_dict": "auto", }, hyperparameter_override={"input__response__null__max_frac": 0.1}, custom={ "fit_algorithm_dict": { "fit_algorithm": "ridge", "fit_algorithm_params": { "normalize": True }, }, "feature_sets_enabled": False }) prophet_components = ModelComponentsParam( seasonality={ "seasonality_mode": ["additive"], "yearly_seasonality": ["auto"], "weekly_seasonality": [True], "daily_seasonality": ["auto"], }, growth={"growth_term": ["linear"]}, events={ "holiday_pre_num_days": [1], "holiday_post_num_days": [1], "holidays_prior_scale": [1.0] }, changepoints={ "changepoint_prior_scale": [0.05], "n_changepoints": [1], "changepoint_range": [0.5], }, regressors={ "add_regressor_dict": [{ "regressor1": { "prior_scale": 10, "standardize": True, "mode": 'additive' }, "regressor2": { "prior_scale": 15, "standardize": False, "mode": 'additive' }, "regressor3": {} }] }, uncertainty={"uncertainty_samples": [10]}) valid_prophet = ForecastConfig( model_template=ModelTemplateEnum.PROPHET.name, metadata_param=metadata, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=prophet_components) valid_silverkite = ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, metadata_param=metadata, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=silverkite_components) configs = { "valid_prophet": valid_prophet, "valid_silverkite": valid_silverkite } return configs
def test_run_forecast_config_with_single_simple_silverkite_template(): # The generic name of single simple silverkite templates are not added to `ModelTemplateEnum`, # therefore we test if these are recognized. data = generate_df_for_tests(freq="D", periods=365) df = data["df"] metric = EvaluationMetricEnum.MeanAbsoluteError evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name, agg_periods=7, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.5 }) evaluation_period = EvaluationPeriodParam(test_horizon=10, periods_between_train_test=5, cv_horizon=4, cv_min_train_periods=80, cv_expanding_window=False, cv_periods_between_splits=20, cv_periods_between_train_test=3, cv_max_splits=2) model_components = ModelComponentsParam( hyperparameter_override=[{ "estimator__yearly_seasonality": 1 }, { "estimator__yearly_seasonality": 2 }]) computation = ComputationParam(verbose=2) forecast_horizon = 27 coverage = 0.90 single_template_class = SimpleSilverkiteTemplateOptions( freq=SILVERKITE_COMPONENT_KEYWORDS.FREQ.value.DAILY, seas=SILVERKITE_COMPONENT_KEYWORDS.SEAS.value.NONE) forecast_config = ForecastConfig(model_template=[ single_template_class, "DAILY_ALGO_SGD", "SILVERKITE_DAILY_90" ], computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=model_components) forecaster = Forecaster() result = forecaster.run_forecast_config(df=df, config=forecast_config) summary = summarize_grid_search_results(result.grid_search) # single_template_class is 1 template, # "DAILY_ALGO_SGD" is 1 template and "SILVERKITE_DAILY_90" has 4 templates. # With 2 items in `hyperparameter_override, there should be a total of 12 cases. assert summary.shape[0] == 12 # Tests functionality for single template class only. forecast_config = ForecastConfig(model_template=single_template_class, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon) forecaster = Forecaster() pipeline_parameters = forecaster.apply_forecast_config( df=df, config=forecast_config) assert_equal(actual=pipeline_parameters["hyperparameter_grid"], expected={ "estimator__time_properties": [None], "estimator__origin_for_time_vars": [None], "estimator__train_test_thresh": [None], "estimator__training_fraction": [None], "estimator__fit_algorithm_dict": [{ "fit_algorithm": "linear", "fit_algorithm_params": None }], "estimator__holidays_to_model_separately": [[]], "estimator__holiday_lookup_countries": [[]], "estimator__holiday_pre_num_days": [0], "estimator__holiday_post_num_days": [0], "estimator__holiday_pre_post_num_dict": [None], "estimator__daily_event_df_dict": [None], "estimator__changepoints_dict": [None], "estimator__seasonality_changepoints_dict": [None], "estimator__yearly_seasonality": [0], "estimator__quarterly_seasonality": [0], "estimator__monthly_seasonality": [0], "estimator__weekly_seasonality": [0], "estimator__daily_seasonality": [0], "estimator__max_daily_seas_interaction_order": [0], "estimator__max_weekly_seas_interaction_order": [2], "estimator__autoreg_dict": [None], "estimator__min_admissible_value": [None], "estimator__max_admissible_value": [None], "estimator__uncertainty_dict": [None], "estimator__growth_term": ["linear"], "estimator__regressor_cols": [[]], "estimator__feature_sets_enabled": [False], "estimator__extra_pred_cols": [[]] }, ignore_keys={"estimator__time_properties": None})
def test_run_forecast_config_custom(): """Tests `run_forecast_config` on weekly data with custom config: - numeric and categorical regressors - coverage - null model """ data = generate_df_with_reg_for_tests(freq="W-MON", periods=140, remove_extra_cols=True, mask_test_actuals=True) reg_cols = ["regressor1", "regressor2", "regressor_categ"] keep_cols = [TIME_COL, VALUE_COL] + reg_cols df = data["df"][keep_cols] metric = EvaluationMetricEnum.MeanAbsoluteError evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name, agg_periods=7, agg_func=np.max, null_model_params={ "strategy": "quantile", "constant": None, "quantile": 0.5 }) evaluation_period = EvaluationPeriodParam(test_horizon=10, periods_between_train_test=5, cv_horizon=4, cv_min_train_periods=80, cv_expanding_window=False, cv_periods_between_splits=20, cv_periods_between_train_test=3, cv_max_splits=3) model_components = ModelComponentsParam( regressors={"regressor_cols": reg_cols}, custom={ "fit_algorithm_dict": { "fit_algorithm": "ridge", "fit_algorithm_params": { "cv": 2 } } }) computation = ComputationParam(verbose=2) forecast_horizon = 27 coverage = 0.90 forecast_config = ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, computation_param=computation, coverage=coverage, evaluation_metric_param=evaluation_metric, evaluation_period_param=evaluation_period, forecast_horizon=forecast_horizon, model_components_param=model_components) with warnings.catch_warnings(): warnings.simplefilter("ignore") forecaster = Forecaster() result = forecaster.run_forecast_config(df=df, config=forecast_config) mse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name() q80 = EvaluationMetricEnum.Quantile80.get_metric_name() assert result.backtest.test_evaluation[mse] == pytest.approx(2.976, rel=1e-2) assert result.backtest.test_evaluation[q80] == pytest.approx(1.360, rel=1e-2) assert result.forecast.train_evaluation[mse] == pytest.approx(2.224, rel=1e-2) assert result.forecast.train_evaluation[q80] == pytest.approx(0.941, rel=1e-2) check_forecast_pipeline_result(result, coverage=coverage, strategy=None, score_func=metric.name, greater_is_better=False) with pytest.raises(KeyError, match="missing_regressor"): model_components = ModelComponentsParam( regressors={"regressor_cols": ["missing_regressor"]}) forecaster = Forecaster() result = forecaster.run_forecast_config( df=df, config=ForecastConfig( model_template=ModelTemplateEnum.SILVERKITE.name, model_components_param=model_components)) check_forecast_pipeline_result(result, coverage=None, strategy=None, score_func=metric.get_metric_func(), greater_is_better=False)
metadata = MetadataParam( time_col="ts", # name of the time column value_col="y", # name of the value column freq="D" # "H" for hourly, "D" for daily, "W" for weekly, etc. ) # Defines number of periods to forecast into the future forecast_horizon = 7 # Specifies intended coverage of the prediction interval coverage = 0.95 # Defines the metrics to evaluate the forecasts # We use Mean Absolute Percent Error (MAPE) in this tutorial evaluation_metric = EvaluationMetricParam( cv_selection_metric=EvaluationMetricEnum.MeanAbsolutePercentError.name, cv_report_metrics=None) # Defines the cross-validation config within pipeline evaluation_period = EvaluationPeriodParam( cv_max_splits= 1, # Benchmarking n_splits is defined in tscv, here we don't need split to choose parameter sets periods_between_train_test=0, ) # Defines parameters related to grid-search computation computation = ComputationParam( hyperparameter_budget=None, n_jobs=-1, # to debug, change to 1 for more informative error messages verbose=3)