def test_evaluate_no_exog_against_with_exog(): """Check that adding exogenous data produces different results.""" y, X = load_longley() forecaster = ARIMA(suppress_warnings=True) cv = SlidingWindowSplitter() scoring = MeanAbsolutePercentageError(symmetric=True) out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring) out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring) scoring_name = f"test_{scoring.name}" assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
def test_evaluate_no_exog_against_with_exog(): # Check that adding exogenous data produces different results y, X = load_longley() forecaster = ARIMA(suppress_warnings=True) cv = SlidingWindowSplitter() scoring = sMAPE() out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring) out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring) scoring_name = f"test_{scoring.name}" assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
def _fit_and_score(params): # Clone forecaster. forecaster = clone(self.forecaster) # Set parameters. forecaster.set_params(**params) # Evaluate. out = evaluate( forecaster, cv, y, X, strategy=self.strategy, scoring=scoring, fit_params=fit_params, ) # Filter columns. out = out.filter(items=[scoring_name, "fit_time", "pred_time"], axis=1) # Aggregate results. out = out.mean() out = out.add_prefix("mean_") # Add parameters to output table. out["params"] = params return out
def test_evaluate_common_configs(CV, fh, window_length, step_length, strategy, scoring): """Test evaluate common configs.""" y = make_forecasting_problem(n_timepoints=30, index_type="int") forecaster = NaiveForecaster() cv = CV(fh, window_length, step_length=step_length) out = evaluate(forecaster=forecaster, y=y, cv=cv, strategy=strategy, scoring=scoring) _check_evaluate_output(out, cv, y, scoring) # check scoring actual = out.loc[:, f"test_{scoring.name}"] n_splits = cv.get_n_splits(y) expected = np.empty(n_splits) for i, (train, test) in enumerate(cv.split(y)): f = clone(forecaster) f.fit(y.iloc[train], fh=fh) expected[i] = scoring(y.iloc[test], f.predict(), y_train=y.iloc[train]) np.testing.assert_array_equal(actual, expected)
def _get_expected_scores(forecaster, cv, param_grid, y, X, scoring): scores = np.zeros(len(param_grid)) for i, params in enumerate(param_grid): f = clone(forecaster) f.set_params(**params) out = evaluate(f, cv, y, X=X, scoring=scoring) scores[i] = out.loc[:, f"test_{scoring.name}"].mean() return scores
def test_evaluate(): y = load_airline() forecaster = NaiveForecaster(strategy="drift", sp=12) cv = ExpandingWindowSplitter( initial_window=24, step_length=24, fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], window_length=10, ) df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy="update") # just making sure the function is running assert isinstance(df, pd.DataFrame)
def _score_forecasters(forecasters, cv, y): """Will evaluate all the forecasters on y and return the name of best.""" scoring = check_scoring(None) scoring_name = f"test_{scoring.name}" score = None for name, forecaster in forecasters: results = evaluate(forecaster, cv, y) results = results.mean() new_score = float(results[scoring_name]) if not score or new_score < score: score = new_score best_name = name return best_name
def test_evaluate_initial_window(): initial_window = 20 y = make_forecasting_problem(n_timepoints=30, index_type="int") forecaster = NaiveForecaster() fh = 1 cv = SlidingWindowSplitter(fh=fh, initial_window=initial_window) scoring = sMAPE() out = evaluate( forecaster=forecaster, y=y, cv=cv, strategy="update", scoring=scoring ) _check_evaluate_output(out, cv, y, scoring) assert out.loc[0, "len_train_window"] == initial_window # check scoring actual = out.loc[0, f"test_{scoring.name}"] train, test = next(cv.split(y)) f = clone(forecaster) f.fit(y.iloc[train], fh=fh) expected = scoring(y.iloc[test], f.predict()) np.testing.assert_equal(actual, expected)
fh = np.arange(1, 14 + 1) y = pd.Series(data=trend_newCases.values, index=total_de_casos_amazonas.date) y.index.freq = 'D' model = LGBMRegressor(random_state=4, learning_rate=0.04591301953670739, num_leaves=45, min_child_samples=1, subsample=0.05, colsample_bytree=0.9828905761860228, subsample_freq=1, n_estimators=685) reg = make_reduction(estimator=model, window_length=14) cv = ExpandingWindowSplitter(initial_window=60) cross_val = evaluate(forecaster=reg, y=y, cv=cv, strategy="refit", return_data=True) reg.fit(y) y_pred = reg.predict(fh).round() def show_figure11(): fig = go.Figure() fig.add_trace( go.Bar(x=total_de_casos_amazonas['date'].tail(30), y=total_de_casos_amazonas['newCases'].tail(30), hoverinfo='skip')) fig.update_traces(marker_color='gray')