def test_train( self, tmp_path, capsys, use_pred_months, experiment, monthly_agg, predict_delta ): x, _, _ = _make_dataset(size=(5, 5), const=True) x_static, _, _ = _make_dataset(size=(5, 5), add_times=False) y = x.isel(time=[-1]) x_add1, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="precip") x_add1 = x_add1 * 2 x_add2, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="temp") x_add2 = x_add2 * 3 x = xr.merge([x, x_add1, x_add2]) norm_dict = { "VHI": {"mean": 0, "std": 1}, "precip": {"mean": 0, "std": 1}, "temp": {"mean": 0, "std": 1}, } static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}} test_features = tmp_path / f"features/{experiment}/train/2001_12" test_features.mkdir(parents=True) pred_features = tmp_path / f"features/{experiment}/test/2001_12" pred_features.mkdir(parents=True) static_features = tmp_path / f"features/static" static_features.mkdir(parents=True) with (tmp_path / f"features/{experiment}/normalizing_dict.pkl").open("wb") as f: pickle.dump(norm_dict, f) with (tmp_path / f"features/static/normalizing_dict.pkl").open("wb") as f: pickle.dump(static_norm_dict, f) x.to_netcdf(test_features / "x.nc") x.to_netcdf(pred_features / "x.nc") y.to_netcdf(test_features / "y.nc") y.to_netcdf(pred_features / "y.nc") x_static.to_netcdf(static_features / "data.nc") model = LinearRegression( tmp_path, include_pred_month=use_pred_months, experiment=experiment, include_monthly_aggs=monthly_agg, predict_delta=predict_delta, normalize_y=True, ) model.train() captured = capsys.readouterr() expected_stdout = "Epoch 1, train RMSE: " assert ( expected_stdout in captured.out ), f"Expected stdout to be {expected_stdout}, got {captured.out}" assert ( type(model.model) == linear_model.SGDRegressor ), f"Model attribute not a linear regression!" if experiment == "nowcast": coef_size = (3 * 35) + 2 elif experiment == "one_month_forecast": coef_size = 3 * 36 if monthly_agg: # doubled including the mean, tripled including the std coef_size *= 2 if use_pred_months: coef_size += 12 coef_size += 3 # for the yearly aggs coef_size += 1 # for the static variable coef_size += 1 # for the prev_y_var assert model.model.coef_.size == coef_size, f"Got unexpected coef size" test_arrays_dict, preds_dict = model.predict() assert ( test_arrays_dict["2001_12"]["y"].size == preds_dict["2001_12"].shape[0] ), "Expected length of test arrays to be the same as the predictions" # test saving the model outputs model.evaluate(save_preds=True) save_path = model.data_path / "models" / experiment / "linear_regression" assert (save_path / "preds_2001_12.nc").exists() assert (save_path / "results.json").exists() pred_ds = xr.open_dataset(save_path / "preds_2001_12.nc") assert np.isin(["lat", "lon", "time"], [c for c in pred_ds.coords]).all() assert y.time == pred_ds.time
def test_train(self, tmp_path, capsys, use_pred_months, experiment, monthly_agg): x, _, _ = _make_dataset(size=(5, 5), const=True) x_static, _, _ = _make_dataset(size=(5, 5), add_times=False) y = x.isel(time=[-1]) x_add1, _, _ = _make_dataset(size=(5, 5), const=True, variable_name='precip') x_add2, _, _ = _make_dataset(size=(5, 5), const=True, variable_name='temp') x = xr.merge([x, x_add1, x_add2]) norm_dict = { 'VHI': { 'mean': 0, 'std': 1 }, 'precip': { 'mean': 0, 'std': 1 }, 'temp': { 'mean': 0, 'std': 1 } } static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}} test_features = tmp_path / f'features/{experiment}/train/hello' test_features.mkdir(parents=True) pred_features = tmp_path / f'features/{experiment}/test/hello' pred_features.mkdir(parents=True) static_features = tmp_path / f'features/static' static_features.mkdir(parents=True) with (tmp_path / f'features/{experiment}/normalizing_dict.pkl').open('wb') as f: pickle.dump(norm_dict, f) with (tmp_path / f'features/static/normalizing_dict.pkl').open('wb') as f: pickle.dump(static_norm_dict, f) x.to_netcdf(test_features / 'x.nc') x.to_netcdf(pred_features / 'x.nc') y.to_netcdf(test_features / 'y.nc') y.to_netcdf(pred_features / 'y.nc') x_static.to_netcdf(static_features / 'data.nc') model = LinearRegression(tmp_path, include_pred_month=use_pred_months, experiment=experiment, include_monthly_aggs=monthly_agg) model.train() captured = capsys.readouterr() expected_stdout = 'Epoch 1, train RMSE: ' assert expected_stdout in captured.out, \ f'Expected stdout to be {expected_stdout}, got {captured.out}' assert type(model.model) == linear_model.SGDRegressor, \ f'Model attribute not a linear regression!' if experiment == 'nowcast': coef_size = (3 * 35) + 2 elif experiment == 'one_month_forecast': coef_size = (3 * 36) if monthly_agg: # doubled including the mean, tripled including the std coef_size *= 2 if use_pred_months: coef_size += 12 coef_size += 3 # for the yearly aggs coef_size += 1 # for the static variable assert model.model.coef_.size == coef_size, f'Got unexpected coef size' test_arrays_dict, preds_dict = model.predict() assert ( test_arrays_dict['hello']['y'].size == preds_dict['hello'].shape[0] ), 'Expected length of test arrays to be the same as the predictions' # test saving the model outputs model.evaluate(save_preds=True) save_path = model.data_path / 'models' / experiment / 'linear_regression' assert (save_path / 'preds_hello.nc').exists() assert (save_path / 'results.json').exists() pred_ds = xr.open_dataset(save_path / 'preds_hello.nc') assert np.isin(['lat', 'lon', 'time'], [c for c in pred_ds.coords]).all() assert y.time == pred_ds.time