def test_daily_regression(get_config: Fixture[Callable[[str], dict]], single_timescale_model: Fixture[str], daily_dataset: Fixture[str], single_timescale_forcings: Fixture[str]): """Test regression training and evaluation for daily predictions. Parameters ---------- get_config : Fixture[Callable[[str], dict] Method that returns a run configuration to test. single_timescale_model : Fixture[str] Model to test. daily_dataset : Fixture[str] Daily dataset to use. single_timescale_forcings : Fixture[str] Daily forcings set to use. """ config = get_config('daily_regression') config.update_config({ 'model': single_timescale_model, 'dataset': daily_dataset['dataset'], 'data_dir': config.data_dir / daily_dataset['dataset'], 'target_variables': daily_dataset['target'], 'forcings': single_timescale_forcings['forcings'], 'dynamic_inputs': single_timescale_forcings['variables'] }) start_training(config) start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test') _check_results(config, '01022500')
def test_daily_regression_additional_features( get_config: Fixture[Callable[[str], dict]]): """Tests #38 (training and testing with additional_features). Parameters ---------- get_config : Fixture[Callable[[str], dict]] Method that returns a run configuration """ config = get_config('daily_regression_additional_features') basin = '01022500' test_start_date, test_end_date = _get_test_start_end_dates(config) start_training(config) start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test') results = _get_basin_results(config.run_dir, 1)[basin]['1D']['xr'].isel(time_step=-1) assert pd.to_datetime(results['date'].values[0]) == test_start_date.date() assert pd.to_datetime(results['date'].values[-1]) == test_end_date.date() discharge = _get_discharge(config, basin) assert discharge.loc[test_start_date:test_end_date].values \ == approx(results[f'{config.target_variables[0]}_obs'].values.reshape(-1), nan_ok=True) # CAMELS forcings have no NaNs, so there should be no NaN predictions assert not pd.isna(results[f'{config.target_variables[0]}_sim']).any()
def test_daily_regression(get_config: Fixture[Callable[[str], dict]], single_timescale_model: Fixture[str], daily_dataset: Fixture[str], single_timescale_forcings: Fixture[str]): """Test regression training and evaluation for daily predictions. Parameters ---------- get_config : Fixture[Callable[[str], dict] Method that returns a run configuration to test. single_timescale_model : Fixture[str] Model to test. daily_dataset : Fixture[str] Daily dataset to use. single_timescale_forcings : Fixture[str] Daily forcings set to use. """ config = get_config('daily_regression') config.update_config({ 'model': single_timescale_model, 'dataset': daily_dataset['dataset'], 'data_dir': config.data_dir / daily_dataset['dataset'], 'target_variables': daily_dataset['target'], 'forcings': single_timescale_forcings['forcings'], 'dynamic_inputs': single_timescale_forcings['variables'] }) basin = '01022500' test_start_date, test_end_date = _get_test_start_end_dates(config) start_training(config) start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test') results = _get_basin_results(config.run_dir, 1)[basin]['1D']['xr'].isel(time_step=-1) assert pd.to_datetime(results['date'].values[0]) == test_start_date.date() assert pd.to_datetime(results['date'].values[-1]) == test_end_date.date() discharge = _get_discharge(config, basin) assert discharge.loc[test_start_date:test_end_date].values \ == approx(results[f'{config.target_variables[0]}_obs'].values.reshape(-1), nan_ok=True) # CAMELS forcings have no NaNs, so there should be no NaN predictions assert not pd.isna(results[f'{config.target_variables[0]}_sim']).any()
def test_transformer_daily_regression(get_config: Fixture[Callable[[str], dict]]): """Tests training and testing with a transformer model. Parameters ---------- get_config : Fixture[Callable[[str], dict]] Method that returns a run configuration """ config = get_config('transformer_daily_regression') start_training(config) start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test') _check_results(config, '01022500')
def test_multi_timescale_regression(get_config: Fixture[Callable[[str], dict]], multi_timescale_model: Fixture[str]): """Test regression training and evaluation for multi-timescale predictions. Parameters ---------- get_config : Fixture[Callable[[str], dict] Method that returns a run configuration to test. multi_timescale_model : Fixture[str] Model to test. """ config = get_config('multi_timescale_regression') config.update_config({'model': multi_timescale_model}) basin = '01022500' test_start_date, test_end_date = _get_test_start_end_dates(config) start_training(config) start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test') results = _get_basin_results(config.run_dir, 1)[basin] discharge = hourlycamelsus.load_hourly_us_netcdf(config.data_dir, config.forcings[0]) \ .sel(basin=basin, date=slice(test_start_date, test_end_date))['qobs_mm_per_hour'] hourly_results = results['1H']['xr'].to_dataframe().reset_index() hourly_results.index = hourly_results['date'] + hourly_results['time_step'] assert hourly_results.index[0] == test_start_date assert hourly_results.index[-1] == test_end_date.floor('H') daily_results = results['1D']['xr'] assert pd.to_datetime(daily_results['date'].values[0]) == test_start_date assert pd.to_datetime( daily_results['date'].values[-1]) == test_end_date.date() assert len(daily_results['qobs_mm_per_hour_obs']) == len(discharge) // 24 assert len(discharge) == len(hourly_results) assert discharge.values \ == approx(hourly_results['qobs_mm_per_hour_obs'].values, nan_ok=True) # Hourly CAMELS forcings have no NaNs, so there should be no NaN predictions assert not pd.isna(hourly_results['qobs_mm_per_hour_sim']).any() assert not pd.isna(daily_results['qobs_mm_per_hour_sim'].values).any()
def test_daily_regression_with_embedding(get_config: Fixture[Callable[[str], dict]], single_timescale_model: Fixture[str]): """Tests training and testing with static and dynamic embedding network. Parameters ---------- get_config : Fixture[Callable[[str], dict]] Method that returns a run configuration single_timescale_model : Fixture[str] Name of a single-timescale model """ config = get_config('daily_regression_with_embedding') config.update_config({'model': single_timescale_model}) start_training(config) start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test') _check_results(config, '01022500')
def test_daily_regression_nan_targets(get_config: Fixture[Callable[[str], dict]]): """Tests #112 (evaluation when target values are NaN). Parameters ---------- get_config : Fixture[Callable[[str], dict]] Method that returns a run configuration """ config = get_config('daily_regression_nan_targets') start_training(config) start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test') # the fact that the targets are NaN should not lead the model to create NaN outputs. # however, we do need to pass discharge as an NaN series, because the camels discharge loader would return [], # as the test period is outside the part of the discharge time series that is stored on disk. discharge = pd.Series(float('nan'), index=pd.date_range(*_get_test_start_end_dates(config))) _check_results(config, '01022500', discharge=discharge)
def eval_run(run_dir: Path, period: str, epoch: int = None, gpu: int = None): """Start evaluating a trained model. Parameters ---------- run_dir : Path Path to the run directory. period : {'train', 'validation', 'test'} The period to evaluate. epoch : int, optional Define a specific epoch to use. By default, the weights of the last epoch are used. gpu : int, optional GPU id to use. Will override config argument 'device'. """ config = Config(run_dir / "config.yml") # check if a GPU has been specified as command line argument. If yes, overwrite config if gpu is not None: config.device = f"cuda:{gpu}" start_evaluation(cfg=config, run_dir=run_dir, epoch=epoch, period=period)