Пример #1
0
def continue_run(run_dir: Path, config_file: Path = None, gpu: int = None):
    """Continue model training.
    
    Parameters
    ----------
    run_dir : Path
        Path to the run directory.
    config_file : Path, optional
        Path to an additional config file. Each config argument in this file will overwrite the original run config.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'. A value smaller than zero indicates CPU.

    """
    # load config from base run and overwrite all elements with an optional new config
    base_config = Config(run_dir / "config.yml")

    if config_file is not None:
        base_config.update_config(config_file)

    base_config.is_continue_training = True

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None and gpu >= 0:
        config.device = f"cuda:{gpu}"
    if gpu is not None and gpu < 0:
        config.device = "cpu"

    start_training(base_config)
Пример #2
0
def finetune(config_file: Path = None, gpu: int = None):
    """Finetune a pre-trained model.

    Parameters
    ----------
    config_file : Path, optional
        Path to an additional config file. Each config argument in this file will overwrite the original run config.
        The config file for finetuning must contain the argument `base_run_dir`, pointing to the folder of the 
        pre-trained model, as well as 'finetune_modules' to indicate which model parts will be trained during
        fine-tuning.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'. A value smaller than zero indicates CPU.

    """
    # load finetune config and check for a non-empty list of finetune_modules
    temp_config = Config(config_file)
    if not temp_config.finetune_modules:
        raise ValueError("For finetuning, at least one model part has to be specified by 'finetune_modules'.")

    # extract base run dir, load base run config and combine with the finetune arguments
    config = Config(temp_config.base_run_dir / "config.yml")
    config.update_config({'run_dir': None, 'experiment_name': None})
    config.update_config(config_file)
    config.is_finetuning = True

    # if the base run was a continue_training run, we need to override the continue_training flag from its config.
    config.is_continue_training = False

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None and gpu >= 0:
        config.device = f"cuda:{gpu}"
    if gpu is not None and gpu < 0:
        config.device = "cpu"

    start_training(config)
Пример #3
0
def test_daily_regression_additional_features(
        get_config: Fixture[Callable[[str], dict]]):
    """Tests #38 (training and testing with additional_features).

    Parameters
    ----------
    get_config : Fixture[Callable[[str], dict]]
        Method that returns a run configuration
    """
    config = get_config('daily_regression_additional_features')

    basin = '01022500'
    test_start_date, test_end_date = _get_test_start_end_dates(config)

    start_training(config)
    start_evaluation(cfg=config,
                     run_dir=config.run_dir,
                     epoch=1,
                     period='test')

    results = _get_basin_results(config.run_dir,
                                 1)[basin]['1D']['xr'].isel(time_step=-1)

    assert pd.to_datetime(results['date'].values[0]) == test_start_date.date()
    assert pd.to_datetime(results['date'].values[-1]) == test_end_date.date()

    discharge = _get_discharge(config, basin)

    assert discharge.loc[test_start_date:test_end_date].values \
           == approx(results[f'{config.target_variables[0]}_obs'].values.reshape(-1), nan_ok=True)

    # CAMELS forcings have no NaNs, so there should be no NaN predictions
    assert not pd.isna(results[f'{config.target_variables[0]}_sim']).any()
Пример #4
0
def finetune(config_file: Path = None, gpu: int = None):
    """Finetune a pre-trained model.

    Parameters
    ----------
    config_file : Path, optional
        Path to an additional config file. Each config argument in this file will overwrite the original run config.
        The config file for finetuning must contain the argument `base_run_dir`, pointing to the folder of the 
        pre-trained model.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'.

    """
    # load finetune config, extract base run dir, load base run config and combine with the finetune arguments
    temp_config = Config(config_file)
    config = Config(temp_config.base_run_dir / "config.yml")
    config.force_update({'run_dir': None, 'experiment_name': None})
    config.update_config(config_file)
    config.is_finetuning = True

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None:
        config.device = f"cuda:{gpu}"

    start_training(config)
Пример #5
0
def test_daily_regression(get_config: Fixture[Callable[[str], dict]], single_timescale_model: Fixture[str],
                          daily_dataset: Fixture[str], single_timescale_forcings: Fixture[str]):
    """Test regression training and evaluation for daily predictions.

    Parameters
    ----------
    get_config : Fixture[Callable[[str], dict]
        Method that returns a run configuration to test.
    single_timescale_model : Fixture[str]
        Model to test.
    daily_dataset : Fixture[str]
        Daily dataset to use.
    single_timescale_forcings : Fixture[str]
        Daily forcings set to use.
    """
    config = get_config('daily_regression')
    config.update_config({
        'model': single_timescale_model,
        'dataset': daily_dataset['dataset'],
        'data_dir': config.data_dir / daily_dataset['dataset'],
        'target_variables': daily_dataset['target'],
        'forcings': single_timescale_forcings['forcings'],
        'dynamic_inputs': single_timescale_forcings['variables']
    })

    start_training(config)
    start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test')

    _check_results(config, '01022500')
Пример #6
0
def test_daily_regression(get_config: Fixture[Callable[[str], dict]],
                          single_timescale_model: Fixture[str],
                          daily_dataset: Fixture[str],
                          single_timescale_forcings: Fixture[str]):
    """Test regression training and evaluation for daily predictions.

    Parameters
    ----------
    get_config : Fixture[Callable[[str], dict]
        Method that returns a run configuration to test.
    single_timescale_model : Fixture[str]
        Model to test.
    daily_dataset : Fixture[str]
        Daily dataset to use.
    single_timescale_forcings : Fixture[str]
        Daily forcings set to use.
    """
    config = get_config('daily_regression')
    config.update_config({
        'model':
        single_timescale_model,
        'dataset':
        daily_dataset['dataset'],
        'data_dir':
        config.data_dir / daily_dataset['dataset'],
        'target_variables':
        daily_dataset['target'],
        'forcings':
        single_timescale_forcings['forcings'],
        'dynamic_inputs':
        single_timescale_forcings['variables']
    })

    basin = '01022500'
    test_start_date, test_end_date = _get_test_start_end_dates(config)

    start_training(config)
    start_evaluation(cfg=config,
                     run_dir=config.run_dir,
                     epoch=1,
                     period='test')

    results = _get_basin_results(config.run_dir,
                                 1)[basin]['1D']['xr'].isel(time_step=-1)

    assert pd.to_datetime(results['date'].values[0]) == test_start_date.date()
    assert pd.to_datetime(results['date'].values[-1]) == test_end_date.date()

    discharge = _get_discharge(config, basin)

    assert discharge.loc[test_start_date:test_end_date].values \
           == approx(results[f'{config.target_variables[0]}_obs'].values.reshape(-1), nan_ok=True)

    # CAMELS forcings have no NaNs, so there should be no NaN predictions
    assert not pd.isna(results[f'{config.target_variables[0]}_sim']).any()
Пример #7
0
def test_transformer_daily_regression(get_config: Fixture[Callable[[str], dict]]):
    """Tests training and testing with a transformer model.

    Parameters
    ----------
    get_config : Fixture[Callable[[str], dict]]
        Method that returns a run configuration
    """
    config = get_config('transformer_daily_regression')

    start_training(config)
    start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test')

    _check_results(config, '01022500')
Пример #8
0
def test_multi_timescale_regression(get_config: Fixture[Callable[[str], dict]],
                                    multi_timescale_model: Fixture[str]):
    """Test regression training and evaluation for multi-timescale predictions.

    Parameters
    ----------
    get_config : Fixture[Callable[[str], dict]
        Method that returns a run configuration to test.
    multi_timescale_model : Fixture[str]
        Model to test.
    """
    config = get_config('multi_timescale_regression')
    config.update_config({'model': multi_timescale_model})

    basin = '01022500'
    test_start_date, test_end_date = _get_test_start_end_dates(config)

    start_training(config)
    start_evaluation(cfg=config,
                     run_dir=config.run_dir,
                     epoch=1,
                     period='test')

    results = _get_basin_results(config.run_dir, 1)[basin]
    discharge = hourlycamelsus.load_hourly_us_netcdf(config.data_dir, config.forcings[0]) \
        .sel(basin=basin, date=slice(test_start_date, test_end_date))['qobs_mm_per_hour']

    hourly_results = results['1H']['xr'].to_dataframe().reset_index()
    hourly_results.index = hourly_results['date'] + hourly_results['time_step']
    assert hourly_results.index[0] == test_start_date
    assert hourly_results.index[-1] == test_end_date.floor('H')

    daily_results = results['1D']['xr']
    assert pd.to_datetime(daily_results['date'].values[0]) == test_start_date
    assert pd.to_datetime(
        daily_results['date'].values[-1]) == test_end_date.date()
    assert len(daily_results['qobs_mm_per_hour_obs']) == len(discharge) // 24

    assert len(discharge) == len(hourly_results)
    assert discharge.values \
           == approx(hourly_results['qobs_mm_per_hour_obs'].values, nan_ok=True)

    # Hourly CAMELS forcings have no NaNs, so there should be no NaN predictions
    assert not pd.isna(hourly_results['qobs_mm_per_hour_sim']).any()
    assert not pd.isna(daily_results['qobs_mm_per_hour_sim'].values).any()
Пример #9
0
def test_daily_regression_with_embedding(get_config: Fixture[Callable[[str], dict]],
                                         single_timescale_model: Fixture[str]):
    """Tests training and testing with static and dynamic embedding network.

    Parameters
    ----------
    get_config : Fixture[Callable[[str], dict]]
        Method that returns a run configuration
    single_timescale_model : Fixture[str]
        Name of a single-timescale model
    """
    config = get_config('daily_regression_with_embedding')
    config.update_config({'model': single_timescale_model})

    start_training(config)
    start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test')

    _check_results(config, '01022500')
Пример #10
0
def start_run(config_file: Path, gpu: int = None):
    """Start training a model.
    
    Parameters
    ----------
    config_file : Path
        Path to a configuration file (.yml), defining the settings for the specific run.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'.

    """

    config = Config(config_file)

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None:
        config.device = f"cuda:{gpu}"

    start_training(config)
Пример #11
0
def test_daily_regression_nan_targets(get_config: Fixture[Callable[[str], dict]]):
    """Tests #112 (evaluation when target values are NaN).

    Parameters
    ----------
    get_config : Fixture[Callable[[str], dict]]
        Method that returns a run configuration
    """
    config = get_config('daily_regression_nan_targets')

    start_training(config)
    start_evaluation(cfg=config, run_dir=config.run_dir, epoch=1, period='test')


    # the fact that the targets are NaN should not lead the model to create NaN outputs.
    # however, we do need to pass discharge as an NaN series, because the camels discharge loader would return [],
    # as the test period is outside the part of the discharge time series that is stored on disk.
    discharge = pd.Series(float('nan'), index=pd.date_range(*_get_test_start_end_dates(config)))
    _check_results(config, '01022500', discharge=discharge)