Пример #1
0
def create_results_ensemble(run_dirs: List[Path],
                            best_k: int = None,
                            metrics: List[str] = None,
                            period: str = 'test',
                            epoch: int = None) -> dict:
    """Average the predictions of several runs for the specified period and calculate new metrics.

    If `best_k` is provided, only the k runs with the best validation NSE will be used in the generated ensemble.
    
    Parameters
    ----------
    run_dirs : List[Path]
        List of directories of the runs to be merged
    best_k : int, optional
        If provided, will only merge the k best runs based on validation NSE.
    metrics : List[str], optional
        Use this parameter to override the metrics from the config files in the run directories.
    period : {'test', 'validation', 'train'}, optional
        One of train, val, test. If best_k is used, only 'test' is allowed. 
        The run_directories must contain results files for the specified period.
    epoch : int, optional
        If provided, will ensemble the model predictions of this epoch otherwise of the last epoch
    
    Returns
    -------
    dict
        Dictionary of ensemble predictions and metrics per basin and frequency.
    """
    if len(run_dirs) < 2:
        raise ValueError(
            'Need to provide at least two run directories to be merged.')

    if period not in ['train', 'validation', 'test']:
        raise ValueError(f'Unknown period {period}.')
    if best_k is not None:
        if period != 'test':
            raise ValueError(
                'If best_k is specified, the period must be test.')
        print('Searching for best validation runs.')
        best_val_runs = _get_best_validation_runs(run_dirs, best_k, epoch)
        best_runs = [
            _get_results_file(run_dir, period, epoch)
            for run_dir in best_val_runs
        ]
    else:
        best_runs = [
            _get_results_file(run_dir, period, epoch) for run_dir in run_dirs
        ]

    config = Config(run_dirs[0] / 'config.yml')
    if metrics is not None:
        # override metrics from config
        config.metrics = metrics

    # get frequencies from a results file.
    # (they might not be stored in the config if the native data frequency was used)
    run_results = pickle.load(open(best_runs[0], 'rb'))
    frequencies = list(run_results[list(run_results.keys())[0]].keys())

    return _create_ensemble(best_runs, frequencies, config)
Пример #2
0
 def _get_config(name):
     config_file = Path(f'./test/test_configs/{name}.test.yml')
     if not config_file.is_file():
         raise ValueError(f'Test config file not found at {config_file}.')
     config = Config(config_file)
     config.run_dir = Path(tmpdir)
     return config
Пример #3
0
def continue_run(run_dir: Path, config_file: Path = None, gpu: int = None):
    """Continue model training.
    
    Parameters
    ----------
    run_dir : Path
        Path to the run directory.
    config_file : Path, optional
        Path to an additional config file. Each config argument in this file will overwrite the original run config.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'. A value smaller than zero indicates CPU.

    """
    # load config from base run and overwrite all elements with an optional new config
    base_config = Config(run_dir / "config.yml")

    if config_file is not None:
        base_config.update_config(config_file)

    base_config.is_continue_training = True

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None and gpu >= 0:
        config.device = f"cuda:{gpu}"
    if gpu is not None and gpu < 0:
        config.device = "cpu"

    start_training(base_config)
Пример #4
0
def finetune(config_file: Path = None, gpu: int = None):
    """Finetune a pre-trained model.

    Parameters
    ----------
    config_file : Path, optional
        Path to an additional config file. Each config argument in this file will overwrite the original run config.
        The config file for finetuning must contain the argument `base_run_dir`, pointing to the folder of the 
        pre-trained model.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'.

    """
    # load finetune config, extract base run dir, load base run config and combine with the finetune arguments
    temp_config = Config(config_file)
    config = Config(temp_config.base_run_dir / "config.yml")
    config.force_update({'run_dir': None, 'experiment_name': None})
    config.update_config(config_file)
    config.is_finetuning = True

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None:
        config.device = f"cuda:{gpu}"

    start_training(config)
Пример #5
0
    def __init__(self, mass_input_size: int, aux_input_size: int,
                 hidden_size: int, cfg: Config):
        super(_MCLSTMCell, self).__init__()
        self.cfg = cfg
        self.mass_input_size = mass_input_size
        self.aux_input_size = aux_input_size
        self.hidden_size = hidden_size

        _temp_dict = cfg.as_dict()
        self._mass_in_gates = _temp_dict["mclstm_mass_in_gates"]
        self._subtract_outgoing_mass = _temp_dict["subtract_outgoing_mass"]

        gate_inputs = aux_input_size + hidden_size
        if self._mass_in_gates:
            gate_inputs += mass_input_size

        # initialize gates
        self.output_gate = _Gate(in_features=gate_inputs,
                                 out_features=hidden_size)
        self.input_gate = _NormalisedGate(
            in_features=gate_inputs,
            out_shape=(mass_input_size, hidden_size),
            normaliser=_temp_dict["mclstm_i_normaliser"])
        self.redistribution = _NormalisedGate(
            in_features=gate_inputs,
            out_shape=(hidden_size, hidden_size),
            normaliser=_temp_dict["mclstm_r_normaliser"])

        self._reset_parameters()
def run_evaluation(run_dir: Path,
                   epoch: Optional[int] = None,
                   period: str = "test"):
    """Helper Function to run the evaluation
    (same as def start_evaluation: neuralhydrology/evaluation/evaluate.py:L7)

    Args:
        run_dir (Path): Path of the experiment run
        epoch (Optional[int], optional):
            Model epoch to evaluate. None finds the latest (highest) epoch.
            Defaults to None.
        period (str, optional): {"test", "train", "validation"}. Defaults to "test".
    """
    #
    cfg = Config(run_dir / "config.yml")
    tester = Tester(cfg=cfg, run_dir=run_dir, period=period, init_model=True)

    if epoch is None:
        # get the highest epoch trained model
        all_trained_models = [
            d.name for d in (run_dir).glob("model_epoch*.pt")
        ]
        epoch = int(
            sorted(all_trained_models)[-1].replace(".pt", "").replace(
                "model_epoch", ""))
    print(f"** EVALUATING MODEL EPOCH: {epoch} **")
    tester.evaluate(epoch=epoch, save_results=True, metrics=["NSE", "KGE"])
Пример #7
0
def test_mass_conservation():
    torch.manual_seed(111)

    # create minimal config required for model initialization
    config = Config({
        'dynamic_inputs': ['tmin(C)', 'tmax(C)'],
        'hidden_size': 10,
        'initial_forget_bias': 0,
        'mass_inputs': ['prcp(mm/day)'],
        'model': 'mclstm',
        'target_variables': ['QObs(mm/d)']
    })
    model = MCLSTM(config)

    # create random inputs
    data = {
        'x_d': torch.rand(
            (1, 25, 3)
        )  # [batch size, sequence length, total number of time series inputs]
    }

    # get model outputs and intermediate states
    output = model(data)

    # the total mass within the system at each time step is the cumsum over the outgoing mass + the current cell state
    cumsum_system = output["m_out"].sum(-1).cumsum(-1) + output["c"].sum(-1)

    # the accumulated mass of the inputs at each time step
    cumsum_input = data["x_d"][:, :, 0].cumsum(-1)

    # check if the total mass is conserved at every timestep of the forward pass
    assert torch.allclose(cumsum_system, cumsum_input)
Пример #8
0
def start_run(config_file: Path, gpu: int = None):
    """Start training a model.
    
    Parameters
    ----------
    config_file : Path
        Path to a configuration file (.yml), defining the settings for the specific run.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'.

    """

    config = Config(config_file)

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None:
        config.device = f"cuda:{gpu}"

    start_training(config)
Пример #9
0
    def __init__(self, cfg: Config):
        self._train = True
        self.log_interval = cfg.log_interval
        self.log_dir = cfg.run_dir
        self._img_log_dir = cfg.img_log_dir

        # get git commit hash if folder is a git repository
        current_dir = str(Path(__file__).absolute().parent)
        if subprocess.call(["git", "-C", current_dir, "branch"],
                           stderr=subprocess.DEVNULL,
                           stdout=subprocess.DEVNULL) == 0:
            git_output = subprocess.check_output(
                ["git", "-C", current_dir, "describe", "--always"])
            cfg.update_config(
                {'commit_hash': git_output.strip().decode('ascii')})

        # Additionally, the package version is stored in the config
        cfg.update_config({"package_version": __version__})

        # store a copy of the config into the run folder
        cfg.dump_config(folder=self.log_dir)

        self.epoch = 0
        self.update = 0
        self._metrics = defaultdict(list)
        self.writer = None
Пример #10
0
def eval_run(run_dir: Path, period: str, epoch: int = None, gpu: int = None):
    """Start evaluating a trained model.
    
    Parameters
    ----------
    run_dir : Path
        Path to the run directory.
    period : {'train', 'validation', 'test'}
        The period to evaluate.
    epoch : int, optional
        Define a specific epoch to use. By default, the weights of the last epoch are used.  
    gpu : int, optional
        GPU id to use. Will override config argument 'device'.

    """
    config = Config(run_dir / "config.yml")

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None:
        config.device = f"cuda:{gpu}"

    start_evaluation(cfg=config, run_dir=run_dir, epoch=epoch, period=period)
Пример #11
0
def create_config_files(base_config_path: Path, modify_dict: Dict[str, list],
                        output_dir: Path):
    """Create configs, given a base config and a dictionary of parameters to modify.
    
    This function will create one config file for each combination of parameters defined in the modify_dict.
    
    Parameters
    ----------
    base_config_path : Path
        Path to a base config file (.yml)
    modify_dict : dict
        Dictionary, mapping from parameter names to lists of possible parameter values.
    output_dir : Path 
        Path to a folder where the generated configs will be stored
    """
    if not output_dir.is_dir():
        output_dir.mkdir(parents=True)

    # load base config as dictionary
    base_config = Config(base_config_path)
    experiment_name = base_config.experiment_name
    option_names = list(modify_dict.keys())

    # iterate over each possible combination of hyper parameters
    for i, options in enumerate(
            itertools.product(*[val for val in modify_dict.values()])):

        for key, val in zip(option_names, options):
            base_config.force_update(key=key, value=val)

        # create a unique run name
        name = experiment_name
        for key, val in zip(option_names, options):
            name += f"_{key}{val}"
        base_config.force_update(key="experiment_name", value=name)

        base_config.dump_config(output_dir, f"config_{i+1}.yml")

    print(f"Finished. Configs are stored in {output_dir}")
Пример #12
0
    def __init__(self, cfg: Config):
        self._train = True
        self.log_interval = cfg.log_interval
        self.log_dir = cfg.run_dir
        self._img_log_dir = cfg.img_log_dir

        # get git commit hash if folder is a git repository
        cfg.update_config({'commit_hash': get_git_hash()})

        # save git diff to file if branch is dirty
        if cfg.save_git_diff:
            save_git_diff(cfg.run_dir)

        # Additionally, the package version is stored in the config
        cfg.update_config({"package_version": __version__})

        # store a copy of the config into the run folder
        cfg.dump_config(folder=self.log_dir)

        self.epoch = 0
        self.update = 0
        self._metrics = defaultdict(list)
        self.writer = None
Пример #13
0
def finetune(config_file: Path = None, gpu: int = None):
    """Finetune a pre-trained model.

    Parameters
    ----------
    config_file : Path, optional
        Path to an additional config file. Each config argument in this file will overwrite the original run config.
        The config file for finetuning must contain the argument `base_run_dir`, pointing to the folder of the 
        pre-trained model, as well as 'finetune_modules' to indicate which model parts will be trained during
        fine-tuning.
    gpu : int, optional
        GPU id to use. Will override config argument 'device'. A value smaller than zero indicates CPU.

    """
    # load finetune config and check for a non-empty list of finetune_modules
    temp_config = Config(config_file)
    if not temp_config.finetune_modules:
        raise ValueError(
            "For finetuning, at least one model part has to be specified by 'finetune_modules'."
        )

    # extract base run dir, load base run config and combine with the finetune arguments
    config = Config(temp_config.base_run_dir / "config.yml")
    config.update_config({'run_dir': None, 'experiment_name': None})
    config.update_config(config_file)
    config.is_finetuning = True

    # if the base run was a continue_training run, we need to override the continue_training flag from its config.
    config.is_continue_training = False

    # check if a GPU has been specified as command line argument. If yes, overwrite config
    if gpu is not None and gpu >= 0:
        config.device = f"cuda:{gpu}"
    if gpu is not None and gpu < 0:
        config.device = "cpu"

    start_training(config)