示例#1
0
def _prepare_data(cfg: Dict, basins: List) -> Dict:
    """Preprocess training data.

    Parameters
    ----------
    cfg : dict
        Dictionary containing the run config
    basins : List
        List containing the 8-digit USGS gauge id

    Returns
    -------
    dict
        Dictionary containing the updated run config
    """
    # create database file containing the static basin attributes
    cfg["db_path"] = str(cfg["run_dir"] / "attributes.db")
    add_camels_attributes(cfg["camels_root"], db_path=cfg["db_path"])

    # create .h5 files for train and validation data
    cfg["train_file"] = cfg["train_dir"] / 'train_data.h5'
    create_h5_files(camels_root=cfg["camels_root"],
                    out_file=cfg["train_file"],
                    basins=basins,
                    dates=[cfg["train_start"], cfg["train_end"]],
                    with_basin_str=True,
                    seq_length=cfg["seq_length"])

    return cfg
def _prepare_data(cfg: Dict, basins: List) -> Dict:
    """Preprocess training data.

    Parameters
    ----------
    cfg : dict
        Dictionary containing the run config
    basins : List
        List containing the 8-digit USGS gauge id

    Returns
    -------
    dict
        Dictionary containing the updated run config
    """
    # create database file containing the static basin attributes
    cfg["db_path"] = str(cfg["run_dir"] / "attributes.db")
    add_camels_attributes(cfg["camels_root"], db_path=cfg["db_path"])

    # create .h5 files for train and validation data
    if cfg["train_file"] is None:
        cfg["train_file"] = cfg["train_dir"] / 'train_data.h5'
        cfg["scaler_file"] = cfg["train_dir"] / "scaler.p"

        # get additional static inputs
        file_name = Path(__file__).parent / 'data' / 'dynamic_features_nwm_v2.p'
        with file_name.open("rb") as fp:
            additional_features = pickle.load(fp)
        # ad hoc static climate indices 
        # requres the training period for this experiment
        # overwrites the *_dyn type climate indices in 'additional_features'
        if not cfg['use_dynamic_climate']:
            train_clim_indexes = training_period_climate_indices(
                                      db_path=cfg['db_path'], camels_root=cfg['camels_root'],
                                      basins=basins, 
                                      start_date=cfg['train_start'], end_date=cfg['train_end'])
            for basin in basins:
               for col in train_clim_indexes[basin].columns:
                   additional_features[basin][col] = np.tile(train_clim_indexes[basin][col].values,[additional_features[basin].shape[0],1])

        create_h5_files_v2(
            camels_root=cfg["camels_root"],
            out_file=cfg["train_file"],
            basins=basins,
            dates=[cfg["train_start"], cfg["train_end"]],
            db_path=cfg["db_path"],
            cfg=cfg,
            additional_features=additional_features,
            num_workers=cfg["num_workers"],
            seq_length=cfg["seq_length"])
    
    # copy scaler file into run folder
    else:
        dst = cfg["train_dir"] / "scaler.p"
        shutil.copyfile(cfg["scaler_file"], dst)

    return cfg