def _prepare_data(cfg: Dict, basins: List) -> Dict: """Preprocess training data. Parameters ---------- cfg : dict Dictionary containing the run config basins : List List containing the 8-digit USGS gauge id Returns ------- dict Dictionary containing the updated run config """ # create database file containing the static basin attributes cfg["db_path"] = str(cfg["run_dir"] / "attributes.db") add_camels_attributes(cfg["camels_root"], db_path=cfg["db_path"]) # create .h5 files for train and validation data cfg["train_file"] = cfg["train_dir"] / 'train_data.h5' create_h5_files(camels_root=cfg["camels_root"], out_file=cfg["train_file"], basins=basins, dates=[cfg["train_start"], cfg["train_end"]], with_basin_str=True, seq_length=cfg["seq_length"]) return cfg
def _prepare_data(cfg: Dict, basins: List) -> Dict: """Preprocess training data. Parameters ---------- cfg : dict Dictionary containing the run config basins : List List containing the 8-digit USGS gauge id Returns ------- dict Dictionary containing the updated run config """ # create database file containing the static basin attributes cfg["db_path"] = str(cfg["run_dir"] / "attributes.db") add_camels_attributes(cfg["camels_root"], db_path=cfg["db_path"]) # create .h5 files for train and validation data if cfg["train_file"] is None: cfg["train_file"] = cfg["train_dir"] / 'train_data.h5' cfg["scaler_file"] = cfg["train_dir"] / "scaler.p" # get additional static inputs file_name = Path(__file__).parent / 'data' / 'dynamic_features_nwm_v2.p' with file_name.open("rb") as fp: additional_features = pickle.load(fp) # ad hoc static climate indices # requres the training period for this experiment # overwrites the *_dyn type climate indices in 'additional_features' if not cfg['use_dynamic_climate']: train_clim_indexes = training_period_climate_indices( db_path=cfg['db_path'], camels_root=cfg['camels_root'], basins=basins, start_date=cfg['train_start'], end_date=cfg['train_end']) for basin in basins: for col in train_clim_indexes[basin].columns: additional_features[basin][col] = np.tile(train_clim_indexes[basin][col].values,[additional_features[basin].shape[0],1]) create_h5_files_v2( camels_root=cfg["camels_root"], out_file=cfg["train_file"], basins=basins, dates=[cfg["train_start"], cfg["train_end"]], db_path=cfg["db_path"], cfg=cfg, additional_features=additional_features, num_workers=cfg["num_workers"], seq_length=cfg["seq_length"]) # copy scaler file into run folder else: dst = cfg["train_dir"] / "scaler.p" shutil.copyfile(cfg["scaler_file"], dst) return cfg