def engineer(data_path, experiment='one_month_forecast', process_static=True, pred_months=12, expected_length=12): engineer = Engineer(data_path, experiment=experiment, process_static=process_static) engineer.engineer( test_year=2018, target_variable='VHI', pred_months=pred_months, expected_length=pred_months, )
def engineer(pred_months=3, target_var="VCI1M"): engineer = Engineer(get_data_path(), experiment="one_month_forecast", process_static=False) engineer.engineer( test_year=[y for y in range(2016, 2019)], target_variable=target_var, pred_months=pred_months, expected_length=pred_months, )
def eng_strato(): # if the working directory is alread ml_drought don't need ../data if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought': data_path = Path('data') else: data_path = Path('../data') engineer = Engineer(data_path, experiment='strato') engineer.engineer( test_year=2018, target_variable='u', )
def engineer(experiment="one_month_forecast", process_static=True, pred_months=12): engineer = Engineer(get_data_path(), experiment=experiment, process_static=process_static) engineer.engineer( test_year=[y for y in range(2011, 2019)], target_variable="VCI", pred_months=pred_months, expected_length=pred_months, )
def engineer(experiment='one_month_forecast', process_static=True, pred_months=12): # if the working directory is alread ml_drought don't need ../data if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought': data_path = Path('data') else: data_path = Path('../data') engineer = Engineer(data_path, experiment=experiment, process_static=process_static) engineer.engineer( test_year=2018, target_variable='VCI', pred_months=pred_months, expected_length=pred_months, )
def test_engineer(self, tmp_path): _setup(tmp_path) pred_months = expected_length = 11 engineer = Engineer(tmp_path) engineer.engineer( test_year=2001, target_variable="a", pred_months=pred_months, expected_length=expected_length, ) def check_folder(folder_path): y = xr.open_dataset(folder_path / "y.nc") assert "b" not in set( y.variables), "Got unexpected variables in test set" x = xr.open_dataset(folder_path / "x.nc") for expected_var in {"a", "b"}: assert expected_var in set( x.variables), "Missing variables in testing input dataset" assert (len(x.time.values) == expected_length ), "Wrong number of months in the test x dataset" assert len( y.time.values) == 1, "Wrong number of months in test y dataset" # check_folder(tmp_path / 'features/one_month_forecast/train/1999_12') for month in range(1, 13): check_folder(tmp_path / f"features/one_month_forecast/test/2001_{month}") check_folder(tmp_path / f"features/one_month_forecast/train/2000_{month}") assert (len( list((tmp_path / "features/one_month_forecast/train" ).glob("2001_*"))) == 0), "Test data in the training data!" assert (tmp_path / "features/one_month_forecast/normalizing_dict.pkl" ).exists(), f"Normalizing dict not saved!" with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl" ).open("rb") as f: norm_dict = pickle.load(f) for key, val in norm_dict.items(): assert key in {"a", "b"}, f"Unexpected key!" assert norm_dict[key]["mean"] == 1, f"Mean incorrectly calculated!" assert norm_dict[key]["std"] == 0, f"Std incorrectly calculated!"
def engineer( pred_months=3, target_var="boku_VCI", process_static=False, global_means: bool = True, log_vars: Optional[List[str]] = None, ): engineer = Engineer(get_data_path(), experiment="one_month_forecast", process_static=process_static) engineer.engineer( test_year=[y for y in range(2016, 2019)], target_variable=target_var, pred_months=pred_months, expected_length=pred_months, global_means=global_means, )
from collections import defaultdict import calendar from datetime import datetime, date from pathlib import Path import xarray as xr from typing import cast, Dict, List, Optional, Union, Tuple from typing import DefaultDict as DDict from src.engineer import Engineer from src.preprocess.base import BasePreProcessor data_path = Path("/Volumes/Lees_Extend/data/ecmwf_sowc/data") engineer = Engineer(data_path) engineer.engineer(test_year=1990, target_variable="VHI", pred_months=3, expected_length=3) # wrong shapes! datasets = engineer._get_preprocessed_files() ds_list = [xr.open_dataset(ds) for ds in datasets] dims_list = [[dim for dim in ds.dims] for ds in ds_list] variable_list = [[var for var in ds.variables if var not in dims_list[i]][0] for i, ds in enumerate(ds_list)] da_list = [ds[variable_list[i]] for i, ds in enumerate(ds_list)] # ds = engineer._make_dataset() years = [1990] train_ds, test_dict train_ds = engineer._train_test_split(ds,
def engineer(self, engineer_args: Dict) -> None: """Run the engineer on the data """ engineer_args["init_args"]["data_folder"] = self.data engineer = Engineer(**engineer_args["init_args"]) engineer.engineer(**engineer_args["run_args"])