def read_all_data(data_dir: Path, experiment="one_month_forecast", static: bool = False) -> Tuple[xr.Dataset]: X_train, y_train = read_train_data(data_dir, experiment=experiment) X_test, y_test = read_test_data(data_dir, experiment=experiment) if static: static_ds = xr.open_dataset(data_dir / "features/static/data.nc") return (X_train, y_train, X_test, y_test)
def predict( self, all_data: bool = False ) -> Tuple[Dict[str, Dict[str, np.ndarray]], Dict[str, np.ndarray]]: _, y_train = read_train_data(self.data_path) ds = y_train if all_data: # if want to calculate climatology for train+test data _, y_test = read_test_data(self.data_path) ds = xr.merge([y_train, y_test]).sortby("time").sortby("lat") target_var = [v for v in ds.data_vars][0] # calculate climatology: monmean = ds.groupby("time.month").mean(dim=["time"])[target_var] test_arrays_loader = self.get_dataloader(mode="test", shuffle_data=False, normalize=False, static=False) preds_dict: Dict[str, np.ndarray] = {} test_arrays_dict: Dict[str, Dict[str, np.ndarray]] = {} for dict in test_arrays_loader: for key, val in dict.items(): try: _ = val.x_vars.index(val.y_var) except ValueError as e: print("Target variable not in prediction data!") raise e preds_dict[key] = monmean.sel( month=val.target_time.month).values.reshape(val.y.shape) test_arrays_dict[key] = { "y": val.y, "latlons": val.latlons, "time": val.target_time, "y_var": val.y_var, } return test_arrays_dict, preds_dict
import xarray as xr import numpy as np import pandas as pd import matplotlib.pyplot as plt from pathlib import Path import sys %load_ext autoreload %autoreload 2 %matplotlib data_dir = data_path = Path('data') data_dir = data_path = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data') sys.path.append('/Users/tommylees/github/ml_drought') # load model from src.models import load_model model_path = data_dir / 'models/one_month_forecast/ealstm/model.pt' assert model_path.exists() ealstm = load_model(model_path) # load X / Y data from src.analysis import read_train_data, read_test_data X_train, y_train = read_train_data(data_dir) X_test, y_test = read_test_data(data_dir)
# # e = Engineer(data_dir) # data = e.engineer_class._make_dataset(static=False) from src.analysis import read_train_data, read_test_data from src.analysis.indices.utils import rolling_mean boku = True if boku: experiment = "one_month_forecast_BOKU_boku_VCI" else: experiment = "one_month_forecast" # "one_month_forecast_BOKU_boku_VCI" X_train, y_train = read_train_data(data_dir, experiment=experiment) X_test, y_test = read_test_data(data_dir, experiment=experiment) ds = xr.merge([y_train, y_test]).sortby("time").sortby("lat") d_ = xr.merge([X_train, X_test]).sortby("time").sortby("lat") ds = xr.merge([ds, d_]) # ---------------------------------------- # Create the features (pixel-by-pixel) # ---------------------------------------- """ NOTE: Nasty hack the indices.spi computation sometimes collapses the dimensionality of the groupby object ~/miniconda3/envs/crop/lib/python3.7/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, output_sizes, keep_attrs, *args) 580 data = np.expand_dims(data, -1)