def test_predict(self, tmp_path): self._create_train_samples(tmp_path, 12) # TEST with a random nan value included! x, _, _ = _make_dataset(size=(5, 5), random_nan=1) y = x.isel(time=[-1]) test_features = tmp_path / "features/one_month_forecast/test/1980_1" test_features.mkdir(parents=True) x.to_netcdf(test_features / "x.nc") y.to_netcdf(test_features / "y.nc") predictor = Climatology(tmp_path) test_arrays, preds = predictor.predict() assert (test_arrays["1980_1"]["y"].shape == preds["1980_1"].shape ), f"Shape of climatology is incorrect!" # calculate climatology _, y_train = read_train_data(tmp_path) ds = y_train nan_mask = test_arrays["1980_1"]["nan_mask"] # check that the nan mask is 1 (the random nan value we included!) assert nan_mask.sum() == 1 assert (preds["1980_1"].flatten( ) == ds["VHI"]["time.month" == 1].values.flatten()[~nan_mask]).all( ), "Expect the month mean to be the calculated from the training data"
def test_predict(self, tmp_path): self._create_train_samples(tmp_path, 12) x, _, _ = _make_dataset(size=(5, 5)) y = x.isel(time=[-1]) test_features = tmp_path / "features/one_month_forecast/test/1980_1" test_features.mkdir(parents=True) x.to_netcdf(test_features / "x.nc") y.to_netcdf(test_features / "y.nc") predictor = Climatology(tmp_path) test_arrays, preds = predictor.predict() assert (test_arrays["1980_1"]["y"].shape == preds["1980_1"].shape ), f"Shape of climatology is incorrect!" # calculate climatology _, y_train = read_train_data(tmp_path) ds = y_train assert ( preds["1980_1"].reshape(5, 5) == ds["VHI"]["time.month" == 1].values ).all( ), "Expect the month mean to be the calculated from the training data"
def read_all_data(data_dir: Path, experiment="one_month_forecast", static: bool = False) -> Tuple[xr.Dataset]: X_train, y_train = read_train_data(data_dir, experiment=experiment) X_test, y_test = read_test_data(data_dir, experiment=experiment) if static: static_ds = xr.open_dataset(data_dir / "features/static/data.nc") return (X_train, y_train, X_test, y_test)
def predict( self, all_data: bool = False ) -> Tuple[Dict[str, Dict[str, np.ndarray]], Dict[str, np.ndarray]]: _, y_train = read_train_data(self.data_path) ds = y_train if all_data: # if want to calculate climatology for train+test data _, y_test = read_test_data(self.data_path) ds = xr.merge([y_train, y_test]).sortby("time").sortby("lat") target_var = [v for v in ds.data_vars][0] # calculate climatology: monmean = ds.groupby("time.month").mean(dim=["time"])[target_var] test_arrays_loader = self.get_dataloader(mode="test", shuffle_data=False, normalize=False, static=False) preds_dict: Dict[str, np.ndarray] = {} test_arrays_dict: Dict[str, Dict[str, np.ndarray]] = {} for dict in test_arrays_loader: for key, val in dict.items(): try: _ = val.x_vars.index(val.y_var) except ValueError as e: print("Target variable not in prediction data!") raise e preds_dict[key] = monmean.sel( month=val.target_time.month).values.reshape(val.y.shape) test_arrays_dict[key] = { "y": val.y, "latlons": val.latlons, "time": val.target_time, "y_var": val.y_var, } return test_arrays_dict, preds_dict
import xarray as xr import numpy as np import pandas as pd import matplotlib.pyplot as plt from pathlib import Path import sys %load_ext autoreload %autoreload 2 %matplotlib data_dir = data_path = Path('data') data_dir = data_path = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data') sys.path.append('/Users/tommylees/github/ml_drought') # load model from src.models import load_model model_path = data_dir / 'models/one_month_forecast/ealstm/model.pt' assert model_path.exists() ealstm = load_model(model_path) # load X / Y data from src.analysis import read_train_data, read_test_data X_train, y_train = read_train_data(data_dir) X_test, y_test = read_test_data(data_dir)
import xarray as xr import pandas as pd from geopandas import GeoDataFrame import pickle from pathlib import Path data_dir = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data') from src.analysis.region_analysis.groupby_region import KenyaGroupbyRegion from src.analysis.region_analysis.groupby_region import GroupbyRegion from src.analysis import read_train_data # ------------------------ # Read the training data # ------------------------ X, y = read_train_data(data_dir) # extract mean values for each region for each variable region_grouper = KenyaGroupbyRegion(data_dir=data_dir) region_precip_df = region_grouper.analyze(X.precip, selection='level_2') region_precip_gdf = region_grouper.gdf.rename(columns={'mean_value': 'precip'}) region_grouper = KenyaGroupbyRegion(data_dir=data_dir) region_E_df = region_grouper.analyze(X.E, selection='level_2') region_E_gdf = region_grouper.gdf.rename(columns={'mean_value': 'E'}) region_grouper = KenyaGroupbyRegion(data_dir=data_dir) region_SMsurf_df = region_grouper.analyze(X.SMsurf, selection='level_2') region_SMsurf_gdf = region_grouper.gdf.rename(columns={'mean_value': 'SMsurf'}) region_grouper = KenyaGroupbyRegion(data_dir=data_dir) region_VCI_df = region_grouper.analyze(y.VCI, selection='level_2')
from src.engineer import Engineer # # e = Engineer(data_dir) # data = e.engineer_class._make_dataset(static=False) from src.analysis import read_train_data, read_test_data from src.analysis.indices.utils import rolling_mean boku = True if boku: experiment = "one_month_forecast_BOKU_boku_VCI" else: experiment = "one_month_forecast" # "one_month_forecast_BOKU_boku_VCI" X_train, y_train = read_train_data(data_dir, experiment=experiment) X_test, y_test = read_test_data(data_dir, experiment=experiment) ds = xr.merge([y_train, y_test]).sortby("time").sortby("lat") d_ = xr.merge([X_train, X_test]).sortby("time").sortby("lat") ds = xr.merge([ds, d_]) # ---------------------------------------- # Create the features (pixel-by-pixel) # ---------------------------------------- """ NOTE: Nasty hack the indices.spi computation sometimes collapses the dimensionality of the groupby object ~/miniconda3/envs/crop/lib/python3.7/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, output_sizes, keep_attrs, *args)