Пример #1
0
    def test_regridder_save(self, tmp_path):
        size_reference = (10, 10)
        size_target = (20, 20)

        reference_ds, _, _ = _make_dataset(size_reference)
        target_ds, _, _ = _make_dataset(size_target)

        processor = BasePreProcessor(tmp_path)
        processor.regrid(target_ds, reference_ds)
        weight_filename = 'nearest_s2d_100x100_10x10.nc'
        assert (processor.preprocessed_folder / weight_filename).exists() is False, \
            f'Regridder weight file not deleted!'
Пример #2
0
    def test_incorrect_method(self, tmp_path):
        size_reference = (10, 10)
        size_target = (100, 100)

        reference_ds, _, _ = _make_dataset(size_reference)
        target_ds, _, _ = _make_dataset(size_target)

        processor = BasePreProcessor(tmp_path)
        with pytest.raises(AssertionError) as e:
            processor.regrid(target_ds, reference_ds, method='woops!')
        expected_message_contains = 'not an acceptable regridding method. Must be one of'
        assert expected_message_contains in str(e), \
            f'Expected {e} to contain {expected_message_contains}'
Пример #3
0
    def test_regridding(self, tmp_path):

        size_reference = (10, 10)
        size_target = (20, 20)

        reference_ds, _, _ = _make_dataset(size_reference)
        target_ds, _, _ = _make_dataset(size_target)

        processor = BasePreProcessor(tmp_path)
        regridded_ds = processor.regrid(target_ds, reference_ds)

        # add the time dimension
        assert regridded_ds.VHI.values.shape[1:] == size_reference, \
            f'Expected regridded Dataset to have shape {size_reference}, ' \
            f'got {regridded_ds.VHI.values.shape}'
Пример #4
0
    def test_resampling(self):
        monthly_in, _, _ = _make_dataset(size=(10, 10))

        monthly = BasePreProcessor.resample_time(monthly_in,
                                                 resample_length='M')

        assert len(monthly_in.time) == len(monthly.time)
Пример #5
0
    def test_load_regridder(self, tmp_path):

        test_dataset, _, _ = _make_dataset(size=(10, 10))
        test_dataset.to_netcdf(tmp_path / 'regridder.nc')

        output = BasePreProcessor.load_reference_grid(tmp_path /
                                                      'regridder.nc')

        assert set(output.variables) == {
            'lat', 'lon'
        }, f'Got extra variables: {output.variables}'
Пример #6
0
    def test_load_regridder(self, tmp_path):

        test_dataset, _, _ = _make_dataset(size=(10, 10))
        test_dataset.to_netcdf(tmp_path / "regridder.nc")

        output = BasePreProcessor.load_reference_grid(tmp_path /
                                                      "regridder.nc")

        assert set(output.variables) == {
            "lat",
            "lon",
        }, f"Got extra variables: {output.variables}"
Пример #7
0
    def test_chop_roi(self, tmp_path):
        size_original = (80, 80)
        original_ds, _, _ = _make_dataset(size_original)

        original_shape = original_ds.VHI.shape

        processor = BasePreProcessor(tmp_path)
        subset_str = 'east_africa'
        new_ds = processor.chop_roi(ds=original_ds, subset_str=subset_str)
        output_shape = new_ds.VHI.shape

        assert original_shape != output_shape, f"The chop_roi should lead to\
        smaller datasets than the original. Expected output_shape: {output_shape}\
        to be different from original_shape: {original_shape}"

        assert ((new_ds.lat.values.min() >= -11) &
                (new_ds.lat.values.max() <= 23)
                ), f"Expected latitude to be in the range -11 : 23. Currently:\
        {new_ds.lat.values.min()} : {new_ds.lat.values.max()}"

        assert (
            (new_ds.lon.values.min() >= 21) & (new_ds.lon.values.max() <= 51.8)
        ), f"Expected longitude to be in the range 21 : 51.8. Currently:\
Пример #8
0
# create forecast_horizon
fh = pd.to_timedelta(ds_new.time.values - ds_new.initialisation_date.values)
ds_new["time"] = fh
ds_new = ds_new.rename({"time": "forecast_horizon"})

# create a new coord
time = ds_new.initialisation_date + ds_new.forecast_horizon
ds_new = ds_new.assign_coords(time=time)

# ------------------------------------------------------------------------------
# Test PREPROCESSING
# ------------------------------------------------------------------------------
from src.preprocess.base import BasePreProcessor

b = BasePreProcessor()
ds1_kenya = b.chop_roi(ds1, inverse_lat=True)
ds2_kenya = b.chop_roi(ds2, inverse_lat=True)

# concat across initialisation dates
ds_kenya = xr.concat([ds1_kenya, ds2_kenya], dim="initialisation_date")
stacked = ds_kenya.stack(time=("initialisation_date", "forecast_horizon"))

# stack each individually
k1 = ds1_kenya.stack(time=("initialisation_date", "forecast_horizon"))
k2 = ds2_kenya.stack(time=("initialisation_date", "forecast_horizon"))

# test selectors
stacked.sel(forecast_horizon=np.timedelta64(28, "D"))
stacked.sel(initialisation_date="1997-01-01")
stacked.swap_dims({"time": "valid_time"}).sel(valid_time="1997-04")
Пример #9
0
from src.engineer import Engineer

data_path = Path("/Volumes/Lees_Extend/data/ecmwf_sowc/data")
engineer = Engineer(data_path)
# engineer.engineer(test_year=1994, target_variable='VHI')

# wrong shapes!
datasets = engineer._get_preprocessed_files()
ds_list = [xr.open_dataset(ds) for ds in datasets]
dims_list = [[dim for dim in ds.dims] for ds in ds_list]
variable_list = [[var for var in ds.variables if var not in dims_list[i]][0]
                 for i, ds in enumerate(ds_list)]
da_list = [ds[variable_list[i]] for i, ds in enumerate(ds_list)]

pp = BasePreProcessor(data_path)
c_ds = ds_list[0]
e_ds = ds_list[1]
v_ds = ds_list[2]

v_ds = pp.resample_time(v_ds)

c_ds = pp.regrid(c_ds, v_ds)
c_ds = pp.resample_time(c_ds)

v_ds.to_netcdf(vhi_path.home() / vhi_path.parent / "vhi_kenya_regrid.nc")
v_ds.to_netcdf(chirps_path.home() / chirps_path.parent /
               "chirps_kenya_regrid.nc")

# engineer process
engineer._get_preprocessed_files
Пример #10
0
"""NOTE: https://github.com/esowc/ml_drought for the `src` code"""
from pathlib import Path
from src.preprocess.base import BasePreProcessor

era5_dir = Path('/soge-home/data/analysis/era5/0.28125x0.28125/hourly/')
winds = ['u_component_of_wind', 'v_component_of_wind']
wind_component = ['u', 'v']

base_our_dir = Path('/soge-home/projects/crop_yield/hackathon/')



processor = BasePreProcessor(Path('/soge-home/users/chri4118')))

for wind, component in zip(winds, wind_component):
    out_dir = base_our_dir / wind + '_surface'
    if not out_dir.exists():
        out_dir.mkdir(exist_ok=True, parents=True)

    ds = xr.open_mfdataset(base_our_dir / wind /  '*.nc')
    ds = ds.isel(level=0)
    ds.to_netcdf(out_dir / 'data_africa.nc')
Пример #11
0
import xarray as xr
import sys

sys.path.append("../..")

from scripts.utils import get_data_path
from src.preprocess.base import BasePreProcessor


if __name__ == "__main__":
    data_dir = get_data_path()

    vci = xr.open_dataset(data_dir / "interim/VCI_preprocessed/data_india.nc")
    regrid_ds = xr.open_dataset(
        data_dir / "interim/reanalysis-era5-land_preprocessed/data_india.nc"
    )

    print("** Begin Regridding **")
    processor = BasePreProcessor(data_dir)
    vci = processor.regrid(ds=vci, reference_ds=regrid_ds)

    print("** Saving file **")
    vci.to_netcdf(data_dir / "interim/VCI_preprocessed/regrid_data_india.nc")