def test_alternative_region(self, tmp_path): # make the dataset (tmp_path / "raw/era5POS/global").mkdir(parents=True) data_path = tmp_path / "raw/era5POS/global/testy_test.nc" dataset = self._make_era5POS_dataset(size=(100, 100)) dataset.to_netcdf(path=data_path) ethiopia = get_ethiopia() regrid_dataset, _, _ = _make_dataset( size=(20, 20), latmin=ethiopia.latmin, latmax=ethiopia.latmax, lonmin=ethiopia.lonmin, lonmax=ethiopia.lonmax, ) regrid_path = tmp_path / "regridder.nc" regrid_dataset.to_netcdf(regrid_path) processor = PlanetOSPreprocessor(tmp_path) processor.preprocess(subset_str="ethiopia", regrid=regrid_path, n_processes=1) expected_out_path = tmp_path / "interim/era5POS_preprocessed/data_ethiopia.nc" assert ( expected_out_path.exists() ), f"Expected processed file to be saved to {expected_out_path}"
def test_alternative_region(self, tmp_path): # make the dataset (tmp_path / 'raw/era5POS/global').mkdir(parents=True) data_path = tmp_path / 'raw/era5POS/global/testy_test.nc' dataset = self._make_era5POS_dataset(size=(100, 100)) dataset.to_netcdf(path=data_path) ethiopia = get_ethiopia() regrid_dataset, _, _ = _make_dataset(size=(20, 20), latmin=ethiopia.latmin, latmax=ethiopia.latmax, lonmin=ethiopia.lonmin, lonmax=ethiopia.lonmax) regrid_path = tmp_path / 'regridder.nc' regrid_dataset.to_netcdf(regrid_path) processor = PlanetOSPreprocessor(tmp_path) processor.preprocess(subset_str='ethiopia', regrid=regrid_path, parallel=False) expected_out_path = tmp_path / 'interim/era5POS_preprocessed/data_ethiopia.nc' assert expected_out_path.exists(), \ f'Expected processed file to be saved to {expected_out_path}'
def test_get_filenames(tmp_path): (tmp_path / 'raw' / 'era5POS').mkdir(parents=True) test_file = tmp_path / 'raw/era5POS/testy_test.nc' test_file.touch() processor = PlanetOSPreprocessor(tmp_path) files = processor.get_filepaths() assert files[0] == test_file, f'Expected {test_file} to be retrieved'
def test_preprocess(self, tmp_path): (tmp_path / "raw/era5POS/global").mkdir(parents=True) data_path = tmp_path / "raw/era5POS/global/testy_test.nc" dataset = self._make_era5POS_dataset(size=(100, 100)) dataset.to_netcdf(path=data_path) kenya = get_kenya() regrid_dataset, _, _ = _make_dataset( size=(20, 20), latmin=kenya.latmin, latmax=kenya.latmax, lonmin=kenya.lonmin, lonmax=kenya.lonmax, ) regrid_path = tmp_path / "regridder.nc" regrid_dataset.to_netcdf(regrid_path) processor = PlanetOSPreprocessor(tmp_path) processor.preprocess(subset_str="kenya", regrid=regrid_path, parallel=False) expected_out_path = tmp_path / "interim/era5POS_preprocessed/data_kenya.nc" assert ( expected_out_path.exists() ), f"Expected processed file to be saved to {expected_out_path}" # check the subsetting happened correctly out_data = xr.open_dataset(expected_out_path) expected_dims = ["lat", "lon", "time"] assert len(list(out_data.dims)) == len(expected_dims) for dim in expected_dims: assert dim in list( out_data.dims ), f"Expected {dim} to be in the processed dataset dims" lons = out_data.lon.values assert (lons.min() >= kenya.lonmin) and ( lons.max() <= kenya.lonmax ), "Longitudes not correctly subset" lats = out_data.lat.values assert (lats.min() >= kenya.latmin) and ( lats.max() <= kenya.latmax ), "Latitudes not correctly subset" assert out_data.VHI.values.shape[1:] == (20, 20) assert out_data.precip.values.shape[1:] == (20, 20) assert ( not processor.interim.exists() ), f"Interim era5 folder should have been deleted"
def process_era5POS_2018(): data_path = get_data_path() regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = PlanetOSPreprocessor(data_path) processor.preprocess( subset_str="kenya", regrid=regrid_path, parallel=False, resample_time="M", upsampling=False, )
def process_era5POS_2018(subset_str: str = "kenya"): data_path = get_data_path() regrid_path = ( data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc") assert regrid_path.exists(), f"{regrid_path} not available" processor = PlanetOSPreprocessor(data_path) processor.preprocess( subset_str=subset_str, regrid=regrid_path, parallel=False, resample_time="M", upsampling=False, )
def process_era5POS_2018(): # if the working directory is alread ml_drought don't need ../data if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought': data_path = Path('data') else: data_path = Path('../data') regrid_path = data_path / 'interim/VCI_preprocessed/data_kenya.nc' assert regrid_path.exists(), f'{regrid_path} not available' processor = PlanetOSPreprocessor(data_path) processor.preprocess(subset_str='kenya', regrid=regrid_path, parallel=False, resample_time='M', upsampling=False)
def test_rotate_and_filter(self): dataset = self._make_era5POS_dataset(size=(100, 100)).rename( {'time1': 'time'}) rotated_ds = PlanetOSPreprocessor._rotate_and_filter(dataset) assert (rotated_ds.lon.min() > -180) & (rotated_ds.lon.max() < 180), \ f'Longitudes not properly rotated!'
def test_make_filename(): path = Path('2008/01/vhi.nc') name = PlanetOSPreprocessor.create_filename(path, 'kenya') expected_name = '2008_01_vhi_kenya.nc' assert name == expected_name, f'{name} generated, expected {expected_name}'
def test_init(self, tmp_path): PlanetOSPreprocessor(tmp_path) assert (tmp_path / 'interim/era5POS_preprocessed').exists() assert (tmp_path / 'interim/era5POS_interim').exists()
from pathlib import Path from src.preprocess import PlanetOSPreprocessor data_path = Path("data") regrid_path = data_path / "interim/chirps_preprocessed/chirps_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = PlanetOSPreprocessor(data_path) processor.preprocess( subset_kenya=True, regrid=regrid_path, parallel=False, resample_time="M", upsampling=False, )
def test_make_filename(): path = Path("2008/01/vhi.nc") name = PlanetOSPreprocessor.create_filename(path, "kenya") expected_name = "2008_01_vhi_kenya.nc" assert name == expected_name, f"{name} generated, expected {expected_name}"