def test_make_filename(): path = Path("reanalysis-era5-land" "/2m_temperature/1979_2019/01_12.nc") name = ERA5LandPreprocessor.create_filename(path, "kenya") expected_name = "1979_2019_01_12_2m_temperature_kenya.nc" assert name == expected_name, f"{name} generated, expected {expected_name}"
def test_init(self, tmp_path): ERA5LandPreprocessor(tmp_path) assert (tmp_path / "interim/reanalysis-era5-land_interim").exists() assert (tmp_path / "interim/reanalysis-era5-land_preprocessed").exists()
def test_preprocess(self, tmp_path): (tmp_path / "raw/reanalysis-era5-land/" "2m_temperature/1979_2019").mkdir(parents=True) data_path = (tmp_path / "raw/reanalysis-era5-land/" "2m_temperature/1979_2019/01_12.nc") dataset = self._make_era5_dataset(size=(100, 100)) dataset.to_netcdf(path=data_path) kenya = get_kenya() regrid_dataset, _, _ = _make_dataset( size=(20, 20), latmin=kenya.latmin, latmax=kenya.latmax, lonmin=kenya.lonmin, lonmax=kenya.lonmax, ) regrid_path = tmp_path / "regridder.nc" regrid_dataset.to_netcdf(regrid_path) processor = ERA5LandPreprocessor(tmp_path) processor.preprocess( subset_str="kenya", regrid=regrid_path, parallel_processes=1, variable="2m_temperature", ) expected_out_path = ( tmp_path / "interim/reanalysis-era5" "-land_preprocessed/reanalysis-era5-land_kenya.nc") assert (expected_out_path.exists( )), f"Expected processed file to be saved to {expected_out_path}" # check the subsetting happened correctly out_data = xr.open_dataset(expected_out_path) expected_dims = ["lat", "lon", "time"] assert len(list(out_data.dims)) == len(expected_dims) for dim in expected_dims: assert dim in list( out_data.dims ), f"Expected {dim} to be in the processed dataset dims" lons = out_data.lon.values assert (lons.min() >= kenya.lonmin) and ( lons.max() <= kenya.lonmax), "Longitudes not correctly subset" lats = out_data.lat.values assert (lats.min() >= kenya.latmin) and ( lats.max() <= kenya.latmax), "Latitudes not correctly subset" assert out_data.t2m.values.shape[1:] == (20, 20) assert (not processor.interim.exists() ), f"Interim era5 folder should have been deleted"
def test_get_filenames(tmp_path): (tmp_path / "raw/reanalysis-era5-land/" "2m_temperature/1979_2019").mkdir(parents=True) test_file = (tmp_path / "raw/reanalysis-era5-land" "/2m_temperature/1979_2019.01_12.nc") test_file.touch() processor = ERA5LandPreprocessor(tmp_path) files = processor.get_filepaths() assert files[0] == test_file, f"Expected {test_file} to be retrieved"
def process_era5_land( variables: Optional[Union[List, str]] = None, subset_str: str = "kenya", monmean: bool = True, ): data_path = get_data_path() # Check all the provided variables exist if variables is None: variables = [ d.name for d in (data_path / "raw/reanalysis-era5-land").iterdir() ] assert ( variables != [] ), f"Expecting to find some variables in: {(data_path / 'raw/reanalysis-era5-land')}" else: if isinstance(variables, str): variables = [variables] assert variables in [ d.name for d in (data_path / "raw/reanalysis-era5-land").iterdir() ], f"Expect to find {variables} in {(data_path / 'raw/reanalysis-era5-land')}" else: assert all( np.isin( variables, [ d.name for d in (data_path / "raw/reanalysis-era5-land").iterdir() ], )), f"Expected to find {variables}" # regrid_path = data_path / f"interim/reanalysis-era5-land_preprocessed/data_{subset_str}.nc" # assert regrid_path.exists(), f"{regrid_path} not available" regrid_path = None if monmean: processor = ERA5LandMonthlyMeansPreprocessor(data_path) else: processor = ERA5LandPreprocessor(data_path) for variable in variables: processor.preprocess( subset_str=subset_str, regrid=None, resample_time="M", upsampling=False, variable=variable, )
def process_era5_land(variable: str): if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought": data_path = Path("data") else: data_path = Path("../data") regrid_path = data_path / "interim/chirps_preprocessed/chirps_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = ERA5LandPreprocessor(data_path) processor.preprocess( subset_str="kenya", regrid=None, resample_time="M", upsampling=False, variable=variable, )
from pathlib import Path import xarray as xr %load_ext autoreload %autoreload 2 from src.preprocess import ERA5LandPreprocessor data_dir = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data') e = ERA5LandPreprocessor(data_dir) # get filepaths e.get_filepaths()