示例#1
0
    def test_alternative_region(self, tmp_path):
        # make the dataset
        (tmp_path / "raw/gleam/monthly").mkdir(parents=True)
        data_path = tmp_path / "raw/gleam/monthly/testy_test.nc"
        dataset = self._make_gleam_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)
        ethiopia = get_ethiopia()

        # regrid the datasets
        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=ethiopia.latmin,
            latmax=ethiopia.latmax,
            lonmin=ethiopia.lonmin,
            lonmax=ethiopia.lonmax,
        )
        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        # build the Preprocessor object and subset with a different subset_str
        processor = GLEAMPreprocessor(tmp_path)
        processor.preprocess(subset_str="ethiopia", regrid=regrid_path)

        expected_out_path = tmp_path / "interim/gleam_preprocessed/data_ethiopia.nc"
        assert (
            expected_out_path.exists()
        ), f"Expected processed file to be saved to {expected_out_path}"
示例#2
0
def process_gleam():
    data_path = get_data_path()

    regrid_path = data_path / "interim/VCI_preprocessed/data_kenya.nc"
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = GLEAMPreprocessor(data_path)

    processor.preprocess(
        subset_str="kenya", regrid=regrid_path, resample_time="M", upsampling=False
    )
示例#3
0
    def test_get_filenames(tmp_path):

        (tmp_path / 'raw/gleam/monthly').mkdir(parents=True)

        test_file = tmp_path / 'raw/gleam/monthly/testy_test.nc'
        test_file.touch()

        processor = GLEAMPreprocessor(tmp_path)

        files = processor.get_filepaths()
        assert files[0] == test_file, f'Expected {test_file} to be retrieved'
示例#4
0
    def test_preprocess(self, tmp_path):

        (tmp_path / "raw/gleam/monthly").mkdir(parents=True)
        data_path = tmp_path / "raw/gleam/monthly/testy_test.nc"
        dataset = self._make_gleam_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)

        kenya = get_kenya()
        regrid_dataset, _, _ = _make_dataset(
            size=(20, 20),
            latmin=kenya.latmin,
            latmax=kenya.latmax,
            lonmin=kenya.lonmin,
            lonmax=kenya.lonmax,
        )

        regrid_path = tmp_path / "regridder.nc"
        regrid_dataset.to_netcdf(regrid_path)

        processor = GLEAMPreprocessor(tmp_path)
        processor.preprocess(subset_str="kenya", regrid=regrid_path)

        expected_out_path = tmp_path / "interim/gleam_preprocessed/data_kenya.nc"
        assert (
            expected_out_path.exists()
        ), f"Expected processed file to be saved to {expected_out_path}"

        # check the subsetting happened correctly
        out_data = xr.open_dataset(expected_out_path)
        expected_dims = ["lat", "lon", "time"]
        assert len(list(out_data.dims)) == len(expected_dims)
        for dim in expected_dims:
            assert dim in list(
                out_data.dims
            ), f"Expected {dim} to be in the processed dataset dims"

        lons = out_data.lon.values
        assert (lons.min() >= kenya.lonmin) and (
            lons.max() <= kenya.lonmax
        ), "Longitudes not correctly subset"

        lats = out_data.lat.values
        assert (lats.min() >= kenya.latmin) and (
            lats.max() <= kenya.latmax
        ), "Latitudes not correctly subset"

        assert set(out_data.data_vars) == {"E"}, f"Got unexpected variables!"

        assert (
            not processor.interim.exists()
        ), f"Interim gleam folder should have been deleted"
def process_gleam():
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')
    regrid_path = data_path / 'interim/VCI_preprocessed/data_kenya.nc'
    assert regrid_path.exists(), f'{regrid_path} not available'

    processor = GLEAMPreprocessor(data_path)

    processor.preprocess(subset_str='kenya',
                         regrid=regrid_path,
                         resample_time='M',
                         upsampling=False)
示例#6
0
    def test_swapaxes(self):

        dataset = self._make_gleam_dataset(size=(20, 30))

        out = GLEAMPreprocessor._swap_dims_and_filter(dataset)

        assert out.E.values.shape[1:] == (30, 20), f"Array axes not properly swapped!"
示例#7
0
    def test_directories_created(tmp_path):
        v = GLEAMPreprocessor(tmp_path)

        assert (tmp_path / v.preprocessed_folder / 'gleam_preprocessed').exists(), \
            'Should have created a directory tmp_path/interim/chirps_preprocessed'

        assert (tmp_path / v.preprocessed_folder / 'gleam_interim').exists(), \
            'Should have created a directory tmp_path/interim/chirps_interim'
示例#8
0
    def test_make_filename():

        test_file = 'testy_test.nc'
        expected_output = 'testy_test_kenya.nc'

        filename = GLEAMPreprocessor.create_filename(test_file, 'kenya')
        assert filename == expected_output, \
            f'Expected output to be {expected_output}, got {filename}'
示例#9
0
def process_gleam():
    # if the working directory is alread ml_drought don't need ../data
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")
    regrid_path = (
        data_path /
        "interim/reanalysis-era5-single-levels-monthly-means_preprocessed/data_kenya.nc"
    )
    assert regrid_path.exists(), f"{regrid_path} not available"

    processor = GLEAMPreprocessor(data_path)

    processor.preprocess(subset_str=subset_str,
                         regrid=regrid_path,
                         resample_time="M",
                         upsampling=False)
示例#10
0
    def test_make_filename():

        test_file = "testy_test.nc"
        expected_output = "testy_test_kenya.nc"

        filename = GLEAMPreprocessor.create_filename(test_file, "kenya")
        assert (
            filename == expected_output
        ), f"Expected output to be {expected_output}, got {filename}"
示例#11
0
    def test_preprocess(self, tmp_path):

        (tmp_path / 'raw/gleam/monthly').mkdir(parents=True)
        data_path = tmp_path / 'raw/gleam/monthly/testy_test.nc'
        dataset = self._make_gleam_dataset(size=(100, 100))
        dataset.to_netcdf(path=data_path)

        kenya = get_kenya()
        regrid_dataset, _, _ = _make_dataset(size=(20, 20),
                                             latmin=kenya.latmin,
                                             latmax=kenya.latmax,
                                             lonmin=kenya.lonmin,
                                             lonmax=kenya.lonmax)

        regrid_path = tmp_path / 'regridder.nc'
        regrid_dataset.to_netcdf(regrid_path)

        processor = GLEAMPreprocessor(tmp_path)
        processor.preprocess(subset_str='kenya', regrid=regrid_path)

        expected_out_path = tmp_path / 'interim/gleam_preprocessed/data_kenya.nc'
        assert expected_out_path.exists(), \
            f'Expected processed file to be saved to {expected_out_path}'

        # check the subsetting happened correctly
        out_data = xr.open_dataset(expected_out_path)
        expected_dims = ['lat', 'lon', 'time']
        assert len(list(out_data.dims)) == len(expected_dims)
        for dim in expected_dims:
            assert dim in list(out_data.dims), \
                f'Expected {dim} to be in the processed dataset dims'

        lons = out_data.lon.values
        assert (lons.min() >= kenya.lonmin) and (lons.max() <= kenya.lonmax), \
            'Longitudes not correctly subset'

        lats = out_data.lat.values
        assert (lats.min() >= kenya.latmin) and (lats.max() <= kenya.latmax), \
            'Latitudes not correctly subset'

        assert set(out_data.data_vars) == {'E'}, f'Got unexpected variables!'

        assert not processor.interim.exists(), \
            f'Interim gleam folder should have been deleted'
def preprocess_data(data_path):
    # preprocess VHI
    print('** Preprocessing VHI **')
    processor = VHIPreprocessor(data_path)
    processor.preprocess(
        subset_str='kenya', regrid=regrid_path,
         n_parallel_processes=1, resample_time='M',
         upsampling=False
    )

    regrid_path = data_path / 'interim' / 'vhi_preprocessed' / 'vhi_kenya.nc'

    # preprocess CHIRPS Rainfall
    print('** Preprocessing CHIRPS Precipitation **')
    processor = CHIRPSPreprocesser(data_path)
    processor.preprocess(
        subset_str='kenya', regrid=regrid_path,
        n_parallel_processes=1
    )

    # preprocess GLEAM evaporation
    print('** Preprocessing GLEAM Evaporation **')
    processor = GLEAMPreprocessor(data_path)
    processor.preprocess(
        subset_str='kenya', regrid=regrid_path,
        resample_time='M', upsampling=False
    )

    # preprocess SRTM Topography
    print('** Preprocessing SRTM Topography **')
    processor = SRTMPreprocessor(data_path)
    processor.preprocess(
        subset_str='kenya', regrid=regrid_path
    )

    # preprocess ESA CCI Landcover
    print('** Preprocessing ESA CCI Landcover **')
    processor = ESACCIPreprocessor(data_path)
    processor.preprocess(
        subset_str='kenya', regrid=regrid_path,
        resample_time='M', upsampling=False
    )
示例#13
0
def preprocess_data(data_path):
    # preprocess VHI
    print("** Preprocessing VHI **")
    processor = VHIPreprocessor(data_path)
    processor.preprocess(
        subset_str="kenya",
        regrid=regrid_path,
        n_parallel_processes=1,
        resample_time="M",
        upsampling=False,
    )

    regrid_path = data_path / "interim" / "vhi_preprocessed" / "vhi_kenya.nc"

    # preprocess CHIRPS Rainfall
    print("** Preprocessing CHIRPS Precipitation **")
    processor = CHIRPSPreprocessor(data_path)
    processor.preprocess(subset_str="kenya",
                         regrid=regrid_path,
                         n_parallel_processes=1)

    # preprocess GLEAM evaporation
    print("** Preprocessing GLEAM Evaporation **")
    processor = GLEAMPreprocessor(data_path)
    processor.preprocess(subset_str="kenya",
                         regrid=regrid_path,
                         resample_time="M",
                         upsampling=False)

    # preprocess SRTM Topography
    print("** Preprocessing SRTM Topography **")
    processor = SRTMPreprocessor(data_path)
    processor.preprocess(subset_str="kenya", regrid=regrid_path)

    # preprocess ESA CCI Landcover
    print("** Preprocessing ESA CCI Landcover **")
    processor = ESACCIPreprocessor(data_path)
    processor.preprocess(subset_str="kenya",
                         regrid=regrid_path,
                         resample_time="M",
                         upsampling=False)