def test_raise(self): da = xr.open_dataset(self.nc_poslons).tas with pytest.raises(ValueError): subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, start_yr=2056, end_yr=2055)
def test_simple(self): da = xr.open_dataset(self.nc_file).tasmax out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat.values >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) da = xr.open_dataset(self.nc_poslons).tas da = da.assign_coords(lon=(da.lon - 360)) yr_st = 2050 yr_ed = 2059 out = subset.subset_bbox( da, lon_bnds=self.lonGCM, lat_bnds=self.latGCM, start_date=str(yr_st), end_date=str(yr_ed), ) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(self.lonGCM)) assert np.all(out.lon <= np.max(self.lonGCM)) assert np.all(out.lat >= np.min(self.latGCM)) assert np.all(out.lat <= np.max(self.latGCM)) np.testing.assert_array_equal(out.time.dt.year.max(), yr_ed) np.testing.assert_array_equal(out.time.dt.year.min(), yr_st) out = subset.subset_bbox( da, lon_bnds=self.lon, lat_bnds=self.lat, start_date=str(yr_st) ) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), da.time.dt.year.max()) np.testing.assert_array_equal(out.time.dt.year.min(), yr_st) out = subset.subset_bbox( da, lon_bnds=self.lon, lat_bnds=self.lat, end_date=str(yr_ed) ) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), yr_ed) np.testing.assert_array_equal(out.time.dt.year.min(), da.time.dt.year.min())
def test_simple(self): da = xr.open_dataset(self.nc_file).tasmax out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) da = xr.open_dataset(self.nc_poslons).tas da["lon"] -= 360 yr_st = "2050" yr_ed = "2059" out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, start_date=yr_st, end_date=yr_ed) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), np.array(int(yr_ed))) np.testing.assert_array_equal(out.time.dt.year.min(), np.array(int(yr_st))) with pytest.warns(Warning): out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, start_date=yr_st) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), da.time.dt.year.max()) np.testing.assert_array_equal(out.time.dt.year.min(), np.array(int(yr_st))) with pytest.warns(Warning): out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, end_date=yr_ed) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), np.array(int(yr_ed))) np.testing.assert_array_equal(out.time.dt.year.min(), da.time.dt.year.min())
def test_raise(self): da = xr.open_dataset(self.nc_poslons).tas with pytest.raises(ValueError): subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, start_yr=2056, end_yr=2055) da = xr.open_dataset(self.nc_2dlonlat).tasmax.drop(["lon", "lat"]) with pytest.raises(Exception): subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
def test_positive_lons(self): da = xr.open_dataset(self.nc_poslons).tas out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert np.all(out.lon >= np.min(np.asarray(self.lon) + 360)) assert np.all(out.lon <= np.max(np.asarray(self.lon) + 360)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) out = subset.subset_bbox(da, lon_bnds=np.array(self.lon) + 360, lat_bnds=self.lat) assert np.all(out.lon >= np.min(np.asarray(self.lon) + 360))
def test_raise(self): da = xr.open_dataset(self.nc_poslons).tas with pytest.raises(ValueError): subset.subset_bbox( da, lon_bnds=self.lonGCM, lat_bnds=self.latGCM, start_date="2056", end_date="2055", ) da = xr.open_dataset(self.nc_2dlonlat).tasmax.drop_vars(names=["lon", "lat"]) with pytest.raises(Exception): subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
def test_simple(self): da = xr.open_dataset(self.nc_file).tasmax out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) da = xr.open_dataset(self.nc_poslons).tas da["lon"] -= 360 yr_st = 2050 yr_ed = 2059 out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, start_yr=yr_st, end_yr=yr_ed) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), yr_ed) np.testing.assert_array_equal(out.time.dt.year.min(), yr_st) out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, start_yr=yr_st) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), da.time.dt.year.max()) np.testing.assert_array_equal(out.time.dt.year.min(), yr_st) out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, end_yr=yr_ed) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.time.dt.year.max(), yr_ed) np.testing.assert_array_equal(out.time.dt.year.min(), da.time.dt.year.min())
def test_positive_lons(self): da = xr.open_dataset(self.nc_poslons).tas out = subset.subset_bbox(da, lon_bnds=self.lonGCM, lat_bnds=self.latGCM) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(np.asarray(self.lonGCM) + 360)) assert np.all(out.lon <= np.max(np.asarray(self.lonGCM) + 360)) assert np.all(out.lat >= np.min(self.latGCM)) assert np.all(out.lat <= np.max(self.latGCM)) out = subset.subset_bbox( da, lon_bnds=np.array(self.lonGCM) + 360, lat_bnds=self.latGCM ) assert np.all(out.lon >= np.min(np.asarray(self.lonGCM) + 360))
def test_single_bounds_rectilinear(self): da = xr.open_dataset(self.nc_file).tasmax out = subset.subset_bbox(da, lon_bnds=self.lon) assert out.lon.values.size != 0 assert out.lat.values.size != 0 np.testing.assert_array_equal(out.lat, da.lat) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lon.values >= np.min(self.lon)) out = subset.subset_bbox(da, lat_bnds=self.lat) assert out.lon.values.size != 0 assert out.lat.values.size != 0 np.testing.assert_array_equal(out.lon, da.lon) assert np.all(out.lat <= np.max(self.lat)) assert np.all(out.lat.values >= np.min(self.lat))
def test_single_bounds_curvilinear(self): da = xr.open_dataset(self.nc_2dlonlat).tasmax out = subset.subset_bbox(da, lon_bnds=self.lon) assert out.lon.values.size != 0 assert out.lat.values.size != 0 mask1 = ~(np.isnan(out.sel(time=out.time[0]))) assert np.all(out.lon.values[mask1.values] <= np.max(self.lon)) assert np.all(out.lon.values[mask1.values] >= np.min(self.lon)) out = subset.subset_bbox(da, lat_bnds=self.lat) assert out.lon.values.size != 0 assert out.lat.values.size != 0 mask1 = ~(np.isnan(out.sel(time=out.time[0]))) assert np.all(out.lat.values[mask1.values] <= np.max(self.lat)) assert np.all(out.lat.values[mask1.values] >= np.min(self.lat))
def test_irregular_dataset(self): da = xr.open_dataset(self.nc_2dlonlat) out = subset.subset_bbox(da, lon_bnds=[-150, 100], lat_bnds=[10, 60]) variables = list(da.data_vars) variables.pop(variables.index("tasmax")) # only tasmax should be subsetted/masked others should remain untouched for v in variables: assert out[v].dims == da[v].dims np.testing.assert_array_equal(out[v], da[v]) # ensure results are equal to previous test on DataArray only out1 = subset.subset_bbox(da.tasmax, lon_bnds=[-150, 100], lat_bnds=[10, 60]) np.testing.assert_array_equal(out1, out.tasmax) # additional test if dimensions have no coordinates da = da.drop_vars(["rlon", "rlat"]) subset.subset_bbox(da.tasmax, lon_bnds=[-150, 100], lat_bnds=[10, 60])
def test_badly_named_latlons(self): da = xr.open_dataset(self.nc_file) extended_latlons = {"lat": "latitude", "lon": "longitude"} da_extended_names = da.rename(extended_latlons) out = subset.subset_bbox( da_extended_names, lon_bnds=self.lon, lat_bnds=self.lat ) assert {"latitude", "longitude"}.issubset(out.dims) long_for_some_reason = {"lon": "long"} da_long = da.rename(long_for_some_reason) out = subset.subset_bbox(da_long, lon_bnds=self.lon, lat_bnds=self.lat) assert {"long"}.issubset(out.dims) lons_lats = {"lon": "lons", "lat": "lats"} da_lonslats = da.rename(lons_lats) out = subset.subset_bbox(da_lonslats, lon_bnds=self.lon, lat_bnds=self.lat) assert {"lons", "lats"}.issubset(out.dims)
def test_dataset(self): da = xr.open_mfdataset( [self.nc_file, self.nc_file.replace("tasmax", "tasmin")]) out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert np.all(out.lon >= np.min(self.lon)) assert np.all(out.lon <= np.max(self.lon)) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat)) np.testing.assert_array_equal(out.tasmin.shape, out.tasmax.shape)
def test_time(self): da = xr.open_dataset(self.nc_poslons).tas da["lon"] -= 360 out = subset.subset_bbox( da, lon_bnds=self.lonGCM, lat_bnds=self.latGCM, start_date="2050", end_date="2059", ) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(self.lonGCM)) assert np.all(out.lon <= np.max(self.lonGCM)) assert np.all(out.lat >= np.min(self.latGCM)) assert np.all(out.lat <= np.max(self.latGCM)) np.testing.assert_array_equal(out.time.min().dt.year, 2050) np.testing.assert_array_equal(out.time.min().dt.month, 1) np.testing.assert_array_equal(out.time.min().dt.day, 1) np.testing.assert_array_equal(out.time.max().dt.year, 2059) np.testing.assert_array_equal(out.time.max().dt.month, 12) np.testing.assert_array_equal(out.time.max().dt.day, 31) out = subset.subset_bbox( da, lon_bnds=self.lonGCM, lat_bnds=self.latGCM, start_date="2050-02-05", end_date="2059-07-15", ) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(self.lonGCM)) assert np.all(out.lon <= np.max(self.lonGCM)) assert np.all(out.lat >= np.min(self.latGCM)) assert np.all(out.lat <= np.max(self.latGCM)) np.testing.assert_array_equal(out.time.min().dt.year, 2050) np.testing.assert_array_equal(out.time.min().dt.month, 2) np.testing.assert_array_equal(out.time.min().dt.day, 5) np.testing.assert_array_equal(out.time.max().dt.year, 2059) np.testing.assert_array_equal(out.time.max().dt.month, 7) np.testing.assert_array_equal(out.time.max().dt.day, 15)
def test_warnings(self): da = xr.open_dataset(self.nc_poslons).tas da = da.assign_coords(lon=(da.lon - 360)) with pytest.raises(TypeError): subset.subset_bbox( da, lon_bnds=self.lon, lat_bnds=self.lat, start_yr=2050, end_yr=2059 ) with pytest.warns(None) as record: subset.subset_bbox( da, lon_bnds=self.lon, lat_bnds=self.lat, start_date="2050", end_date="2055", ) assert ( '"start_yr" and "end_yr" (type: int) are being deprecated. Temporal subsets will soon exclusively' ' support "start_date" and "end_date" (type: str) using formats of "%Y", "%Y-%m" or "%Y-%m-%d".' not in [str(q.message) for q in record] )
def test_warnings(self): da = xr.open_dataset(self.nc_poslons).tas da["lon"] -= 360 with pytest.warns(FutureWarning): subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat, start_yr=2050, end_yr=2059) with pytest.warns(None) as record: subset.subset_bbox( da, lon_bnds=self.lon, lat_bnds=self.lat, start_date="2050", end_date="2055", ) assert ( '"start_yr" and "end_yr" (type: int) are being deprecated. Temporal subsets will soon exclusively' ' support "start_date" and "end_date" (type: str) using formats of "%Y", "%Y-%m" or "%Y-%m-%d".' not in [q.message for q in record])
def test_irregular(self): da = xr.open_dataset(self.nc_2dlonlat).tasmax out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) # for irregular lat lon grids data matrix remains rectangular in native proj # but with data outside bbox assigned nans. This means it can have lon and lats outside the bbox. # Check only non-nans gridcells using mask mask1 = ~np.isnan(out.sel(time=out.time[0])) assert np.all(out.lon.values[mask1] >= np.min(self.lon)) assert np.all(out.lon.values[mask1] <= np.max(self.lon)) assert np.all(out.lat.values[mask1] >= np.min(self.lat)) assert np.all(out.lat.values[mask1] <= np.max(self.lat))
def _subset_function(dataset): nonlocal count count += 1 percentage = start_percentage + int((count - 1) / n_files * (end_percentage - start_percentage)) self.write_log(f"Processing file {count} of {n_files}", response, percentage) dataset = dataset[variables] if variables else dataset if lat1 is None and lon1 is None: return subset_gridpoint(dataset, lon=lon0, lat=lat0, start_yr=y0, end_yr=y1) else: return subset_bbox( dataset, lon_bnds=[lon0, lon1], lat_bnds=[lat0, lat1], start_yr=y0, end_yr=y1 )
def test_inverted_coords(self): lon = np.linspace(-90, -60, 200) lat = np.linspace(40, 80, 100) da = xr.Dataset( data_vars=None, coords={"lon": np.flip(lon), "lat": np.flip(lat)} ) da["data"] = xr.DataArray( np.random.rand(lon.size, lat.size), dims=["lon", "lat"] ) out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) assert out.lon.values.size != 0 assert out.lat.values.size != 0 assert np.all(out.lon >= np.min(np.asarray(self.lon))) assert np.all(out.lon <= np.max(np.asarray(self.lon))) assert np.all(out.lat >= np.min(self.lat)) assert np.all(out.lat <= np.max(self.lat))