def test_time_start_only(self): da = open_dataset(self.nc_poslons).tas yr_st = "2050" # start date only with pytest.warns(None): out = subset.subset_time(da, start_date=f"{yr_st}-01") np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st)) np.testing.assert_array_equal(out.time.dt.year.max(), da.time.dt.year.max()) with pytest.warns(None): out = subset.subset_time(da, start_date=f"{yr_st}-07") np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st)) np.testing.assert_array_equal(out.time.min().dt.month, 7) np.testing.assert_array_equal(out.time.dt.year.max(), da.time.dt.year.max()) np.testing.assert_array_equal(out.time.max(), da.time.max()) with pytest.warns(None): out = subset.subset_time(da, start_date=f"{yr_st}-07-15") np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st)) np.testing.assert_array_equal(out.time.min().dt.month, 7) np.testing.assert_array_equal(out.time.min().dt.day, 15) np.testing.assert_array_equal(out.time.dt.year.max(), da.time.dt.year.max()) np.testing.assert_array_equal(out.time.max(), da.time.max())
def test_warnings(self): da = xr.open_dataset(self.nc_poslons).tas with pytest.raises(ValueError) as record: subset.subset_time(da, start_date="2059", end_date="2050") with pytest.raises(TypeError): subset.subset_time(da, start_yr=2050, end_yr=2059) with pytest.warns(None) as record: subset.subset_time( da, start_date=2050, end_date=2055, ) assert ( 'start_date and end_date require dates in (type: str) using formats of "%Y", "%Y-%m" or "%Y-%m-%d".' in [str(q.message) for q in record]) with pytest.warns(None) as record: subset.subset_time(da, start_date="2064-01-01T00:00:00", end_date="2065-02-01T03:12:01") assert [str(q.message) for q in record] == [ '"start_date" has been nudged to nearest valid time step in xarray object.', '"end_date" has been nudged to nearest valid time step in xarray object.', ]
def test_simple(self): da = open_dataset(self.nc_poslons).tas yr_st = "2050" yr_ed = "2059" out = subset.subset_time(da, start_date=yr_st, end_date=yr_ed) out1 = subset.subset_time(da, start_date=f"{yr_st}-01", end_date=f"{yr_ed}-12") out2 = subset.subset_time(da, start_date=f"{yr_st}-01-01", end_date=f"{yr_ed}-12-31") np.testing.assert_array_equal(out, out1) np.testing.assert_array_equal(out, out2) np.testing.assert_array_equal(len(np.unique(out.time.dt.year)), 10) np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed)) np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st))
def test_time_incomplete_years(self): da = open_dataset(self.nc_poslons).tas yr_st = "2050" yr_ed = "2059" out = subset.subset_time(da, start_date=f"{yr_st}-07-01", end_date=f"{yr_ed}-06-30") out1 = subset.subset_time(da, start_date=f"{yr_st}-07", end_date=f"{yr_ed}-06") np.testing.assert_array_equal(out, out1) np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st)) np.testing.assert_array_equal(out.time.min().dt.month, 7) np.testing.assert_array_equal(out.time.min().dt.day, 1) np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed)) np.testing.assert_array_equal(out.time.max().dt.month, 6) np.testing.assert_array_equal(out.time.max().dt.day, 30)
def test_time_end_only(self): da = open_dataset(self.nc_poslons).tas yr_ed = "2059" # end date only with pytest.warns(None): out = subset.subset_time(da, end_date=f"{yr_ed}-01") np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed)) np.testing.assert_array_equal(out.time.max().dt.month, 1) np.testing.assert_array_equal(out.time.max().dt.day, 31) np.testing.assert_array_equal(out.time.min(), da.time.min()) with pytest.warns(None): out = subset.subset_time(da, end_date=f"{yr_ed}-06-15") np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed)) np.testing.assert_array_equal(out.time.max().dt.month, 6) np.testing.assert_array_equal(out.time.max().dt.day, 15) np.testing.assert_array_equal(out.time.min(), da.time.min())
def test_time_dates_outofbounds(self): da = open_dataset(self.nc_poslons).tas yr_st = "1776" yr_ed = "2077" with pytest.warns(None) as record: out = subset.subset_time(da, start_date=f"{yr_st}-01", end_date=f"{yr_ed}-01") np.testing.assert_array_equal(out.time.dt.year.min(), da.time.dt.year.min()) np.testing.assert_array_equal(out.time.dt.year.max(), da.time.dt.year.max()) assert ( '"start_date" not found within input date time range. Defaulting to minimum time step in xarray object.' in [str(q.message) for q in record]) assert ( '"end_date" not found within input date time range. Defaulting to maximum time step in xarray object.' in [str(q.message) for q in record])
def test_warnings(self): da = open_dataset(self.nc_poslons).tas with pytest.raises(ValueError) as record: subset.subset_time(da, start_date="2059", end_date="2050") with pytest.raises(TypeError): subset.subset_time(da, start_yr=2050, end_yr=2059) with pytest.warns(None) as record: subset.subset_time( da, start_date=2050, end_date=2055, ) assert ( 'start_date and end_date require dates in (type: str) using formats of "%Y", "%Y-%m" or "%Y-%m-%d".' in [str(q.message) for q in record])
def finch_average_shape( process: Process, netcdf_inputs: List[ComplexInput], request_inputs: RequestInputs, ) -> List[Path]: """Parse wps `request_inputs` based on their name and average `netcdf_inputs`. The expected names of the request_inputs are as followed (taken from `wpsio.py`): - shape: Polygon contour to average the data over. - start_date: Initial date for temporal subsetting. - end_date: Final date for temporal subsetting. """ shp = Path(request_inputs[wpsio.shape.identifier][0].file) if shp.suffix == ".zip": shp = extract_shp(shp) start_date = single_input_or_none(request_inputs, wpsio.start_date.identifier) end_date = single_input_or_none(request_inputs, wpsio.end_date.identifier) tolerance = single_input_or_none(request_inputs, wpsio.tolerance.identifier) variables = [r.data for r in request_inputs.get("variable", [])] shape = gpd.read_file(shp) if tolerance > 0: shape['geometry'] = shape.simplify(tolerance) n_files = len(netcdf_inputs) count = 0 output_files = [] for resource in netcdf_inputs: # if not subsetting by time, it's not necessary to decode times time_subset = start_date is not None or end_date is not None dataset = try_opendap(resource, decode_times=time_subset, chunk_dims=['time', 'realization']) count += 1 write_log( process, f"Averaging file {count} of {n_files} ({getattr(resource, resource.prop)})", subtask_percentage=(count - 1) * 100 // n_files, ) dataset = dataset[variables] if variables else dataset if time_subset: dataset = subset_time(dataset, start_date=start_date, end_date=end_date) averaged = average_shape(dataset, shape) if not all(averaged.dims.values()): LOGGER.warning(f"Average is empty for dataset: {resource.url}") return p = make_subset_file_name(resource, kind="avg") output_filename = Path(process.workdir) / p dataset_to_netcdf(averaged, output_filename) output_files.append(output_filename) return output_files