示例#1
0
    def test_raise(self):
        da = open_dataset(self.nc_poslons).tas
        with pytest.raises(ValueError):
            subset.subset_bbox(
                da,
                lon_bnds=self.lonGCM,
                lat_bnds=self.latGCM,
                start_date="2056",
                end_date="2055",
            )

        da = open_dataset(
            self.nc_2dlonlat).tasmax.drop_vars(names=["lon", "lat"])
        with pytest.raises(Exception):
            subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
示例#2
0
    def test_single_bounds_rectilinear(self):
        da = open_dataset(self.nc_file).tasmax

        out = subset.subset_bbox(da, lon_bnds=self.lon)
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        np.testing.assert_array_equal(out.lat, da.lat)
        assert np.all(out.lon <= np.max(self.lon))
        assert np.all(out.lon.values >= np.min(self.lon))

        out = subset.subset_bbox(da, lat_bnds=self.lat)
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        np.testing.assert_array_equal(out.lon, da.lon)
        assert np.all(out.lat <= np.max(self.lat))
        assert np.all(out.lat.values >= np.min(self.lat))
示例#3
0
    def test_single_bounds_curvilinear(self):
        da = open_dataset(self.nc_2dlonlat).tasmax

        out = subset.subset_bbox(da, lon_bnds=self.lon)
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        mask1 = ~(np.isnan(out.sel(time=out.time[0])))
        assert np.all(out.lon.values[mask1.values] <= np.max(self.lon))
        assert np.all(out.lon.values[mask1.values] >= np.min(self.lon))

        out = subset.subset_bbox(da, lat_bnds=self.lat)
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        mask1 = ~(np.isnan(out.sel(time=out.time[0])))
        assert np.all(out.lat.values[mask1.values] <= np.max(self.lat))
        assert np.all(out.lat.values[mask1.values] >= np.min(self.lat))
示例#4
0
    def test_positive_lons(self):
        da = open_dataset(self.nc_poslons).tas

        out = subset.subset_bbox(da,
                                 lon_bnds=self.lonGCM,
                                 lat_bnds=self.latGCM)
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(np.asarray(self.lonGCM) + 360))
        assert np.all(out.lon <= np.max(np.asarray(self.lonGCM) + 360))
        assert np.all(out.lat >= np.min(self.latGCM))
        assert np.all(out.lat <= np.max(self.latGCM))

        out = subset.subset_bbox(da,
                                 lon_bnds=np.array(self.lonGCM) + 360,
                                 lat_bnds=self.latGCM)
        assert np.all(out.lon >= np.min(np.asarray(self.lonGCM) + 360))
示例#5
0
    def test_time(self):
        da = open_dataset(self.nc_poslons).tas
        da = da.assign_coords(lon=(da.lon - 360))

        out = subset.subset_bbox(
            da,
            lon_bnds=self.lonGCM,
            lat_bnds=self.latGCM,
            start_date="2050",
            end_date="2059",
        )
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(self.lonGCM))
        assert np.all(out.lon <= np.max(self.lonGCM))
        assert np.all(out.lat >= np.min(self.latGCM))
        assert np.all(out.lat <= np.max(self.latGCM))
        np.testing.assert_array_equal(out.time.min().dt.year, 2050)
        np.testing.assert_array_equal(out.time.min().dt.month, 1)
        np.testing.assert_array_equal(out.time.min().dt.day, 1)
        np.testing.assert_array_equal(out.time.max().dt.year, 2059)
        np.testing.assert_array_equal(out.time.max().dt.month, 12)
        np.testing.assert_array_equal(out.time.max().dt.day, 31)

        out = subset.subset_bbox(
            da,
            lon_bnds=self.lonGCM,
            lat_bnds=self.latGCM,
            start_date="2050-02-05",
            end_date="2059-07-15",
        )
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(self.lonGCM))
        assert np.all(out.lon <= np.max(self.lonGCM))
        assert np.all(out.lat >= np.min(self.latGCM))
        assert np.all(out.lat <= np.max(self.latGCM))
        np.testing.assert_array_equal(out.time.min().dt.year, 2050)
        np.testing.assert_array_equal(out.time.min().dt.month, 2)
        np.testing.assert_array_equal(out.time.min().dt.day, 5)
        np.testing.assert_array_equal(out.time.max().dt.year, 2059)
        np.testing.assert_array_equal(out.time.max().dt.month, 7)
        np.testing.assert_array_equal(out.time.max().dt.day, 15)
示例#6
0
    def test_irregular_dataset(self):
        da = open_dataset(self.nc_2dlonlat)
        out = subset.subset_bbox(da, lon_bnds=[-150, 100], lat_bnds=[10, 60])
        variables = list(da.data_vars)
        variables.pop(variables.index("tasmax"))
        # only tasmax should be subsetted/masked others should remain untouched
        for v in variables:
            assert out[v].dims == da[v].dims
            np.testing.assert_array_equal(out[v], da[v])

        # ensure results are equal to previous test on DataArray only
        out1 = subset.subset_bbox(da.tasmax,
                                  lon_bnds=[-150, 100],
                                  lat_bnds=[10, 60])
        np.testing.assert_array_equal(out1, out.tasmax)

        # additional test if dimensions have no coordinates
        da = da.drop_vars(["rlon", "rlat"])
        subset.subset_bbox(da.tasmax, lon_bnds=[-150, 100], lat_bnds=[10, 60])
示例#7
0
    def test_badly_named_latlons(self):
        da = open_dataset(self.nc_file)
        extended_latlons = {"lat": "latitude", "lon": "longitude"}
        da_extended_names = da.rename(extended_latlons)
        out = subset.subset_bbox(da_extended_names,
                                 lon_bnds=self.lon,
                                 lat_bnds=self.lat)
        assert {"latitude", "longitude"}.issubset(out.dims)

        long_for_some_reason = {"lon": "long"}
        da_long = da.rename(long_for_some_reason)
        out = subset.subset_bbox(da_long, lon_bnds=self.lon, lat_bnds=self.lat)
        assert {"long"}.issubset(out.dims)

        lons_lats = {"lon": "lons", "lat": "lats"}
        da_lonslats = da.rename(lons_lats)
        out = subset.subset_bbox(da_lonslats,
                                 lon_bnds=self.lon,
                                 lat_bnds=self.lat)
        assert {"lons", "lats"}.issubset(out.dims)
示例#8
0
 def test_dataset(self):
     da = xr.open_mfdataset(
         [self.nc_file,
          self.nc_file.replace("tasmax", "tasmin")],
         combine="by_coords",
     )
     out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
     assert np.all(out.lon >= np.min(self.lon))
     assert np.all(out.lon <= np.max(self.lon))
     assert np.all(out.lat >= np.min(self.lat))
     assert np.all(out.lat <= np.max(self.lat))
     np.testing.assert_array_equal(out.tasmin.shape, out.tasmax.shape)
示例#9
0
    def test_warnings(self):
        da = open_dataset(self.nc_poslons).tas
        da = da.assign_coords(lon=(da.lon - 360))

        with pytest.raises(TypeError):
            subset.subset_bbox(da,
                               lon_bnds=self.lon,
                               lat_bnds=self.lat,
                               start_yr=2050,
                               end_yr=2059)
        with pytest.warns(None) as record:
            subset.subset_bbox(
                da,
                lon_bnds=self.lon,
                lat_bnds=self.lat,
                start_date="2050",
                end_date="2055",
            )
        assert (
            '"start_yr" and "end_yr" (type: int) are being deprecated. Temporal subsets will soon exclusively'
            ' support "start_date" and "end_date" (type: str) using formats of "%Y", "%Y-%m" or "%Y-%m-%d".'
            not in [str(q.message) for q in record])
示例#10
0
    def test_irregular(self):
        da = open_dataset(self.nc_2dlonlat).tasmax

        out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)

        # for irregular lat lon grids data matrix remains rectangular in native proj
        # but with data outside bbox assigned nans.  This means it can have lon and lats outside the bbox.
        # Check only non-nans gridcells using mask
        mask1 = ~(np.isnan(out.sel(time=out.time[0])))
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon.values[mask1.values] >= np.min(self.lon))
        assert np.all(out.lon.values[mask1.values] <= np.max(self.lon))
        assert np.all(out.lat.values[mask1.values] >= np.min(self.lat))
        assert np.all(out.lat.values[mask1.values] <= np.max(self.lat))
示例#11
0
    def test_inverted_coords(self):
        lon = np.linspace(-90, -60, 200)
        lat = np.linspace(40, 80, 100)
        da = xr.Dataset(data_vars=None,
                        coords={
                            "lon": np.flip(lon),
                            "lat": np.flip(lat)
                        })
        da["data"] = xr.DataArray(np.random.rand(lon.size, lat.size),
                                  dims=["lon", "lat"])

        out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(np.asarray(self.lon)))
        assert np.all(out.lon <= np.max(np.asarray(self.lon)))
        assert np.all(out.lat >= np.min(self.lat))
        assert np.all(out.lat <= np.max(self.lat))
示例#12
0
    def _subset(resource):
        nonlocal count

        # if not subsetting by time, it's not necessary to decode times
        time_subset = start_date is not None or end_date is not None
        dataset = try_opendap(resource, decode_times=time_subset)

        with lock:
            count += 1
            write_log(
                process,
                f"Subsetting file {count} of {n_files} ({getattr(resource, resource.prop)})",
                subtask_percentage=(count - 1) * 100 // n_files,
            )

        dataset = dataset[variables] if variables else dataset

        try:
            subsetted = subset_bbox(
                dataset,
                lon_bnds=[lon0, lon1],
                lat_bnds=[lat0, lat1],
                start_date=start_date,
                end_date=end_date,
            )
        except ValueError:
            subsetted = False

        if subsetted is False or not all(subsetted.dims.values()):
            LOGGER.warning(f"Subset is empty for dataset: {resource.url}")
            return

        p = make_subset_file_name(resource)
        output_filename = Path(process.workdir) / p

        dataset_to_netcdf(subsetted, output_filename)

        output_files.append(output_filename)
示例#13
0
    def test_simple(self):
        da = open_dataset(self.nc_file).tasmax

        out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(self.lon))
        assert np.all(out.lon <= np.max(self.lon))
        assert np.all(out.lat.values >= np.min(self.lat))
        assert np.all(out.lat <= np.max(self.lat))

        da = open_dataset(self.nc_poslons).tas
        da = da.assign_coords(lon=(da.lon - 360))
        yr_st = 2050
        yr_ed = 2059

        out = subset.subset_bbox(
            da,
            lon_bnds=self.lonGCM,
            lat_bnds=self.latGCM,
            start_date=str(yr_st),
            end_date=str(yr_ed),
        )
        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(self.lonGCM))
        assert np.all(out.lon <= np.max(self.lonGCM))
        assert np.all(out.lat >= np.min(self.latGCM))
        assert np.all(out.lat <= np.max(self.latGCM))
        np.testing.assert_array_equal(out.time.dt.year.max(), yr_ed)
        np.testing.assert_array_equal(out.time.dt.year.min(), yr_st)

        out = subset.subset_bbox(da,
                                 lon_bnds=self.lon,
                                 lat_bnds=self.lat,
                                 start_date=str(yr_st))

        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(self.lon))
        assert np.all(out.lon <= np.max(self.lon))
        assert np.all(out.lat >= np.min(self.lat))
        assert np.all(out.lat <= np.max(self.lat))
        np.testing.assert_array_equal(out.time.dt.year.max(),
                                      da.time.dt.year.max())
        np.testing.assert_array_equal(out.time.dt.year.min(), yr_st)

        out = subset.subset_bbox(da,
                                 lon_bnds=self.lon,
                                 lat_bnds=self.lat,
                                 end_date=str(yr_ed))

        assert out.lon.values.size != 0
        assert out.lat.values.size != 0
        assert np.all(out.lon >= np.min(self.lon))
        assert np.all(out.lon <= np.max(self.lon))
        assert np.all(out.lat >= np.min(self.lat))
        assert np.all(out.lat <= np.max(self.lat))
        np.testing.assert_array_equal(out.time.dt.year.max(), yr_ed)
        np.testing.assert_array_equal(out.time.dt.year.min(),
                                      da.time.dt.year.min())
示例#14
0
def get_subsetted_forecast(region_coll, ds, times, is_caspar):
    """
    This function takes a dataset, a region and the time sampling array and returns
    the subsetted values for the given region and times

    Parameters
    ----------
    region_coll : fiona.collection.Collection
      The region vectors.
    ds : xarray.Dataset
      The dataset containing the raw, worldwide forecast data
    times: dt.datetime
      The array of times required to do the forecast.
    is_caspar: boolean
      True if the data comes from Caspar, false otherwise. Used to define
      lat/lon on rotated grid.

    Returns
    -------
    forecast : xararray.Dataset
      The forecast dataset.

    """
    # Extract the bounding box to subset the entire forecast grid to something
    # more manageable
    lon_min = region_coll.bounds[0]
    lon_max = region_coll.bounds[2]
    lat_min = region_coll.bounds[1]
    lat_max = region_coll.bounds[3]

    # Subset the data to the desired location (bounding box) and times
    ds = subset.subset_bbox(ds,
                            lon_bnds=[lon_min, lon_max],
                            lat_bnds=[lat_min, lat_max]).sel(time=times)

    # Rioxarray requires CRS definitions for variables
    # Get CRS, e.g. 4326
    crs = int(re.match(r"epsg:(\d+)", region_coll.crs["init"]).group(1))

    # Here the name of the variable could differ based on the Caspar file processing
    tas = ds.tas.rio.write_crs(crs)
    pr = ds.pr.rio.write_crs(crs)
    ds = xr.merge([tas, pr])

    # Now apply the mask of the basin contour and average the values to get a single time series
    if is_caspar:
        ds.rio.set_spatial_dims("rlon", "rlat")
        ds["rlon"] = ds["rlon"] - 360
        # clip the netcdf and average across space.
        shdf = [next(iter(region_coll))["geometry"]]
        forecast = ds.rio.clip(shdf, crs=crs)
        forecast = forecast.mean(dim={"rlat", "rlon"}, keep_attrs=True)

    else:
        ds.rio.set_spatial_dims("lon", "lat")
        ds["lon"] = ds["lon"] - 360
        # clip the netcdf and average across space.
        shdf = [next(iter(region_coll))["geometry"]]
        forecast = ds.rio.clip(shdf, crs=crs)
        forecast = forecast.mean(dim={"lat", "lon"}, keep_attrs=True)

    return forecast