def test_gridspec(): gs = GridSpec(crs=geometry.CRS('EPSG:4326'), tile_size=(1, 1), resolution=(-0.1, 0.1), origin=(10, 10)) poly = geometry.polygon([(10, 12.2), (10.8, 13), (13, 10.8), (12.2, 10), (10, 12.2)], crs=geometry.CRS('EPSG:4326')) cells = { index: geobox for index, geobox in list(gs.tiles_from_geopolygon(poly)) } assert set(cells.keys()) == {(0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1)} assert numpy.isclose(cells[(2, 0)].coordinates['longitude'].values, numpy.linspace(12.05, 12.95, num=10)).all() assert numpy.isclose(cells[(2, 0)].coordinates['latitude'].values, numpy.linspace(10.95, 10.05, num=10)).all() # check geobox_cache cache = {} poly = gs.tile_geobox((3, 4)).extent (c1, gbox1), = list(gs.tiles_from_geopolygon(poly, geobox_cache=cache)) (c2, gbox2), = list(gs.tiles_from_geopolygon(poly, geobox_cache=cache)) assert c1 == (3, 4) and c2 == c1 assert gbox1 is gbox2
def gs_bounds(gs: GridSpec, tiles: Tuple[Tuple[int, int], Tuple[int, int]]) -> Geometry: """ Compute Polygon for a selection of tiles. :param gs: GridSpec :param tiles: (x_range, y_range) X,Y ranges are inclusive on the left and exclusive on the right, same as numpy slicing. """ ((x0, x1), (y0, y1)) = tiles if gs.resolution[0] < 0: gb = gs.tile_geobox((x0, y1 - 1)) else: gb = gs.tile_geobox((x0, y0)) nx = (x1 - x0) * gb.shape[1] ny = (y1 - y0) * gb.shape[0] return polygon_from_transform(nx, ny, gb.affine, gb.crs)
class AfricaGeobox: """ generate the geobox for each tile according to the longitude ande latitude bounds. """ def __init__(self, resolution: Tuple[int, int] = (-20, 20), crs: str = "epsg:6933"): target_crs = CRS(crs) self.albers_africa_N = GridSpec( crs=target_crs, tile_size=(96_000.0, 96_000.0), # default resolution=resolution, ) africa = box(-18, -38, 60, 30, "epsg:4326") self.africa_projected = africa.to_crs(crs, resolution=math.inf) def tile_geobox(self, tile_index: Tuple[int, int]) -> GeoBox: return self.albers_africa_N.tile_geobox(tile_index) @property def geobox_dict(self) -> Dict: return dict( self.albers_africa_N.tiles(self.africa_projected.boundingbox))
class TaskReader: def __init__(self, cache: Union[str, DatasetCache], product: Optional[OutputProduct] = None): self._cache_path = None if isinstance(cache, str): if cache.startswith("s3://"): self._cache_path = s3_download(cache) cache = self._cache_path cache = DatasetCache.open_ro(cache) # TODO: verify this things are set in the file cfg = cache.get_info_dict("stats/config") grid = cfg["grid"] gridspec = cache.grids[grid] self._product = product self._dscache = cache self._cfg = cfg self._grid = grid self._gridspec = gridspec self._all_tiles = sorted(idx for idx, _ in cache.tiles(grid)) def is_compatible_resolution(self, resolution: Tuple[float, float], tol=1e-8): for res, sz in zip(resolution, self._gridspec.tile_size): res = abs(res) npix = int(sz / res) if abs(npix * res - sz) > tol: return False return True def change_resolution(self, resolution: Tuple[float, float]): """ Modify GridSpec to have different pixel resolution but still covering same tiles as the original. """ if not self.is_compatible_resolution(resolution): raise ValueError( "Supplied resolution is not compatible with the current GridSpec" ) gs = self._gridspec self._gridspec = GridSpec(gs.crs, gs.tile_size, resolution=resolution, origin=gs.origin) def __del__(self): if self._cache_path is not None: os.unlink(self._cache_path) def __repr__(self) -> str: grid, path, n = self._grid, str(self._dscache.path), len( self._all_tiles) return f"<{path}> grid:{grid} n:{n:,d}" def _resolve_product(self, product: Optional[OutputProduct]) -> OutputProduct: if product is None: product = self._product if product is None: raise ValueError("Product is not supplied and default is not set") return product @property def product(self) -> OutputProduct: return self._resolve_product(None) @property def all_tiles(self) -> List[TileIdx_txy]: return self._all_tiles def datasets(self, tile_index: TileIdx_txy) -> Tuple[Dataset, ...]: return tuple( ds for ds in self._dscache.stream_grid_tile(tile_index, self._grid)) def load_task( self, tile_index: TileIdx_txy, product: Optional[OutputProduct] = None, source: Any = None, ) -> Task: product = self._resolve_product(product) dss = self.datasets(tile_index) tidx_xy = _xy(tile_index) return Task( product=product, tile_index=tidx_xy, geobox=self._gridspec.tile_geobox(tidx_xy), time_range=DateTimeRange(tile_index[0]), datasets=dss, source=source, ) def stream(self, tiles: Iterable[TileIdx_txy], product: Optional[OutputProduct] = None) -> Iterator[Task]: product = self._resolve_product(product) for tidx in tiles: yield self.load_task(tidx, product) def stream_from_sqs( self, sqs_queue, product: Optional[OutputProduct] = None, visibility_timeout: int = 3600, **kw, ) -> Iterator[Task]: from odc.aws.queue import get_messages, get_queue from ._sqs import SQSWorkToken product = self._resolve_product(product) if isinstance(sqs_queue, str): sqs_queue = get_queue(sqs_queue) for msg in get_messages(sqs_queue, visibility_timeout=visibility_timeout, **kw): # TODO: switch to JSON for SQS message body token = SQSWorkToken(msg, visibility_timeout) tidx = parse_task(msg.body) yield self.load_task(tidx, product, source=token)
def test_gridworkflow(): """ Test GridWorkflow with padding option. """ from mock import MagicMock import datetime # ----- fake a datacube ----- # e.g. let there be a dataset that coincides with a grid cell fakecrs = geometry.CRS('EPSG:4326') grid = 100 # spatial frequency in crs units pixel = 10 # square pixel linear dimension in crs units # if cell(0,0) has lower left corner at grid origin, # and cell indices increase toward upper right, # then this will be cell(1,-2). gridspec = GridSpec(crs=fakecrs, tile_size=(grid, grid), resolution=(-pixel, pixel)) # e.g. product gridspec fakedataset = MagicMock() fakedataset.extent = geometry.box(left=grid, bottom=-grid, right=2 * grid, top=-2 * grid, crs=fakecrs) fakedataset.center_time = t = datetime.datetime(2001, 2, 15) fakedataset.id = uuid.uuid4() fakeindex = PickableMock() fakeindex._db = None fakeindex.datasets.get_field_names.return_value = [ 'time' ] # permit query on time fakeindex.datasets.search_eager.return_value = [fakedataset] # ------ test without padding ---- from datacube.api.grid_workflow import GridWorkflow gw = GridWorkflow(fakeindex, gridspec) # Need to force the fake index otherwise the driver manager will # only take its _db gw.index = fakeindex query = dict(product='fake_product_name', time=('2001-1-1 00:00:00', '2001-3-31 23:59:59')) # test backend : that it finds the expected cell/dataset assert list(gw.cell_observations(**query).keys()) == [(1, -2)] # again but with geopolygon assert list( gw.cell_observations(**query, geopolygon=gridspec.tile_geobox( (1, -2)).extent).keys()) == [(1, -2)] with pytest.raises(ValueError) as e: list( gw.cell_observations(**query, tile_buffer=(1, 1), geopolygon=gridspec.tile_geobox( (1, -2)).extent).keys()) assert str( e.value) == 'Cannot process tile_buffering and geopolygon together.' # test frontend assert len(gw.list_tiles(**query)) == 1 # ------ introduce padding -------- assert len(gw.list_tiles(tile_buffer=(20, 20), **query)) == 9 # ------ add another dataset (to test grouping) ----- # consider cell (2,-2) fakedataset2 = MagicMock() fakedataset2.extent = geometry.box(left=2 * grid, bottom=-grid, right=3 * grid, top=-2 * grid, crs=fakecrs) fakedataset2.center_time = t fakedataset2.id = uuid.uuid4() def search_eager(lat=None, lon=None, **kwargs): return [fakedataset, fakedataset2] fakeindex.datasets.search_eager = search_eager # unpadded assert len(gw.list_tiles(**query)) == 2 ti = numpy.datetime64(t, 'ns') assert set(gw.list_tiles(**query).keys()) == {(1, -2, ti), (2, -2, ti)} # padded assert len(gw.list_tiles(tile_buffer=(20, 20), ** query)) == 12 # not 18=2*9 because of grouping # -------- inspect particular returned tile objects -------- # check the array shape tile = gw.list_tiles(**query)[1, -2, ti] # unpadded example assert grid / pixel == 10 assert tile.shape == (1, 10, 10) padded_tile = gw.list_tiles(tile_buffer=(20, 20), **query)[1, -2, ti] # padded example # assert grid/pixel + 2*gw2.grid_spec.padding == 14 # GREG: understand this assert padded_tile.shape == (1, 14, 14) # count the sources assert len(tile.sources.isel(time=0).item()) == 1 assert len(padded_tile.sources.isel(time=0).item()) == 2 # check the geocoding assert tile.geobox.alignment == padded_tile.geobox.alignment assert tile.geobox.affine * (0, 0) == padded_tile.geobox.affine * (2, 2) assert tile.geobox.affine * (10, 10) == padded_tile.geobox.affine * ( 10 + 2, 10 + 2) # ------- check loading -------- # GridWorkflow accesses the load_data API # to ultimately convert geobox,sources,measurements to xarray, # so only thing to check here is the call interface. measurement = dict(nodata=0, dtype=numpy.int) fakedataset.type.lookup_measurements.return_value = {'dummy': measurement} fakedataset2.type = fakedataset.type from mock import patch with patch('datacube.api.core.Datacube.load_data') as loader: data = GridWorkflow.load(tile) data2 = GridWorkflow.load(padded_tile) # Note, could also test Datacube.load for consistency (but may require more patching) assert data is data2 is loader.return_value assert loader.call_count == 2 # Note, use of positional arguments here is not robust, could spec mock etc. for (args, kwargs), loadable in zip(loader.call_args_list, [tile, padded_tile]): args = list(args) assert args[0] is loadable.sources assert args[1] is loadable.geobox assert list(args[2].values())[0] is measurement assert 'resampling' in kwargs # ------- check single cell index extract ------- tile = gw.list_tiles(cell_index=(1, -2), **query) assert len(tile) == 1 assert tile[1, -2, ti].shape == (1, 10, 10) assert len(tile[1, -2, ti].sources.values[0]) == 1 padded_tile = gw.list_tiles(cell_index=(1, -2), tile_buffer=(20, 20), **query) assert len(padded_tile) == 1 assert padded_tile[1, -2, ti].shape == (1, 14, 14) assert len(padded_tile[1, -2, ti].sources.values[0]) == 2