Пример #1
0
def test_gridspec():
    gs = GridSpec(crs=geometry.CRS('EPSG:4326'),
                  tile_size=(1, 1),
                  resolution=(-0.1, 0.1),
                  origin=(10, 10))
    poly = geometry.polygon([(10, 12.2), (10.8, 13), (13, 10.8), (12.2, 10),
                             (10, 12.2)],
                            crs=geometry.CRS('EPSG:4326'))
    cells = {
        index: geobox
        for index, geobox in list(gs.tiles_from_geopolygon(poly))
    }
    assert set(cells.keys()) == {(0, 1), (0, 2), (1, 0), (1, 1), (1, 2),
                                 (2, 0), (2, 1)}
    assert numpy.isclose(cells[(2, 0)].coordinates['longitude'].values,
                         numpy.linspace(12.05, 12.95, num=10)).all()
    assert numpy.isclose(cells[(2, 0)].coordinates['latitude'].values,
                         numpy.linspace(10.95, 10.05, num=10)).all()

    # check geobox_cache
    cache = {}
    poly = gs.tile_geobox((3, 4)).extent
    (c1, gbox1), = list(gs.tiles_from_geopolygon(poly, geobox_cache=cache))
    (c2, gbox2), = list(gs.tiles_from_geopolygon(poly, geobox_cache=cache))

    assert c1 == (3, 4) and c2 == c1
    assert gbox1 is gbox2
Пример #2
0
def gs_bounds(gs: GridSpec, tiles: Tuple[Tuple[int, int],
                                         Tuple[int, int]]) -> Geometry:
    """
    Compute Polygon for a selection of tiles.

    :param gs: GridSpec
    :param tiles: (x_range, y_range)

    X,Y ranges are inclusive on the left and exclusive on the right, same as numpy slicing.
    """
    ((x0, x1), (y0, y1)) = tiles
    if gs.resolution[0] < 0:
        gb = gs.tile_geobox((x0, y1 - 1))
    else:
        gb = gs.tile_geobox((x0, y0))

    nx = (x1 - x0) * gb.shape[1]
    ny = (y1 - y0) * gb.shape[0]
    return polygon_from_transform(nx, ny, gb.affine, gb.crs)
Пример #3
0
class AfricaGeobox:
    """
    generate the geobox for each tile according to the longitude ande latitude bounds.
    """
    def __init__(self,
                 resolution: Tuple[int, int] = (-20, 20),
                 crs: str = "epsg:6933"):
        target_crs = CRS(crs)
        self.albers_africa_N = GridSpec(
            crs=target_crs,
            tile_size=(96_000.0, 96_000.0),  # default
            resolution=resolution,
        )
        africa = box(-18, -38, 60, 30, "epsg:4326")
        self.africa_projected = africa.to_crs(crs, resolution=math.inf)

    def tile_geobox(self, tile_index: Tuple[int, int]) -> GeoBox:
        return self.albers_africa_N.tile_geobox(tile_index)

    @property
    def geobox_dict(self) -> Dict:
        return dict(
            self.albers_africa_N.tiles(self.africa_projected.boundingbox))
Пример #4
0
class TaskReader:
    def __init__(self,
                 cache: Union[str, DatasetCache],
                 product: Optional[OutputProduct] = None):
        self._cache_path = None
        if isinstance(cache, str):
            if cache.startswith("s3://"):
                self._cache_path = s3_download(cache)
                cache = self._cache_path
            cache = DatasetCache.open_ro(cache)

        # TODO: verify this things are set in the file
        cfg = cache.get_info_dict("stats/config")
        grid = cfg["grid"]
        gridspec = cache.grids[grid]

        self._product = product
        self._dscache = cache
        self._cfg = cfg
        self._grid = grid
        self._gridspec = gridspec
        self._all_tiles = sorted(idx for idx, _ in cache.tiles(grid))

    def is_compatible_resolution(self,
                                 resolution: Tuple[float, float],
                                 tol=1e-8):
        for res, sz in zip(resolution, self._gridspec.tile_size):
            res = abs(res)
            npix = int(sz / res)
            if abs(npix * res - sz) > tol:
                return False
        return True

    def change_resolution(self, resolution: Tuple[float, float]):
        """
        Modify GridSpec to have different pixel resolution but still covering same tiles as the original.
        """
        if not self.is_compatible_resolution(resolution):
            raise ValueError(
                "Supplied resolution is not compatible with the current GridSpec"
            )
        gs = self._gridspec
        self._gridspec = GridSpec(gs.crs,
                                  gs.tile_size,
                                  resolution=resolution,
                                  origin=gs.origin)

    def __del__(self):
        if self._cache_path is not None:
            os.unlink(self._cache_path)

    def __repr__(self) -> str:
        grid, path, n = self._grid, str(self._dscache.path), len(
            self._all_tiles)
        return f"<{path}> grid:{grid} n:{n:,d}"

    def _resolve_product(self,
                         product: Optional[OutputProduct]) -> OutputProduct:
        if product is None:
            product = self._product

        if product is None:
            raise ValueError("Product is not supplied and default is not set")
        return product

    @property
    def product(self) -> OutputProduct:
        return self._resolve_product(None)

    @property
    def all_tiles(self) -> List[TileIdx_txy]:
        return self._all_tiles

    def datasets(self, tile_index: TileIdx_txy) -> Tuple[Dataset, ...]:
        return tuple(
            ds
            for ds in self._dscache.stream_grid_tile(tile_index, self._grid))

    def load_task(
        self,
        tile_index: TileIdx_txy,
        product: Optional[OutputProduct] = None,
        source: Any = None,
    ) -> Task:
        product = self._resolve_product(product)

        dss = self.datasets(tile_index)
        tidx_xy = _xy(tile_index)

        return Task(
            product=product,
            tile_index=tidx_xy,
            geobox=self._gridspec.tile_geobox(tidx_xy),
            time_range=DateTimeRange(tile_index[0]),
            datasets=dss,
            source=source,
        )

    def stream(self,
               tiles: Iterable[TileIdx_txy],
               product: Optional[OutputProduct] = None) -> Iterator[Task]:
        product = self._resolve_product(product)
        for tidx in tiles:
            yield self.load_task(tidx, product)

    def stream_from_sqs(
        self,
        sqs_queue,
        product: Optional[OutputProduct] = None,
        visibility_timeout: int = 3600,
        **kw,
    ) -> Iterator[Task]:
        from odc.aws.queue import get_messages, get_queue
        from ._sqs import SQSWorkToken

        product = self._resolve_product(product)

        if isinstance(sqs_queue, str):
            sqs_queue = get_queue(sqs_queue)

        for msg in get_messages(sqs_queue,
                                visibility_timeout=visibility_timeout,
                                **kw):
            # TODO: switch to JSON for SQS message body
            token = SQSWorkToken(msg, visibility_timeout)
            tidx = parse_task(msg.body)
            yield self.load_task(tidx, product, source=token)
Пример #5
0
def test_gridworkflow():
    """ Test GridWorkflow with padding option. """
    from mock import MagicMock
    import datetime

    # ----- fake a datacube -----
    # e.g. let there be a dataset that coincides with a grid cell

    fakecrs = geometry.CRS('EPSG:4326')

    grid = 100  # spatial frequency in crs units
    pixel = 10  # square pixel linear dimension in crs units
    # if cell(0,0) has lower left corner at grid origin,
    # and cell indices increase toward upper right,
    # then this will be cell(1,-2).
    gridspec = GridSpec(crs=fakecrs,
                        tile_size=(grid, grid),
                        resolution=(-pixel, pixel))  # e.g. product gridspec

    fakedataset = MagicMock()
    fakedataset.extent = geometry.box(left=grid,
                                      bottom=-grid,
                                      right=2 * grid,
                                      top=-2 * grid,
                                      crs=fakecrs)
    fakedataset.center_time = t = datetime.datetime(2001, 2, 15)
    fakedataset.id = uuid.uuid4()

    fakeindex = PickableMock()
    fakeindex._db = None
    fakeindex.datasets.get_field_names.return_value = [
        'time'
    ]  # permit query on time
    fakeindex.datasets.search_eager.return_value = [fakedataset]

    # ------ test without padding ----

    from datacube.api.grid_workflow import GridWorkflow
    gw = GridWorkflow(fakeindex, gridspec)
    # Need to force the fake index otherwise the driver manager will
    # only take its _db
    gw.index = fakeindex
    query = dict(product='fake_product_name',
                 time=('2001-1-1 00:00:00', '2001-3-31 23:59:59'))

    # test backend : that it finds the expected cell/dataset
    assert list(gw.cell_observations(**query).keys()) == [(1, -2)]

    # again but with geopolygon
    assert list(
        gw.cell_observations(**query,
                             geopolygon=gridspec.tile_geobox(
                                 (1, -2)).extent).keys()) == [(1, -2)]

    with pytest.raises(ValueError) as e:
        list(
            gw.cell_observations(**query,
                                 tile_buffer=(1, 1),
                                 geopolygon=gridspec.tile_geobox(
                                     (1, -2)).extent).keys())
    assert str(
        e.value) == 'Cannot process tile_buffering and geopolygon together.'

    # test frontend
    assert len(gw.list_tiles(**query)) == 1

    # ------ introduce padding --------

    assert len(gw.list_tiles(tile_buffer=(20, 20), **query)) == 9

    # ------ add another dataset (to test grouping) -----

    # consider cell (2,-2)
    fakedataset2 = MagicMock()
    fakedataset2.extent = geometry.box(left=2 * grid,
                                       bottom=-grid,
                                       right=3 * grid,
                                       top=-2 * grid,
                                       crs=fakecrs)
    fakedataset2.center_time = t
    fakedataset2.id = uuid.uuid4()

    def search_eager(lat=None, lon=None, **kwargs):
        return [fakedataset, fakedataset2]

    fakeindex.datasets.search_eager = search_eager

    # unpadded
    assert len(gw.list_tiles(**query)) == 2
    ti = numpy.datetime64(t, 'ns')
    assert set(gw.list_tiles(**query).keys()) == {(1, -2, ti), (2, -2, ti)}

    # padded
    assert len(gw.list_tiles(tile_buffer=(20, 20), **
                             query)) == 12  # not 18=2*9 because of grouping

    # -------- inspect particular returned tile objects --------

    # check the array shape

    tile = gw.list_tiles(**query)[1, -2, ti]  # unpadded example
    assert grid / pixel == 10
    assert tile.shape == (1, 10, 10)

    padded_tile = gw.list_tiles(tile_buffer=(20, 20),
                                **query)[1, -2, ti]  # padded example
    # assert grid/pixel + 2*gw2.grid_spec.padding == 14  # GREG: understand this
    assert padded_tile.shape == (1, 14, 14)

    # count the sources

    assert len(tile.sources.isel(time=0).item()) == 1
    assert len(padded_tile.sources.isel(time=0).item()) == 2

    # check the geocoding

    assert tile.geobox.alignment == padded_tile.geobox.alignment
    assert tile.geobox.affine * (0, 0) == padded_tile.geobox.affine * (2, 2)
    assert tile.geobox.affine * (10, 10) == padded_tile.geobox.affine * (
        10 + 2, 10 + 2)

    # ------- check loading --------
    # GridWorkflow accesses the load_data API
    # to ultimately convert geobox,sources,measurements to xarray,
    # so only thing to check here is the call interface.

    measurement = dict(nodata=0, dtype=numpy.int)
    fakedataset.type.lookup_measurements.return_value = {'dummy': measurement}
    fakedataset2.type = fakedataset.type

    from mock import patch
    with patch('datacube.api.core.Datacube.load_data') as loader:

        data = GridWorkflow.load(tile)
        data2 = GridWorkflow.load(padded_tile)
        # Note, could also test Datacube.load for consistency (but may require more patching)

    assert data is data2 is loader.return_value
    assert loader.call_count == 2

    # Note, use of positional arguments here is not robust, could spec mock etc.
    for (args, kwargs), loadable in zip(loader.call_args_list,
                                        [tile, padded_tile]):
        args = list(args)
        assert args[0] is loadable.sources
        assert args[1] is loadable.geobox
        assert list(args[2].values())[0] is measurement
        assert 'resampling' in kwargs

    # ------- check single cell index extract -------
    tile = gw.list_tiles(cell_index=(1, -2), **query)
    assert len(tile) == 1
    assert tile[1, -2, ti].shape == (1, 10, 10)
    assert len(tile[1, -2, ti].sources.values[0]) == 1

    padded_tile = gw.list_tiles(cell_index=(1, -2),
                                tile_buffer=(20, 20),
                                **query)
    assert len(padded_tile) == 1
    assert padded_tile[1, -2, ti].shape == (1, 14, 14)
    assert len(padded_tile[1, -2, ti].sources.values[0]) == 2