def fetch(self, grouped: VirtualDatasetBox, **load_settings: Dict[str, Any]) -> xarray.Dataset: """ Convert grouped datasets to `xarray.Dataset`. """ load_keys = self._LOAD_KEYS - {'measurements'} merged = merge_search_terms(select_keys(self, load_keys), select_keys(load_settings, load_keys)) product = grouped.product_definitions[self._product] if 'measurements' in self and 'measurements' in load_settings: for measurement in load_settings['measurements']: self._assert( measurement in self['measurements'], '{} not found in {}'.format(measurement, self._product)) measurement_dicts = self.output_measurements( grouped.product_definitions, load_settings.get('measurements')) if grouped.load_natively: canonical_names = [ product.canonical_measurement(measurement) for measurement in measurement_dicts ] dataset_geobox = geobox_union_conservative([ native_geobox(ds, measurements=canonical_names, basis=merged.get('like')) for ds in grouped.box.sum().item() ]) if grouped.geopolygon is not None: reproject_roi = compute_reproject_roi( dataset_geobox, GeoBox.from_geopolygon( grouped.geopolygon, crs=dataset_geobox.crs, align=dataset_geobox.alignment, resolution=dataset_geobox.resolution)) self._assert(reproject_roi.is_st, "native load is not axis-aligned") self._assert(numpy.isclose(reproject_roi.scale, 1.0), "native load should not require scaling") geobox = dataset_geobox[reproject_roi.roi_src] else: geobox = dataset_geobox else: geobox = grouped.geobox result = Datacube.load_data(grouped.box, geobox, list(measurement_dicts.values()), fuse_func=merged.get('fuse_func'), dask_chunks=merged.get('dask_chunks'), resampling=merged.get( 'resampling', 'nearest')) return result
def reproject_band(band, geobox, resampling, dims, dask_chunks=None): """ Reproject a single measurement to the geobox. """ if not hasattr(band.data, 'dask') or dask_chunks is None: data = reproject_array(band.data, band.nodata, band.geobox, geobox, resampling) return wrap_in_dataarray(data, band, geobox, dims) dask_name = 'warp_{name}-{token}'.format(name=band.name, token=uuid.uuid4().hex) dependencies = [band.data] spatial_chunks = tuple( dask_chunks.get(k, geobox.shape[i]) for i, k in enumerate(geobox.dims)) gt = GeoboxTiles(geobox, spatial_chunks) new_layer = {} for tile_index in numpy.ndindex(gt.shape): sub_geobox = gt[tile_index] # find the input array slice from the output geobox reproject_roi = compute_reproject_roi(band.geobox, sub_geobox, padding=1) # find the chunk from the input array with the slice index subset_band = band[(..., ) + reproject_roi.roi_src].chunk(-1) if min(subset_band.shape) == 0: # pad the empty chunk new_layer[(dask_name, ) + tile_index] = (numpy.full, sub_geobox.shape, band.nodata, band.dtype) else: # next 3 lines to generate the new graph dependencies.append(subset_band.data) # get the input dask array for the function `reproject_array` band_key = list(flatten(subset_band.data.__dask_keys__()))[0] # generate a new layer of dask graph with reroject new_layer[(dask_name, ) + tile_index] = (reproject_array, band_key, band.nodata, subset_band.geobox, sub_geobox, resampling) # create a new graph with the additional layer and pack the graph into dask.array # since only regular chunking is allowed at the higher level dask.array interface, # to manipulate the graph seems to be the easiest way to obtain a dask.array with irregular chunks after reproject data = dask.array.Array(band.data.dask.from_collections( dask_name, new_layer, dependencies=dependencies), dask_name, chunks=spatial_chunks, dtype=band.dtype, shape=gt.base.shape) return wrap_in_dataarray(data, band, geobox, dims)
def _read(gbox, resampling='nearest', fallback_nodata=-999, dst_nodata=-999, check_paste=False): with RasterFileDataSource(mm.path, 1, nodata=fallback_nodata).open() as rdr: if check_paste: # check that we are using paste paste_ok, reason = can_paste(compute_reproject_roi(rdr_geobox(rdr), gbox)) assert paste_ok is True, reason yy = np.full(gbox.shape, dst_nodata, dtype=rdr.dtype) roi = read_time_slice(rdr, yy, gbox, resampling, dst_nodata) return yy, roi
def test_compute_reproject_roi(): src = AlbersGS.tile_geobox((15, -40)) dst = geometry.GeoBox.from_geopolygon( src.extent.to_crs(epsg3857).buffer(10), resolution=src.resolution) rr = compute_reproject_roi(src, dst) assert rr.roi_src == np.s_[0:src.height, 0:src.width] assert 0 < rr.scale < 1 assert rr.is_st is False assert rr.transform.linear is None assert rr.scale in rr.scale2 # check pure translation case roi_ = np.s_[113:-100, 33:-10] rr = compute_reproject_roi(src, src[roi_]) assert rr.roi_src == roi_normalise(roi_, src.shape) assert rr.scale == 1 assert rr.is_st is True rr = compute_reproject_roi(src, src[roi_], padding=0, align=0) assert rr.roi_src == roi_normalise(roi_, src.shape) assert rr.scale == 1 assert rr.scale2 == (1, 1) # check pure translation case roi_ = np.s_[113:-100, 33:-10] rr = compute_reproject_roi(src, src[roi_], align=256) assert rr.roi_src == np.s_[0:src.height, 0:src.width] assert rr.scale == 1 roi_ = np.s_[113:-100, 33:-10] rr = compute_reproject_roi(src, src[roi_]) assert rr.scale == 1 assert roi_shape(rr.roi_src) == roi_shape(rr.roi_dst) assert roi_shape(rr.roi_dst) == src[roi_].shape
def test_compute_reproject_roi_issue1047(): """ `compute_reproject_roi(geobox, geobox[roi])` sometimes returns `src_roi != roi`, when `geobox` has (1) tiny pixels and (2) oddly sized `alignment`. Test this issue is resolved. """ geobox = GeoBox(3000, 3000, Affine(0.00027778, 0.0, 148.72673054908861, 0.0, -0.00027778, -34.98825802556622), "EPSG:4326") src_roi = np.s_[2800:2810, 10:30] rr = compute_reproject_roi(geobox, geobox[src_roi]) assert rr.is_st is True assert rr.roi_src == src_roi assert rr.roi_dst == np.s_[0:10, 0:20]
def _read(gbox, resampling='nearest', fallback_nodata=-999, dst_nodata=-999, check_paste=False): rdr = open_reader(mm.path, nodata=fallback_nodata) if check_paste: # check that we are using paste paste_ok, reason = can_paste( compute_reproject_roi(rdr_geobox(rdr), gbox)) assert paste_ok is True, reason yy = np.full(gbox.shape, dst_nodata, dtype=rdr.dtype) yy_, roi = read_time_slice_v2(rdr, gbox, resampling, dst_nodata) yy[roi] = yy_ return yy, roi
def test_compute_reproject_roi_issue647(): """ In some scenarios non-overlapping geoboxes will result in non-empty `roi_dst` even though `roi_src` is empty. Test this case separately. """ from datacube.utils.geometry import CRS src = GeoBox(10980, 10980, Affine(10, 0, 300000, 0, -10, 5900020), CRS('epsg:32756')) dst = GeoBox(976, 976, Affine(10, 0, 1730240, 0, -10, -4170240), CRS('EPSG:3577')) assert src.extent.overlaps(dst.extent.to_crs(src.crs)) is False rr = compute_reproject_roi(src, dst) assert roi_is_empty(rr.roi_src) assert roi_is_empty(rr.roi_dst)
def check_false(dst, **kwargs): ok, reason = can_paste(compute_reproject_roi(src, dst), **kwargs) if ok: assert ok is False, "Expected can_paste to return False, but got True"
def check_true(dst, **kwargs): ok, reason = can_paste(compute_reproject_roi(src, dst), **kwargs) if not ok: assert ok is True, reason
def dask_reproject( src: da.Array, src_geobox: GeoBox, dst_geobox: GeoBox, resampling: str = "nearest", chunks: Optional[Tuple[int, int]] = None, src_nodata: Optional[NodataType] = None, dst_nodata: Optional[NodataType] = None, axis: int = 0, name: str = "reproject", ) -> da.Array: """ Reproject to GeoBox as dask operation :param src : Input src[(time,) y,x (, band)] :param src_geobox: GeoBox of the source array :param dst_geobox: GeoBox of the destination :param resampling: Resampling strategy as a string: nearest, bilinear, average, mode ... :param chunks : In Y,X dimensions only, default is to use same input chunk size :param axis : Index of Y axis (default is 0) :param src_nodata: nodata marker for source image :param dst_nodata: nodata marker for dst image :param name : Dask graph name, "reproject" is the default """ if chunks is None: chunks = src.chunksize[axis:axis + 2] if dst_nodata is None: dst_nodata = src_nodata assert src.shape[axis:axis + 2] == src_geobox.shape yx_shape = dst_geobox.shape yx_chunks = unpack_chunks(chunks, yx_shape) dst_chunks = src.chunks[:axis] + yx_chunks + src.chunks[axis + 2:] dst_shape = src.shape[:axis] + yx_shape + src.shape[axis + 2:] # tuple(*dims1, y, x, *dims2) -- complete shape in blocks dims1 = tuple(map(len, dst_chunks[:axis])) dims2 = tuple(map(len, dst_chunks[axis + 2:])) assert dims2 == () deps = [src] tile_shape = (yx_chunks[0][0], yx_chunks[1][0]) gbt = GeoboxTiles(dst_geobox, tile_shape) xy_chunks_with_data = list(gbt.tiles(src_geobox.extent)) name = randomize(name) dsk: Dict[Any, Any] = {} block_impl = (_reproject_block_bool_impl if src.dtype == "bool" else _reproject_block_impl) for idx in xy_chunks_with_data: _dst_geobox = gbt[idx] rr = compute_reproject_roi(src_geobox, _dst_geobox) _src = crop_2d_dense(src, rr.roi_src, axis=axis) _src_geobox = src_geobox[rr.roi_src] deps.append(_src) for ii1 in np.ndindex(dims1): # TODO: band dims dsk[(name, *ii1, *idx)] = ( block_impl, (_src.name, *ii1, 0, 0), _src_geobox, _dst_geobox, resampling, src_nodata, dst_nodata, axis, ) fill_value = 0 if dst_nodata is None else dst_nodata shape_in_blocks = tuple(map(len, dst_chunks)) mk_empty = empty_maker(fill_value, src.dtype, dsk) for idx in np.ndindex(shape_in_blocks): # TODO: other dims k = (name, *idx) if k not in dsk: bshape = tuple(ch[i] for ch, i in zip(dst_chunks, idx)) dsk[k] = mk_empty(bshape) dsk = HighLevelGraph.from_collections(name, dsk, dependencies=deps) return da.Array(dsk, name, chunks=dst_chunks, dtype=src.dtype, shape=dst_shape)