def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox: """ Datasets grouped by their timestamps. :param datasets: the `VirtualDatasetBag` to fetch data from """ geopolygon = datasets.geopolygon merged = merge_search_terms(self, group_settings) if geopolygon is None: selected = list(datasets.contained_datasets()) else: selected = None geobox = output_geobox(datasets=selected, output_crs=self['reproject']['output_crs'], resolution=self['reproject']['resolution'], align=self['reproject'].get('align'), geopolygon=geopolygon) # load natively input_box = self._input.group(datasets, **reject_keys(merged, self._GEOBOX_KEYS)) return VirtualDatasetBox(input_box.box, geobox, True, datasets.product_definitions, geopolygon=geopolygon)
def test_output_geobox_load_hints(): geobox0 = AlbersGS.tile_geobox((15, -40)) geobox = output_geobox(load_hints={'output_crs': geobox0.crs, 'resolution': geobox0.resolution}, geopolygon=geobox0.extent) assert geobox == geobox0
def _product_group_(): # select only those inside the ROI # ROI could be smaller than the query for the `query` method if query_geopolygon(**search_terms) is not None: geopolygon = query_geopolygon(**search_terms) selected = list( select_datasets_inside_polygon(datasets.pile, geopolygon)) else: geopolygon = datasets.geopolygon selected = list(datasets.pile) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(search_terms, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=datasets.grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions)
def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox: geopolygon = datasets.geopolygon selected = list(datasets.bag) # geobox merged = merge_search_terms(self, group_settings) try: geobox = output_geobox(datasets=selected, grid_spec=datasets.product_definitions[self._product].grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) load_natively = False except ValueError: # we are not calculating geoboxes here for the moment # since it may require filesystem access # in ODC 2.0 the dataset should know the information required geobox = None load_natively = True # group by time group_query = query_group_by(**select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox(Datacube.group_datasets(selected, group_query), geobox, load_natively, datasets.product_definitions, geopolygon=None if not load_natively else geopolygon)
def test_output_geobox_fail_paths(): with pytest.raises(ValueError): output_geobox() with pytest.raises(ValueError): output_geobox(output_crs='EPSG:4326') # need resolution as well # need bounds with pytest.raises(ValueError): output_geobox(output_crs='EPSG:4326', resolution=(1, 1))
def test_output_geobox_fail_paths(): from datacube.api.core import output_geobox gs_nores = GridSpec(crs=geometry.CRS('EPSG:4326'), tile_size=None, resolution=None) with pytest.raises(ValueError): output_geobox() with pytest.raises(ValueError): output_geobox(output_crs='EPSG:4326') # need resolution as well with pytest.raises(ValueError): output_geobox(grid_spec=gs_nores) # GridSpec with missing resolution
def group(self, datasets: VirtualDatasetBag, **search_terms: Dict[str, Any]) -> VirtualDatasetBox: geopolygon = datasets.geopolygon selected = list(datasets.pile) # geobox merged = merge_search_terms(self, search_terms) geobox = output_geobox( datasets=selected, grid_spec=datasets.product_definitions[self._product].grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions)
def test_like_geobox(): from datacube.testutils.geom import AlbersGS from datacube.api.core import output_geobox geobox = AlbersGS.tile_geobox((15, -40)) assert output_geobox(like=geobox) is geobox
def test_like_geobox(): geobox = AlbersGS.tile_geobox((15, -40)) assert output_geobox(like=geobox) is geobox
def group(self, datasets, **search_terms): # type: (QueryResult, Dict[str, Any]) -> DatasetPile """ Datasets grouped by their timestamps. :param datasets: the `QueryResult` to fetch data from :param query: to specify a spatial sub-region """ grid_spec = datasets.grid_spec if 'product' in self: # select only those inside the ROI # ROI could be smaller than the query for `query` spatial_query = reject_keys(search_terms, self._NON_SPATIAL_KEYS) selected = list( select_datasets_inside_polygon( datasets.pile, query_geopolygon(**spatial_query))) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(spatial_query, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=grid_spec, **select_keys(merged, self._GEOBOX_KEYS), **spatial_query) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) def wrap(_, value): return QueryResult(value, grid_spec) # information needed for Datacube.load_data return DatasetPile(Datacube.group_datasets(selected, group_query), geobox).map(wrap) elif 'transform' in self: return self._input.group(datasets, **search_terms) elif 'collate' in self: self._assert( len(datasets.pile) == len(self._children), "invalid dataset pile") def build(source_index, product, dataset_pile): grouped = product.group(dataset_pile, **search_terms) def tag(_, value): in_position = [ value if i == source_index else None for i, _ in enumerate(datasets.pile) ] return QueryResult(in_position, grid_spec) return grouped.map(tag) groups = [ build(source_index, product, dataset_pile) for source_index, (product, dataset_pile) in enumerate( zip(self._children, datasets.pile)) ] return DatasetPile( xarray.concat([grouped.pile for grouped in groups], dim='time'), select_unique([grouped.geobox for grouped in groups])) elif 'juxtapose' in self: self._assert( len(datasets.pile) == len(self._children), "invalid dataset pile") groups = [ product.group(datasets, **search_terms) for product, datasets in zip(self._children, datasets.pile) ] aligned_piles = xarray.align(*[grouped.pile for grouped in groups]) child_groups = [ DatasetPile(aligned_piles[i], grouped.geobox) for i, grouped in enumerate(groups) ] def tuplify(indexes, _): return QueryResult([ grouped.pile.sel(**indexes).item() for grouped in child_groups ], grid_spec) return DatasetPile( child_groups[0].map(tuplify).pile, select_unique([grouped.geobox for grouped in groups])) else: raise VirtualProductException("virtual product was not validated")
def group(self, datasets: VirtualDatasetBag, **search_terms: Dict[str, Any]) -> VirtualDatasetBox: """ Datasets grouped by their timestamps. :param datasets: the `VirtualDatasetBag` to fetch data from :param query: to specify a spatial sub-region """ grid_spec = datasets.grid_spec geopolygon = datasets.geopolygon if 'product' in self: # select only those inside the ROI # ROI could be smaller than the query for the `query` method if query_geopolygon(**search_terms) is not None: geopolygon = query_geopolygon(**search_terms) selected = list( select_datasets_inside_polygon(datasets.pile, geopolygon)) else: selected = list(datasets.pile) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(search_terms, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions) elif 'transform' in self: return self._input.group(datasets, **search_terms) elif 'collate' in self: self._assert( 'collate' in datasets.pile and len(datasets.pile['collate']) == len(self._children), "invalid dataset pile") def build(source_index, product, dataset_pile): grouped = product.group( VirtualDatasetBag(dataset_pile, datasets.grid_spec, datasets.geopolygon, datasets.product_definitions), **search_terms) def tag(_, value): return {'collate': (source_index, value)} return grouped.map(tag) groups = [ build(source_index, product, dataset_pile) for source_index, (product, dataset_pile) in enumerate( zip(self._children, datasets.pile['collate'])) ] return VirtualDatasetBox( xarray.concat([grouped.pile for grouped in groups], dim='time'), select_unique([grouped.geobox for grouped in groups]), merge_dicts( [grouped.product_definitions for grouped in groups])) elif 'juxtapose' in self: self._assert( 'juxtapose' in datasets.pile and len(datasets.pile['juxtapose']) == len(self._children), "invalid dataset pile") groups = [ product.group( VirtualDatasetBag(dataset_pile, datasets.grid_spec, datasets.geopolygon, datasets.product_definitions), **search_terms) for product, dataset_pile in zip( self._children, datasets.pile['juxtapose']) ] aligned_piles = xarray.align(*[grouped.pile for grouped in groups]) def tuplify(indexes, _): return { 'juxtapose': [pile.sel(**indexes).item() for pile in aligned_piles] } return VirtualDatasetBox( xr_apply(aligned_piles[0], tuplify), select_unique([grouped.geobox for grouped in groups]), merge_dicts( [grouped.product_definitions for grouped in groups])) else: raise VirtualProductException("virtual product was not validated")
def dc_load( datasets: Sequence[Dataset], measurements: Optional[Union[str, Sequence[str]]] = None, geobox: Optional[GeoBox] = None, groupby: Optional[str] = None, resampling: Optional[Union[str, Dict[str, str]]] = None, skip_broken_datasets: bool = False, chunks: Optional[Dict[str, int]] = None, progress_cbk: Optional[Callable[[int, int], Any]] = None, fuse_func=None, **kw, ) -> xr.Dataset: assert len(datasets) > 0 # dask_chunks is a backward-compatibility alias for chunks if chunks is None: chunks = kw.pop("dask_chunks", None) # group_by is a backward-compatibility alias for groupby if groupby is None: groupby = kw.pop("group_by", "time") # bands alias for measurements if measurements is None: measurements = kw.pop("bands", None) # extract all "output_geobox" inputs geo_keys = { k: kw.pop(k) for k in [ "like", "geopolygon", "resolution", "output_crs", "crs", "align", "x", "y", "lat", "lon", ] if k in kw } ds = datasets[0] product = ds.type if geobox is None: geobox = output_geobox( grid_spec=product.grid_spec, load_hints=product.load_hints(), **geo_keys, datasets=datasets, ) elif len(geo_keys): warn(f"Supplied 'geobox=' parameter aliases {list(geo_keys)} inputs") grouped = Datacube.group_datasets(datasets, groupby) mm = product.lookup_measurements(measurements) return Datacube.load_data( grouped, geobox, mm, resampling=resampling, fuse_func=fuse_func, dask_chunks=chunks, skip_broken_datasets=skip_broken_datasets, progress_cbk=progress_cbk, **kw, )