def test_query_kwargs(): from mock import MagicMock mock_index = MagicMock() mock_index.datasets.get_field_names = lambda: {u'product', u'lat', u'sat_path', 'type_id', u'time', u'lon', u'orbit', u'instrument', u'sat_row', u'platform', 'metadata_type', u'gsi', 'type', 'id'} query = Query(index=mock_index, product='ls5_nbar_albers') assert str(query) assert query.product == 'ls5_nbar_albers' assert query.search_terms['product'] == 'ls5_nbar_albers' query = Query(index=mock_index, latitude=(-35, -36), longitude=(148, 149)) assert query.geopolygon assert 'lat' in query.search_terms assert 'lon' in query.search_terms query = Query(index=mock_index, latitude=-35, longitude=148) assert query.geopolygon assert 'lat' in query.search_terms assert 'lon' in query.search_terms query = Query(index=mock_index, y=(-4174726, -4180011), x=(1515184, 1523263), crs='EPSG:3577') assert query.geopolygon assert 'lat' in query.search_terms assert 'lon' in query.search_terms query = Query(index=mock_index, y=-4174726, x=1515184, crs='EPSG:3577') assert query.geopolygon assert 'lat' in query.search_terms assert 'lon' in query.search_terms query = Query(index=mock_index, y=-4174726, x=1515184, crs='EPSG:3577') assert query.geopolygon assert 'lat' in query.search_terms assert 'lon' in query.search_terms query = Query(index=mock_index, time='2001') assert 'time' in query.search query = Query(index=mock_index, time=('2001', '2002')) assert 'time' in query.search with pytest.raises(ValueError): Query(index=mock_index, y=-4174726, coordinate_reference_system='WGS84', x=1515184, crs='EPSG:3577') with pytest.raises(LookupError): Query(index=mock_index, y=-4174726, x=1515184, crs='EPSG:3577', made_up_key='NotReal') with pytest.raises(LookupError): query_group_by(group_by='magic') gb = query_group_by('time') assert isinstance(gb, GroupBy) assert query_group_by(group_by=gb) is gb
def check_legacy_open(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000', use_threads=True) assert data_array['blue'].shape[0] == 1 assert (data_array.blue != -999).any() # force fusing load by duplicating dataset dss = dc.find_datasets(product='ls5_nbar_albers', time='1992-03-23T23:14:25.500000') assert len(dss) == 1 dss = dss*2 sources = dc.group_datasets(dss, query_group_by('time')) gbox = data_array.geobox mm = [dss[0].type.measurements['blue']] xx = dc.load_data(sources, gbox, mm) assert (xx == data_array).all() with rasterio.Env(): xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1}) assert xx_lazy['blue'].data.dask assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
def check_data_with_api(index, time_slices): """Chek retrieved data for specific values. We scale down by 100 and check for predefined values in the corners. """ from datacube import Datacube dc = Datacube(index=index) # Make the retrieved data 100 less granular shape_x = int(GEOTIFF['shape']['x'] / 100.0) shape_y = int(GEOTIFF['shape']['y'] / 100.0) pixel_x = int(GEOTIFF['pixel_size']['x'] * 100) pixel_y = int(GEOTIFF['pixel_size']['y'] * 100) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = geometry.GeoBox( shape_x + 1, shape_y + 1, Affine(pixel_x, 0.0, GEOTIFF['ul']['x'], 0.0, pixel_y, GEOTIFF['ul']['y']), geometry.CRS(GEOTIFF['crs'])) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values()) assert hashlib.md5( data.green.data).hexdigest() == '7f5ace486e88d33edf3512e8de6b6996' assert hashlib.md5( data.blue.data).hexdigest() == 'b58204f1e10dd678b292df188c242c7e' for time_slice in range(time_slices): assert data.blue.values[time_slice][-1, -1] == -999
def __call__(self, index, product, time, group_by) -> Tile: # Do for a specific poly whose boundary is known output_crs = CRS(self.storage['crs']) filtered_items = [ 'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y' ] filtered_dict = { k: v for k, v in self.input_region.items() if k in filtered_items } if self.feature is not None: filtered_dict['geopolygon'] = self.feature.geopolygon geopoly = filtered_dict['geopolygon'] else: geopoly = query_geopolygon(**self.input_region) dc = Datacube(index=index) datasets = dc.find_datasets(product=product, time=time, group_by=group_by, **filtered_dict) group_by = query_group_by(group_by=group_by) sources = dc.group_datasets(datasets, group_by) output_resolution = [ self.storage['resolution'][dim] for dim in output_crs.dimensions ] geopoly = geopoly.to_crs(output_crs) geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution) return Tile(sources, geobox)
def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox: geopolygon = datasets.geopolygon selected = list(datasets.bag) # geobox merged = merge_search_terms(self, group_settings) try: geobox = output_geobox(datasets=selected, grid_spec=datasets.product_definitions[self._product].grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) load_natively = False except ValueError: # we are not calculating geoboxes here for the moment # since it may require filesystem access # in ODC 2.0 the dataset should know the information required geobox = None load_natively = True # group by time group_query = query_group_by(**select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox(Datacube.group_datasets(selected, group_query), geobox, load_natively, datasets.product_definitions, geopolygon=None if not load_natively else geopolygon)
def _product_group_(): # select only those inside the ROI # ROI could be smaller than the query for the `query` method if query_geopolygon(**search_terms) is not None: geopolygon = query_geopolygon(**search_terms) selected = list( select_datasets_inside_polygon(datasets.pile, geopolygon)) else: geopolygon = datasets.geopolygon selected = list(datasets.pile) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(search_terms, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=datasets.grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions)
def __call__(self, product, time, group_by) -> Tile: # Do for a specific poly whose boundary is known output_crs = CRS(self.storage['crs']) filtered_item = [ 'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y' ] filtered_dict = { k: v for k, v in filter(lambda t: t[0] in filtered_item, self.input_region.items()) } if 'feature_id' in self.input_region: filtered_dict['geopolygon'] = Geometry( self.input_region['geom_feat'], CRS(self.input_region['crs_txt'])) geopoly = filtered_dict['geopolygon'] else: geopoly = query_geopolygon(**self.input_region) datasets = self.dc.find_datasets(product=product, time=time, group_by=group_by, **filtered_dict) group_by = query_group_by(group_by=group_by) sources = self.dc.group_datasets(datasets, group_by) output_resolution = [ self.storage['resolution'][dim] for dim in output_crs.dimensions ] geopoly = geopoly.to_crs(output_crs) geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution) return Tile(sources, geobox)
def __init__(self, cache, group_by='time', key_fmt=None, grid_spec=None): from datacube.api.query import query_group_by self._cache = cache self._grouper = query_group_by(group_by=group_by) self._grid_spec = gs_albers() if grid_spec is None else grid_spec self._key_fmt = 'albers/{:03d}_{:03d}' if key_fmt is None else key_fmt
def test_load_data(tmpdir): tmpdir = Path(str(tmpdir)) group_by = query_group_by('time') spatial = dict(resolution=(15, -15), offset=(11230, 1381110),) nodata = -999 aa = mk_test_image(96, 64, 'int16', nodata=nodata) ds, gbox = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)], tmpdir, prefix='ds1-', timestamp='2018-07-19', **spatial) assert ds.time is not None ds2, _ = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)], tmpdir, prefix='ds2-', timestamp='2018-07-19', **spatial) assert ds.time is not None assert ds.time == ds2.time sources = Datacube.group_datasets([ds], 'time') sources2 = Datacube.group_datasets([ds, ds2], group_by) mm = ['aa'] mm = [ds.type.measurements[k] for k in mm] ds_data = Datacube.load_data(sources, gbox, mm) assert ds_data.aa.nodata == nodata np.testing.assert_array_equal(aa, ds_data.aa.values[0]) custom_fuser_call_count = 0 def custom_fuser(dest, delta): nonlocal custom_fuser_call_count custom_fuser_call_count += 1 dest[:] += delta progress_call_data = [] def progress_cbk(n, nt): progress_call_data.append((n, nt)) ds_data = Datacube.load_data(sources2, gbox, mm, fuse_func=custom_fuser, progress_cbk=progress_cbk) assert ds_data.aa.nodata == nodata assert custom_fuser_call_count > 0 np.testing.assert_array_equal(nodata + aa + aa, ds_data.aa.values[0]) assert progress_call_data == [(1, 2), (2, 2)]
def check_open_with_api(driver_manager, time_slices): from datacube import Datacube dc = Datacube(driver_manager=driver_manager) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = geometry.GeoBox(200, 200, Affine(25, 0.0, 638000, 0.0, -25, 6276000), geometry.CRS('EPSG:28355')) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values(), driver_manager=driver_manager) assert data.blue.shape == (time_slices, 200, 200)
def check_open_with_api(index): from datacube import Datacube dc = Datacube(index=index) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577')) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values()) assert data.blue.shape == (1, 200, 200)
def list_gqa_filtered_cells(index, gw, pix_th=None, cell_index=None, **indexers): geobox = gw.grid_spec.tile_geobox(cell_index) query = Query(index=index, geopolygon=None, **indexers) observations = index.datasets.search_eager(**query.search_terms) # filter now with pixel threshold value datasets = {} if pix_th is None: pix_th = 1 print ("pix_th value", str(pix_th)) for dataset in observations: if check_intersect(geobox.extent, dataset.extent.to_crs(gw.grid_spec.crs)): if get_gqa(index, dataset.id) < pix_th: #datasets.append(dataset) datasets.setdefault(cell_index,{'datasets': [], 'geobox': geobox})['datasets'].append(dataset) return gw.cell_sources(datasets, query_group_by(**indexers))
def check_open_with_api(index, time_slices): with rasterio.Env(): from datacube import Datacube dc = Datacube(index=index) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = geometry.GeoBox(200, 200, Affine(25, 0.0, 638000, 0.0, -25, 6276000), geometry.CRS('EPSG:28355')) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values()) assert data.blue.shape == (time_slices, 200, 200) chunk_profile = {'time': 1, 'x': 100, 'y': 100} lazy_data = dc.load_data(sources, geobox, input_type.measurements.values(), dask_chunks=chunk_profile) assert lazy_data.blue.shape == (time_slices, 200, 200) assert (lazy_data.blue.load() == data.blue).all()
def check_data_with_api(index, time_slices): """Chek retrieved data for specific values. We scale down by 100 and check for predefined values in the corners. """ from datacube import Datacube dc = Datacube(index=index) # TODO: this test needs to change, it tests that results are exactly the # same as some time before, but with the current zoom out factor it's # hard to verify that results are as expected even with human # judgement. What it should test is that reading native from the # ingested product gives exactly the same results as reading into the # same GeoBox from the original product. Separate to that there # should be a read test that confirms that what you read from native # product while changing projection is of expected value # Make the retrieved data lower res ss = 100 shape_x = int(GEOTIFF['shape']['x'] / ss) shape_y = int(GEOTIFF['shape']['y'] / ss) pixel_x = int(GEOTIFF['pixel_size']['x'] * ss) pixel_y = int(GEOTIFF['pixel_size']['y'] * ss) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = geometry.GeoBox( shape_x + 2, shape_y + 2, Affine(pixel_x, 0.0, GEOTIFF['ul']['x'], 0.0, pixel_y, GEOTIFF['ul']['y']), geometry.CRS(GEOTIFF['crs'])) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values()) assert hashlib.md5( data.green.data).hexdigest() == '0f64647bad54db4389fb065b2128025e' assert hashlib.md5( data.blue.data).hexdigest() == '41a7b50dfe5c4c1a1befbc378225beeb' for time_slice in range(time_slices): assert data.blue.values[time_slice][-1, -1] == -999
def group(self, datasets: VirtualDatasetBag, **search_terms: Dict[str, Any]) -> VirtualDatasetBox: geopolygon = datasets.geopolygon selected = list(datasets.pile) # geobox merged = merge_search_terms(self, search_terms) geobox = output_geobox( datasets=selected, grid_spec=datasets.product_definitions[self._product].grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions)
def multi_product_list_cells(products, gw, cell_index=None, product_query=None, **query): """This is similar to GridWorkflow.list_cells but generalised to multiple products. Only datasets that are available in all of the products are reported. Datasets that do not have a full set across all products are returned in a separate group. products -- list of product names gw -- Preconfigured GridWorkflow object cell_index -- Limit search area to a single cell product_query -- Product specific query, dict product_name => product specific query **query -- Common query parameters across all products Returns: co_common -- Cell observation that have full set across products co_unmatched -- Cell observations where at least one product is missing Type of `co_common, co_unmatched` is list of dictionaries of tiles. `type(co_common[product_idx:Int][cell_idx:(Int,Int)]) == datacube.api.Tile` """ if product_query is None: product_query = {} empty_cell = dict(datasets=[], geobox=None) co_common = [dict() for _ in products] co_unmatched = [dict() for _ in products] group_by = query_group_by(**query) obs = [ gw.cell_observations(product=product, cell_index=cell_index, **product_query.get(product, {}), **query) for product in products ] # set of all cell indexes found across all products all_cell_idx = set(reduce(list.__add__, [list(o.keys()) for o in obs])) def cell_is_empty(c): return len(c['datasets']) == 0 for cidx in all_cell_idx: common, unmatched = common_obs_per_cell( *[o.get(cidx, empty_cell) for o in obs]) for i in range(len(products)): if cidx in obs[i]: if not cell_is_empty(common[i]): co_common[i][cidx] = common[i] if not cell_is_empty(unmatched[i]): co_unmatched[i][cidx] = unmatched[i] co_common = [ GridWorkflow.group_into_cells(c, group_by=group_by) for c in co_common ] co_unmatched = [ GridWorkflow.group_into_cells(c, group_by=group_by) for c in co_unmatched ] return co_common, co_unmatched
def load_with_meta(dc, *args, **kwargs): vals = dc.load(*args, **kwargs) datasets = dc.find_datasets(*args, **kwargs) sources = dc.group_datasets(datasets, query_group_by()) return vals.assign(sources=sources)
def group(self, datasets, **search_terms): # type: (QueryResult, Dict[str, Any]) -> DatasetPile """ Datasets grouped by their timestamps. :param datasets: the `QueryResult` to fetch data from :param query: to specify a spatial sub-region """ grid_spec = datasets.grid_spec if 'product' in self: # select only those inside the ROI # ROI could be smaller than the query for `query` spatial_query = reject_keys(search_terms, self._NON_SPATIAL_KEYS) selected = list( select_datasets_inside_polygon( datasets.pile, query_geopolygon(**spatial_query))) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(spatial_query, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=grid_spec, **select_keys(merged, self._GEOBOX_KEYS), **spatial_query) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) def wrap(_, value): return QueryResult(value, grid_spec) # information needed for Datacube.load_data return DatasetPile(Datacube.group_datasets(selected, group_query), geobox).map(wrap) elif 'transform' in self: return self._input.group(datasets, **search_terms) elif 'collate' in self: self._assert( len(datasets.pile) == len(self._children), "invalid dataset pile") def build(source_index, product, dataset_pile): grouped = product.group(dataset_pile, **search_terms) def tag(_, value): in_position = [ value if i == source_index else None for i, _ in enumerate(datasets.pile) ] return QueryResult(in_position, grid_spec) return grouped.map(tag) groups = [ build(source_index, product, dataset_pile) for source_index, (product, dataset_pile) in enumerate( zip(self._children, datasets.pile)) ] return DatasetPile( xarray.concat([grouped.pile for grouped in groups], dim='time'), select_unique([grouped.geobox for grouped in groups])) elif 'juxtapose' in self: self._assert( len(datasets.pile) == len(self._children), "invalid dataset pile") groups = [ product.group(datasets, **search_terms) for product, datasets in zip(self._children, datasets.pile) ] aligned_piles = xarray.align(*[grouped.pile for grouped in groups]) child_groups = [ DatasetPile(aligned_piles[i], grouped.geobox) for i, grouped in enumerate(groups) ] def tuplify(indexes, _): return QueryResult([ grouped.pile.sel(**indexes).item() for grouped in child_groups ], grid_spec) return DatasetPile( child_groups[0].map(tuplify).pile, select_unique([grouped.geobox for grouped in groups])) else: raise VirtualProductException("virtual product was not validated")
def interval_uncertainty(polygon_id, item_polygon_path, products=('ls5_pq_albers', 'ls7_pq_albers', 'ls8_pq_albers'), time_period=('1986-01-01', '2017-01-01')): """ This function uses the Digital Earth Australia archive to compute the standard deviation of tide heights for all Landsat observations that were used to generate the ITEM 2.0 composite layers and resulting tidal intervals. These standard deviations (one for each ITEM 2.0 interval) quantify the 'uncertainty' of each NIDEM elevation estimate: larger values indicate the ITEM interval was produced from a composite of images with a larger range of tide heights. Last modified: September 2018 Author: Robbi Bishop-Taylor :param polygon_id: An integer giving the polygon ID of the desired ITEM v2.0 polygon to analyse. :param item_polygon_path: A string giving the path to the ITEM v2.0 polygon shapefile. :param products: An optional tuple of DEA Landsat product names used to calculate tide heights of all observations used to generate ITEM v2.0 tidal intervals. Defaults to ('ls5_pq_albers', 'ls7_pq_albers', 'ls8_pq_albers'), which loads Landsat 5, Landsat 7 and Landsat 8. :param time_period: An optional tuple giving the start and end date to analyse. Defaults to ('1986-01-01', '2017-01-01'), which analyses all Landsat observations from the start of 1986 to the end of 2016. :return: An array of shape (9,) giving the standard deviation of tidal heights for all Landsat observations used to produce each ITEM interval. """ # Import tidal model data and extract geom and tide post item_gpd = gpd.read_file(item_polygon_path) lat, lon, poly = item_gpd[item_gpd.ID == int(polygon_id)][['lat', 'lon', 'geometry']].values[0] geom = geometry.Geometry(mapping(poly), crs=geometry.CRS(item_gpd.crs['init'])) all_times_obs = list() # For each product: for source in products: # Use entire time range unless LS7 time_range = ('1986-01-01', '2003-05-01') if source == 'ls7_pq_albers' else time_period # Determine matching datasets for geom area and group into solar day ds = dc.find_datasets(product=source, time=time_range, geopolygon=geom) group_by = query_group_by(group_by='solar_day') sources = dc.group_datasets(ds, group_by) # If data is found, add time to list then sort if len(ds) > 0: all_times_obs.extend(sources.time.data.astype('M8[s]').astype('O').tolist()) # Calculate tide data from X-Y-time location all_times_obs = sorted(all_times_obs) tp_obs = [TimePoint(float(lon), float(lat), dt) for dt in all_times_obs] tides_obs = [tide.tide_m for tide in predict_tide(tp_obs)] # Covert to dataframe of observed dates and tidal heights df1_obs = pd.DataFrame({'Tide_height': tides_obs}, index=pd.DatetimeIndex(all_times_obs)) ################## # ITEM intervals # ################## # Compute percentage tide height min_height = df1_obs.Tide_height.min() max_height = df1_obs.Tide_height.max() observed_range = max_height - min_height # Create dict of percentile values per10_dict = {perc + 1: min_height + observed_range * perc * 0.1 for perc in range(0, 10, 1)} # Bin each observation into an interval df1_obs['interval'] = pd.cut(df1_obs.Tide_height, bins=list(per10_dict.values()), labels=list(per10_dict.keys())[:-1]) return df1_obs.groupby('interval').std().values.flatten()
def group(self, datasets: VirtualDatasetBag, **search_terms: Dict[str, Any]) -> VirtualDatasetBox: """ Datasets grouped by their timestamps. :param datasets: the `VirtualDatasetBag` to fetch data from :param query: to specify a spatial sub-region """ grid_spec = datasets.grid_spec geopolygon = datasets.geopolygon if 'product' in self: # select only those inside the ROI # ROI could be smaller than the query for the `query` method if query_geopolygon(**search_terms) is not None: geopolygon = query_geopolygon(**search_terms) selected = list( select_datasets_inside_polygon(datasets.pile, geopolygon)) else: selected = list(datasets.pile) # geobox merged = merge_search_terms( select_keys(self, self._NON_SPATIAL_KEYS), select_keys(search_terms, self._NON_SPATIAL_KEYS)) geobox = output_geobox(datasets=selected, grid_spec=grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) # group by time group_query = query_group_by( **select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox( Datacube.group_datasets(selected, group_query), geobox, datasets.product_definitions) elif 'transform' in self: return self._input.group(datasets, **search_terms) elif 'collate' in self: self._assert( 'collate' in datasets.pile and len(datasets.pile['collate']) == len(self._children), "invalid dataset pile") def build(source_index, product, dataset_pile): grouped = product.group( VirtualDatasetBag(dataset_pile, datasets.grid_spec, datasets.geopolygon, datasets.product_definitions), **search_terms) def tag(_, value): return {'collate': (source_index, value)} return grouped.map(tag) groups = [ build(source_index, product, dataset_pile) for source_index, (product, dataset_pile) in enumerate( zip(self._children, datasets.pile['collate'])) ] return VirtualDatasetBox( xarray.concat([grouped.pile for grouped in groups], dim='time'), select_unique([grouped.geobox for grouped in groups]), merge_dicts( [grouped.product_definitions for grouped in groups])) elif 'juxtapose' in self: self._assert( 'juxtapose' in datasets.pile and len(datasets.pile['juxtapose']) == len(self._children), "invalid dataset pile") groups = [ product.group( VirtualDatasetBag(dataset_pile, datasets.grid_spec, datasets.geopolygon, datasets.product_definitions), **search_terms) for product, dataset_pile in zip( self._children, datasets.pile['juxtapose']) ] aligned_piles = xarray.align(*[grouped.pile for grouped in groups]) def tuplify(indexes, _): return { 'juxtapose': [pile.sel(**indexes).item() for pile in aligned_piles] } return VirtualDatasetBox( xr_apply(aligned_piles[0], tuplify), select_unique([grouped.geobox for grouped in groups]), merge_dicts( [grouped.product_definitions for grouped in groups])) else: raise VirtualProductException("virtual product was not validated")