def check_legacy_open(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000', use_threads=True) assert data_array['blue'].shape[0] == 1 assert (data_array.blue != -999).any() # force fusing load by duplicating dataset dss = dc.find_datasets(product='ls5_nbar_albers', time='1992-03-23T23:14:25.500000') assert len(dss) == 1 dss = dss*2 sources = dc.group_datasets(dss, query_group_by('time')) gbox = data_array.geobox mm = [dss[0].type.measurements['blue']] xx = dc.load_data(sources, gbox, mm) assert (xx == data_array).all() with rasterio.Env(): xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1}) assert xx_lazy['blue'].data.dask assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
def ingest_work(config, source_type, output_type, tile, tile_index): _LOG.info('Starting task %s', tile_index) namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources, version=config['taskfile_version']) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf(nudata, file_path, global_attributes, variable_params) _LOG.info('Finished task %s', tile_index) return datasets
def ingest_work(config, source_type, output_type, tile, tile_index): # pylint: disable=too-many-locals _LOG.info('Starting task %s', tile_index) driver = storage_writer_by_name(config['storage']['driver']) if driver is None: _LOG.error('Failed to load storage driver %s', config['storage']['driver']) raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option') namemap = get_namemap(config) # TODO: get_measurements possibly changes dtype, not sure load_data would like that measurements = get_measurements(source_type, config) resampling = get_resampling(config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] datasets = tile.sources.sum().item() for dataset in datasets: if not dataset.uris: _LOG.error('Locationless dataset found in the database: %r', dataset) data = Datacube.load_data(tile.sources, tile.geobox, measurements, resampling=resampling, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) file_uri = driver.mk_uri(file_path, config['storage']) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_uri, app_info=get_app_metadata(config['filename']), valid_data=polygon_from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) variable_params['dataset'] = { 'chunksizes': (1,), 'zlib': True, 'complevel': 9, } driver_data = driver.write_dataset_to_storage(nudata, file_uri, global_attributes=global_attributes, variable_params=variable_params, storage_config=config['storage']) if (driver_data is not None) and len(driver_data) > 0: datasets.attrs['driver_data'] = driver_data _LOG.info('Finished task %s', tile_index) return datasets
def ingest_work(config, source_type, output_type, tile, tile_index): # pylint: disable=too-many-locals _LOG.info('Starting task %s', tile_index) driver = storage_writer_by_name(config['storage']['driver']) if driver is None: _LOG.error('Failed to load storage driver %s', config['storage']['driver']) raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option') namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) def mk_uri(file_path): if driver.uri_scheme == "file": return file_path.absolute().as_uri() return '{}://{}'.format(driver.uri_scheme, file_path) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=mk_uri(file_path), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) variable_params['dataset'] = { 'chunksizes': (1,), 'zlib': True, 'complevel': 9, } storage_metadata = driver.write_dataset_to_storage(nudata, file_path, global_attributes=global_attributes, variable_params=variable_params, storage_config=config['storage']) if (storage_metadata is not None) and len(storage_metadata) > 0: datasets.attrs['storage_metadata'] = storage_metadata _LOG.info('Finished task %s', tile_index) return datasets
def ingest_work(driver_manager, config, source_type, output_type, tile, tile_index): _LOG.info('Starting task %s', tile_index) namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func, driver_manager=driver_manager) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents( sources, tile.geobox)) datasets = xr_apply( tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) # Until ingest becomes a class and DriverManager an instance # variable, we call the constructor each time. DriverManager being # a singleton, there is little overhead, though. datasets.attrs['storage_output'] = driver_manager.write_dataset_to_storage( nudata, file_path, global_attributes, variable_params) _LOG.info('Finished task %s', tile_index) # When using multiproc executor, Driver Manager is a clone. if driver_manager.is_clone: driver_manager.close() return datasets