def check_open_with_dc(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', variables=['blue'], stack='variable') assert data_array.shape data_array = dc.load(product='ls5_nbar_albers', latitude=(-34, -35), longitude=(149, 150), stack='variable') assert data_array.shape dataset = dc.load(product='ls5_nbar_albers', variables=['blue']) assert dataset['blue'].size dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2)) assert dataset['blue'].size data_array = dc.load(product='ls5_nbar_albers', latitude=(-34, -35), longitude=(149, 150), variables=['blue'], group_by='solar_day') products_df = dc.list_products() assert len(products_df) assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])]) assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])]) assert len(dc.list_measurements())
def check_open_with_api(index): from datacube.api.core import Datacube datacube = Datacube(index=index) input_type_name = 'ls5_nbar_albers' input_type = datacube.index.datasets.types.get_by_name(input_type_name) geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577')) observations = datacube.product_observations('ls5_nbar_albers', geobox.extent) sources = datacube.product_sources(observations, lambda ds: ds.center_time, 'time', 'seconds since 1970-01-01 00:00:00') data = datacube.product_data(sources, geobox, input_type.measurements.values()) assert data.blue.shape == (1, 200, 200)
def check_open_with_api(index): from datacube.api.core import Datacube datacube = Datacube(index=index) input_type_name = 'ls5_nbar_albers' input_type = datacube.index.datasets.types.get_by_name(input_type_name) geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577')) observations = datacube.product_observations(product='ls5_nbar_albers', geopolygon=geobox.extent) sources = datacube.product_sources(observations, lambda ds: ds.center_time, 'time', 'seconds since 1970-01-01 00:00:00') data = datacube.product_data(sources, geobox, input_type.measurements.values()) assert data.blue.shape == (1, 200, 200)
def test_end_to_end_multitime(clirunner, index, product_def, original_data): """Test simple indexing but for multiple measurements and wavelengths.""" dc = Datacube(index=index) # Add the GEDI Dataset Types clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)]) for idx, measurement in enumerate(product_def.measurements): for product_id in GEDI_PRODUCT_IDS: index_yaml = str(product_def.index_yaml).format( product_id=product_id.pid, measurement=measurement, ) # Index the Datasets clirunner(["-v", "dataset", "add", str(index_yaml)]) if idx == 0: # Full check for the first measurement only # Check data for all product IDs check_open_with_dc_contents(dc, product_def, GEDI_PRODUCT_IDS, measurement, original_data) # check_open_with_grid_workflow(index) # Only test first product ID with dss check_load_via_dss(dc, product_def, GEDI_PRODUCT_IDS[:1], measurement, original_data) else: check_open_with_dc_simple(dc, product_def, GEDI_PRODUCT_IDS, measurement)
def ingest_work(config, source_type, output_type, tile, tile_index): # pylint: disable=too-many-locals _LOG.info('Starting task %s', tile_index) driver = storage_writer_by_name(config['storage']['driver']) if driver is None: _LOG.error('Failed to load storage driver %s', config['storage']['driver']) raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option') namemap = get_namemap(config) # TODO: get_measurements possibly changes dtype, not sure load_data would like that measurements = get_measurements(source_type, config) resampling = get_resampling(config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] datasets = tile.sources.sum().item() for dataset in datasets: if not dataset.uris: _LOG.error('Locationless dataset found in the database: %r', dataset) data = Datacube.load_data(tile.sources, tile.geobox, measurements, resampling=resampling, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) file_uri = driver.mk_uri(file_path, config['storage']) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_uri, app_info=get_app_metadata(config['filename']), valid_data=polygon_from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) variable_params['dataset'] = { 'chunksizes': (1,), 'zlib': True, 'complevel': 9, } driver_data = driver.write_dataset_to_storage(nudata, file_uri, global_attributes=global_attributes, variable_params=variable_params, storage_config=config['storage']) if (driver_data is not None) and len(driver_data) > 0: datasets.attrs['driver_data'] = driver_data _LOG.info('Finished task %s', tile_index) return datasets
def ingest_work(config, source_type, output_type, tile, tile_index): _LOG.info('Starting task %s', tile_index) namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources, version=config['taskfile_version']) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf(nudata, file_path, global_attributes, variable_params) _LOG.info('Finished task %s', tile_index) return datasets
def test_new_xr_load(data_folder): base = "file://" + str(data_folder) + "/metadata.yml" rdr = mk_rio_driver() assert rdr is not None _bands = [] def band_info_collector(bands, ctx): for b in bands: _bands.append(b) tee_new_load_context(rdr, band_info_collector) band_a = dict(name='a', path='test.tif') band_b = dict(name='b', band=2, path='test.tif') ds = mk_sample_dataset([band_a, band_b], base) sources = Datacube.group_datasets([ds], 'time') im, meta = rio_slurp(str(data_folder) + '/test.tif') measurements = [ds.type.measurements[n] for n in ('a', 'b')] xx, _ = xr_load(sources, meta.gbox, measurements, rdr) assert len(_bands) == 2 assert im[0].shape == xx.a.isel(time=0).shape assert im[1].shape == xx.b.isel(time=0).shape np.testing.assert_array_equal(im[0], xx.a.values[0]) np.testing.assert_array_equal(im[1], xx.b.values[0])
def ingest_work(config, source_type, output_type, index, sources, geobox): namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.product_data(sources, geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, index, sources) def _make_dataset(labels, sources): sources_union = union_points(*[source.extent.to_crs(geobox.crs).points for source in sources]) valid_data = intersect_points(geobox.extent.points, sources_union) dataset = make_dataset(dataset_type=output_type, sources=sources, extent=geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon(valid_data, geobox.crs)) return dataset datasets = xr_apply(sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf(nudata, global_attributes, variable_params, file_path) return datasets
def check_legacy_open(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000', use_threads=True) assert data_array['blue'].shape[0] == 1 assert (data_array.blue != -999).any() # force fusing load by duplicating dataset dss = dc.find_datasets(product='ls5_nbar_albers', time='1992-03-23T23:14:25.500000') assert len(dss) == 1 dss = dss*2 sources = dc.group_datasets(dss, query_group_by('time')) gbox = data_array.geobox mm = [dss[0].type.measurements['blue']] xx = dc.load_data(sources, gbox, mm) assert (xx == data_array).all() with rasterio.Env(): xx_lazy = dc.load_data(sources, gbox, mm, dask_chunks={'time': 1}) assert xx_lazy['blue'].data.dask assert xx_lazy.blue[0, :, :].equals(xx.blue[0, :, :])
def dask_load(sources, geobox, measurements, dask_chunks, skip_broken_datasets=False): def data_func(measurement): return make_dask_array(sources, geobox, measurement, skip_broken_datasets=skip_broken_datasets, dask_chunks=dask_chunks) return Datacube.create_storage(OrderedDict((dim, sources.coords[dim]) for dim in sources.dims), geobox, measurements, data_func)
def test_indexing(clirunner, index, product_def): """Test indexing features for 2D and 3D products. A few no-op indexing commands are tested as well as a simple load with shape check only. """ product_id = GEDI_PRODUCT_IDS[0] measurement = product_def.measurements[0] index_yaml = str(product_def.index_yaml).format( product_id=product_id.pid, measurement=measurement, ) # Add the GEDI Dataset Types clirunner(["-v", "product", "add", str(GEDI_PRODUCT.dataset_types)]) # Index the Datasets # - do test run first to increase test coverage clirunner(["-v", "dataset", "add", "--dry-run", str(index_yaml)]) # - do actual indexing clirunner(["-v", "dataset", "add", str(index_yaml)]) # - this will be no-op but with ignore lineage clirunner([ "-v", "dataset", "add", "--confirm-ignore-lineage", str(index_yaml), ]) # Test no-op update for policy in ["archive", "forget", "keep"]: clirunner([ "-v", "dataset", "update", "--dry-run", "--location-policy", policy, str(index_yaml), ]) # Test no changes needed update clirunner([ "-v", "dataset", "update", "--location-policy", policy, str(index_yaml), ]) dc = Datacube(index=index) check_open_with_dc_simple(dc, product_def, [product_id], measurement)
def ingest_work(config, source_type, output_type, tile, tile_index): # pylint: disable=too-many-locals _LOG.info('Starting task %s', tile_index) driver = storage_writer_by_name(config['storage']['driver']) if driver is None: _LOG.error('Failed to load storage driver %s', config['storage']['driver']) raise ValueError('Something went wrong: no longer can find driver pointed by storage.driver option') namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) def mk_uri(file_path): if driver.uri_scheme == "file": return file_path.absolute().as_uri() return '{}://{}'.format(driver.uri_scheme, file_path) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=mk_uri(file_path), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents(sources, tile.geobox)) datasets = xr_apply(tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) variable_params['dataset'] = { 'chunksizes': (1,), 'zlib': True, 'complevel': 9, } storage_metadata = driver.write_dataset_to_storage(nudata, file_path, global_attributes=global_attributes, variable_params=variable_params, storage_config=config['storage']) if (storage_metadata is not None) and len(storage_metadata) > 0: datasets.attrs['storage_metadata'] = storage_metadata _LOG.info('Finished task %s', tile_index) return datasets
def ingest_cmd(index, config, dry_run, executor): _, config = next(read_documents(Path(config))) source_type = index.datasets.types.get_by_name(config['source_type']) if not source_type: _LOG.error("Source DatasetType %s does not exist", config['source_type']) # print (source_type) # print ("abcdefghijklmnopqrstuvwxyz") output_type = morph_dataset_type(source_type, config) # print (output_type) _LOG.info('Created DatasetType %s', output_type.name) output_type = index.datasets.types.add(output_type) datacube = Datacube(index=index) grid_spec = output_type.grid_spec namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) file_path_template = str( Path(config['location'], config['file_path_template'])) bbox = BoundingBox(**config['ingestion_bounds']) tasks = find_diff(source_type, output_type, bbox, datacube) def ingest_work(tile_index, sources): geobox = GeoBox.from_grid_spec(grid_spec, tile_index) # print ("in ingest.py in ingest_word") data = Datacube.product_data(sources, geobox, measurements) nudata = data.rename(namemap) file_path = file_path_template.format( tile_index=tile_index, start_time=to_datetime( sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'), end_time=to_datetime( sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f')) # TODO: algorithm params print("Writing product") nudatasets = write_product(nudata, sources, output_type, config['global_attributes'], variable_params, Path(file_path)) return nudatasets do_work(tasks, ingest_work, index, executor) temp = str(Path(config['location'])) files_path = temp + "/cache" if not os.path.isfile(temp + "/archive"): os.system("mkdir " + temp + "/archive") print("Compressing files") compress(files_path)
def dask_load(sources, geobox, measurements, dask_chunks, skip_broken_datasets=False): def data_func(measurement): return make_dask_array(sources, geobox, measurement, skip_broken_datasets=skip_broken_datasets, dask_chunks=dask_chunks) return Datacube.create_storage(sources.coords, geobox, measurements, data_func)
def check_load_via_dss(index): from datacube.api.core import Datacube dc = Datacube(index=index) dss = dc.find_datasets(product='ls5_nbar_albers') assert len(dss) > 0 xx1 = dc.load(product='ls5_nbar_albers', measurements=['blue']) xx2 = dc.load(datasets=dss, measurements=['blue']) assert xx1.blue.shape assert (xx1.blue != -999).any() assert (xx1.blue == xx2.blue).all() xx2 = dc.load(datasets=iter(dss), measurements=['blue']) assert xx1.blue.shape assert (xx1.blue != -999).any() assert (xx1.blue == xx2.blue).all() with pytest.raises(ValueError): dc.load(measurements=['blue']) with pytest.raises(DeprecationWarning): dc.load(product='ls5_nbar_albers', stack=True)
def ingest_work(tile_index, sources): geobox = GeoBox.from_grid_spec(grid_spec, tile_index) # print ("in ingest.py in ingest_word") data = Datacube.product_data(sources, geobox, measurements) nudata = data.rename(namemap) file_path = file_path_template.format(tile_index=tile_index, start_time=to_datetime(sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'), end_time=to_datetime(sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f')) # TODO: algorithm params print ("Writing product") nudatasets = write_product(nudata, sources, output_type, config['global_attributes'], variable_params, Path(file_path)) return nudatasets
def ingest_work(driver_manager, config, source_type, output_type, tile, tile_index): _LOG.info('Starting task %s', tile_index) namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.load_data(tile.sources, tile.geobox, measurements, fuse_func=fuse_func, driver_manager=driver_manager) nudata = data.rename(namemap) file_path = get_filename(config, tile_index, tile.sources) def _make_dataset(labels, sources): return make_dataset(product=output_type, sources=sources, extent=tile.geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata(config, config['filename']), valid_data=GeoPolygon.from_sources_extents( sources, tile.geobox)) datasets = xr_apply( tile.sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) # Until ingest becomes a class and DriverManager an instance # variable, we call the constructor each time. DriverManager being # a singleton, there is little overhead, though. datasets.attrs['storage_output'] = driver_manager.write_dataset_to_storage( nudata, file_path, global_attributes, variable_params) _LOG.info('Finished task %s', tile_index) # When using multiproc executor, Driver Manager is a clone. if driver_manager.is_clone: driver_manager.close() return datasets
def test_indexing_with_spectral_map(clirunner, index, dataset_types): """Test indexing features with spectral map.""" product_id = GEDI_PRODUCT_IDS[0] product_def = GEDI_PRODUCTS["3D"] measurement = product_def.measurements[0] index_yaml = str(product_def.index_yaml).format( product_id=product_id.pid, measurement=measurement, ) # Add the GEDI Dataset Types clirunner(["-v", "product", "add", str(dataset_types)]) # Index the Dataset clirunner(["-v", "dataset", "add", str(index_yaml)]) dc = Datacube(index=index) check_open_with_dc_simple(dc, product_def, [product_id], measurement)
def xr_load(sources, geobox, measurements, skip_broken_datasets=False, use_threads=False): mk_new = get_loader(sources) data = Datacube.create_storage(OrderedDict((dim, sources.coords[dim]) for dim in sources.dims), geobox, measurements) # TODO: re-add use_threads for index, datasets in np.ndenumerate(sources.values): for m in measurements: t_slice = data[m.name].values[index] fuse_measurement(t_slice, datasets, geobox, m, mk_new=mk_new, skip_broken_datasets=skip_broken_datasets) return data
def ingest_work(tile_index, sources): geobox = GeoBox.from_grid_spec(grid_spec, tile_index) # print ("in ingest.py in ingest_word") data = Datacube.product_data(sources, geobox, measurements) nudata = data.rename(namemap) file_path = file_path_template.format( tile_index=tile_index, start_time=to_datetime( sources.time.values[0]).strftime('%Y%m%d%H%M%S%f'), end_time=to_datetime( sources.time.values[-1]).strftime('%Y%m%d%H%M%S%f')) # TODO: algorithm params print("Writing product") nudatasets = write_product(nudata, sources, output_type, config['global_attributes'], variable_params, Path(file_path)) return nudatasets
def xr_load(sources, geobox, measurements, skip_broken_datasets=False, use_threads=False): mk_new = get_loader(sources) data = Datacube.create_storage(sources.coords, geobox, measurements) if use_threads: def work_load_data(index, datasets, m): t_slice = data[m.name].values[index] fuse_measurement(t_slice, datasets, geobox, m, mk_new=mk_new, skip_broken_datasets=skip_broken_datasets) futures = [] pool = ThreadPoolExecutor(cpu_count() * 2) for index, datasets in np.ndenumerate(sources.values): for m in measurements: futures.append(pool.submit(work_load_data, index, datasets, m)) wait(futures) else: for index, datasets in np.ndenumerate(sources.values): for m in measurements: t_slice = data[m.name].values[index] fuse_measurement(t_slice, datasets, geobox, m, mk_new=mk_new, skip_broken_datasets=skip_broken_datasets) return data
def ingest_work(config, source_type, output_type, index, sources, geobox): namemap = get_namemap(config) measurements = get_measurements(source_type, config) variable_params = get_variable_params(config) global_attributes = config['global_attributes'] with datacube.set_options(reproject_threads=1): fuse_func = {'copy': None}[config.get(FUSER_KEY, 'copy')] data = Datacube.product_data(sources, geobox, measurements, fuse_func=fuse_func) nudata = data.rename(namemap) file_path = get_filename(config, index, sources) def _make_dataset(labels, sources): sources_union = union_points( *[source.extent.to_crs(geobox.crs).points for source in sources]) valid_data = intersect_points(geobox.extent.points, sources_union) dataset = make_dataset(dataset_type=output_type, sources=sources, extent=geobox.extent, center_time=labels['time'], uri=file_path.absolute().as_uri(), app_info=get_app_metadata( config, config['filename']), valid_data=GeoPolygon(valid_data, geobox.crs)) return dataset datasets = xr_apply( sources, _make_dataset, dtype='O') # Store in Dataarray to associate Time -> Dataset nudata['dataset'] = datasets_to_doc(datasets) write_dataset_to_netcdf(nudata, global_attributes, variable_params, file_path) return datasets
def check_open_with_grid_workflow(index): from datacube.api.core import Datacube dc = Datacube(index=index) type_name = 'ls5_nbar_albers' dt = dc.index.datasets.types.get_by_name(type_name) from datacube.api.grid_workflow import GridWorkflow gw = GridWorkflow(dc, dt.grid_spec) cells = gw.list_cells(product=type_name) assert LBG_CELL in cells tiles = gw.list_tiles(product=type_name) assert tiles assert tiles[LBG_CELL] ts, tile = tiles[LBG_CELL].popitem() dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue']) assert dataset_cell['blue'].size dataset_cell = gw.load(LBG_CELL, tile) assert all(m in dataset_cell for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2']) tiles = gw.list_tile_stacks(product=type_name) assert tiles assert tiles[LBG_CELL] tile = tiles[LBG_CELL] dataset_cell = gw.load(LBG_CELL, tile, measurements=['blue']) assert dataset_cell['blue'].size dataset_cell = gw.load(LBG_CELL, tile) assert all(m in dataset_cell for m in ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
def with_datacube(index, *args, **kwargs): return f(Datacube(index=index), *args, **kwargs)
def test_end_to_end(clirunner, index, testdata_dir, ingest_configs, datacube_env_name): """ Loads two dataset configurations, then ingests a sample Landsat 5 scene One dataset configuration specifies Australian Albers Equal Area Projection, the other is simply latitude/longitude. The input dataset should be recorded in the index, and two sets of storage units should be created on disk and recorded in the index. """ lbg_nbar = testdata_dir / 'lbg' / LBG_NBAR lbg_pq = testdata_dir / 'lbg' / LBG_PQ ls5_nbar_albers_ingest_config = testdata_dir / ingest_configs[ 'ls5_nbar_albers'] ls5_pq_albers_ingest_config = testdata_dir / ingest_configs['ls5_pq_albers'] # Add the LS5 Dataset Types clirunner(['-v', 'product', 'add', str(LS5_DATASET_TYPES)]) # Index the Datasets # - do test run first to increase test coverage clirunner( ['-v', 'dataset', 'add', '--dry-run', str(lbg_nbar), str(lbg_pq)]) # - do actual indexing clirunner(['-v', 'dataset', 'add', str(lbg_nbar), str(lbg_pq)]) # - this will be no-op but with ignore lineage clirunner([ '-v', 'dataset', 'add', '--confirm-ignore-lineage', str(lbg_nbar), str(lbg_pq) ]) # Test no-op update for policy in ['archive', 'forget', 'keep']: clirunner([ '-v', 'dataset', 'update', '--dry-run', '--location-policy', policy, str(lbg_nbar), str(lbg_pq) ]) # Test no changes needed update clirunner([ '-v', 'dataset', 'update', '--location-policy', policy, str(lbg_nbar), str(lbg_pq) ]) # TODO: test location update # 1. Make a copy of a file # 2. Call dataset update with archive/forget # 3. Check location # Ingest NBAR clirunner(['-v', 'ingest', '-c', str(ls5_nbar_albers_ingest_config)]) # Ingest PQ clirunner(['-v', 'ingest', '-c', str(ls5_pq_albers_ingest_config)]) dc = Datacube(index=index) assert isinstance(str(dc), str) assert isinstance(repr(dc), str) with pytest.raises(ValueError): dc.find_datasets(time='2019') # no product supplied, raises exception check_open_with_dc(index) check_open_with_grid_workflow(index) check_load_via_dss(index)
def with_index(driver_manager, *args, **kwargs): return f(Datacube(driver_manager=driver_manager), *args, **kwargs)
def check_open_with_dc(index): from datacube.api.core import Datacube dc = Datacube(index=index) data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], stack='variable') assert data_array.shape assert (data_array != -999).any() data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], time='1992-03-23T23:14:25.500000') assert data_array['blue'].shape[0] == 1 assert (data_array.blue != -999).any() data_array = dc.load(product='ls5_nbar_albers', measurements=['blue'], latitude=-35.3, longitude=149.1) assert data_array['blue'].shape[1:] == (1, 1) assert (data_array.blue != -999).any() data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), stack='variable') assert data_array.ndim == 4 assert 'variable' in data_array.dims assert (data_array != -999).any() with rasterio.Env(): lazy_data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), stack='variable', dask_chunks={'time': 1, 'x': 1000, 'y': 1000}) assert lazy_data_array.data.dask assert lazy_data_array.ndim == data_array.ndim assert 'variable' in lazy_data_array.dims assert lazy_data_array[1, :2, 950:1050, 950:1050].equals(data_array[1, :2, 950:1050, 950:1050]) dataset = dc.load(product='ls5_nbar_albers', measurements=['blue']) assert dataset['blue'].size dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2)) assert dataset['blue'].size with rasterio.Env(): lazy_dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2), dask_chunks={'time': 1}) assert lazy_dataset['blue'].data.dask assert lazy_dataset.blue[:2, :100, :100].equals(dataset.blue[:2, :100, :100]) assert lazy_dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050)).equals( dataset.isel(time=slice(0, 2), x=slice(950, 1050), y=slice(950, 1050))) dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset) assert (dataset.blue == dataset_like.blue).all() data_array = dc.load(product='ls5_nbar_albers', latitude=(-35, -36), longitude=(149, 150), measurements=['blue'], group_by='solar_day') dataset = dc.load(product='ls5_nbar_albers', latitude=(-35.2, -35.3), longitude=(149.1, 149.2), align=(5, 20)) assert dataset.geobox.affine.f % abs(dataset.geobox.affine.e) == 5 assert dataset.geobox.affine.c % abs(dataset.geobox.affine.a) == 20 dataset_like = dc.load(product='ls5_nbar_albers', measurements=['blue'], like=dataset) assert (dataset.blue == dataset_like.blue).all() products_df = dc.list_products() assert len(products_df) assert len(products_df[products_df['name'].isin(['ls5_nbar_albers'])]) assert len(products_df[products_df['name'].isin(['ls5_pq_albers'])]) assert len(dc.list_measurements()) resamp = ['nearest', 'cubic', 'bilinear', 'cubic_spline', 'lanczos', 'average'] results = {} # WTF def calc_max_change(da): midline = int(da.shape[0] * 0.5) a = int(abs(da[midline, :-1].data - da[midline, 1:].data).max()) centerline = int(da.shape[1] * 0.5) b = int(abs(da[:-1, centerline].data - da[1:, centerline].data).max()) return a + b for resamp_meth in resamp: dataset = dc.load(product='ls5_nbar_albers', measurements=['blue'], latitude=(-35.28, -35.285), longitude=(149.15, 149.155), output_crs='EPSG:4326', resolution=(-0.0000125, 0.0000125), resampling=resamp_meth) results[resamp_meth] = calc_max_change(dataset.blue.isel(time=0)) assert results['cubic_spline'] < results['nearest'] assert results['lanczos'] < results['average']