def calculate_index_task(params): item = params.get('item') index = params.get('index', 'rgb') dc = Datacube(config="datacube.conf") product = "ls8_level1_usgs" x = (item["bbox"][0], item["bbox"][2]) y = (item["bbox"][1], item["bbox"][3]) time = item["properties"]["datetime"].split("T")[0] measurements = ["band_2", "band_3", "band_4"] query = { 'x': x, 'y': y, 'time': time, 'measurements': ['nbart_red', 'nbart_green', 'nbart_blue'], 'output_crs': 'EPSG:4326', 'resolution': (-0.001, 0.001), } ds = dc.load(product=product, **query) print(ds) rgb_da = ds.to_array() suffix = 'rgb' filename = f'{item["id"]}_{suffix}.tif' path = config.STATIC_DIR / filename write_cog(geo_im=rgb_da, fname='rgb.tif', overwrite=True) return {"success": True, "url": str(path)}
def check_data_with_api(index, time_slices): """Chek retrieved data for specific values. We scale down by 100 and check for predefined values in the corners. """ from datacube import Datacube dc = Datacube(index=index) # Make the retrieved data 100 less granular shape_x = int(GEOTIFF['shape']['x'] / 100.0) shape_y = int(GEOTIFF['shape']['y'] / 100.0) pixel_x = int(GEOTIFF['pixel_size']['x'] * 100) pixel_y = int(GEOTIFF['pixel_size']['y'] * 100) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = geometry.GeoBox( shape_x + 1, shape_y + 1, Affine(pixel_x, 0.0, GEOTIFF['ul']['x'], 0.0, pixel_y, GEOTIFF['ul']['y']), geometry.CRS(GEOTIFF['crs'])) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values()) assert hashlib.md5( data.green.data).hexdigest() == '7f5ace486e88d33edf3512e8de6b6996' assert hashlib.md5( data.blue.data).hexdigest() == 'b58204f1e10dd678b292df188c242c7e' for time_slice in range(time_slices): assert data.blue.values[time_slice][-1, -1] == -999
def xadataset_from_odcdataset(datasets: Union[List[ODCDataset], ODCDataset] = None, ids: Union[List[UUID], UUID] = None, measurements: List[str] = None) -> xa.Dataset: """ Loads a xaDataset from ODCDatasets or ODCDataset ids :param datasets: ODCDataset(s), optional :param ids: ODCDataset id(s), optional :param measurements: list of measurements/bands to load, optional :return: xa.Dataset containing given ODCDatasets or IDs """ dc = Datacube(app="dataset_from_ODCDataset") if not datasets: if not isinstance(ids, list): ids = [ids] datasets = [dc.index.datasets.get(id_) for id_ in ids] if not isinstance(datasets, list): datasets = [datasets] product_name = datasets[0].metadata_doc["product"]["name"] crs = datasets[0].crs res = (10, -10) # TODO: handle other resolutions ds = dc.load(product=product_name, dask_chunks={}, measurements=measurements, output_crs=str(crs), resolution=res, datasets=datasets) return ds
def __call__(self, index, product, time, group_by) -> Tile: # Do for a specific poly whose boundary is known output_crs = CRS(self.storage['crs']) filtered_items = [ 'geopolygon', 'lon', 'lat', 'longitude', 'latitude', 'x', 'y' ] filtered_dict = { k: v for k, v in self.input_region.items() if k in filtered_items } if self.feature is not None: filtered_dict['geopolygon'] = self.feature.geopolygon geopoly = filtered_dict['geopolygon'] else: geopoly = query_geopolygon(**self.input_region) dc = Datacube(index=index) datasets = dc.find_datasets(product=product, time=time, group_by=group_by, **filtered_dict) group_by = query_group_by(group_by=group_by) sources = dc.group_datasets(datasets, group_by) output_resolution = [ self.storage['resolution'][dim] for dim in output_crs.dimensions ] geopoly = geopoly.to_crs(output_crs) geobox = GeoBox.from_geopolygon(geopoly, resolution=output_resolution) return Tile(sources, geobox)
class IndexGEETestCase(unittest.TestCase): def setUp(self): self.datacube = Datacube(config=DATACUBE_CONFIG) IndexerTestCase().test_product_generation() product = self.datacube.index.products.get_by_name('ls8_test') if product is None: self.skipTest('No product available to index') datasets = self.datacube.find_datasets(product='ls8_test') if datasets: self.skipTest('Indexed datasets already exist in database') def test_index_gee(self): product = 'ls8_test' latitude = (-4.15, -3.90) longitude = (39.50, 39.75) time = '2020-01' cmd = [ "index_gee", "--product", product, "--latitude", str(latitude), "--longitude", str(longitude), "--time", time, "--config", DATACUBE_CONFIG, "--no_confirm", "-u" ] subprocess.check_output(cmd) datasets = self.datacube.find_datasets(product=product) self.assertGreater(len(datasets), 0, 'Expected to find datasets in index')
def native_load(ds, measurements=None, basis=None, **kw): """Load single dataset in native resolution. :param ds: Dataset :param measurements: List of band names to load :param basis: Name of the band to use for computing reference frame, other bands might be reprojected if they use different pixel grid :param **kw: Any other parameter load_data accepts :return: Xarray dataset """ from datacube import Datacube geobox = native_geobox( ds, measurements, basis) # early exit via exception if no compatible grid exists if measurements is not None: mm = [ds.type.measurements[n] for n in measurements] else: mm = ds.type.measurements return Datacube.load_data(Datacube.group_datasets([ds], 'time'), geobox, measurements=mm, **kw)
def collect_uris(prod_index, products, expressions): """ Collect all URIs of datasets from products matching search expressions. """ dc = Datacube(index=prod_index) for prod in products: for dataset in dc.find_datasets_lazy(product=prod, **expressions): yield normalize_uri(dataset.local_uri)
def list_products(index): """ List products that are defined in the index """ dc = Datacube(index) products = dc.list_products() echo(products.to_string(columns=('name', 'description', 'product_type', 'instrument', 'format', 'platform'), justify='left'))
def setUp(self): self.datacube = Datacube(config=DATACUBE_CONFIG) IndexerTestCase().test_product_generation() product = self.datacube.index.products.get_by_name('ls8_test') if product is None: self.skipTest('No product available to index') datasets = self.datacube.find_datasets(product='ls8_test') if datasets: self.skipTest('Indexed datasets already exist in database')
def __get_mask_datasets(self) -> List[ODCDataset]: """ Finds mask datasets based on config """ dc = Datacube(app="mosaic_creator") time_range = (str(self.__start_date), str(self.__end_date)) datasets = dc.find_datasets(product=self.__product_name, time=time_range) if not datasets: LOGGER.warning("No mask datasets found for" f"product={self.__product_name}, time={time_range}") raise ValueError("No datasets found") # TODO: custom exception return datasets
def test_woffles(query, expected): dc = Datacube(app='test_wofls') bands = ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'] # inputs needed from EO data) source = dc.load(product='ls8_nbar_albers', measurements=bands, **query) pq = dc.load(product='ls8_pq_albers', like=source) dsm = dc.load(product='dsm1sv10', like=source, time=('1900-01-01', '2100-01-01'), resampling='cubic') wofls_output = woffles(*(x.isel(time=0) for x in [source, pq, dsm])) assert (wofls_output == expected).all()
def list_products(index): """ List products that are defined in the index """ dc = Datacube(index) products = dc.list_products() echo( products.to_string(columns=('name', 'description', 'product_type', 'instrument', 'format', 'platform'), justify='left'))
def test_load_data(tmpdir): tmpdir = Path(str(tmpdir)) group_by = query_group_by('time') spatial = dict(resolution=(15, -15), offset=(11230, 1381110),) nodata = -999 aa = mk_test_image(96, 64, 'int16', nodata=nodata) ds, gbox = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)], tmpdir, prefix='ds1-', timestamp='2018-07-19', **spatial) assert ds.time is not None ds2, _ = gen_tiff_dataset([SimpleNamespace(name='aa', values=aa, nodata=nodata)], tmpdir, prefix='ds2-', timestamp='2018-07-19', **spatial) assert ds.time is not None assert ds.time == ds2.time sources = Datacube.group_datasets([ds], 'time') sources2 = Datacube.group_datasets([ds, ds2], group_by) mm = ['aa'] mm = [ds.type.measurements[k] for k in mm] ds_data = Datacube.load_data(sources, gbox, mm) assert ds_data.aa.nodata == nodata np.testing.assert_array_equal(aa, ds_data.aa.values[0]) custom_fuser_call_count = 0 def custom_fuser(dest, delta): nonlocal custom_fuser_call_count custom_fuser_call_count += 1 dest[:] += delta progress_call_data = [] def progress_cbk(n, nt): progress_call_data.append((n, nt)) ds_data = Datacube.load_data(sources2, gbox, mm, fuse_func=custom_fuser, progress_cbk=progress_cbk) assert ds_data.aa.nodata == nodata assert custom_fuser_call_count > 0 np.testing.assert_array_equal(nodata + aa + aa, ds_data.aa.values[0]) assert progress_call_data == [(1, 2), (2, 2)]
def check_open_with_api(driver_manager, time_slices): from datacube import Datacube dc = Datacube(driver_manager=driver_manager) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = geometry.GeoBox(200, 200, Affine(25, 0.0, 638000, 0.0, -25, 6276000), geometry.CRS('EPSG:28355')) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values(), driver_manager=driver_manager) assert data.blue.shape == (time_slices, 200, 200)
def post_processing(predicted): """ filter prediction results with post processing filters. Simplified from production code to skip segmentation, probability, and mode calcs """ dc = Datacube(app='whatever') predict = predicted.Predictions #--Post process masking--------------------------------------------------------------- #print(" masking with AEZ,WDPA,WOfS,slope & elevation") # mask out classification beyond AEZ boundary gdf = gpd.read_file('data/Sahel.geojson') with HiddenPrints(): mask = xr_rasterize(gdf, predicted) predict = predict.where(mask, 0) # mask with WDPA # url_wdpa="s3://deafrica-input-datasets/protected_areas/WDPA_southern.tif" # wdpa=rio_slurp_xarray(url_wdpa, gbox=predicted.geobox) # wdpa = wdpa.astype(bool) # predict = predict.where(~wdpa, 0) #mask with WOFS wofs = dc.load(product='wofs_ls_summary_annual', like=predicted.geobox, time=('2019')) wofs = wofs.frequency > 0.2 # threshold predict = predict.where(~wofs, 0) #mask steep slopes url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif" slope = rio_slurp_xarray(url_slope, gbox=predicted.geobox) slope = slope > 50 predict = predict.where(~slope, 0) #mask where the elevation is above 3600m elevation = dc.load(product='dem_srtm', like=predicted.geobox) elevation = elevation.elevation > 3600 # threshold predict = predict.where(~elevation.squeeze(), 0) #set dtype predict = predict.astype(np.int8) return predict
async def get_products(lon1: float, lat1: float, lon2: float, lat2: float, date1: str = None, date2: str = None, limit: int = 10, days: int = 7): lon1, lon2 = min(lon1, lon2), max(lon1, lon2) lat1, lat2 = min(lat1, lat2), max(lat1, lat2) if not date1 or not date2: dates = None else: dates = (date1, date2) dc = Datacube(config=config.DATACUBE_CONF) product = dc.list_products(with_pandas=False)[0] print(product) datasets = dc.index.datasets.search(product=product["name"]) print(list(datasets)) for dataset in datasets: print(dataset)
def test_query_dataset_multi_product(index: Index, ls5_dataset_w_children: Dataset): # We have one ls5 level1 and its child nbar dc = Datacube(index) # Can we query a single product name? datasets = dc.find_datasets(product='ls5_nbar_scene') assert len(datasets) == 1 # Can we query multiple products? datasets = dc.find_datasets(product=['ls5_nbar_scene', 'ls5_level1_scene']) assert len(datasets) == 2 # Can we query multiple products in a tuple datasets = dc.find_datasets(product=('ls5_nbar_scene', 'ls5_level1_scene')) assert len(datasets) == 2
def check_open_with_api(index): from datacube import Datacube dc = Datacube(index=index) input_type_name = 'ls5_nbar_albers' input_type = dc.index.products.get_by_name(input_type_name) geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577')) observations = dc.find_datasets(product='ls5_nbar_albers', geopolygon=geobox.extent) group_by = query_group_by('time') sources = dc.group_datasets(observations, group_by) data = dc.load_data(sources, geobox, input_type.measurements.values()) assert data.blue.shape == (1, 200, 200)
def post_processing( predicted: xr.Dataset, ) -> xr.DataArray: """ filter prediction results with post processing filters. :param predicted: The prediction results """ dc = Datacube(app='whatever') #grab predictions and proba for post process filtering predict=predicted.Predictions # mask out classification beyond AEZ boundary gdf = gpd.read_file('data/Western.geojson') with HiddenPrints(): mask = xr_rasterize(gdf, predicted) predict = predict.where(mask,0) # mask with WDPA url_wdpa="s3://deafrica-input-datasets/protected_areas/WDPA_western.tif" wdpa=rio_slurp_xarray(url_wdpa, gbox=predicted.geobox) wdpa = wdpa.astype(bool) predict = predict.where(~wdpa, 0) #mask with WOFS wofs=dc.load(product='ga_ls8c_wofs_2_summary',like=predicted.geobox) wofs=wofs.frequency > 0.2 # threshold predict=predict.where(~wofs, 0) #mask steep slopes url_slope="https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope=rio_slurp_xarray(url_slope, gbox=predicted.geobox) slope=slope > 35 predict=predict.where(~slope, 0) #mask where the elevation is above 3600m elevation=dc.load(product='dem_srtm', like=predicted.geobox) elevation=elevation.elevation > 3600 # threshold predict=predict.where(~elevation.squeeze(), 0) #set dtype predict=predict.astype(np.int8) return predict
def cli(limit, update_if_exists, bbox, product, add_product, workers): """ Index the Copernicus DEM automatically. """ if product not in PRODUCTS.keys(): raise ValueError( f"Unknown product {product}, must be one of {' '.join(PRODUCTS.keys())}" ) dc = Datacube() if add_product: add_cop_dem_product(dc, product) print(f"Indexing Copernicus DEM for {product} with bounding box of {bbox}") added, failed = cop_dem_to_dc(dc, product, bbox, limit, update_if_exists, n_workers=workers) print(f"Added {added} Datasets, failed {failed} Datasets") if failed > 0: sys.exit(failed)
def compute_confidence_filtered(self): """ Return the wofs filtered summary band data that is 10% filtered by confidence band. """ con_layer = self.compute_confidence() env = self.cfg.get_env_of_product('wofs_summary') with Datacube(app='wofs_summary', env=env) as dc: gwf = GridWorkflow(dc.index, self.grid_spec) indexed_tile = gwf.list_cells(self.tile_index, product='wofs_summary') # load the data of the tile dataset = gwf.load(tile=indexed_tile[self.tile_index], measurements=['frequency']) data = dataset.data_vars['frequency'].data.ravel().reshape( self.grid_spec.tile_resolution) con_filtering = self.cfg.cfg.get('confidence_filtering') threshold = None if con_filtering: threshold = con_filtering.get('threshold') if threshold: data[con_layer <= threshold] = DEFAULT_FLOAT_NODATA else: data[con_layer <= 0.10] = DEFAULT_FLOAT_NODATA return data
def load_tile_data(self, factors): """ Load and return factor data for confidence band prediction. :param factors: List of factor info as given by Config """ model_data = [] for fac in factors: factor = self.cfg.get_factor_info(fac) with Datacube(app='confidence_layer', env=factor['env']) as dc: gwf = GridWorkflow(dc.index, self.grid_spec) indexed_tiles = gwf.list_cells(self.tile_index, product=factor['product']) # load the data of the tile dataset = gwf.load(tile=indexed_tiles[self.tile_index], measurements=[factor['band']]) data = dataset.data_vars[factor['band']].data # Rescale where needed: Keep an eye on this since this is to do with different scaling factors used during # training than what is on datacube if factor['name'].startswith('phat'): data = data * 100.0 if factor['name'].startswith('phat'): data[data < 0.0] = 0.0 if factor['name'].startswith('mrvbf'): data[data > 10] = 10 if factor['name'].startswith('modis'): data[data > 100] = 100 model_data.append(data.ravel()) del data return np.column_stack(model_data)
def get_mapped_crses(*product_names: str, index: Index = None) -> Iterable[Dict]: with Datacube(index=index) as dc: index = dc.index for product_name in product_names: product = index.products.get_by_name(product_name) # SQLAlchemy queries require "column == None", not "column is None" due to operator overloading: # pylint: disable=singleton-comparison res = ( alchemy_engine(index) .execute( select( [ literal(product.name).label("product"), get_dataset_srid_alchemy_expression( product.metadata_type ).label("crs"), ] ) .where(DATASET.c.dataset_type_ref == product.id) .where(DATASET.c.archived == None) .limit(1) ) .fetchone() ) if res: yield dict(res)
def run_one(config_file, input_dataset, environment=None): """ Run with CONFIG_FILE on a single INPUT_DATASET INPUT_DATASET may be either a URL or a Dataset ID """ alchemist = Alchemist(config_file=config_file, dc_env=environment) dc = Datacube(env=environment) try: ds = dc.index.datasets.get(input_dataset) except ValueError as e: _LOG.info("Couldn't find dataset with ID={} with exception {} trying by URL".format( input_dataset, e )) # Couldn't find a dataset by ID, try something if '://' in input_dataset: # Smells like a url input_url = input_dataset else: # Treat the input as a local file path input_url = Path(input_dataset).as_uri() ds = dc.index.datasets.get_datasets_for_location(input_url) # Currently this doesn't work by URL... TODO: fixme! task = alchemist.generate_task(ds) execute_task(task)
def group(self, datasets: VirtualDatasetBag, **group_settings: Dict[str, Any]) -> VirtualDatasetBox: geopolygon = datasets.geopolygon selected = list(datasets.bag) # geobox merged = merge_search_terms(self, group_settings) try: geobox = output_geobox(datasets=selected, grid_spec=datasets.product_definitions[self._product].grid_spec, geopolygon=geopolygon, **select_keys(merged, self._GEOBOX_KEYS)) load_natively = False except ValueError: # we are not calculating geoboxes here for the moment # since it may require filesystem access # in ODC 2.0 the dataset should know the information required geobox = None load_natively = True # group by time group_query = query_group_by(**select_keys(merged, self._GROUPING_KEYS)) # information needed for Datacube.load_data return VirtualDatasetBox(Datacube.group_datasets(selected, group_query), geobox, load_natively, datasets.product_definitions, geopolygon=None if not load_natively else geopolygon)
def cli(skip_lineage, fail_on_missing_lineage, verify_lineage, uri, product): """ Iterate through files in an S3 bucket and add them to datacube""" # Get a generator from supplied S3 Uri for metadata definitions fetcher = S3Fetcher() # TODO: Share Fetcher s3_obj_stream = s3_find_glob(uri, False) # Extract URL's from output of iterator before passing to Fetcher s3_url_stream = (o.url for o in s3_obj_stream) # TODO: Capture S3 URL's in batches and perform bulk_location_has # Consume generator and fetch YAML's dc = Datacube() added, failed = dump_to_odc( fetcher(s3_url_stream), dc, product, skip_lineage=skip_lineage, fail_on_missing_lineage=fail_on_missing_lineage, verify_lineage=verify_lineage, ) print(f"Added {added} Datasets, Failed {failed} Datasets")
def test_grouping_datasets(): def group_func(d): return d.time dimension = 'time' units = None datasets = [ SimpleNamespace(time=datetime.datetime(2016, 1, 1), value='foo', id=UUID(int=10)), SimpleNamespace(time=datetime.datetime(2016, 2, 1), value='bar', id=UUID(int=1)), SimpleNamespace(time=datetime.datetime(2016, 1, 1), value='flim', id=UUID(int=9)), ] group_by = GroupBy(dimension, group_func, units, sort_key=group_func) grouped = Datacube.group_datasets(datasets, group_by) dss = grouped.isel(time=0).values[()] assert isinstance(dss, tuple) assert len(dss) == 2 assert [ds.value for ds in dss] == ['flim', 'foo'] dss = grouped.isel(time=1).values[()] assert isinstance(dss, tuple) assert len(dss) == 1 assert [ds.value for ds in dss] == ['bar'] assert str(grouped.time.dtype) == 'datetime64[ns]' assert grouped.loc['2016-01-01':'2016-01-15']
def test_wofs_filtered(): cfg = Config('../configs/template_client.yaml') grid_spec = GridSpec(crs=CRS('EPSG:3577'), tile_size=(100000, 100000), resolution=(-25, 25)) cell_index = (17, -39) wf = WofsFiltered(cfg, grid_spec, cell_index) confidence = wf.compute_confidence(cell_index) filtered = wf.compute_confidence_filtered() # Display images: to be removed later with Datacube(app='wofs_summary', env='dev') as dc: gwf = GridWorkflow(dc.index, grid_spec) indexed_tile = gwf.list_cells(cell_index, product='wofs_statistical_summary') # load the data of the tile dataset = gwf.load(tile=indexed_tile[cell_index], measurements=['frequency']) frequency = dataset.data_vars['frequency'].data.ravel().reshape( grid_spec.tile_resolution) # Check with previous run with rasterio.open('confidenceFilteredWOfS_17_-39_epsilon=10.tiff') as f: data = f.read(1) plt.subplot(221) plt.imshow(frequency) plt.subplot(222) plt.imshow(data) plt.subplot(223) plt.imshow(confidence) plt.subplot(224) plt.imshow(filtered) plt.show() wf.compute_and_write()
def mk_sample_xr_dataset(crs="EPSG:3578", shape=(33, 74), resolution=None, xy=(0, 0), time='2020-02-13T11:12:13.1234567Z', name='band', dtype='int16', nodata=-999, units='1'): """ Note that resolution is in Y,X order to match that of GeoBox. shape (height, width) resolution (y: float, x: float) - in YX, to match GeoBox/shape notation xy (x: float, y: float) -- location of the top-left corner of the top-left pixel in CRS units """ if isinstance(crs, str): crs = CRS(crs) if resolution is None: resolution = (-10, 10) if crs is None or crs.projected else (-0.01, 0.01) t_coords = {} if time is not None: t_coords['time'] = mk_time_coord([time]) transform = Affine.translation(*xy)*Affine.scale(*resolution[::-1]) h, w = shape geobox = GeoBox(w, h, transform, crs) return Datacube.create_storage(t_coords, geobox, [Measurement(name=name, dtype=dtype, nodata=nodata, units=units)])
def get_dataset_values(product_name, product_config, time_range=None): """ Extract the file list corresponding to a product for the given year and month using datacube API. """ try: query = {**dict(product=product_name), **time_range} except TypeError: # Time range is None query = {**dict(product=product_name)} dc = Datacube(app='cog-worklist query') field_names = get_field_names(product_config) LOG.info( f"Perform a datacube dataset search returning only the specified fields, {field_names}." ) ds_records = dc.index.datasets.search_returning( field_names=tuple(field_names), **query) search_results = False for ds_rec in ds_records: search_results = True yield check_prefix_from_query_result(ds_rec, product_config) if not search_results: LOG.warning( f"Datacube product query is empty for {product_name} product with time-range, {time_range}" )
def main(): config_yaml = """ sources: - product: ls8_nbar_albers measurements: [red, green, blue] group_by: solar_day date_ranges: start_date: 2014-06-01 end_date: 2014-07-01 storage: # this driver enables in-memory computation driver: xarray crs: EPSG:3577 tile_size: x: 40000.0 y: 40000.0 resolution: x: 25 y: -25 chunking: x: 200 y: 200 time: 1 dimension_order: [time, y, x] computation: chunking: x: 800 y: 800 input_region: tile: [15, -41] output_products: - name: nbar_mean statistic: simple statistic_args: reduction_function: mean """ # or manually creating a config dictionary works too config = yaml.load(config_yaml) print(yaml.dump(config, indent=4)) dc = Datacube() app = StatsApp(config, dc.index) print('generating tasks') tasks = app.generate_tasks() print('running tasks') for task in tasks: # this method is only available for the xarray output driver output = app.execute_task(task) print('result for {}'.format(task.tile_index)) print(output.result['nbar_mean'])
#print "number of projects: ", len(projectList) #for sfproject in projectList: #print "--" #print "name: ", sfproject.getProjectName() #print "id: ", sfproject.getProjectId() #pdl = sfproject.getProjectDependencyList() #for listItem in pdl: #print "deps: ", listItem.getTimePeriodStart(), "-", listItem.getTimePeriodEnd(), ": ", listItem.getDependencies() #print "url: ", listItem.getUrl() #print "version: ", listItem.getVersion() #print "duration %1.8f seconds" % (testEnde1 - testStart1) testStart2 = time.clock() # create datacube dataCube = Datacube() # start with an empty list of data levels dataLevelList = [] for sfproject in projectList: pdl = sfproject.getProjectDependencyList() for listItem in pdl: timePeriodStart = listItem.getTimePeriodStart() timePeriodEnd = listItem.getTimePeriodEnd() # verify datalevel existance # if not available -- create according data level entry currentTime = timePeriodStart while currentTime <= timePeriodEnd: dataLevelList.append(currentTime) currentTime = currentTime + datetime.timedelta(days=1)