def test_fractional_cover_lazy(sr_filepath, fc_filepath): print(sr_filepath) print(fc_filepath) sr_dataset = open_dataset(sr_filepath, chunks={'x': 50, 'y': 50}) measurements = [ Measurement(name='PV', dtype='int8', nodata=-1, units='percent'), Measurement(name='NPV', dtype='int8', nodata=-1, units='percent'), Measurement(name='BS', dtype='int8', nodata=-1, units='percent'), Measurement(name='UE', dtype='int8', nodata=-1, units='1'), ] fc_dataset = fractional_cover(sr_dataset, measurements) assert fc_dataset.PV.data.dask assert fc_dataset.NPV.data.dask assert fc_dataset.BS.data.dask assert fc_dataset.UE.data.dask fc_dataset.load() assert set( fc_dataset.data_vars.keys()) == {m['name'] for m in measurements} validation_ds = open_dataset(fc_filepath) assert validation_ds == fc_dataset assert validation_ds.equals(fc_dataset)
def measurements(self, input_measurements): return { self.var_name: Measurement(name=self.var_name, dtype='float32', nodata=-9999, units='1'), 'water': Measurement(name='water', dtype='int16', nodata=0, units='1'), 'pmask': Measurement(name='pmask', dtype='bool', nodata=False, units='1') }
def measurements(self, m): mm = [ Measurement(name="sdev", dtype="float32", nodata=np.nan, units="1"), Measurement(name="edev", dtype="float32", nodata=np.nan, units="1"), Measurement(name="bcdev", dtype="float32", nodata=np.nan, units="1"), ] LOG.debug("Returning measurements: %s", mm) return mm
def fuse_measurement(dest: np.ndarray, datasets: List[Dataset], geobox: GeoBox, measurement: Measurement, mk_new: Callable[[BandInfo], DataSource], skip_broken_datasets: bool = False): reproject_and_fuse([mk_new(BandInfo(dataset, measurement.name)) for dataset in datasets], dest, geobox, dest.dtype.type(measurement.nodata), resampling=measurement.get('resampling_method', 'nearest'), fuse_func=measurement.get('fuser', None), skip_broken_datasets=skip_broken_datasets)
def output_measurements( self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: self._assert( self._product in product_definitions, "product {} not found in definitions".format(self._product)) product = product_definitions[self._product] measurement_dicts = { measurement['name']: Measurement(**measurement) for measurement in product.definition['measurements'] } if self.get('measurements') is None: return measurement_dicts try: return { name: measurement_dicts[product.canonical_measurement(name)] for name in self['measurements'] } except KeyError as ke: raise VirtualProductException( "could not find measurement: {}".format(ke.args))
def output_measurements( self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: input_measurement_list = [ child.output_measurements(product_definitions) for child in self._children ] first, *rest = input_measurement_list for child in rest: self._assert( set(child) == set(first), "child datasets do not all have the same set of measurements") name = self.get('index_measurement_name') if name is None: return first self._assert( name not in first, "source index measurement '{}' already present".format(name)) first.update( {name: Measurement(name=name, dtype='int8', nodata=-1, units='1')}) return first
def measurements(self, input_measurements): return [ Measurement(name='smad', dtype='float32', nodata=np.nan, units='1') ]
def mk_sample_xr_dataset(crs="EPSG:3578", shape=(33, 74), resolution=None, xy=(0, 0), time='2020-02-13T11:12:13.1234567Z', name='band', dtype='int16', nodata=-999, units='1'): """ Note that resolution is in Y,X order to match that of GeoBox. shape (height, width) resolution (y: float, x: float) - in YX, to match GeoBox/shape notation xy (x: float, y: float) -- location of the top-left corner of the top-left pixel in CRS units """ if isinstance(crs, str): crs = CRS(crs) if resolution is None: resolution = (-10, 10) if crs is None or crs.projected else (-0.01, 0.01) t_coords = {} if time is not None: t_coords['time'] = mk_time_coord([time]) transform = Affine.translation(*xy)*Affine.scale(*resolution[::-1]) h, w = shape geobox = GeoBox(w, h, transform, crs) return Datacube.create_storage(t_coords, geobox, [Measurement(name=name, dtype=dtype, nodata=nodata, units=units)])
def fetch_child(child, source_index, r): if any([x == 0 for x in r.box.shape]): # empty raster return None else: result = child.fetch(r, **load_settings) name = self.get('index_measurement_name') if name is None: return result # implication for dask? measurement = Measurement(name=name, dtype='int8', nodata=-1, units='1') shape = select_unique( [result[band].shape for band in result.data_vars]) array = numpy.full(shape, source_index, dtype=measurement.dtype) first = result[list(result.data_vars)[0]] result[name] = xarray.DataArray(array, dims=first.dims, coords=first.coords, name=name).assign_attrs( units=measurement.units, nodata=measurement.nodata) return result
def odc_style_xr_dataset(): """An xarray.Dataset with ODC style coordinates and CRS, and no time dimension. Contains an EPSG:4326, single variable 'B10' of 100x100 int16 pixels.""" affine = Affine.scale(0.1, 0.1) * Affine.translation(20, 30) geobox = geometry.GeoBox(100, 100, affine, geometry.CRS(GEO_PROJ)) return Datacube.create_storage({}, geobox, [Measurement(name='B10', dtype='int16', nodata=0, units='1')])
def measurements(self, input_measurements): renamed = [ Measurement(**{ **vars(m), 'name': m.name + '_PC_' + str(q) }) for q in self.qs for m in input_measurements ] return PerBandIndexStat( per_pixel_metadata=self.per_pixel_metadata).measurements(renamed)
def measurements(self, input_measurements): measurement_names = [ 'pct_exceedance_brightness', 'pct_exceedance_greenness', 'pct_exceedance_wetness', 'mean_brightness', 'mean_greenness', 'mean_wetness', 'std_brightness', 'std_greenness', 'std_wetness' ] return [ Measurement(name=m_name, dtype='float32', nodata=-1, units='1') for m_name in measurement_names ]
def morph_measurement(src_measurements, spec): src_varname = spec.get('src_varname', spec.get('name', None)) assert src_varname is not None measurement = src_measurements.get(src_varname, None) if measurement is None: raise ValueError("No such variable in the source product: {}".format(src_varname)) measurement.update({k: spec.get(k, measurement[k]) for k in ('name', 'nodata', 'dtype')}) return Measurement(**measurement)
def measurements(self, input_measurements): measurement_names = set(m.name for m in input_measurements) assert 'water' in measurement_names wet = Measurement(name='count_wet', dtype='int16', nodata=-1, units='1') dry = Measurement(name='count_clear', dtype='int16', nodata=-1, units='1') frequency = Measurement(name='frequency', dtype='float32', nodata=-1, units='1') if self.freq_only: return [frequency] else: return [wet, dry, frequency]
def test_normalised_difference_stats(dataset, output_name): var1, var2 = list(dataset.data_vars) ndstat = NormalisedDifferenceStats(var1, var2, output_name) result = ndstat.compute(dataset) assert isinstance(result, xr.Dataset) assert 'time' not in result.dims assert dataset.crs == result.crs expected_output_varnames = set(f'{output_name}_{stat_name}' for stat_name in ndstat.stats) assert set(result.data_vars) == expected_output_varnames # Check the measurements() function raises an error on bad input_measurements with pytest.raises(StatsConfigurationError): invalid_names = [Measurement(name='foo', **FAKE_MEASUREMENT_INFO)] ndstat.measurements(invalid_names) # Check the measurements() function returns something reasonable input_measurements = [Measurement(name=name, **FAKE_MEASUREMENT_INFO) for name in (var1, var2)] output_measurements = ndstat.measurements(input_measurements) measurement_names = set(m.name for m in output_measurements) assert expected_output_varnames == measurement_names
def measurements(self, input_measurements): index_measurements = [ Measurement(name=measurement.name + '_source', dtype='int8', nodata=-1, units='1') for measurement in input_measurements ] date_measurements = [ Measurement(name=measurement.name + '_observed', dtype='float64', nodata=0, units='seconds since 1970-01-01 00:00:00') for measurement in input_measurements ] text_measurements = [ Measurement(name=measurement.name + '_observed_date', dtype='int32', nodata=0, units='Date as YYYYMMDD') for measurement in input_measurements ] all_measurements = super(PerBandIndexStat, self).measurements(input_measurements) metadata = self.per_pixel_metadata if 'source' in metadata: all_measurements += index_measurements if 'observed' in metadata: all_measurements += date_measurements if 'observed_date' in metadata: all_measurements += text_measurements return all_measurements
def measurements(self, input_measurements): measurement_names = set(m.name for m in input_measurements) if self.band1 not in measurement_names or self.band2 not in measurement_names: raise StatsConfigurationError( 'Input measurements for %s must include "%s" and "%s"' % (self.name, self.band1, self.band2)) return [ Measurement(name='_'.join([self.name, stat]), dtype='float32', nodata=-1, units='1') for stat in self.stats ]
def test_fractional_cover(sr_filepath, fc_filepath): # print(sr_filepath) # print(fc_filepath) sr_dataset = open_dataset(sr_filepath) measurements = [ Measurement(name='PV', dtype='int8', nodata=-1, units='percent'), Measurement(name='NPV', dtype='int8', nodata=-1, units='percent'), Measurement(name='BS', dtype='int8', nodata=-1, units='percent'), Measurement(name='UE', dtype='int8', nodata=-1, units='1'), ] fc_dataset = fractional_cover(sr_dataset, measurements) assert set( fc_dataset.data_vars.keys()) == {m['name'] for m in measurements} validation_ds = open_dataset(fc_filepath) assert validation_ds == fc_dataset assert validation_ds.equals(fc_dataset)
def __init__(self, metadata_type, product_type, input_measurements, storage, name, file_path_template, stat_name, statistic, output_params=None, extras=None, stats_metadata=None, custom_metadata=None): #: The product name. self.name = name self.file_path_template = file_path_template #: The name of the statistic. Eg, mean, max, medoid, percentile_10 self.stat_name = stat_name #: The implementation of a statistic. See :class:`Statistic`. #: Will provide `compute` and `measurements` functions. self.statistic = statistic inputs = [ Measurement(**measurement) for measurement in input_measurements ] self.data_measurements = [ dict(output) for output in statistic.measurements(inputs) ] #: The ODC Product (formerly DatasetType) self.product = self._create_product(metadata_type, product_type, self.data_measurements, storage, stats_metadata=stats_metadata or {}, custom_metadata=custom_metadata or {}) self.output_params = output_params #: A dictionary of extra arguments to be used through the processing chain #: Will be available as named argument when producing the output filename self.extras = extras or {}
def measurements(self, input_measurements): renamed = [] for m in input_measurements: if m.dtype == 'int8': data_type = 'int16' else: data_type = m.dtype for q in self.qs: renamed.append( Measurement(**{ **m, 'name': m.name + '_PC_' + str(q), 'dtype': data_type })) return PerBandIndexStat( per_pixel_metadata=self.per_pixel_metadata).measurements(renamed)
def test_wofs_stats(dataset): wofsstat = WofsStats() # Check that measurements() does something useful output_measurements = wofsstat.measurements([Measurement(name='water', **FAKE_MEASUREMENT_INFO)]) assert len(output_measurements) == 3 expected_vars = {'count_wet', 'count_clear', 'frequency'} output_names = set(m.name for m in output_measurements) assert expected_vars == output_names # Check the computation returns something reasonable result = wofsstat.compute(dataset) assert isinstance(result, xr.Dataset) assert 'time' not in result.dims assert all(result_dim in dataset.dims for result_dim in result.dims) assert dataset.crs == result.crs assert set(result.data_vars) == expected_vars
def measurements(self, input_measurements): nodata = -1 bit_defs = input_measurements[0].flags_definition if self._nodata_flags is not None: self._valid_pq_mask = mk_masker(*create_mask_value( bit_defs, **self._nodata_flags), invert=True) for v in self._vars: flags = v['flags'] v['_mask'] = create_mask_value(bit_defs, **flags) v['mask'] = mk_masker(*v['_mask']) return [ Measurement(name=v['name'], dtype='int16', units='1', nodata=nodata) for v in self._vars ]
def test_measurement(): # Can create a measurement m = Measurement(name='t', dtype='uint8', nodata=255, units='1') # retrieve it's vital stats assert m.name == 't' assert m.dtype == 'uint8' assert m.nodata == 255 assert m.units == '1' # retrieve the information required for filling a DataArray assert m.dataarray_attrs() == {'nodata': 255, 'units': '1'} # Can add a new attribute by name and ensure it updates the DataArray attrs too m['bob'] = 10 assert m.bob == 10 assert m.dataarray_attrs() == {'nodata': 255, 'units': '1', 'bob': 10} m['none'] = None assert m.none is None # Resampling method is special and *not* needed for DataArray attrs m['resampling_method'] = 'cubic' assert 'resampling_method' not in m.dataarray_attrs() # It's possible to copy and update a Measurement instance m2 = m.copy() assert m2.bob == 10 assert m2.dataarray_attrs() == m.dataarray_attrs() assert repr(m2) == repr(m) # Must specify *all* required keys. name, dtype, nodata and units with pytest.raises(ValueError) as e: Measurement(name='x', units='1', nodata=0) assert 'required keys missing:' in str(e.value) assert 'dtype' in str(e.value)
def measurements(self, input_measurements): return [Measurement(name=band, dtype='int16', nodata=0, units='1') for band in self.bands]
'swir1': [0.00256, 0.99467], 'swir2': [-0.00327, 1.02551] } dataset_constants = { 'product_type': 'ls8_usgs_fc_scene', 'format': { 'name': 'GeoTIFF' }, 'lineage': { 'source_datasets': {} } } fc_measurements = [ Measurement(name='BS', units='percent', dtype='int16', nodata=-1), Measurement(name='PV', units='percent', dtype='int16', nodata=-1), Measurement(name='NPV', units='percent', dtype='int16', nodata=-1), Measurement(name='UE', units='1', dtype='int16', nodata=-1) ] def fishnet(geometry, threshold): bounds = geometry.bounds xmin = int(bounds[0] // threshold) xmax = int(bounds[2] // threshold) ymin = int(bounds[1] // threshold) ymax = int(bounds[3] // threshold) ncols = int(xmax - xmin + 1) nrows = int(ymax - ymin + 1) result = []
def merge_measurement(measurement, spec): measurement.update( {k: spec.get(k) or measurement[k] for k in ('nodata', 'dtype')}) return Measurement(**measurement)
def measurement(self): return Measurement(name=self._var_name, dtype='int8', nodata=-1, units='1')
def measurement(self): return Measurement(name=self._var_name, dtype='int32', nodata=0, units='Date as YYYYMMDD')
def measurement(self): return Measurement(name=self._var_name, dtype='int16', nodata=0, units='days since {:%Y-%m-%d %H:%M:%S}'.format( self._since))
def output_measurements(self, product_definitions): # type: (Dict[str, Dict]) -> Dict[str, Measurement] """ A dictionary mapping names to measurement metadata. :param product_definitions: a dictionary mapping product names to definitions """ get = self.get if 'product' in self: self._assert( self._product in product_definitions, "product {} not found in definitions".format(self._product)) measurement_docs = product_definitions[ self._product]['measurements'] measurements = { measurement['name']: Measurement(**measurement) for measurement in measurement_docs } if get('measurements') is None: return measurements try: return { name: measurements[name] for name in get('measurements') } except KeyError as ke: raise VirtualProductException( "could not find measurement: {}".format(ke.args)) elif 'transform' in self: input_measurements = self._input.output_measurements( product_definitions) return self._transformation.measurements(input_measurements) elif 'collate' in self: input_measurements = [ child.output_measurements(product_definitions) for child in self._children ] first, *rest = input_measurements for child in rest: self._assert( set(child) == set(first), "child datasets do not all have the same set of measurements" ) name = get('index_measurement_name') if name is None: return first self._assert( name not in first, "source index measurement '{}' already present".format(name)) first.update({ name: Measurement(name=name, dtype='int8', nodata=-1, units='1') }) return first elif 'juxtapose' in self: input_measurements = [ child.output_measurements(product_definitions) for child in self._children ] result = {} for measurements in input_measurements: common = set(result) & set(measurements) self._assert( not common, "common measurements {} between children".format(common)) result.update(measurements) return result else: raise VirtualProductException("virtual product was not validated")