def as_geo_docs(self) -> Tuple[CRS, Dict[str, GridDoc], Dict[str, MeasurementDoc]]: """Calculate combined geo information for metadata docs""" if not self._measurements_per_grid: return None, None, None grid_docs: Dict[str, GridDoc] = {} measurement_docs: Dict[str, MeasurementDoc] = {} crs = None for grid_name, (grid, measurements) in self._as_named_grids().items(): # Validate assumption: All grids should have same CRS if crs is None: crs = grid.crs # TODO: CRS equality is tricky. This may not work. # We're assuming a group of measurements specify their CRS # the same way if they are the same. elif grid.crs != crs: raise ValueError( f"Measurements have different CRSes in the same dataset:\n" f"\t{crs.to_string()!r}\n" f"\t{grid.crs.to_string()!r}\n" ) grid_docs[grid_name] = GridDoc(grid.shape, grid.transform) for measurement_name, measurement_path in measurements.items(): # No measurement groups in the doc: we replace with underscores. measurement_name = measurement_name.replace(":", "_") measurement_docs[measurement_name] = MeasurementDoc( path=measurement_path.path, layer=measurement_path.layer, grid=grid_name if grid_name != "default" else None, ) return crs, grid_docs, measurement_docs
def _band_to_measurement(band: Dict, dataset_location: str) -> MeasurementDoc: """Create EO3 measurement from an EO1 band dict""" return MeasurementDoc( path=band.get("path"), band=band.get("band"), layer=band.get("layer"), name=band.get("name"), alias=band.get("label"), )
def as_geo_docs( self) -> Tuple[CRS, Dict[str, GridDoc], Dict[str, MeasurementDoc]]: """Calculate combined geo information for metadata docs""" if not self._measurements_per_grid: return None, None, None # Order grids from most to fewest measurements. # PyCharm's typing seems to get confused by the sorted() call. # noinspection PyTypeChecker grids_by_frequency: List[Tuple[GridSpec, Dict[str, MeasPath]]] = sorted( self._measurements_per_grid.items(), key=lambda k: len(k[1]), reverse=True) grid_docs: Dict[str, GridDoc] = {} measurement_docs: Dict[str, MeasurementDoc] = {} crs = grids_by_frequency[0][0].crs for i, (grid, measurements) in enumerate(grids_by_frequency): # TODO: CRS equality is tricky. This may not work. # We're assuming a group of measurements specify their CRS # the same way if they are the same. if grid.crs != crs: raise ValueError( f"Measurements have different CRSes in the same dataset:\n" f"\t{crs.to_string()!r}\n" f"\t{grid.crs.to_string()!r}\n") # The grid with the most measurements. if i == 0: grid_name = "default" else: grid_name = _find_a_common_name(list(measurements.keys())) # If another grid already has this name: TODO: make both grid names more specific? if grid_name in grid_docs: raise NotImplementedError( f"Clashing grid names. Needs a recalculation. " f"Name {grid_name!r}, but have {tuple(grid_docs.keys())!r}" ) # There was no common prefix. Just concat all band names. # Perhaps we just fallback to enumeration in these weird cases. grid a, grid b etc.... if not grid_name: grid_name = "_".join(measurements.keys()) grid_docs[grid_name] = GridDoc(grid.shape, grid.transform) for measurement_name, measurement_path in measurements.items(): # No measurement groups in the doc: we replace with underscores. measurement_name = measurement_name.replace(":", "_") measurement_docs[measurement_name] = MeasurementDoc( path=measurement_path.path, layer=measurement_path.layer, grid=grid_name if grid_name != "default" else None, ) return crs, grid_docs, measurement_docs
def test_esri_prepare(): tif_url = ESRI_ANTIMERIDIAN_OVERLAP_TIF.resolve().as_uri() dataset_doc = as_eo3(tif_url) # Varies dataset_doc.properties.pop("odc:processing_datetime") assert_expected_eo3( DatasetDoc( id=UUID("411f3641-bc4a-501d-99e5-6c5c47379715"), product=ProductDoc("esri_land_cover"), accessories={}, lineage={}, label="esri_land_cover_60K_2020-01-01", locations=[ tif_url, ], properties=Eo3Dict( { "datetime": datetime(2020, 1, 1, 0, 0, tzinfo=timezone.utc), "dtr:end_datetime": datetime(2021, 1, 1, 0, 0, tzinfo=timezone.utc), "dtr:start_datetime": datetime( 2020, 1, 1, 0, 0, tzinfo=timezone.utc ), "odc:product": "esri_land_cover", "odc:region_code": "60K", } ), crs="epsg:32760", measurements=dict( classification=MeasurementDoc( path="60K_20200101-20210101.tif", ) ), geometry=shape( { "coordinates": ( ( (841600.2521048393, 7864419.112077935), (841600.2521048393, 8221119.112077935), (476100.25210483925, 8221119.112077935), (476100.25210483925, 7864419.112077935), (841600.2521048393, 7864419.112077935), ), ), "type": "Polygon", } ), grids={ "default": GridDoc( (128, 128), Affine( 2855.46875, 0.0, 476100.25210483925, 0.0, -2786.71875, 8221119.112077935, ), ) }, ), dataset_doc, )