def test_add_source_dataset(tmp_path: Path, inherit_geom): from eodatasets3 import serialise p = DatasetAssembler(tmp_path, naming_conventions="dea_c3") source_dataset = serialise.from_path( Path(__file__).parent / "data/LC08_L1TP_089080_20160302_20170328_01_T1.yaml") p.add_source_dataset(source_dataset, auto_inherit_properties=True, inherit_geometry=inherit_geom) p.maturity = "interim" p.collection_number = "3" p.dataset_version = "1.6.0" p.producer = "ga.gov.au" p.processed = "1998-07-30T12:23:23" p.product_family = "wofs" p.write_measurement( "water", Path(__file__).parent / "data/wofs/ga_ls_wofs_3_099081_2020-07-26_interim_water_clipped.tif", ) id, path = p.done() output = serialise.from_path(path) if inherit_geom: # POLYGON((609615 - 3077085, 378285 - 3077085, 378285 - 3310515, 609615 - 3310515, 609615 - 3077085)) assert output.geometry == source_dataset.geometry else: # POLYGON((684285 - 3439275, 684285 - 3444495, 689925 - 3444495, 689925 - 3439275, 684285 - 3439275)) # Geometry is not set from the source dataset, but instead from the added wofs measurement assert output.geometry != source_dataset.geometry
def assert_expected_eo3_path( expected_doc: Dict, expected_path: Path, ignore_fields=(), ): """ Check an output path of an EO3 dataset matches an expected document. This is slightly smarter about doing geometry equality etc within the document. """ __tracebackhide__ = operator.methodcaller("errisinstance", AssertionError) assert (expected_path.exists() ), f"Expected output EO3 path doesn't exist: {expected_path}" assert_same_as_file( expected_doc, expected_path, # We check the geometry below ignore_fields=("geometry", ) + tuple(ignore_fields), ) if "geometry" not in ignore_fields: # Compare geometry after parsing, rather than comparing the raw dict values. produced_dataset = serialise.from_path(expected_path) expected_dataset = serialise.from_doc(expected_doc, skip_validation=True) if expected_dataset.geometry is None: assert produced_dataset.geometry is None else: assert_shapes_mostly_equal(produced_dataset.geometry, expected_dataset.geometry, 0.00000001)
def _load_level1_doc( wagl_doc: Dict, user_specified_l1_path: Optional[Path] = None, allow_missing_provenance=False, ): if user_specified_l1_path: if not user_specified_l1_path.exists(): raise ValueError( f"No level1 metadata found at given path {user_specified_l1_path}" ) level1_path = user_specified_l1_path else: level1_path = Path( get_path(wagl_doc, ("source_datasets", "source_level1"))) # If a directory, assume "<dirname>.odc-metadata.yaml" if level1_path.is_dir(): metadata_path = level1_path / (level1_path.name + ".odc-metadata.yaml") # Otherwise it's a sibling file with ".odc-metadata.yaml" suffix else: if level1_path.suffix.lower() == ".yaml": metadata_path = level1_path else: metadata_path = level1_path.with_suffix(".odc-metadata.yaml") if not metadata_path.exists(): if not allow_missing_provenance: raise ValueError( "No level1 found or provided. " f"WAGL said it was at path {str(level1_path)!r}. " "Which has no metadata doc we can find, and you didn't specify an alternative. " f"(allow_missing_provenance={allow_missing_provenance})") return None return serialise.from_path(metadata_path)
def run( verbose: bool, odc_metadata_files: Iterable[Path], stac_base_url: str, explorer_base_url: str, validate: bool, ): for input_metadata in odc_metadata_files: dataset = serialise.from_path(input_metadata) name = input_metadata.stem.replace(".odc-metadata", "") output_path = input_metadata.with_name(f"{name}.stac-item.json") # Create STAC dict item_doc = dc_to_stac( dataset, input_metadata, output_path, stac_base_url, explorer_base_url, do_validate=False, ) if validate: eo3stac.validate_item(item_doc, log=echo if verbose else lambda line: None) with output_path.open("w") as f: json.dump(item_doc, f, indent=4, default=json_fallback) if verbose: echo(f'Wrote {style(output_path.as_posix(), "green")}')
def run(odc_metadata_files: Iterable[Path]): for input_metadata in odc_metadata_files: dataset = serialise.from_path(input_metadata) project = partial( pyproj.transform, pyproj.Proj(init=dataset.crs), pyproj.Proj(init="epsg:4326"), ) wgs84_geometry: BaseGeometry = transform(project, dataset.geometry) item_doc = dict( id=dataset.id, type="Feature", bbox=wgs84_geometry.bounds, geometry=wgs84_geometry.__geo_interface__, properties={ **dataset.properties, "odc:product": dataset.product.name }, assets={ # TODO: Currently assuming no name collisions. **{ name: { "href": m.path } for name, m in dataset.measurements.items() }, **{ name: { "href": m.path } for name, m in dataset.accessories.items() }, }, links=[ # { # "rel": "self", # "href": '?', # }, { "rel": "odc_product", "href": dataset.product.href }, { "rel": "alternative", "type": "text/html", "href": f"https://explorer.dea.ga.gov.au/dataset/{dataset.id}", }, ], ) name = input_metadata.stem.replace(".odc-metadata", "") output_path = input_metadata.with_name(f"{name}.stac-item.json") with output_path.open("w") as f: json.dump(item_doc, f, indent=4, default=json_fallback) echo(output_path)
def test_tostac_no_grids(odc_dataset_path: Path, expected_stac_doc: Dict): """ Converted EO1 datasets don't have grid information. Make sure it still outputs without falling over. """ # Remove grids from the input.... dataset = serialise.from_path(odc_dataset_path) dataset.grids = None serialise.to_path(odc_dataset_path, dataset) run_tostac(odc_dataset_path) expected_output_path = odc_dataset_path.with_name( odc_dataset_path.name.replace(".odc-metadata.yaml", ".stac-item.json")) # No longer expect proj fields (they come from grids). remove_stac_properties(expected_stac_doc, ("proj:shape", "proj:transform", "proj:epsg")) # But we do still expect a global CRS. expected_stac_doc["properties"]["proj:epsg"] = 32656 output_doc = json.load(expected_output_path.open()) assert_same(expected_stac_doc, output_doc)
def _write_stac( metadata_path: Path, task: AlchemistTask, dataset_assembler: DatasetAssembler, ): out_dataset = serialise.from_path(metadata_path) stac_path = Path(str(metadata_path).replace("odc-metadata.yaml", "stac-item.json")) # Madness in deferred destination logic uri_base = dataset_assembler.names.destination_folder( Path(task.settings.output.location) ) uri_base = str(uri_base) + "/" stac = dc_to_stac( out_dataset, metadata_path, stac_path, uri_base.replace("s3:/", "s3://"), task.settings.output.explorer_url, False, ) with stac_path.open("w") as f: json.dump(stac, f, default=json_fallback) dataset_assembler.add_accessory_file("metadata:stac", stac_path) # dataset_assembler._checksum.write(dataset_assembler._accessories["checksum:sha1"]) # Need a new checksummer because EODatasets is insane checksummer = PackageChecksum() checksum_file = ( dataset_assembler._dataset_location / dataset_assembler._accessories["checksum:sha1"].name ) checksummer.read(checksum_file) checksummer.add_file(stac_path) checksummer.write(checksum_file) return stac
def check_prepare_outputs( invoke_script, run_args, expected_doc: Dict, expected_metadata_path: Path, ignore_fields=(), ): __tracebackhide__ = operator.methodcaller("errisinstance", AssertionError) run_prepare_cli(invoke_script, *run_args) assert expected_metadata_path.exists() assert_same_as_file( expected_doc, expected_metadata_path, # We check the geometry below ignore_fields=("geometry",) + tuple(ignore_fields), ) # Compare geometry after parsing, rather than comparing the raw dict values. produced_dataset = serialise.from_path(expected_metadata_path) expected_dataset = serialise.from_doc(expected_doc, skip_validation=True) assert_shapes_mostly_equal( produced_dataset.geometry, expected_dataset.geometry, 0.00000001 )
def for_path( cls, wagl_hdf5: Path, granule_names: Optional[Sequence[str]] = None, level1_metadata_path: Optional[Path] = None, fmask_image_path: Optional[Path] = None, fmask_doc_path: Optional[Path] = None, gqa_doc_path: Optional[Path] = None, ): """ Create granules by scanning the given hdf5 file. Optionally specify additional files and level1 path. If they are not specified it look for them using WAGL's output naming conventions. """ if not wagl_hdf5.exists(): raise ValueError(f"Input hdf5 doesn't exist {wagl_hdf5}") with h5py.File(wagl_hdf5, "r") as fid: granule_names = granule_names or fid.keys() for granule_name in granule_names: if granule_name not in fid: raise ValueError( f"Granule {granule_name!r} not found in file {wagl_hdf5}" ) wagl_doc_field = get_path( fid, (granule_name, "METADATA", "CURRENT")) if not wagl_doc_field: raise ValueError( f"Granule contains no wagl metadata: {granule_name} in {wagl_hdf5}" ) [wagl_doc] = loads_yaml(wagl_doc_field[()]) if not level1_metadata_path: level1_tar_path = Path( get_path(wagl_doc, ("source_datasets", "source_level1"))) level1_metadata_path = level1_tar_path.with_suffix( ".odc-metadata.yaml") if not level1_metadata_path.exists(): raise ValueError( f"No level1 metadata found at {level1_metadata_path}") level1 = serialise.from_path(level1_metadata_path) fmask_image_path = fmask_image_path or wagl_hdf5.with_name( f"{granule_name}.fmask.img") if not fmask_image_path.exists(): raise ValueError( f"No fmask image found at {fmask_image_path}") fmask_doc_path = fmask_doc_path or fmask_image_path.with_suffix( ".yaml") if not fmask_doc_path.exists(): raise ValueError(f"No fmask found at {fmask_doc_path}") with fmask_doc_path.open("r") as fl: [fmask_doc] = loads_yaml(fl) gqa_doc_path = gqa_doc_path or wagl_hdf5.with_name( f"{granule_name}.gqa.yaml") if not gqa_doc_path.exists(): raise ValueError(f"No gqa found at {gqa_doc_path}") with gqa_doc_path.open("r") as fl: [gqa_doc] = loads_yaml(fl) yield cls( name=granule_name, wagl_hdf5=wagl_hdf5, wagl_metadata=wagl_doc, source_level1_metadata=level1, fmask_doc=fmask_doc, fmask_image=fmask_image_path, gqa_doc=gqa_doc, )
def for_path( cls, wagl_hdf5: Path, granule_names: Optional[Sequence[str]] = None, level1_metadata_path: Optional[Path] = None, fmask_image_path: Optional[Path] = None, fmask_doc_path: Optional[Path] = None, gqa_doc_path: Optional[Path] = None, tesp_doc_path: Optional[Path] = None, allow_missing_provenance: bool = False, ): """ Create granules by scanning the given hdf5 file. Optionally specify additional files and level1 path. If they are not specified it look for them using WAGL's output naming conventions. :param allow_missing_provenance: """ if not wagl_hdf5.exists(): raise ValueError(f"Input hdf5 doesn't exist {wagl_hdf5}") with h5py.File(wagl_hdf5, "r") as fid: granule_names = granule_names or fid.keys() for granule_name in granule_names: if granule_name not in fid: raise ValueError( f"Granule {granule_name!r} not found in file {wagl_hdf5}" ) wagl_doc_field = get_path( fid, (granule_name, "METADATA", "CURRENT")) if not wagl_doc_field: raise ValueError( f"Granule contains no wagl metadata: {granule_name} in {wagl_hdf5}" ) [wagl_doc] = loads_yaml(wagl_doc_field[()]) if not level1_metadata_path: level1_metadata_path = _get_level1_metadata_path(wagl_doc) if level1_metadata_path and not level1_metadata_path.exists(): raise ValueError( f"No level1 metadata found at {level1_metadata_path}") level1 = (serialise.from_path(level1_metadata_path) if level1_metadata_path else None) if (not level1_metadata_path) and ( not allow_missing_provenance): raise ValueError( "No level1 found or provided. " f"WAGL said it was at path {str(level1_metadata_path)!r}. " "It's not, and you didn't specify an alternative. " f"(allow_missing_provenance={allow_missing_provenance})" ) fmask_image_path = fmask_image_path or wagl_hdf5.with_name( f"{granule_name}.fmask.img") if not fmask_image_path.exists(): raise ValueError( f"No fmask image found at {fmask_image_path}") fmask_doc_path = fmask_doc_path or fmask_image_path.with_suffix( ".yaml") if not fmask_doc_path.exists(): raise ValueError(f"No fmask found at {fmask_doc_path}") with fmask_doc_path.open("r") as fl: [fmask_doc] = loads_yaml(fl) gqa_doc_path = gqa_doc_path or wagl_hdf5.with_name( f"{granule_name}.gqa.yaml") if not gqa_doc_path.exists(): raise ValueError(f"No gqa found at {gqa_doc_path}") with gqa_doc_path.open("r") as fl: [gqa_doc] = loads_yaml(fl) # Optional doc if tesp_doc_path: # But if they gave us a path, we're strict about it existing. if not tesp_doc_path.exists(): raise ValueError( f"Supplied tesp doc path doesn't exist: {tesp_doc_path}" ) else: tesp_doc_path = wagl_hdf5.with_name( f"{granule_name}.tesp.yaml") if tesp_doc_path.exists(): with tesp_doc_path.open("r") as fl: [tesp_doc] = loads_yaml(fl) yield cls( name=granule_name, wagl_hdf5=wagl_hdf5, wagl_metadata=wagl_doc, source_level1_metadata=level1, fmask_doc=fmask_doc, fmask_image=fmask_image_path, gqa_doc=gqa_doc, tesp_doc=tesp_doc, )