def _extract_sinergise_fields(path: Path, p: DatasetAssembler) -> Iterable[Path]: """Extract Sinergise metadata and return list of image offsets""" product_info_path = path / "productInfo.json" metadata_xml_path = path / "metadata.xml" if not product_info_path.exists(): raise ValueError( "No productInfo.json file found. " "Are you sure the input is a sinergise dataset folder?") p.properties.update(process_sinergise_product_info(product_info_path)) p.add_accessory_file("metadata:sinergise_product_info", product_info_path) p.properties.update(process_tile_metadata(metadata_xml_path.read_text())) p.add_accessory_file("metadata:s2_tile", metadata_xml_path) # TODO: sinergise folders could `process_datastrip_metadata()` in an outer directory? return path.glob("*.jp2")
def _write_stac( metadata_path: Path, task: AlchemistTask, dataset_assembler: DatasetAssembler, ): out_dataset = serialise.from_path(metadata_path) stac_path = Path(str(metadata_path).replace("odc-metadata.yaml", "stac-item.json")) # Madness in deferred destination logic uri_base = dataset_assembler.names.destination_folder( Path(task.settings.output.location) ) uri_base = str(uri_base) + "/" stac = dc_to_stac( out_dataset, metadata_path, stac_path, uri_base.replace("s3:/", "s3://"), task.settings.output.explorer_url, False, ) with stac_path.open("w") as f: json.dump(stac, f, default=json_fallback) dataset_assembler.add_accessory_file("metadata:stac", stac_path) # dataset_assembler._checksum.write(dataset_assembler._accessories["checksum:sha1"]) # Need a new checksummer because EODatasets is insane checksummer = PackageChecksum() checksum_file = ( dataset_assembler._dataset_location / dataset_assembler._accessories["checksum:sha1"].name ) checksummer.read(checksum_file) checksummer.add_file(stac_path) checksummer.write(checksum_file) return stac