def upload_metadata(granule_id): """ Creates and uploads metadata in stac and eo3 formats. :param granule_id: the id of the granule in format 'date/tile_id' :return: serialized stac metadata """ local_path = Path(NCI_DIR) / granule_id granule_s3_path = get_granule_s3_path(granule_id) s3_path = f"s3://{S3_BUCKET}/{granule_s3_path}/" s3_eo3_path = f"{s3_path}eo3-ARD-METADATA.yaml" s3_stac_path = f"{s3_path}stac-ARD-METADATA.json" eo3 = create_eo3(local_path, granule_id) stac = to_stac_item( eo3, stac_item_destination_url=s3_stac_path, odc_dataset_metadata_url=s3_eo3_path, dataset_location=s3_path, ) stac_dump = json.dumps(stac, default=json_fallback, indent=4) s3_dump( yaml.safe_dump(serialise.to_doc(eo3), default_flow_style=False), s3_eo3_path, ACL="bucket-owner-full-control", ContentType="text/vnd.yaml" ) return stac_dump, s3_stac_path
def test_valid_document_works(tmp_path: Path, example_metadata: Dict): generated_doc = dump_roundtrip(example_metadata) # Do a serialisation roundtrip and check that it's still identical. reserialised_doc = dump_roundtrip( serialise.to_doc(serialise.from_doc(generated_doc))) assert_same(generated_doc, reserialised_doc) assert serialise.from_doc(generated_doc) == serialise.from_doc( reserialised_doc)
def assert_unchanged_after_roundstrip(doc: Dict): generated_doc = dump_roundtrip(doc) # Do a serialisation roundtrip and check that it's still identical. reserialised_doc = dump_roundtrip( serialise.to_doc(serialise.from_doc(generated_doc))) # One allowed difference: input dates can be many string formats, # but we normalise them with timezone (UTC default) _normalise_datetime_props(generated_doc) assert serialise.from_doc(generated_doc) == serialise.from_doc( reserialised_doc)
def assert_expected_eo3( expected_doc: DatasetDoc, given_doc: DatasetDoc, *, ignore_fields=(), ): """ Do the two DatasetDocs match? (Unlike equality, gives reasonable error message of differences, and compares geometry more intelligently.) """ __tracebackhide__ = operator.methodcaller("errisinstance", AssertionError) if expected_doc.geometry is None: assert given_doc.geometry is None, "Expected no geometry" else: assert_shapes_mostly_equal(given_doc.geometry, expected_doc.geometry, 0.00000001) e = serialise.to_doc(expected_doc) g = serialise.to_doc(given_doc) for f in ("geometry", ) + ignore_fields: e.pop(f) g.pop(f) assert_same(g, e)
def test_location_single_serialisation(tmp_path: Path, l1_ls8_folder_md_expected: Dict): # Always serialises a single location as 'location' location = "https://some/test/path" # Given multiple l1_ls8_folder_md_expected["locations"] = [location] reserialised_doc = dump_roundtrip( serialise.to_doc(serialise.from_doc(l1_ls8_folder_md_expected))) # We get singular assert reserialised_doc["location"] == location assert "locations" not in reserialised_doc
def on_success(dataset: DatasetDoc, dataset_path: Path): """ Index the dataset """ product_name = dataset.product.name product = products.get(product_name) if not product: product = index.products.get_by_name(product_name) if not product: raise ValueError( f"Product {product_name} not found in ODC index") products[product_name] = product index.datasets.add( Dataset(product, serialise.to_doc(dataset), uris=dataset.locations)) _LOG.debug("Indexed dataset", dataset_id=dataset.id, dataset_path=dataset_path)
def test_in_memory_dataset(tmp_path: Path, l1_ls8_folder: Path): """ You can create metadata fully in-memory, without touching paths. """ out = tmp_path / "out" out.mkdir() [blue_geotiff_path] = l1_ls8_folder.rglob("L*_B2.TIF") dataset_location = out / "my/custom/dataset/path/ls_whatever.stac-item.json" p = DatasetPrepare(dataset_location=dataset_location) p.datetime = datetime(2019, 7, 4, 13, 7, 5) p.product_name = "loch_ness_sightings" p.processed = datetime(2019, 7, 4, 13, 8, 7) pretend_path = dataset_location.parent / "our_image_dont_read_it.tif" p.note_measurement( "blue", pretend_path, # We give it grid information, so it doesn't have to read it itself. # (reading will fail if it tries, because the path is fake!) grid=GridSpec.from_path(blue_geotiff_path), pixels=numpy.ones((60, 60), numpy.int16), nodata=-1, ) dataset: DatasetDoc = p.to_dataset_doc() doc: dict = serialise.to_doc(dataset) # We're testing geometry calc in other tests. assert doc["geometry"] is not None, "Expected geometry" del doc["geometry"] assert doc["id"] is not None, "Expected an id" del doc["id"] # Users can ask the generator for file names: assert ( p.names.measurement_filename("red") == "loch_ness_sightings_2019-07-04_red.tif" ) # The computed file paths are relative to our given dataset location. out_url = out.as_uri() assert ( p.names.resolve_file(p.names.measurement_filename("red")) == f"{out_url}/my/custom/dataset/path/loch_ness_sightings_2019-07-04_red.tif" ) pprint(doc) assert_same( { "$schema": "https://schemas.opendatacube.org/dataset", "label": "loch_ness_sightings_2019-07-04", "crs": "epsg:32655", "measurements": {"blue": {"path": "our_image_dont_read_it.tif"}}, "product": {"name": "loch_ness_sightings"}, "properties": { "datetime": datetime(2019, 7, 4, 13, 7, 5, tzinfo=timezone.utc), "odc:processing_datetime": datetime( 2019, 7, 4, 13, 8, 7, tzinfo=timezone.utc ), "odc:product": "loch_ness_sightings", }, "grids": { "default": { "shape": [60, 60], "transform": [ 3955.5, 0.0, 641985.0, 0.0, -3975.5000000000005, -3714585.0, 0.0, 0.0, 1.0, ], } }, "accessories": {}, "lineage": {}, }, doc, )