def test_dataset_measurement_paths(): format = 'GeoTiff' ds = mk_sample_dataset( [dict(name=n, path=n + '.tiff') for n in 'a b c'.split(' ')], uri='file:///tmp/datataset.yml', format=format) assert ds.local_uri == ds.uris[0] assert ds.uri_scheme == 'file' assert ds.format == format paths = measurement_paths(ds) for k, v in paths.items(): assert v == 'file:///tmp/' + k + '.tiff' ds.uris = None assert ds.local_uri is None with pytest.raises(ValueError): measurement_paths(ds)
def fuse_ds(ds_1: Dataset, ds_2: Dataset, product: Optional[DatasetType] = None) -> Dataset: """ This function fuses two datasets. It requires that: - the products are fusable - grids with the same name are identical - labels are in the format 'product_suffix' with identical suffixes - CRSs' are identical - datetimes are identical - $schemas are identical """ doc_1, doc_2 = ds_1.metadata_doc, ds_2.metadata_doc if product is None: product = fuse_products(ds_1.type, ds_2.type) fused_doc = dict() fused_doc["id"] = str( odc_uuid(product.name, "0.0.0", sources=[doc_1["id"], doc_2["id"]])) fused_doc["lineage"] = {"source_datasets": [doc_1["id"], doc_2["id"]]} # check that all grids with the same name are identical common_grids = set(doc_1["grids"].keys()).intersection( doc_2["grids"].keys()) assert all(doc_1["grids"][g] == doc_2["grids"][g] for g in common_grids) # TODO: handle the case that grids have conflicts in a seperate function fused_doc["grids"] = {**doc_1["grids"], **doc_2["grids"]} label_suffix = doc_1["label"].replace(doc_1["product"]["name"], "") assert label_suffix == doc_2["label"].replace(doc_2["product"]["name"], "") fused_doc["label"] = f"{product.name}{label_suffix}" equal_keys = ["$schema", "crs"] for key in equal_keys: assert doc_1[key] == doc_2[key] fused_doc[key] = doc_1[key] fused_doc["properties"] = dict() assert doc_1["properties"]["datetime"] == doc_2["properties"][ "datetime"] # datetime is the only manditory property # copy over all identical properties for key, val in doc_1["properties"].items(): if val == doc_2["properties"].get(key, None): fused_doc["properties"][key] = val fused_doc["measurements"] = { **doc_1["measurements"], **doc_2["measurements"] } for key, path in { **measurement_paths(ds_1), **measurement_paths(ds_2) }.items(): fused_doc["measurements"][key]["path"] = path fused_ds = Dataset(product, prep_eo3(fused_doc), uris=[""]) return fused_ds