示例#1
0
def bytes2ds_doc(data):
    if isinstance(data, bytes):
        data = data.decode("utf-8")
    stac_doc = json.loads(data)
    eo3_doc = stac_transform(stac_doc)
    ds_doc = prep_eo3(eo3_doc)
    return ds_doc
示例#2
0
    def mk_dataset(ds, uri):
        uuid = ds.id

        if uuid is None:
            return None, None, "Metadata document it missing id field"

        existing = index.datasets.get(uuid)
        if existing is None:
            return None, None, "No such dataset in the database: {}".format(uuid)

        ds = SimpleDocNav(prep_eo3(ds.doc, auto_skip=True))

        # TODO: what about sources=?
        return Dataset(existing.type,
                       ds.doc_without_lineage_sources,
                       uris=[uri]), existing, None
示例#3
0
def test_prep_eo3(sample_doc, sample_doc_180, eo3_metadata):
    rdr = eo3_metadata.dataset_reader(prep_eo3(sample_doc))
    assert rdr.grid_spatial is not None
    assert rdr.lat.end > rdr.lat.begin
    assert rdr.lon.end > rdr.lon.begin
    assert 'src_a' in rdr.sources
    assert 'src_b1' in rdr.sources
    assert 'src_b2' in rdr.sources
    assert 'src_empty' not in rdr.sources

    rdr = eo3_metadata.dataset_reader(prep_eo3(sample_doc_180))
    assert rdr.grid_spatial is not None
    assert rdr.sources == {}
    assert rdr.lat.end > rdr.lat.begin
    assert rdr.lon.end > rdr.lon.begin
    assert rdr.lon.begin < 180 < rdr.lon.end

    non_eo3_doc = {}
    assert prep_eo3(None) is None
    assert prep_eo3(non_eo3_doc, auto_skip=True) is non_eo3_doc

    with pytest.raises(ValueError):
        prep_eo3(non_eo3_doc)
示例#4
0
def eo3_dataset_s2(eo3_metadata):
    ds_doc = {
        '$schema': 'https://schemas.opendatacube.org/dataset',
        'id': '8b0e2770-5d4e-5238-8995-4aa91691ab85',
        'product': {'name': 's2b_msil2a'},
        'label': 'S2B_MSIL2A_20200101T070219_N0213_R120_T39LVG_20200101T091825',

        'crs': 'epsg:32739',
        'grids': {'g20m': {'shape': [5490, 5490],
                           'transform': [20, 0, 399960, 0, -20, 8700040, 0, 0, 1]},
                  'g60m': {'shape': [1830, 1830],
                           'transform': [60, 0, 399960, 0, -60, 8700040, 0, 0, 1]},
                  'default': {'shape': [10980, 10980],
                              'transform': [10, 0, 399960, 0, -10, 8700040, 0, 0, 1]}},
        'geometry': {'type': 'Polygon',
                     'coordinates': [[[509759.0000000001, 8590241.0],
                                      [399960.99999999977, 8590241.0],
                                      [399960.99999999977, 8700039.0],
                                      [509758.99999999965, 8700039.0],
                                      [509759.0000000001, 8590241.0]]]},
        'properties': {'eo:gsd': 10,
                       'datetime': '2020-01-01T07:02:54.188Z',
                       'eo:platform': 'sentinel-2b',
                       'eo:instrument': 'msi',
                       'eo:cloud_cover': 0,
                       'odc:file_format': 'GeoTIFF',
                       'odc:region_code': '39LVG',
                       'odc:processing_datetime': '2020-01-01T07:02:54.188Z'},

        'measurements': {'red': {'path': 'B04.tif'},
                         'scl': {'grid': 'g20m', 'path': 'SCL.tif'},
                         'blue': {'path': 'B02.tif'},
                         'green': {'path': 'B03.tif'},
                         'nir_1': {'path': 'B08.tif'},
                         'nir_2': {'grid': 'g20m', 'path': 'B8A.tif'},
                         'swir_1': {'grid': 'g20m', 'path': 'B11.tif'},
                         'swir_2': {'grid': 'g20m', 'path': 'B12.tif'},
                         'red_edge_1': {'grid': 'g20m', 'path': 'B05.tif'},
                         'red_edge_2': {'grid': 'g20m', 'path': 'B06.tif'},
                         'red_edge_3': {'grid': 'g20m', 'path': 'B07.tif'},
                         'water_vapour': {'grid': 'g60m', 'path': 'B09.tif'},
                         'coastal_aerosol': {'grid': 'g60m', 'path': 'B01.tif'}},
        'lineage': {}}
    product_doc = {
        'name': 's2b_msil2a',
        'description': 'Sentinel-2B Level 2 COGs',
        'metadata_type': 'eo3',
        'metadata': {'product': {'name': 's2b_msil2a'}},
        'measurements':
        [{'name': 'coastal_aerosol', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_01', 'B01']},
         {'name': 'blue', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_02', 'B02']},
         {'name': 'green', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_03', 'B03']},
         {'name': 'red', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_04', 'B04']},
         {'name': 'red_edge_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_05', 'B05']},
         {'name': 'red_edge_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_06', 'B06']},
         {'name': 'red_edge_3', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_07', 'B07']},
         {'name': 'nir_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_08', 'B08']},
         {'name': 'nir_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_8a', 'B8A']},
         {'name': 'water_vapour', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_09', 'B09']},
         {'name': 'swir_1', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_11', 'B11']},
         {'name': 'swir_2', 'dtype': 'uint16', 'units': '1', 'nodata': 0, 'aliases': ['band_12', 'B12']},
         {'name': 'scl', 'dtype': 'uint8', 'units': '1', 'nodata': 0, 'aliases': ['mask', 'qa'],
          'flags_definition': {'sca': {'description': 'Sen2Cor Scene Classification',
                                       'bits': [0, 1, 2, 3, 4, 5, 6, 7],
                                       'values': {
                                           '0': 'nodata',
                                           '1': 'defective',
                                           '2': 'dark',
                                           '3': 'shadow',
                                           '4': 'vegetation',
                                           '5': 'bare',
                                           '6': 'water',
                                           '7': 'unclassified',
                                           '8': 'cloud medium probability',
                                           '9': 'cloud high probability',
                                           '10': 'thin cirrus',
                                           '11': 'snow or ice'}}}}]
    }

    return Dataset(DatasetType(eo3_metadata, product_doc), prep_eo3(ds_doc))
示例#5
0
def eo3_dataset_s2(eo3_metadata):
    ds_doc = {
        "$schema": "https://schemas.opendatacube.org/dataset",
        "id": "8b0e2770-5d4e-5238-8995-4aa91691ab85",
        "product": {
            "name": "s2b_msil2a"
        },
        "label":
        "S2B_MSIL2A_20200101T070219_N0213_R120_T39LVG_20200101T091825",
        "crs": "epsg:32739",
        "grids": {
            "g20m": {
                "shape": [5490, 5490],
                "transform": [20, 0, 399960, 0, -20, 8700040, 0, 0, 1],
            },
            "g60m": {
                "shape": [1830, 1830],
                "transform": [60, 0, 399960, 0, -60, 8700040, 0, 0, 1],
            },
            "default": {
                "shape": [10980, 10980],
                "transform": [10, 0, 399960, 0, -10, 8700040, 0, 0, 1],
            },
        },
        "geometry": {
            "type":
            "Polygon",
            "coordinates": [[
                [509759.0000000001, 8590241.0],
                [399960.99999999977, 8590241.0],
                [399960.99999999977, 8700039.0],
                [509758.99999999965, 8700039.0],
                [509759.0000000001, 8590241.0],
            ]],
        },
        "properties": {
            "eo:gsd": 10,
            "datetime": "2020-01-01T07:02:54.188Z",
            "eo:platform": "sentinel-2b",
            "eo:instrument": "msi",
            "eo:cloud_cover": 0,
            "odc:file_format": "GeoTIFF",
            "odc:region_code": "39LVG",
            "odc:processing_datetime": "2020-01-01T07:02:54.188Z",
        },
        "measurements": {
            "red": {
                "path": "B04.tif"
            },
            "scl": {
                "grid": "g20m",
                "path": "SCL.tif"
            },
            "blue": {
                "path": "B02.tif"
            },
            "green": {
                "path": "B03.tif"
            },
            "nir_1": {
                "path": "B08.tif"
            },
            "nir_2": {
                "grid": "g20m",
                "path": "B8A.tif"
            },
            "swir_1": {
                "grid": "g20m",
                "path": "B11.tif"
            },
            "swir_2": {
                "grid": "g20m",
                "path": "B12.tif"
            },
            "red_edge_1": {
                "grid": "g20m",
                "path": "B05.tif"
            },
            "red_edge_2": {
                "grid": "g20m",
                "path": "B06.tif"
            },
            "red_edge_3": {
                "grid": "g20m",
                "path": "B07.tif"
            },
            "water_vapour": {
                "grid": "g60m",
                "path": "B09.tif"
            },
            "coastal_aerosol": {
                "grid": "g60m",
                "path": "B01.tif"
            },
        },
        "lineage": {},
    }
    product_doc = {
        "name":
        "s2b_msil2a",
        "description":
        "Sentinel-2B Level 2 COGs",
        "metadata_type":
        "eo3",
        "metadata": {
            "product": {
                "name": "s2b_msil2a"
            }
        },
        "measurements": [
            {
                "name": "coastal_aerosol",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_01", "B01"],
            },
            {
                "name": "blue",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_02", "B02"],
            },
            {
                "name": "green",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_03", "B03"],
            },
            {
                "name": "red",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_04", "B04"],
            },
            {
                "name": "red_edge_1",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_05", "B05"],
            },
            {
                "name": "red_edge_2",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_06", "B06"],
            },
            {
                "name": "red_edge_3",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_07", "B07"],
            },
            {
                "name": "nir_1",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_08", "B08"],
            },
            {
                "name": "nir_2",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_8a", "B8A"],
            },
            {
                "name": "water_vapour",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_09", "B09"],
            },
            {
                "name": "swir_1",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_11", "B11"],
            },
            {
                "name": "swir_2",
                "dtype": "uint16",
                "units": "1",
                "nodata": 0,
                "aliases": ["band_12", "B12"],
            },
            {
                "name": "scl",
                "dtype": "uint8",
                "units": "1",
                "nodata": 0,
                "aliases": ["mask", "qa"],
                "flags_definition": {
                    "sca": {
                        "description": "Sen2Cor Scene Classification",
                        "bits": [0, 1, 2, 3, 4, 5, 6, 7],
                        "values": {
                            "0": "nodata",
                            "1": "defective",
                            "2": "dark",
                            "3": "shadow",
                            "4": "vegetation",
                            "5": "bare",
                            "6": "water",
                            "7": "unclassified",
                            "8": "cloud medium probability",
                            "9": "cloud high probability",
                            "10": "thin cirrus",
                            "11": "snow or ice",
                        },
                    }
                },
            },
        ],
    }

    return Dataset(DatasetType(eo3_metadata, product_doc), prep_eo3(ds_doc))
示例#6
0
def validate_dataset(
        doc: Dict,
        product_definition: Optional[Dict] = None,
        metadata_type_definition: Optional[Dict] = None,
        thorough: bool = False,
        readable_location: Union[str, Path] = None,
        expect_extra_measurements: bool = False,
        expect_geometry: bool = True,
        nullable_fields: Iterable[str] = ("label", ),
) -> ValidationMessages:
    """
    Validate a a dataset document, optionally against the given product.

    By default this will only look at the metadata, run with thorough=True to
    open the data files too.

    :param product_definition: Optionally check that the dataset matches this product definition.
    :param thorough: Open the imagery too, to check that data types etc match.
    :param readable_location: Dataset location to use, if not the metadata path.
    :param expect_extra_measurements:
            Allow some dataset measurements to be missing from the product definition.
            This is (deliberately) allowed by ODC, but often a mistake.
            This flag disables the warning.
    """
    schema = doc.get("$schema")
    if schema is None:
        yield _error(
            "no_schema",
            f"No $schema field. "
            f"You probably want an ODC dataset schema {model.ODC_DATASET_SCHEMA_URL!r}",
        )
        return
    if schema != model.ODC_DATASET_SCHEMA_URL:
        yield _error(
            "unknown_doc_type",
            f"Unknown doc schema {schema!r}. Only ODC datasets are supported ({model.ODC_DATASET_SCHEMA_URL!r})",
        )
        return

    has_doc_errors = False
    for error in serialise.DATASET_SCHEMA.iter_errors(doc):
        has_doc_errors = True
        displayable_path = ".".join(error.absolute_path)

        hint = None
        if displayable_path == "crs" and "not of type" in error.message:
            hint = "epsg codes should be prefixed with 'epsg:1234'"

        context = f"({displayable_path}) " if displayable_path else ""
        yield _error("structure", f"{context}{error.message} ", hint=hint)

    if has_doc_errors:
        return

    dataset = serialise.from_doc(doc, skip_validation=True)

    if not dataset.product.href:
        _info("product_href", "A url (href) is recommended for products")

    yield from _validate_geo(dataset, expect_geometry=expect_geometry)

    # Note that a dataset may have no measurements (eg. telemetry data).
    # (TODO: a stricter mode for when we know we should have geo and measurement info)
    if dataset.measurements:
        for name, measurement in dataset.measurements.items():
            grid_name = measurement.grid
            if grid_name != "default" or dataset.grids:
                if grid_name not in dataset.grids:
                    yield _error(
                        "invalid_grid_ref",
                        f"Measurement {name!r} refers to unknown grid {grid_name!r}",
                    )

            if is_absolute(measurement.path):
                yield _warning(
                    "absolute_path",
                    f"measurement {name!r} has an absolute path: {measurement.path!r}",
                )

    yield from _validate_stac_properties(dataset)

    required_measurements: Dict[str, ExpectedMeasurement] = {}
    if product_definition is not None:
        required_measurements.update({
            m.name: m
            for m in map(
                ExpectedMeasurement.from_definition,
                product_definition.get("measurements") or (),
            )
        })

        product_name = product_definition.get("name")
        if product_name != dataset.product.name:
            # This is only informational as it's possible products may be indexed with finer-grained
            # categories than the original datasets: eg. a separate "nrt" product, or test product.
            yield _info(
                "product_mismatch",
                f"Dataset product name {dataset.product.name!r} "
                f"does not match the given product ({product_name!r}",
            )

        for name in required_measurements:
            if name not in dataset.measurements.keys():
                yield _error(
                    "missing_measurement",
                    f"Product {product_name} expects a measurement {name!r})",
                )
        measurements_not_in_product = set(
            dataset.measurements.keys()).difference({
                m["name"]
                for m in product_definition.get("measurements") or ()
            })
        if (not expect_extra_measurements) and measurements_not_in_product:
            things = ", ".join(sorted(measurements_not_in_product))
            yield _warning(
                "extra_measurements",
                f"Dataset has measurements not present in product definition for {product_name!r}: {things}",
                hint=
                "This may be valid, as it's allowed by ODC. Set `expect_extra_measurements` to mute this.",
            )

    if metadata_type_definition:
        # Datacube does certain transforms on an eo3 doc before storage.
        # We need to do the same, as the fields will be read from the storage.
        prepared_doc = prep_eo3(doc)

        for field_name, offsets in _get_field_offsets(
                metadata_type=metadata_type_definition):
            if not any(
                    _has_offset(prepared_doc, offset) for offset in offsets):
                readable_offsets = " or ".join("->".join(offset)
                                               for offset in offsets)
                yield _warning(
                    "missing_field",
                    f"Dataset is missing field {field_name!r}",
                    hint=f"Expected at {readable_offsets}",
                )
                continue

            if field_name not in nullable_fields:
                value = None
                for offset in offsets:
                    value = toolz.get_in(offset, prepared_doc)
                if value is None:
                    yield _info(
                        "null_field",
                        f"Value is null for configured field {field_name!r}",
                    )

    dataset_location = dataset.locations[
        0] if dataset.locations else readable_location

    # If we have a location:
    # For each measurement, try to load it.
    # If loadable:
    if thorough:
        for name, measurement in dataset.measurements.items():
            full_path = uri_resolve(dataset_location, measurement.path)
            expected_measurement = required_measurements.get(name)

            band = measurement.band or 1
            with rasterio.open(full_path) as ds:
                ds: DatasetReader

                if band not in ds.indexes:
                    yield _error(
                        "incorrect_band",
                        f"Measurement {name!r} file contains no rio index {band!r}.",
                        hint=f"contains indexes {ds.indexes!r}",
                    )
                    continue

                if not expected_measurement:
                    # The measurement is not in the product definition
                    #
                    # This is only informational because a product doesn't have to define all
                    # measurements that the datasets contain.
                    #
                    # This is historically because dataset documents reflect the measurements that
                    # are stored on disk, which can differ. But products define the set of measurments
                    # that are mandatory in every dataset.
                    #
                    # (datasets differ when, for example, sensors go offline, or when there's on-disk
                    #  measurements like panchromatic that GA doesn't want in their product definitions)
                    if required_measurements:
                        yield _info(
                            "unspecified_measurement",
                            f"Measurement {name} is not in the product",
                        )
                else:
                    expected_dtype = expected_measurement.dtype
                    band_dtype = ds.dtypes[band - 1]
                    # TODO: NaN handling
                    if expected_dtype != band_dtype:
                        yield _error(
                            "different_dtype",
                            f"{name} dtype: "
                            f"product {expected_dtype!r} != dataset {band_dtype!r}",
                        )

                    ds_nodata = ds.nodatavals[band - 1]

                    # If the dataset is missing 'nodata', we can allow anything in product 'nodata'.
                    # (In ODC, nodata might be a fill value for loading data.)
                    if ds_nodata is None:
                        continue

                    # Otherwise check that nodata matches.
                    expected_nodata = expected_measurement.nodata
                    if expected_nodata != ds_nodata and not (
                            _is_nan(expected_nodata) and _is_nan(ds_nodata)):
                        yield _error(
                            "different_nodata",
                            f"{name} nodata: "
                            f"product {expected_nodata !r} != dataset {ds_nodata !r}",
                        )
示例#7
0
def fuse_ds(ds_1: Dataset,
            ds_2: Dataset,
            product: Optional[DatasetType] = None) -> Dataset:
    """
    This function fuses two datasets. It requires that:
      - the products are fusable
      - grids with the same name are identical
      - labels are in the format 'product_suffix' with identical suffixes
      - CRSs' are identical
      - datetimes are identical
      - $schemas are identical 
    """

    doc_1, doc_2 = ds_1.metadata_doc, ds_2.metadata_doc

    if product is None:
        product = fuse_products(ds_1.type, ds_2.type)

    fused_doc = dict()

    fused_doc["id"] = str(
        odc_uuid(product.name, "0.0.0", sources=[doc_1["id"], doc_2["id"]]))
    fused_doc["lineage"] = {"source_datasets": [doc_1["id"], doc_2["id"]]}

    # check that all grids with the same name are identical
    common_grids = set(doc_1["grids"].keys()).intersection(
        doc_2["grids"].keys())
    assert all(doc_1["grids"][g] == doc_2["grids"][g] for g in common_grids)

    # TODO: handle the case that grids have conflicts in a seperate function
    fused_doc["grids"] = {**doc_1["grids"], **doc_2["grids"]}

    label_suffix = doc_1["label"].replace(doc_1["product"]["name"], "")
    assert label_suffix == doc_2["label"].replace(doc_2["product"]["name"], "")
    fused_doc["label"] = f"{product.name}{label_suffix}"

    equal_keys = ["$schema", "crs"]
    for key in equal_keys:
        assert doc_1[key] == doc_2[key]
        fused_doc[key] = doc_1[key]

    fused_doc["properties"] = dict()
    assert doc_1["properties"]["datetime"] == doc_2["properties"][
        "datetime"]  # datetime is the only manditory property

    # copy over all identical properties
    for key, val in doc_1["properties"].items():
        if val == doc_2["properties"].get(key, None):
            fused_doc["properties"][key] = val

    fused_doc["measurements"] = {
        **doc_1["measurements"],
        **doc_2["measurements"]
    }
    for key, path in {
            **measurement_paths(ds_1),
            **measurement_paths(ds_2)
    }.items():
        fused_doc["measurements"][key]["path"] = path

    fused_ds = Dataset(product, prep_eo3(fused_doc), uris=[""])
    return fused_ds
示例#8
0
def test_fuse_dss(wo_definition, fc_definition):

    standard_metadata_types = {
        d["name"]: metadata_from_doc(d)
        for d in default_metadata_type_docs()
    }
    eo3 = standard_metadata_types["eo3"]

    wo_product = DatasetType(eo3, wo_definition)
    fc_product = DatasetType(eo3, fc_definition)
    fused_product = fuse_products(wo_product, fc_product)

    wo_metadata = {
        'id': 'e9fb6737-b93d-5cd9-bfe6-7e634abc9905',
        'crs': 'epsg:32655',
        'grids': {
            'default': {
                'shape': [7211, 8311],
                'transform':
                [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0]
            }
        },
        'label': 'ga_ls_wo_3_091086_2020-04-04_final',
        '$schema': 'https://schemas.opendatacube.org/dataset',
        'lineage': {
            'source_datasets': {}
        },
        'product': {
            'name': 'ga_ls_wo_3'
        },
        'properties': {
            'title':
            'ga_ls_wo_3_091086_2020-04-04_final',
            'eo:gsd':
            30.0,
            'created':
            '2021-03-09T23:22:42.130266Z',
            'datetime':
            '2020-04-04T23:33:10.644420Z',
            'proj:epsg':
            32655,
            'proj:shape': [7211, 8311],
            'eo:platform':
            'landsat-7',
            'odc:product':
            'ga_ls_wo_3',
            'odc:producer':
            'ga.gov.au',
            'eo:instrument':
            'ETM',
            'eo:cloud_cover':
            44.870310145260326,
            'eo:sun_azimuth':
            49.20198554,
            'proj:transform':
            [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0],
            'landsat:wrs_row':
            86,
            'odc:file_format':
            'GeoTIFF',
            'odc:region_code':
            '091086',
            'dtr:end_datetime':
            '2020-04-04T23:33:24.461679Z',
            'eo:sun_elevation':
            32.7056476,
            'landsat:wrs_path':
            91,
            'dtr:start_datetime':
            '2020-04-04T23:32:56.662365Z',
            'odc:product_family':
            'wo',
            'odc:dataset_version':
            '1.6.0',
            'dea:dataset_maturity':
            'final',
            'odc:collection_number':
            3,
            'odc:naming_conventions':
            'dea_c3',
            'odc:processing_datetime':
            '2020-04-04T23:33:10.644420Z',
            'landsat:landsat_scene_id':
            'LE70910862020095ASA00',
            'landsat:collection_number':
            1,
            'landsat:landsat_product_id':
            'LE07_L1TP_091086_20200404_20200501_01_T1',
            'landsat:collection_category':
            'T1'
        },
        'measurements': {
            'water': {
                'path': 'ga_ls_wo_3_091086_2020-04-04_final_water.tif'
            }
        }
    }

    fc_metadata = {
        'id': '41980746-4f17-5e0c-86a0-92cca8d3c99d',
        'crs': 'epsg:32655',
        'grids': {
            'default': {
                'shape': [7211, 8311],
                'transform':
                [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0]
            }
        },
        'label': 'ga_ls_fc_3_091086_2020-04-04_final',
        '$schema': 'https://schemas.opendatacube.org/dataset',
        'product': {
            'name': 'ga_ls_fc_3'
        },
        'properties': {
            'title':
            'ga_ls_fc_3_091086_2020-04-04_final',
            'eo:gsd':
            30.0,
            'created':
            '2021-03-10T04:14:49.645196Z',
            'datetime':
            '2020-04-04T23:33:10.644420Z',
            'proj:epsg':
            32655,
            'proj:shape': [7211, 8311],
            'eo:platform':
            'landsat-7',
            'odc:product':
            'ga_ls_fc_3',
            'odc:producer':
            'ga.gov.au',
            'eo:instrument':
            'ETM',
            'eo:cloud_cover':
            44.870310145260326,
            'eo:sun_azimuth':
            49.20198554,
            'proj:transform':
            [30.0, 0.0, 423285.0, 0.0, -30.0, -4040385.0, 0.0, 0.0, 1.0],
            'landsat:wrs_row':
            86,
            'odc:file_format':
            'GeoTIFF',
            'odc:region_code':
            '091086',
            'dtr:end_datetime':
            '2020-04-04T23:33:24.461679Z',
            'eo:sun_elevation':
            32.7056476,
            'landsat:wrs_path':
            91,
            'dtr:start_datetime':
            '2020-04-04T23:32:56.662365Z',
            'odc:product_family':
            'fc',
            'odc:dataset_version':
            '2.5.0',
            'dea:dataset_maturity':
            'final',
            'odc:collection_number':
            3,
            'odc:naming_conventions':
            'dea_c3',
            'odc:processing_datetime':
            '2020-04-04T23:33:10.644420Z',
            'landsat:landsat_scene_id':
            'LE70910862020095ASA00',
            'landsat:collection_number':
            1,
            'landsat:landsat_product_id':
            'LE07_L1TP_091086_20200404_20200501_01_T1',
            'landsat:collection_category':
            'T1'
        },
        'measurements': {
            'bs': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_bs.tif'
            },
            'pv': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_pv.tif'
            },
            'ue': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_ue.tif'
            },
            'npv': {
                'path': 'ga_ls_fc_3_091086_2020-04-04_final_npv.tif'
            }
        }
    }

    # paths get made absolute here
    # TODO: force paths to stay relative
    uris = [
        "s3://dea-public-data/derivative/ga_ls_wo_3/1-6-0/091/086/2020/04/04/ga_ls_wo_3_091086_2020-04-04_final.stac-item.json"
    ]
    wo_ds = Dataset(wo_product, prep_eo3(wo_metadata), uris=uris)
    uris = [
        "s3://dea-public-data/derivative/ga_ls_fc_3/2-5-0/091/086/2020/04/04/ga_ls_fc_3_091086_2020-04-04_final.stac-item.json"
    ]
    fc_ds = Dataset(fc_product, prep_eo3(fc_metadata), uris=uris)

    fused_ds = fuse_ds(wo_ds, fc_ds, fused_product)
    assert _get_msr_paths(fused_ds) == _get_msr_paths(fc_ds).union(
        _get_msr_paths(wo_ds))
    fused_ds = fuse_ds(wo_ds, fc_ds)
    assert _get_msr_paths(fused_ds) == _get_msr_paths(fc_ds).union(
        _get_msr_paths(wo_ds))

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata["properties"]["datetime"] = '2020-04-03T23:33:10.644420Z'
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata["crs"] = "epsg:32656"
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata['grids']['default']['shape'] = [7212, 8311]
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)

    bad_metadata = deepcopy(fc_metadata)
    bad_metadata['label'] += 'a'
    bad_ds = Dataset(fc_product, prep_eo3(bad_metadata), uris=uris)
    with pytest.raises(AssertionError):
        fused_ds = fuse_ds(wo_ds, bad_ds, fused_product)