Пример #1
0
def test_complain_about_missing_fields(tmp_path: Path, l1_ls8_folder: Path):
    """
    It should complain immediately if I add a file without enough metadata to write the filename.

    (and with a friendly error message)
    """

    out = tmp_path / "out"
    out.mkdir()

    [blue_geotiff_path] = l1_ls8_folder.rglob("L*_B2.TIF")

    # Default simple naming conventions need at least a date and family...
    with pytest.raises(
        ValueError, match="Need more properties to fulfill naming conventions."
    ):
        with DatasetAssembler(out) as p:
            p.write_measurement("blue", blue_geotiff_path)

    # It should mention the field that's missing (we added a date, so product_family is needed)
    with DatasetAssembler(out) as p:
        with pytest.raises(ValueError, match="odc:product_family"):
            p.datetime = datetime(2019, 7, 4, 13, 7, 5)
            p.write_measurement("blue", blue_geotiff_path)

    # DEA naming conventions should have stricter standards, and will tell your which fields you need to add.
    with DatasetAssembler(out, naming_conventions="dea") as p:
        # We set all the fields that work in default naming conventions.
        p.datetime = datetime(2019, 7, 4, 13, 7, 5)
        p.product_family = "quaternarius"
        p.processed_now()

        # These fields are mandatory for DEA, and so should be complained about.
        expected_extra_fields_needed = (
            "eo:platform",
            "eo:instrument",
            "odc:dataset_version",
            "odc:producer",
            "odc:region_code",
        )
        with pytest.raises(ValueError) as got_error:
            p.write_measurement("blue", blue_geotiff_path)

        # All needed fields should have been in the error message.
        for needed_field_name in expected_extra_fields_needed:
            assert needed_field_name in got_error.value.args[0], (
                f"Expected field {needed_field_name} to "
                f"be listed as mandatory in the error message"
            )
Пример #2
0
def assert_names_match(
    tmp_path: Path,
    # Given:
    conventions,
    properties: Mapping,
    # Then expect:
    expect_metadata_path: str = None,
    expect_label: str = None,
):
    __tracebackhide__ = operator.methodcaller("errisinstance", AssertionError)
    """
    Easily test a set of naming conventions: Do certain properties lead to expected file names?
    """

    with DatasetAssembler(tmp_path, naming_conventions=conventions) as p:
        p.properties.update(properties)

        dataset_id, metadata_path = p.done()

    if expect_metadata_path:
        metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
        assert metadata_path_offset == expect_metadata_path

    with metadata_path.open("r") as f:
        doc = yaml.YAML(typ="safe").load(f)

    if expect_label:
        assert doc["label"] == expect_label, "Unexpected dataset label"
Пример #3
0
def test_dea_interim_folder_calculation(tmp_path: Path):
    """
    DEA Naming conventions should include maturity in the folder name
    when it's not a 'final' dataset.
    """
    with DatasetAssembler(tmp_path, naming_conventions="dea") as p:
        p.platform = "landsat-7"
        # Should not end up in the path, as it's the default:
        p.product_maturity = "stable"
        p.instrument = "ETM+"
        p.datetime = datetime(1998, 7, 30)
        p.product_family = "frogs"
        p.processed = "1999-11-20 00:00:53.152462Z"
        p.maturity = "interim"
        p.producer = "ga.gov.au"
        p.properties["landsat:landsat_scene_id"] = "LE70930821999324EDC00"
        p.dataset_version = "1.2.3"
        p.region_code = "093082"

        p.done()

    [metadata_path] = tmp_path.rglob("*.odc-metadata.yaml")
    calculated_path: Path = metadata_path.relative_to(tmp_path)
    assert calculated_path == Path(
        #                                  ⇩⇩⇩⇩⇩⇩⇩⇩ Adds interim flag
        "ga_ls7e_frogs_1/093/082/1998/07/30_interim/ga_ls7e_frogs_1-2-3_093082_1998-07-30_interim.odc-metadata.yaml"
    )
Пример #4
0
def test_dea_c3_naming_conventions(tmp_path: Path):
    """
    A sample scene for Alchemist C3 processing that tests the naming conventions.
    """
    p = DatasetAssembler(tmp_path, naming_conventions="dea_c3")
    p.platform = "landsat-7"
    p.datetime = datetime(1998, 7, 30)
    p.product_family = "wo"
    p.processed = "1998-07-30T12:23:23"
    p.maturity = "interim"
    p.producer = "ga.gov.au"
    p.region_code = "090081"

    # Try missing few fields and expect ValueError
    with pytest.raises(
            ValueError,
            match="Need more properties to fulfill naming conventions."):
        p.done()

    # Put back the missed ones
    p.dataset_version = "1.6.0"
    p.collection_number = "3"

    # Collection number returned as integer via the getter.
    assert p.collection_number == 3

    # Success case
    dataset_id, metadata_path = p.done()
    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
    assert (
        metadata_path_offset ==
        "ga_ls_wo_3/1-6-0/090/081/1998/07/30/ga_ls_wo_3_090081_1998-07-30_interim.odc-metadata.yaml"
    )
Пример #5
0
def test_add_source_dataset(tmp_path: Path, inherit_geom):
    from eodatasets3 import serialise

    p = DatasetAssembler(tmp_path, naming_conventions="dea_c3")
    source_dataset = serialise.from_path(
        Path(__file__).parent /
        "data/LC08_L1TP_089080_20160302_20170328_01_T1.yaml")
    p.add_source_dataset(source_dataset,
                         auto_inherit_properties=True,
                         inherit_geometry=inherit_geom)

    p.maturity = "interim"
    p.collection_number = "3"
    p.dataset_version = "1.6.0"
    p.producer = "ga.gov.au"
    p.processed = "1998-07-30T12:23:23"
    p.product_family = "wofs"
    p.write_measurement(
        "water",
        Path(__file__).parent /
        "data/wofs/ga_ls_wofs_3_099081_2020-07-26_interim_water_clipped.tif",
    )

    id, path = p.done()

    output = serialise.from_path(path)
    if inherit_geom:
        # POLYGON((609615 - 3077085, 378285 - 3077085, 378285 - 3310515, 609615 - 3310515, 609615 - 3077085))
        assert output.geometry == source_dataset.geometry
    else:
        # POLYGON((684285 - 3439275, 684285 - 3444495, 689925 - 3444495, 689925 - 3439275, 684285 - 3439275))
        # Geometry is not set from the source dataset, but instead from the added wofs measurement
        assert output.geometry != source_dataset.geometry
Пример #6
0
def test_africa_naming_conventions(tmp_path: Path):
    """
    Minimal fields needed for DEAfrica naming conventions
    """
    with DatasetAssembler(tmp_path, naming_conventions="deafrica") as p:

        # Just the fields listed in required_fields.
        p.producer = "digitalearthafrica.org"
        p.datetime = datetime(1998, 7, 30)
        p.region_code = "090081"
        p.product_family = "wofs"
        p.platform = "LANDSAT_8"
        p.processed_now()
        p.dataset_version = "0.1.2"

        dataset_id, metadata_path = p.done()

    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
    assert (
        metadata_path_offset ==
        "wofs_ls/0-1-2/090/081/1998/07/30/wofs_ls_090081_1998-07-30.odc-metadata.yaml"
    )

    with DatasetAssembler(tmp_path, naming_conventions="deafrica") as p:

        # Just the fields listed in required_fields.
        p.producer = "digitalearthafrica.org"
        p.datetime = datetime(1998, 7, 30)
        p.region_code = "090081"
        p.product_family = "fc"
        p.platform = "LANDSAT_8"
        p.processed_now()
        p.dataset_version = "0.1.2"

        dataset_id, metadata_path = p.done()

    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
    assert (
        metadata_path_offset ==
        "fc_ls/0-1-2/090/081/1998/07/30/fc_ls_090081_1998-07-30.odc-metadata.yaml"
    )
Пример #7
0
def test_dataset_multi_platform(tmp_path: Path):
    """Can we make a dataset derived from multiple platforms?"""

    # No platform is included in names when there's a mix.
    with DatasetAssembler(tmp_path) as p:
        p.platforms = ["Sentinel_2a", "landsat_7"]
        assert p.platform == "landsat-7,sentinel-2a"

        p.datetime = datetime(2019, 1, 1)
        p.product_family = "peanuts"
        p.processed_now()

        dataset_id, metadata_path = p.done()

    with metadata_path.open("r") as f:
        doc = yaml.YAML(typ="safe").load(f)

    assert doc["label"] == "peanuts_2019-01-01"
    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
    assert (metadata_path_offset ==
            "peanuts/2019/01/01/peanuts_2019-01-01.odc-metadata.yaml")

    # ... but show the platform abbreviation when there's a known group.
    with DatasetAssembler(tmp_path) as p:
        p.platforms = ["Sentinel_2a", "sentinel_2b"]
        assert p.platform == "sentinel-2a,sentinel-2b"

        p.datetime = datetime(2019, 1, 1)
        p.product_family = "peanuts"
        p.processed_now()

        dataset_id, metadata_path = p.done()

    with metadata_path.open("r") as f:
        doc = yaml.YAML(typ="safe").load(f)

    assert doc["label"] == "s2_peanuts_2019-01-01"
    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
    assert (metadata_path_offset ==
            "s2_peanuts/2019/01/01/s2_peanuts_2019-01-01.odc-metadata.yaml")
Пример #8
0
def test_minimal_generated_naming_package(tmp_path: Path, l1_ls8_folder: Path):
    """
    What's the minimum number of fields we can set and still generate file/product
    names to produce a package?
    """

    out = tmp_path / "out"
    out.mkdir()

    [blue_geotiff_path] = l1_ls8_folder.rglob("L*_B2.TIF")

    with DatasetAssembler(out) as p:
        p.datetime = datetime(2019, 7, 4, 13, 7, 5)
        p.product_family = "quaternarius"
        p.processed_now()

        p.write_measurement("blue", blue_geotiff_path)

        # A friendly __str__ for notebook/terminal users:
        assert str(p) == dedent(
            f"""
            Assembling quaternarius (unfinished)
            - 1 measurements: blue
            - 4 properties: datetime, odc:file_format, odc:processing_datetime, odc:prod...
            Writing to location: {out}/quaternarius/2019/07/04/quaternarius_2019-07-04.odc-metadata.yaml
        """
        )

        # p.done() will validate the dataset and write it to the destination atomically.
        dataset_id, metadata_path = p.done()

    assert dataset_id is not None
    assert_file_structure(
        out,
        {
            "quaternarius": {
                "2019": {
                    "07": {
                        "04": {
                            # Set a dataset version to get rid of 'beta' label.
                            "quaternarius_2019-07-04.odc-metadata.yaml": "",
                            "quaternarius_2019-07-04.proc-info.yaml": "",
                            "quaternarius_2019-07-04_blue.tif": "",
                            "quaternarius_2019-07-04.sha1": "",
                        }
                    }
                }
            }
        },
    )
Пример #9
0
def test_dataset_no_measurements(tmp_path: Path):
    """Can we make a dataset with no measurements? (eg. telemetry data)"""
    with DatasetAssembler(tmp_path) as p:
        # A custom label too.
        p.label = "chipmonk_sightings_2019"
        p.datetime = datetime(2019, 1, 1)
        p.product_family = "chipmonk_sightings"
        p.processed_now()

        dataset_id, metadata_path = p.done()

    with metadata_path.open("r") as f:
        doc = yaml.load(f)

    assert doc["label"] == "chipmonk_sightings_2019", "Couldn't override label field"
Пример #10
0
def test_minimal_s2_dataset(tmp_path: Path):
    """A minimal dataset with sentinel platform/instrument"""
    with DatasetAssembler(tmp_path) as p:
        # A custom label too.
        p.platform = "sentinel-2a"
        p.instrument = "msi"
        p.datetime = datetime(2018, 11, 4)
        p.product_family = "blueberries"
        p.processed = "2018-11-05T12:23:23"

        dataset_id, metadata_path = p.done()

    with metadata_path.open("r") as f:
        doc = yaml.load(f)

    assert doc["label"] == "s2am_blueberries_2018-11-04", "Unexpected dataset label"
Пример #11
0
def test_custom_naming(tmp_path: Path):
    """
    We can create naming conventions separately, and later give it to assembler.
    """
    p = _basic_properties_set()
    convention = namer(properties=p)
    convention.dataset_folder = Path("my/custom/folder/")

    with DatasetAssembler(tmp_path, names=convention) as a:
        dataset_id, metadata_path = a.done()

    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
    assert (
        metadata_path_offset ==
        "my/custom/folder/ga_s2am_tester_1-2-3_023543_2013-02-03.odc-metadata.yaml"
    )
Пример #12
0
def test_minimal_s1_dataset(tmp_path: Path):
    """A minimal dataset with sentinel-1a/b platform/instrument"""
    with DatasetAssembler(tmp_path) as p:
        # A custom label too.
        p.platform = "sentinel-1a"
        p.instrument = "c-sar"
        p.datetime = datetime(2018, 11, 4)
        p.product_family = "bck"
        p.processed = "2018-11-05T12:23:23"

        dataset_id, metadata_path = p.done()

    with metadata_path.open("r") as f:
        doc = yaml.safe_load(f)

    assert doc["label"] == "s1ac_bck_2018-11-04", "Unexpected dataset label"
Пример #13
0
def prepare_and_write(
    ds_path: Path,
    output_yaml: Path,
    producer: str,
) -> Tuple[uuid.UUID, Path]:
    with DatasetAssembler(
            metadata_path=output_yaml,
            dataset_location=ds_path,
    ) as p:
        p.properties["odc:producer"] = producer

        if producer == "esa.int":
            jp2_offsets = _extract_esa_fields(ds_path, p)
        elif producer == "sinergise.com":
            jp2_offsets = _extract_sinergise_fields(ds_path, p)
        else:
            raise NotImplementedError(
                f"Unknown s2 producer {producer}. Expected 'sinergise.com' or 'esa.int'"
            )

        p.dataset_id = _get_stable_id(p)
        p.properties["eo:platform"] = _get_platform_name(p.properties)
        p.properties["eo:instrument"] = "MSI"
        p.properties["odc:dataset_version"] = f"1.0.{p.processed:%Y%m%d}"

        p.properties["odc:file_format"] = "JPEG2000"
        p.properties["odc:product_family"] = "level1"

        for path in jp2_offsets:
            band_number = _extract_band_number(path.stem)
            if band_number.lower() in ("tci", "pvi", "preview"):
                continue
            if band_number not in SENTINEL_MSI_BAND_ALIASES:
                raise RuntimeError(
                    f"Unknown band number {band_number!r} in image {path}")

            p.note_measurement(
                path=path,
                name=SENTINEL_MSI_BAND_ALIASES[band_number],
                relative_to_dataset_location=True,
            )

        return p.done()
Пример #14
0
def test_dataset_given_properties(tmp_path: Path):
    """Can we give existing properties to the assembler?"""

    properties = {
        "datetime": datetime(2019, 1, 1),
        "odc:product_family": "chipmonk_sightings",
        "odc:processing_datetime": "2021-06-15T01:33:43.378850",
    }
    names = namer(properties=properties)
    with DatasetAssembler(tmp_path, names=names) as p:
        # It should have normalised properties!
        assert p.processed == datetime(2021, 6, 15, 1, 33, 43, 378850, timezone.utc)

        dataset_id, metadata_path = p.done()

    relative_path = metadata_path.relative_to(tmp_path)
    assert relative_path == Path(
        "chipmonk_sightings/2019/01/01/chipmonk_sightings_2019-01-01.odc-metadata.yaml"
    )
Пример #15
0
def test_minimal_s2_dataset_normal(tmp_path: Path):
    """A minimal dataset with sentinel platform/instrument"""
    with DatasetAssembler(tmp_path) as p:
        p.platform = "sentinel-2a"
        p.instrument = "msi"
        p.datetime = datetime(2018, 11, 4)
        p.product_family = "blueberries"
        p.processed = "2018-11-05T12:23:23"
        p.properties[
            "sentinel:sentinel_tile_id"] = "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05"

        dataset_id, metadata_path = p.done()

    with metadata_path.open("r") as f:
        doc = yaml.YAML(typ="safe").load(f)

    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()
    assert metadata_path_offset == (
        "s2am_blueberries/2018/11/04/s2am_blueberries_2018-11-04.odc-metadata.yaml"
    )

    assert doc[
        "label"] == "s2am_blueberries_2018-11-04", "Unexpected dataset label"
Пример #16
0
def test_minimal_package_with_product_name(tmp_path: Path, l1_ls8_folder: Path):
    """
    You can specify an ODC product name manually to avoid most of the name generation.
    """
    out = tmp_path / "out"
    out.mkdir()

    [blue_geotiff_path] = l1_ls8_folder.rglob("L*_B2.TIF")

    with DatasetAssembler(out) as p:
        p.datetime = datetime(2019, 7, 4, 13, 7, 5)
        p.product_name = "loch_ness_sightings"
        p.processed = datetime(2019, 7, 4, 13, 8, 7)

        p.write_measurement("blue", blue_geotiff_path)

        dataset_id, metadata_path = p.done()

    assert dataset_id is not None
    assert_file_structure(
        out,
        {
            "loch_ness_sightings": {
                "2019": {
                    "07": {
                        "04": {
                            # Set a dataset version to get rid of 'beta' label.
                            "loch_ness_sightings_2019-07-04.odc-metadata.yaml": "",
                            "loch_ness_sightings_2019-07-04.proc-info.yaml": "",
                            "loch_ness_sightings_2019-07-04_blue.tif": "",
                            "loch_ness_sightings_2019-07-04.sha1": "",
                        }
                    }
                }
            }
        },
    )
Пример #17
0
def prepare_and_write(
    ds_path: Path,
    output_yaml_path: Path,
    source_telemetry: Path = None,
    # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise
    producer="usgs.gov",
) -> Tuple[uuid.UUID, Path]:
    """
    Prepare an eo3 metadata file for a Level1 dataset.

    Input dataset path can be a folder or a tar file.
    """
    mtl_doc, mtl_filename = get_mtl_content(ds_path)
    if not mtl_doc:
        raise ValueError(f"No MTL file found for {ds_path}")

    usgs_collection_number = mtl_doc["metadata_file_info"].get(
        "collection_number")
    if usgs_collection_number is None:
        raise NotImplementedError(
            "Dataset has no collection number: pre-collection data is not supported."
        )

    data_format = mtl_doc["product_metadata"]["output_format"]
    if data_format.upper() != "GEOTIFF":
        raise NotImplementedError(
            f"Only GTiff currently supported, got {data_format}")
    file_format = FileFormat.GeoTIFF

    # Assumed below.
    projection_params = mtl_doc["projection_parameters"]
    if ("grid_cell_size_thermal" in projection_params
            and "grid_cell_size_reflective" in projection_params
            and (projection_params["grid_cell_size_reflective"] !=
                 projection_params["grid_cell_size_thermal"])):
        raise NotImplementedError(
            "reflective and thermal have different cell sizes")
    ground_sample_distance = min(value
                                 for name, value in projection_params.items()
                                 if name.startswith("grid_cell_size_"))

    with DatasetAssembler(
            metadata_path=output_yaml_path,
            dataset_location=ds_path,
            # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them)
            dataset_id=uuid.uuid5(
                USGS_UUID_NAMESPACE,
                mtl_doc["metadata_file_info"]["landsat_product_id"]),
            naming_conventions="dea",
            if_exists=IfExists.Overwrite,
    ) as p:
        if source_telemetry:
            # Only GA's data has source telemetry...
            assert producer == "ga.gov.au"
            p.add_source_path(source_telemetry)

        p.platform = mtl_doc["product_metadata"]["spacecraft_id"]
        p.instrument = mtl_doc["product_metadata"]["sensor_id"]
        p.product_family = "level1"
        p.producer = producer
        p.datetime = "{}T{}".format(
            mtl_doc["product_metadata"]["date_acquired"],
            mtl_doc["product_metadata"]["scene_center_time"],
        )
        p.processed = mtl_doc["metadata_file_info"]["file_date"]
        p.properties["odc:file_format"] = file_format
        p.properties["eo:gsd"] = ground_sample_distance
        cloud_cover = mtl_doc["image_attributes"]["cloud_cover"]
        # Cloud cover is -1 when missing (such as TIRS-only data)
        if cloud_cover != -1:
            p.properties["eo:cloud_cover"] = cloud_cover
        p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][
            "sun_azimuth"]
        p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][
            "sun_elevation"]
        p.properties["landsat:collection_number"] = usgs_collection_number
        for section, fields in _COPYABLE_MTL_FIELDS:
            for field in fields:
                value = mtl_doc[section].get(field)
                if value is not None:
                    p.properties[f"landsat:{field}"] = value

        p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}"
        org_collection_number = utils.get_collection_number(
            p.producer, p.properties["landsat:collection_number"])
        p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}"

        # NRT product?
        # Category is one of: T1, T2 or RT ('real time')
        if p.properties["landsat:collection_category"] == "RT":
            p.properties["odc:dataset_maturity"] = "nrt"

        band_aliases = get_band_alias_mappings(p.platform, p.instrument)
        for usgs_band_id, file_location in _iter_bands_paths(mtl_doc):
            p.note_measurement(
                band_aliases[usgs_band_id],
                file_location,
                relative_to_dataset_location=True,
            )

        p.add_accessory_file("metadata:landsat_mtl", Path(mtl_filename))

        return p.done()
Пример #18
0
def prepare_and_write(
    ds_path: Path,
    collection_location: Path,
    # TODO: Can we infer producer automatically? This is bound to cause mistakes othewise
    producer="usgs.gov",
) -> Tuple[uuid.UUID, Path]:
    """
    Prepare an eo3 metadata file for a Level2 dataset.

    Input dataset path can be a folder or a tar file.
    """
    mtl_doc, mtl_filename = get_mtl_content(
        ds_path, root_element="landsat_metadata_file")
    if not mtl_doc:
        raise ValueError(f"No MTL file found for {ds_path}")

    usgs_collection_number = mtl_doc["product_contents"].get(
        "collection_number")
    if usgs_collection_number is None:
        raise NotImplementedError(
            "Dataset has no collection number: pre-collection data is not supported."
        )

    data_format = mtl_doc["product_contents"]["output_format"]
    if data_format.upper() != "GEOTIFF":
        raise NotImplementedError(
            f"Only GTiff currently supported, got {data_format}")
    file_format = FileFormat.GeoTIFF

    # Assumed below.
    if (mtl_doc["projection_attributes"]["grid_cell_size_reflective"] !=
            mtl_doc["projection_attributes"]["grid_cell_size_thermal"]):
        raise NotImplementedError(
            "reflective and thermal have different cell sizes")
    ground_sample_distance = min(
        value for name, value in mtl_doc["projection_attributes"].items()
        if name.startswith("grid_cell_size_"))

    with DatasetAssembler(
            collection_location=collection_location,
            # Detministic ID based on USGS's product id (which changes when the scene is reprocessed by them)
            dataset_id=uuid.uuid5(
                USGS_UUID_NAMESPACE,
                mtl_doc["product_contents"]["landsat_product_id"]),
            naming_conventions="dea",
            if_exists=IfExists.Overwrite,
    ) as p:
        p.platform = mtl_doc["image_attributes"]["spacecraft_id"]
        p.instrument = mtl_doc["image_attributes"]["sensor_id"]
        p.product_family = "level2"
        p.producer = producer
        p.datetime = "{}T{}".format(
            mtl_doc["image_attributes"]["date_acquired"],
            mtl_doc["image_attributes"]["scene_center_time"],
        )
        # p.processed = mtl_doc["metadata_file_info"]["file_date"]
        p.processed = mtl_doc['level2_processing_record'][
            'date_product_generated']
        p.properties["odc:file_format"] = file_format
        p.properties["eo:gsd"] = ground_sample_distance
        p.properties["eo:cloud_cover"] = mtl_doc["image_attributes"][
            "cloud_cover"]
        p.properties["eo:sun_azimuth"] = mtl_doc["image_attributes"][
            "sun_azimuth"]
        p.properties["eo:sun_elevation"] = mtl_doc["image_attributes"][
            "sun_elevation"]
        p.properties["landsat:collection_number"] = usgs_collection_number
        for section, fields in _COPYABLE_MTL_FIELDS:
            for field in fields:
                value = mtl_doc[section].get(field)
                if value is not None:
                    p.properties[f"landsat:{field}"] = value

        p.region_code = f"{p.properties['landsat:wrs_path']:03d}{p.properties['landsat:wrs_row']:03d}"
        org_collection_number = utils.get_collection_number(
            p.producer, p.properties["landsat:collection_number"])
        p.dataset_version = f"{org_collection_number}.0.{p.processed:%Y%m%d}"

        band_aliases = get_band_alias_mappings(p.platform, p.instrument)

        bands = list(_iter_bands_paths(mtl_doc))
        # add to do one band - remove this to do all the bands
        # bands = bands[0:1]
        for usgs_band_id, file_location in bands:
            # p.note_measurement(
            #     band_aliases[usgs_band_id],
            #     file_location,
            #     relative_to_dataset_location=True,
            # )
            path_file = os.path.join(ds_path, file_location)
            p.write_measurement(band_aliases[usgs_band_id], path_file)

        p.add_accessory_file("metadata:landsat_mtl", Path(mtl_filename))

        return p.done()
Пример #19
0
def package(
    out_directory: Path,
    granule: Granule,
    included_products: Iterable[str] = DEFAULT_PRODUCTS,
    include_oa: bool = True,
    oa_resolution: Optional[Tuple[float, float]] = None,
) -> Tuple[UUID, Path]:
    """
    Package an L2 product.

    :param include_oa:

    :param out_directory:
        The base directory for output datasets. A DEA-naming-conventions folder hierarchy
        will be created inside this folder.

    :param granule:
        Granule information. You probably want to make one with Granule.from_path()

    :param included_products:
        A list of imagery products to include in the package.
        Defaults to all products.

    :return:
        The dataset UUID and output metadata path
    """
    included_products = tuple(s.lower() for s in included_products)

    with h5py.File(granule.wagl_hdf5, "r") as fid:
        granule_group = fid[granule.name]

        wagl_doc = _read_wagl_metadata(granule_group)

        with DatasetAssembler(
                out_directory.absolute(),
                # WAGL stamps a good, random ID already.
                dataset_id=granule.wagl_metadata.get("id"),
                naming_conventions="dea_s2" if
            ("sentinel"
             in wagl_doc["source_datasets"]["platform_id"].lower()) else "dea",
        ) as p:
            _apply_wagl_metadata(p, wagl_doc)

            # It's a GA ARD product.
            p.producer = "ga.gov.au"
            p.product_family = "ard"
            p.maturity = _determine_maturity(
                acq_date=p.datetime,
                processed=p.processed,
                wagl_doc=wagl_doc,
            )
            if granule.source_level1_metadata is not None:
                # For historical consistency: we want to use the instrument that the source L1 product
                # came from, not the instruments reported from the WAGL doc.
                #
                # Eg.
                #     Level 1 will say "OLI_TIRS", while wagl doc will say "OLI".
                #     Our current C3 products say "OLI_TIRS" so we need to stay consistent.
                #     (even though WAGL only *used* the OLI bands, it came from an OLI_TIRS observation)
                #
                # So delete our current wagl one, since we're adding a source dataset:
                if p.instrument is not None:
                    del p.properties["eo:instrument"]

                p.add_source_dataset(granule.source_level1_metadata,
                                     auto_inherit_properties=True)
                # When level 1 is NRT, ARD is always NRT.
                if granule.source_level1_metadata.maturity == "nrt":
                    p.maturity = "nrt"

            org_collection_number = utils.get_collection_number(
                p.platform, p.producer,
                p.properties.get("landsat:collection_number"))

            p.dataset_version = f"{org_collection_number}.2.1"
            p.region_code = _extract_reference_code(p, granule.name)

            _read_gqa_doc(p, granule.gqa_doc)
            _read_fmask_doc(p, granule.fmask_doc)
            if granule.tesp_doc:
                _take_software_versions(p, granule.tesp_doc)

            _unpack_products(p, included_products, granule_group)

            if include_oa:
                with do("Starting OA", heading=True):
                    _unpack_observation_attributes(
                        p,
                        included_products,
                        granule_group,
                        infer_datetime_range=p.platform.startswith("landsat"),
                        oa_resolution=oa_resolution,
                    )
                if granule.fmask_image:
                    with do(f"Writing fmask from {granule.fmask_image} "):
                        p.write_measurement(
                            "oa:fmask",
                            granule.fmask_image,
                            expand_valid_data=False,
                            overview_resampling=Resampling.mode,
                        )

            with do("Finishing package"):
                return p.done()
Пример #20
0
def package(
    out_directory: Path,
    granule: Granule,
    included_products: Iterable[str] = DEFAULT_PRODUCTS,
    include_oa: bool = True,
) -> Tuple[UUID, Path]:
    """
    Package an L2 product.

    :param include_oa:

    :param out_directory:
        The base directory for output datasets. A DEA-naming-conventions folder hierarchy
        will be created inside this folder.

    :param granule:
        Granule information. You probably want to make one with Granule.from_path()

    :param included_products:
        A list of imagery products to include in the package.
        Defaults to all products.

    :return:
        The dataset UUID and output metadata path
    """
    included_products = tuple(s.lower() for s in included_products)

    with h5py.File(granule.wagl_hdf5, "r") as fid:
        granule_group = fid[granule.name]

        with DatasetAssembler(
                out_directory,
                # WAGL stamps a good, random ID already.
                dataset_id=granule.wagl_metadata.get("id"),
                naming_conventions="dea",
        ) as p:
            level1 = granule.source_level1_metadata
            p.add_source_dataset(level1, auto_inherit_properties=True)

            # It's a GA ARD product.
            p.producer = "ga.gov.au"
            p.product_family = "ard"

            org_collection_number = utils.get_collection_number(
                p.producer, p.properties["landsat:collection_number"])
            # TODO: wagl's algorithm version should determine our dataset version number, right?
            p.dataset_version = f"{org_collection_number}.0.0"
            p.region_code = _extract_reference_code(p, granule.name)

            _read_wagl_metadata(p, granule_group)
            _read_gqa_doc(p, granule.gqa_doc)
            _read_fmask_doc(p, granule.fmask_doc)

            _unpack_products(p, included_products, granule_group)

            if include_oa:
                with do(f"Starting OA", heading=True):
                    _unpack_observation_attributes(
                        p,
                        included_products,
                        granule_group,
                        infer_datetime_range=level1.platform.startswith(
                            "landsat"),
                    )
                if granule.fmask_image:
                    with do(f"Writing fmask from {granule.fmask_image} "):
                        p.write_measurement(
                            "oa:fmask",
                            granule.fmask_image,
                            expand_valid_data=False,
                            overview_resampling=Resampling.mode,
                        )

            with do("Finishing package"):
                return p.done()
Пример #21
0
def package_non_standard(outdir, granule):
    """
    yaml creator for the ard pipeline.
    """

    outdir = Path(outdir) / granule.name
    indir = granule.wagl_hdf5.parent

    if indir.is_file():
        shutil.copy(indir, outdir)
    else:
        shutil.copytree(indir, outdir)

    wagl_h5 = outdir / str(granule.name + ".wagl.h5")
    dataset_doc = outdir / str(granule.name + ".yaml")
    boolean_h5 = Path(str(wagl_h5).replace("wagl.h5", "converted.datasets.h5"))
    fmask_img = outdir / str(granule.name + ".fmask.img")

    f = h5py.File(boolean_h5)

    with DatasetAssembler(metadata_path=dataset_doc,
                          naming_conventions="dea") as da:
        level1 = granule.source_level1_metadata
        da.add_source_dataset(level1,
                              auto_inherit_properties=True,
                              inherit_geometry=True)
        da.product_family = "ard"
        da.producer = "ga.gov.au"
        da.properties["odc:file_format"] = "HDF5"

        with h5py.File(wagl_h5, "r") as fid:
            img_paths = [ppjoin(fid.name, pth) for pth in find(fid, "IMAGE")]
            granule_group = fid[granule.name]

            try:
                wagl_path, *ancil_paths = [
                    pth for pth in find(granule_group, "SCALAR")
                    if "METADATA" in pth
                ]
            except ValueError:
                raise ValueError("No nbar metadata found in granule")

            [wagl_doc] = loads_yaml(granule_group[wagl_path][()])

            da.processed = get_path(wagl_doc,
                                    ("system_information", "time_processed"))

            platform = da.properties["eo:platform"]
            if platform == "sentinel-2a" or platform == "sentinel-2b":
                org_collection_number = 3
            else:
                org_collection_number = utils.get_collection_number(
                    platform, da.producer,
                    da.properties["landsat:collection_number"])

            da.dataset_version = f"{org_collection_number}.1.0"
            da.region_code = eodatasets3.wagl._extract_reference_code(
                da, granule.name)

            eodatasets3.wagl._read_gqa_doc(da, granule.gqa_doc)
            eodatasets3.wagl._read_fmask_doc(da, granule.fmask_doc)

            with rasterio.open(fmask_img) as ds:
                fmask_layer = "/{}/OA_FMASK/oa_fmask".format(granule.name)
                data = ds.read(1)
                fmask_ds = f.create_dataset(fmask_layer,
                                            data=data,
                                            compression="lzf",
                                            shuffle=True)
                fmask_ds.attrs["crs_wkt"] = ds.crs.wkt
                fmask_ds.attrs["geotransform"] = ds.transform.to_gdal()

                fmask_ds.attrs[
                    "description"] = "Converted from ERDAS Imagine format to HDF5 to work with the limitations of varied formats within ODC"  # noqa E501

                grid_spec = images.GridSpec(
                    shape=ds.shape,
                    transform=ds.transform,
                    crs=CRS.from_wkt(fmask_ds.attrs["crs_wkt"]),
                )

                measurement_name = "oa_fmask"

                pathname = str(outdir.joinpath(boolean_h5))

                no_data = fmask_ds.attrs.get("no_data_value")
                if no_data is None:
                    no_data = float("nan")

                da._measurements.record_image(
                    measurement_name,
                    grid_spec,
                    pathname,
                    fmask_ds[:],
                    layer="/{}".format(fmask_layer),
                    nodata=no_data,
                    expand_valid_data=False,
                )

            for pathname in img_paths:
                ds = fid[pathname]
                ds_path = Path(ds.name)

                # eodatasets internally uses this grid spec to group image datasets
                grid_spec = images.GridSpec(
                    shape=ds.shape,
                    transform=Affine.from_gdal(*ds.attrs["geotransform"]),
                    crs=CRS.from_wkt(ds.attrs["crs_wkt"]),
                )

                # product group name; lambertian, nbar, nbart, oa
                if "STANDARDISED-PRODUCTS" in str(ds_path):
                    product_group = ds_path.parent.name
                elif "INTERPOLATED-ATMOSPHERIC-COEFFICIENTS" in str(ds_path):
                    product_group = "oa_{}".format(ds_path.parent.name)
                else:
                    product_group = "oa"

                # spatial resolution group
                # used to separate measurements with the same name
                resolution_group = "rg{}".format(
                    ds_path.parts[2].split("-")[-1])

                measurement_name = ("_".join([
                    resolution_group,
                    product_group,
                    ds.attrs.get("alias", ds_path.name),
                ]).replace("-",
                           "_").lower())  # we don't wan't hyphens in odc land

                # include this band in defining the valid data bounds?
                include = True if "nbart" in measurement_name else False

                no_data = ds.attrs.get("no_data_value")
                if no_data is None:
                    no_data = float("nan")

                # if we are of type bool, we'll have to convert just for GDAL
                if ds.dtype.name == "bool":
                    pathname = str(outdir.joinpath(boolean_h5))
                    out_ds = f.create_dataset(
                        measurement_name,
                        data=np.uint8(ds[:]),
                        compression="lzf",
                        shuffle=True,
                        chunks=ds.chunks,
                    )

                    for k, v in ds.attrs.items():
                        out_ds.attrs[k] = v

                    da._measurements.record_image(
                        measurement_name,
                        grid_spec,
                        pathname,
                        out_ds[:],
                        layer="/{}".format(out_ds.name),
                        nodata=no_data,
                        expand_valid_data=include,
                    )
                else:
                    pathname = str(outdir.joinpath(wagl_h5))

                    # work around as note_measurement doesn't allow us to specify the gridspec
                    da._measurements.record_image(
                        measurement_name,
                        grid_spec,
                        pathname,
                        ds[:],
                        layer="/{}".format(ds.name),
                        nodata=no_data,
                        expand_valid_data=include,
                    )

        # the longest part here is generating the valid data bounds vector
        # landsat 7 post SLC-OFF can take a really long time
        return da.done()
Пример #22
0
def prepare_and_write(
    dataset: Path,
    dataset_document: Path,
) -> Tuple[uuid.UUID, Path]:
    # Process esa dataset
    if dataset.suffix == ".zip":
        with zipfile.ZipFile(dataset, "r") as z:
            # Get file paths for esa metadata files
            mtd_ds_zip_path = [s for s in z.namelist() if "MTD_DS.xml" in s][0]
            mtd_tl_zip_path = [s for s in z.namelist() if "MTD_TL.xml" in s][0]
            mtd_msil1c_zip_path = [
                s for s in z.namelist() if "MTD_MSIL1C.xml" in s
            ][0]

            # Crawl through metadata files and return a dict of useful information
            mtd_ds = process_mtd_ds(z.read(mtd_ds_zip_path).decode("utf-8"))
            mtd_tl = process_mtd_tl(z.read(mtd_tl_zip_path).decode("utf-8"))
            mtd_msil1c = process_mtd_msil1c(
                z.read(mtd_msil1c_zip_path).decode("utf-8"))

            with DatasetAssembler(
                    metadata_path=dataset_document,
                    dataset_location=dataset,
            ) as p:

                p.properties["eo:instrument"] = "MSI"
                p.properties["odc:producer"] = "esa.int"
                p.properties["odc:product_family"] = "level1"
                p.properties["odc:file_format"] = "JPEG2000"

                p.properties.update(mtd_ds)
                p.properties.update(mtd_tl)
                p.properties.update(mtd_msil1c)

                p.properties[
                    "odc:dataset_version"] = f"1.0.{p.processed:%Y%m%d}"

                for file in z.namelist():
                    # T55HFA_20201011T000249_B01.jp2
                    if ".jp2" in file and "TCI" not in file and "PVI" not in file:
                        # path = 'zip:%s!%s' % (str(dataset), str(file))
                        p.note_measurement(
                            path=file,
                            name=(SENTINEL_MSI_BAND_ALIASES[(
                                file.split("_")[len(file.split("_")) -
                                                1].replace(".jp2", "").replace(
                                                    "B", ""))]),
                            relative_to_dataset_location=True
                            # path=path, name=name
                        )

                p.add_accessory_file("metadata:mtd_ds", mtd_ds_zip_path)
                p.add_accessory_file("metadata:mtd_tl", mtd_tl_zip_path)
                p.add_accessory_file("metadata:mtd_msil1c",
                                     mtd_msil1c_zip_path)

                return p.done()

    # process sinergise dataset
    elif dataset.is_dir():

        # Get file paths for sinergise metadata files
        product_info_path = dataset / "productInfo.json"
        metadata_xml_path = dataset / "metadata.xml"

        if not product_info_path.exists():
            raise ValueError(
                "No productInfo.json file found. "
                "Are you sure the input is a sinergise dataset folder?")

        # Crawl through metadata files and return a dict of useful information
        product_info = process_product_info(product_info_path)
        metadata_xml = process_metadata_xml(metadata_xml_path)

        with DatasetAssembler(
                metadata_path=dataset_document,
                dataset_location=dataset,
        ) as p:
            p.properties["eo:platform"] = "sentinel-2a"
            p.properties["eo:instrument"] = "MSI"
            p.properties["odc:file_format"] = "JPEG2000"
            p.properties["odc:product_family"] = "level1"
            p.properties["odc:producer"] = "sinergise.com"

            p.properties.update(metadata_xml)
            p.properties.update(product_info)

            p.properties["odc:dataset_version"] = f"1.0.{p.processed:%Y%m%d}"

            for path in dataset.rglob("*.jp2"):
                if "preview" not in path.stem and "TCI" not in path.stem:
                    p.note_measurement(
                        path=path,
                        name=SENTINEL_MSI_BAND_ALIASES[path.stem.replace(
                            "B", "")],
                    )

            p.add_accessory_file("metadata:product_info", product_info_path)
            p.add_accessory_file("metadata:sinergise_metadata",
                                 metadata_xml_path)
            return p.done()
    else:
        raise NotImplementedError("Unknown input file type?")
def create_eo3(granule_dir, granule_id):
    """
    Creates an eo3 document.

    :param granule_dir (Path): the granule directory
    :return: DatasetDoc of eo3 metadata
    """

    with open(granule_dir / "ARD-METADATA.yaml") as fin:
        metadata = yaml.safe_load(fin)

    try:
        coords = metadata['grid_spatial']['projection']['valid_data']['coordinates']
        expand_valid_data = False
    except KeyError:
        expand_valid_data = True

    assembler = DatasetAssembler(
            dataset_location=granule_dir,
            metadata_path=granule_dir / "dummy",
    )

    if "S2A" in str(granule_dir):
        assembler.product_family = "s2a_ard_granule"
        platform = "SENTINEL_2A"        
    else:
        assembler.product_family = "s2b_ard_granule"
        platform = "SENTINEL_2B"

    assembler.processed_now()

    add_datetime(assembler, granule_dir)
    add_to_eo3(assembler, granule_dir, "NBART", lambda x: code_to_band[x.split('_')[-1]], expand_valid_data)
    add_to_eo3(assembler, granule_dir, "SUPPLEMENTARY", lambda x: x[3:].lower(), expand_valid_data)
    add_to_eo3(assembler, granule_dir, "QA", lambda x: x[3:].lower().replace('combined_', ''), expand_valid_data)

    crs, grid_docs, measurement_docs = assembler._measurements.as_geo_docs()
    valid_data = assembler._measurements.consume_and_get_valid_data()

    assembler.properties["odc:region_code"] = metadata["provider"]["reference_code"]
    assembler.properties["gqa:cep90"] = metadata["gqa"]["residual"]["cep90"]
    assembler.properties["gqa:error_message"] = metadata["gqa"]["error_message"]
    assembler.properties["gqa:final_gcp_count"] =metadata["gqa"]["final_gcp_count"]
    assembler.properties["gqa:ref_source"] = metadata["gqa"]["ref_source"]
    assembler.properties["sentinel:datatake_start_datetime"] = granule_id.split("_")[-4]
    assembler.properties["eo:platform"] = platform
    assembler.properties["eo:instrument"] = "MSI"

    for key in ["abs_iterative_mean", "abs", "iterative_mean", "iterative_stddev", "mean", "stddev"]:
        assembler.properties[f"gqa:{key}_xy"] = metadata["gqa"]["residual"][key]["xy"]

    eo3 = DatasetDoc(
        id=assembler.dataset_id,
        label=assembler.label,
        product=ProductDoc(
            name=assembler.names.product_name, href=assembler.names.product_uri
        ),
        crs=assembler._crs_str(crs) if crs is not None else None,
        geometry=valid_data,
        grids=grid_docs,
        properties=assembler.properties,
        accessories={
            name: AccessoryDoc(path, name=name)
            for name, path in assembler._accessories.items()
        },
        measurements=measurement_docs,
        lineage=assembler._lineage,
    )

    if not expand_valid_data:
        eo3.geometry = Polygon(coords[0])

    for measurement in eo3.measurements.values():
        if measurement.grid is None:
            measurement.grid = 'default'

    return eo3
Пример #24
0
def package(
    out_directory: Path,
    granule: Granule,
    *,
    product_maturity: ProductMaturity = ProductMaturity.stable,
    included_products: Iterable[str] = DEFAULT_PRODUCTS,
    include_oa: bool = True,
    oa_resolution: Optional[Tuple[float, float]] = None,
    contiguity_resolution: Optional[Tuple[float, float]] = None,
) -> Tuple[UUID, Path]:
    """
    Package an L2 product.

    :param include_oa:

    :param out_directory:
        The base directory for output datasets. A DEA-naming-conventions folder hierarchy
        will be created inside this folder.

    :param granule:
        Granule information. You probably want to make one with Granule.from_path()

    :param included_products:
        A list of imagery products to include in the package.
        Defaults to all products.

    :return:
        The dataset UUID and output metadata path
    """
    included_products = tuple(s.lower() for s in included_products)

    with h5py.File(granule.wagl_hdf5, "r") as fid:
        granule_group = fid[granule.name]

        wagl_doc = _read_wagl_metadata(granule_group)

        with DatasetAssembler(
                out_directory.absolute(),
                # WAGL stamps a good, random ID already.
                dataset_id=granule.wagl_metadata.get("id"),
                naming_conventions="dea_s2" if
            ("sentinel"
             in wagl_doc["source_datasets"]["platform_id"].lower()) else "dea",
        ) as p:
            _apply_wagl_metadata(p, wagl_doc)

            # It's a GA ARD product.
            p.producer = "ga.gov.au"
            p.product_family = "ard"
            p.maturity = _determine_maturity(
                acq_date=p.datetime,
                processed=p.processed,
                wagl_doc=wagl_doc,
            )

            # We don't bother including product maturity if it's stable, for consistency with old datasets.
            # Stable is the assumed default.
            if product_maturity is not ProductMaturity.stable:
                p.product_maturity = product_maturity

            if granule.source_level1_metadata is not None:
                # For historical consistency: we want to use the instrument that the source L1 product
                # came from, not the instruments reported from the WAGL doc.
                #
                # Eg.
                #     Level 1 will say "OLI_TIRS", while wagl doc will say "OLI".
                #     Our current C3 products say "OLI_TIRS" so we need to stay consistent.
                #     (even though WAGL only *used* the OLI bands, it came from an OLI_TIRS observation)
                #
                # So delete our current wagl one, since we're adding a source dataset:
                if p.instrument is not None:
                    del p.properties["eo:instrument"]

                p.add_source_dataset(granule.source_level1_metadata,
                                     auto_inherit_properties=True)
                # When level 1 is NRT, ARD is always NRT.
                if granule.source_level1_metadata.maturity == "nrt":
                    p.maturity = "nrt"

            org_collection_number = utils.get_collection_number(
                p.platform, p.producer,
                p.properties.get("landsat:collection_number"))

            p.dataset_version = f"{org_collection_number}.2.1"
            p.region_code = _extract_reference_code(p, granule.name)

            _read_gqa_doc(p, granule.gqa_doc)
            _read_fmask_doc(p, granule.fmask_doc)
            if granule.s2cloudless_doc:
                _read_s2cloudless_doc(p, granule.s2cloudless_doc)
            if granule.tesp_doc:
                _take_software_versions(p, granule.tesp_doc)

            _unpack_products(p, included_products, granule_group)

            if include_oa:
                with sub_product("oa", p):
                    with do("Starting OA", heading=True):
                        resolution_groups = {
                            tuple(granule_group[k].attrs["resolution"]):
                            granule_group[k]
                            for k in granule_group.keys()
                            if k.startswith("RES-GROUP-")
                        }

                        # Use the highest resolution as the ground sample distance.
                        if "eo:gsd" in p.properties:
                            del p.properties["eo:gsd"]
                        p.properties["eo:gsd"] = min(
                            min(resolution_groups.keys()))

                        _unpack_observation_attributes(
                            p,
                            get_oa_resolution_group(resolution_groups,
                                                    p.platform, oa_resolution),
                        )

                    infer_datetime_range = p.platform.startswith("landsat")

                    with do("Contiguity", timedelta=infer_datetime_range):
                        # For landsat, we want the "common" band resolution, not panchromatic. Pick lower res.
                        if contiguity_resolution is not None:
                            contiguity_res = contiguity_resolution
                        elif p.platform.startswith("landsat"):
                            contiguity_res = max(resolution_groups.keys())
                        elif p.platform.startswith("sentinel"):
                            contiguity_res = (10.0, 10.0)

                        if contiguity_res not in resolution_groups:
                            raise ValueError(
                                f"No resolution group {contiguity_res} found in {granule.name}."
                                f"Options: {list(resolution_groups.keys())}")
                        contiguity_res_grp = resolution_groups[contiguity_res]

                        timedelta_data = (
                            contiguity_res_grp["SATELLITE-SOLAR/TIME-DELTA"]
                            if infer_datetime_range else None)
                        _create_contiguity(
                            p,
                            included_products,
                            resolution_yx=tuple(
                                contiguity_res_grp.attrs["resolution"]),
                            timedelta_data=timedelta_data,
                        )

                    if granule.fmask_image:
                        with do(f"Writing fmask from {granule.fmask_image} "):
                            p.write_measurement(
                                "oa:fmask",
                                granule.fmask_image,
                                expand_valid_data=False,
                                overview_resampling=Resampling.mode,
                                # Because of our strange sub-products and filename standards, we want the
                                # 'oa_' prefix to be included in the recorded band metadata,
                                # but not in its filename.
                                # So we manually calculate a filename without the extra prefix.
                                path=p.names.measurement_filename("fmask"),
                            )

                    if granule.s2cloudless_prob:
                        with do(f"Writing s2cloudless probability from {granule.s2cloudless_prob} "
                                ):
                            p.write_measurement(
                                "oa:s2cloudless_prob",
                                granule.s2cloudless_prob,
                                expand_valid_data=False,
                                overview_resampling=Resampling.bilinear,
                                path=p.names.measurement_filename(
                                    "s2cloudless-prob"),
                            )

                    if granule.s2cloudless_mask:
                        with do(f"Writing s2cloudless mask from {granule.s2cloudless_mask} "
                                ):
                            p.write_measurement(
                                "oa:s2cloudless_mask",
                                granule.s2cloudless_mask,
                                expand_valid_data=False,
                                overview_resampling=Resampling.mode,
                                path=p.names.measurement_filename(
                                    "s2cloudless-mask"),
                            )

            with do("Finishing package"):
                return p.done()
Пример #25
0
def test_s2_naming_conventions(tmp_path: Path):
    """A minimal dataset with sentinel platform/instrument"""
    p = DatasetAssembler(tmp_path, naming_conventions="dea_s2")
    p.platform = "sentinel-2a"
    p.instrument = "msi"
    p.datetime = datetime(2018, 11, 4)
    p.product_family = "blueberries"
    p.processed = "2018-11-05T12:23:23"
    p.producer = "ga.gov.au"
    p.dataset_version = "1.0.0"
    p.region_code = "Oz"
    p.properties["odc:file_format"] = "GeoTIFF"
    p.properties[
        "sentinel:sentinel_tile_id"] = "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05"

    p.note_source_datasets(
        "telemetry",
        # Accepts multiple, and they can be strings or UUIDs:
        "ca705033-0fc4-4f38-a47e-f425dfb4d0c7",
        uuid.UUID("3781e90f-b677-40af-9439-b40f6e4dfadd"),
    )

    # The property normaliser should have extracted inner fields
    assert p.properties["sentinel:datatake_start_datetime"] == datetime(
        2017, 8, 22, 1, 56, 26, tzinfo=timezone.utc)

    dataset_id, metadata_path = p.done()

    # The s2 naming conventions have an extra subfolder of the datatake start time.
    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()

    assert metadata_path_offset == (
        "ga_s2am_blueberries_1/Oz/2018/11/04/20170822T015626/"
        "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.odc-metadata.yaml")

    assert_expected_eo3_path(
        {
            "$schema": "https://schemas.opendatacube.org/dataset",
            "accessories": {},
            "id": dataset_id,
            "label": "ga_s2am_blueberries_1-0-0_Oz_2018-11-04",
            "product": {
                "href":
                "https://collections.dea.ga.gov.au/product/ga_s2am_blueberries_1",
                "name": "ga_s2am_blueberries_1",
            },
            "properties": {
                "datetime":
                datetime(2018, 11, 4, 0, 0),
                "eo:instrument":
                "msi",
                "eo:platform":
                "sentinel-2a",
                "odc:dataset_version":
                "1.0.0",
                "odc:file_format":
                "GeoTIFF",
                "odc:processing_datetime":
                datetime(2018, 11, 5, 12, 23, 23),
                "odc:producer":
                "ga.gov.au",
                "odc:product_family":
                "blueberries",
                "odc:region_code":
                "Oz",
                "sentinel:datatake_start_datetime":
                datetime(2017, 8, 22, 1, 56, 26),
                "sentinel:sentinel_tile_id":
                "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05",
            },
            "lineage": {
                "telemetry": [
                    "ca705033-0fc4-4f38-a47e-f425dfb4d0c7",
                    "3781e90f-b677-40af-9439-b40f6e4dfadd",
                ]
            },
        },
        expected_path=metadata_path,
    )
Пример #26
0
def test_dea_style_package(l1_ls8_dataset: DatasetDoc,
                           l1_ls8_dataset_path: Path, tmp_path: Path):
    out = tmp_path

    [blue_geotiff_path] = l1_ls8_dataset_path.rglob("L*_B2.TIF")

    with DatasetAssembler(out, naming_conventions="dea") as p:
        # We add a source dataset, asking to inherit the common properties (eg. platform, instrument, datetime)
        p.add_source_path(l1_ls8_dataset_path, auto_inherit_properties=True)

        # It's a GA product of "numerus-unus" ("the number one").
        p.producer = "ga.gov.au"
        p.product_family = "ones"
        p.dataset_version = "3.0.0"

        # Known properties are normalised (see tests at bottom of file)
        p.platform = "LANDSAT_8"  # to: 'landsat-8'
        p.processed = "2016-03-04 14:23:30Z"  # into a date.
        p.maturity = "FINAL"  # lowercased
        p.properties["eo:off_nadir"] = "34"  # into a number

        # Write a measurement from a numpy array, using the source dataset's grid spec.
        p.write_measurement_numpy(
            "ones",
            numpy.ones((60, 60), numpy.int16),
            GridSpec.from_dataset_doc(l1_ls8_dataset),
            nodata=-999,
        )

        # Copy a measurement from an input file (it will write a COG with DEA naming conventions)
        p.write_measurement("blue", blue_geotiff_path)

        # Alternatively, all measurements could be by reference rather that a copy:
        # p.note_measurement("external_blue", blue_geotiff_path)
        # (See an example of referencing in eodatasets3/prepare/landsat_l1_prepare.py )

        # Write a thumbnail using the given bands as r/g/b.
        p.write_thumbnail("ones", "ones", "blue")
        # Write a singleband thumbnail using a bit flag
        p.write_thumbnail_singleband("blue", bit=1, kind="singleband")
        # Write a singleband thumbnail using a lookuptable
        p.write_thumbnail_singleband("blue",
                                     lookup_table={1: (0, 0, 255)},
                                     kind="singleband_lut")

        # Note any software versions important to this created data.
        p.note_software_version(
            "numerus-unus-processor",
            "https://github.com/GeoscienceAustralia/eo-datasets",
            "1.2.3",
        )

        # p.done() will validate the dataset and write it to the destination atomically.
        dataset_id, metadata_path = p.done()

    assert isinstance(dataset_id,
                      UUID), "Expected a random UUID to be assigned"

    out = tmp_path / "ga_ls8c_ones_3/090/084/2016/01/21"
    assert out == metadata_path.parent
    assert_file_structure(
        out,
        {
            "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.odc-metadata.yaml":
            "",
            "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_blue.tif":
            "",
            "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_ones.tif":
            "",
            "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_thumbnail.jpg":
            "",
            "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.proc-info.yaml":
            "",
            "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.sha1":
            "",
            "ga_ls8c_singleband_3-0-0_090084_2016-01-21_final_thumbnail.jpg":
            "",
            "ga_ls8c_singleband_lut_3-0-0_090084_2016-01-21_final_thumbnail.jpg":
            "",
        },
    )

    # TODO: check sha1 checksum list.

    assert_same_as_file(
        {
            "$schema": "https://schemas.opendatacube.org/dataset",
            "id": dataset_id,
            "label": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final",
            "product": {
                # This was added automatically because we chose 'dea' conventions.
                "href":
                "https://collections.dea.ga.gov.au/product/ga_ls8c_ones_3",
                "name": "ga_ls8c_ones_3",
            },
            "crs": "epsg:32655",
            "geometry": {
                "coordinates": [[
                    [879_315.0, -3_714_585.0],
                    [641_985.0, -3_714_585.0],
                    [641_985.0, -3_953_115.0],
                    [879_315.0, -3_953_115.0],
                    [879_315.0, -3_714_585.0],
                ]],
                "type":
                "Polygon",
            },
            "grids": {
                # Note that the two bands had identical grid specs, so it combined them into one grid.
                "default": {
                    "shape": [60, 60],
                    "transform": [
                        3955.5,
                        0.0,
                        641_985.0,
                        0.0,
                        -3975.500_000_000_000_5,
                        -3_714_585.0,
                        0.0,
                        0.0,
                        1.0,
                    ],
                }
            },
            "measurements": {
                "blue": {
                    "path":
                    "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_blue.tif"
                },
                "ones": {
                    "path":
                    "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_ones.tif"
                },
            },
            "properties": {
                "datetime": datetime(2016, 1, 21, 23, 50, 23, 54435),
                "dea:dataset_maturity": "final",
                "odc:dataset_version": "3.0.0",
                "odc:file_format": "GeoTIFF",
                "odc:processing_datetime": "2016-03-04T14:23:30",
                "odc:producer": "ga.gov.au",
                "odc:product_family": "ones",
                # The remaining fields were inherited from the source dataset
                # (because we set auto_inherit_properties=True, and they're in the whitelist)
                "eo:platform":
                "landsat-8",  # matching Stac's examples for capitalisation.
                "eo:instrument":
                "OLI_TIRS",  # matching Stac's examples for capitalisation.
                "eo:cloud_cover": 93.22,
                "eo:off_nadir": 34.0,
                "eo:gsd": 15.0,
                "eo:sun_azimuth": 74.007_443_8,
                "eo:sun_elevation": 55.486_483,
                "landsat:collection_category": "T1",
                "landsat:collection_number": 1,
                "landsat:landsat_product_id":
                "LC08_L1TP_090084_20160121_20170405_01_T1",
                "landsat:landsat_scene_id": "LC80900842016021LGN02",
                "landsat:wrs_path": 90,
                "landsat:wrs_row": 84,
                "odc:region_code": "090084",
            },
            "accessories": {
                # It wrote a checksum file for all of our files.
                "checksum:sha1": {
                    "path": "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.sha1"
                },
                # We didn't add any extra processor metadata, so this just contains
                # some software versions.
                "metadata:processor": {
                    "path":
                    "ga_ls8c_ones_3-0-0_090084_2016-01-21_final.proc-info.yaml"
                },
                # The thumbnails we made.
                "thumbnail": {
                    "path":
                    "ga_ls8c_ones_3-0-0_090084_2016-01-21_final_thumbnail.jpg"
                },
                "thumbnail:singleband": {
                    "path":
                    "ga_ls8c_singleband_3-0-0_090084_2016-01-21_final_thumbnail.jpg"
                },
                "thumbnail:singleband_lut": {
                    "path":
                    "ga_ls8c_singleband_lut_3-0-0_090084_2016-01-21_final_thumbnail.jpg"
                },
            },
            "lineage": {
                "level1": ["a780754e-a884-58a7-9ac0-df518a67f59d"]
            },
        },
        generated_file=metadata_path,
    )
Пример #27
0
def test_s2_naming_conventions(tmp_path: Path):
    """A minimal dataset with sentinel platform/instrument"""
    p = DatasetAssembler(tmp_path, naming_conventions="dea_s2")
    p.platform = "sentinel-2a"
    p.instrument = "msi"
    p.datetime = datetime(2018, 11, 4)
    p.product_family = "blueberries"
    p.processed = "2018-11-05T12:23:23"
    p.producer = "ga.gov.au"
    p.dataset_version = "1.0.0"
    p.region_code = "Oz"
    p.properties["odc:file_format"] = "GeoTIFF"
    p.properties[
        "sentinel:sentinel_tile_id"] = "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05"

    # The property normaliser should have extracted inner fields
    assert p.properties["sentinel:datatake_start_datetime"] == datetime(
        2017, 8, 22, 1, 56, 26, tzinfo=timezone.utc)

    dataset_id, metadata_path = p.done()

    # The s2 naming conventions have an extra subfolder of the datatake start time.
    metadata_path_offset = metadata_path.relative_to(tmp_path).as_posix()

    assert metadata_path_offset == (
        "ga_s2am_blueberries_1/Oz/2018/11/04/20170822T015626/"
        "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.odc-metadata.yaml")

    assert_same_as_file(
        {
            "$schema": "https://schemas.opendatacube.org/dataset",
            "accessories": {
                "checksum:sha1": {
                    "path": "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.sha1"
                },
                "metadata:processor": {
                    "path":
                    "ga_s2am_blueberries_1-0-0_Oz_2018-11-04.proc-info.yaml"
                },
            },
            "id": dataset_id,
            "label": "ga_s2am_blueberries_1-0-0_Oz_2018-11-04",
            "lineage": {},
            "product": {
                "href":
                "https://collections.dea.ga.gov.au/product/ga_s2am_blueberries_1",
                "name": "ga_s2am_blueberries_1",
            },
            "properties": {
                "datetime":
                datetime(2018, 11, 4, 0, 0),
                "eo:instrument":
                "msi",
                "eo:platform":
                "sentinel-2a",
                "odc:dataset_version":
                "1.0.0",
                "odc:file_format":
                "GeoTIFF",
                "odc:processing_datetime":
                datetime(2018, 11, 5, 12, 23, 23),
                "odc:producer":
                "ga.gov.au",
                "odc:product_family":
                "blueberries",
                "odc:region_code":
                "Oz",
                "sentinel:datatake_start_datetime":
                datetime(2017, 8, 22, 1, 56, 26),
                "sentinel:sentinel_tile_id":
                "S2A_OPER_MSI_L1C_TL_SGS__20170822T015626_A011310_T54KYU_N02.05",
            },
        },
        generated_file=metadata_path,
    )