示例#1
0
def validate_paths(
    paths: List[Path],
    thorough: bool = False
) -> Generator[Tuple[Path, List[ValidationMessage]], None, None]:
    """Validate the list of paths. Product documents can be specified before their datasets."""
    products: Dict[str, Dict] = {}

    for path in paths:
        # Load yaml. If product, add to products.
        # Otherwise validate.
        doc = serialise.load_yaml(path)
        messages = []

        if is_product(doc):
            messages.extend(validate_product(doc))
            products[doc["name"]] = doc
            yield path, messages
            continue

        # TODO: follow ODC's match rules?
        product = None
        product_name = get_path(doc, ("product", "name"), default=None)

        if products:
            if len(products) == 1:
                [product] = products.values()
            elif product_name is not None:
                product = products.get(product_name)

            if product is None:
                messages.append(
                    _warning(
                        "unknown_product",
                        "Cannot match dataset to product",
                        hint=f"Nothing matches {product_name!r}"
                        if product_name else
                        "No product name in dataset (TODO: field matching)",
                    ))
        else:
            messages.append(
                ValidationMessage(
                    Level.error if thorough else Level.info,
                    "no_product",
                    "No product provided: validating dataset information alone",
                ))

        messages.extend(
            validate_dataset(
                doc,
                product_definition=product,
                readable_location=path,
                thorough=thorough,
            ))
        yield path, messages
示例#2
0
def test_invalid_crs(input_doc_folder: Path):
    input_metadata_path = input_doc_folder.joinpath(ODC_METADATA_FILE)
    assert input_metadata_path.exists()

    input_doc = serialise.load_yaml(input_metadata_path)
    del input_doc["crs"]

    serialise.dump_yaml(input_metadata_path, input_doc)
    assert input_metadata_path.exists()

    with pytest.raises(RuntimeError) as exp:
        run_tostac(input_metadata_path)
    assert (str(exp.value) == "Expect string or any object with "
            "`.to_epsg()` or `.to_wkt()` method")
示例#3
0
def test_add_property(input_doc_folder: Path):
    input_metadata_path = input_doc_folder.joinpath(ODC_METADATA_FILE)
    assert input_metadata_path.exists()

    input_doc = serialise.load_yaml(input_metadata_path)
    input_doc["properties"]["test"] = "testvalue"

    serialise.dump_yaml(input_metadata_path, input_doc)
    assert input_metadata_path.exists()

    run_tostac(input_metadata_path)

    name = input_metadata_path.stem.replace(".odc-metadata", "")
    actual_stac_path = input_metadata_path.with_name(f"{name}.stac-item.json")
    assert actual_stac_path.exists()

    actual_doc = json.load(actual_stac_path.open())
    assert actual_doc["properties"]["test"] == input_doc["properties"]["test"]
示例#4
0
def update_metadata(
        nci_metadata_file, s3_bucket, s3_base_url, explorer_base_url, sns_topic, s3_path
):
    """
    Uploads updated metadata with nbar element removed, updated checksum file, STAC doc created
    and publish SNS message

    :param nci_metadata_file: Path of metadata file in NCI
    :param s3_bucket: Name of S3 bucket
    :param s3_base_url: Base URL of the S3 bucket
    :param explorer_base_url: Base URL of the explorer
    :param sns_topic: ARN of the SNS topic
    :param s3_path: Path in S3
    :return: List of errors
    """
    # Initialise error list
    metadata_error_list = []
    # Initialise checksum list
    new_checksum_list = {}

    nci_metadata_file_path = Path(nci_metadata_file)
    temp_metadata = serialise.load_yaml(nci_metadata_file_path)

    # Deleting Nbar related metadata
    # Because Landsat 8 is different, we need to check if the fields exist
    # before removing them.
    if "nbar_blue" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_blue"]
    if "nbar_green" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_green"]
    if "nbar_nir" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_nir"]
    if "nbar_red" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_red"]
    if "nbar_swir_1" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_swir_1"]
    if "nbar_swir_2" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_swir_2"]
    if "nbar_coastal_aerosol" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_coastal_aerosol"]
    if "nbar_panchromatic" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["nbar_panchromatic"]
    if "oa_nbar_contiguity" in temp_metadata["measurements"]:
        del temp_metadata["measurements"]["oa_nbar_contiguity"]
    if "thumbnail:nbar" in temp_metadata["accessories"]:
        del temp_metadata["accessories"]["thumbnail:nbar"]

    # Format an eo3 dataset dict for human-readable yaml serialisation.
    temp_metadata = serialise.prepare_formatting(temp_metadata)

    # Dump metadata yaml into buffer
    with io.BytesIO() as temp_yaml:
        serialise.dumps_yaml(temp_yaml, temp_metadata)
        temp_yaml.seek(
            0
        )  # Seek back to the beginning of the file before next read/write
        new_checksum_list[nci_metadata_file_path.name] = verify.calculate_hash(
            temp_yaml
        )

        # Write odc metadata yaml object into S3
        s3_metadata_file = f"{s3_path}/{nci_metadata_file_path.name}"
        try:
            upload_s3_resource(s3_bucket, s3_metadata_file, temp_yaml.getvalue())
            LOG.info(f"Finished uploading metadata to {s3_metadata_file}")
        except S3SyncException as exp:
            LOG.error(f"Failed uploading metadata to {s3_metadata_file} - {exp}")
            metadata_error_list.append(
                f"Failed uploading metadata to {s3_metadata_file} - {exp}"
            )

    # Create stac metadata
    name = nci_metadata_file_path.stem.replace(".odc-metadata", "")
    stac_output_file_path = nci_metadata_file_path.with_name(f"{name}.stac-item.json")
    stac_url_path = f"{s3_base_url if s3_base_url else boto3.client('s3').meta.endpoint_url}/{s3_path}/"
    item_doc = dc_to_stac(
        serialise.from_doc(temp_metadata),
        nci_metadata_file_path,
        stac_output_file_path,
        stac_url_path,
        explorer_base_url,
        True,
    )
    stac_dump = json.dumps(item_doc, indent=4, default=json_fallback)

    # Write stac json to buffer
    with io.BytesIO() as temp_stac:
        temp_stac.write(stac_dump.encode())
        temp_stac.seek(
            0
        )  # Seek back to the beginning of the file before next read/write
        new_checksum_list[stac_output_file_path.name] = verify.calculate_hash(temp_stac)

        # Write stac metadata json object into S3
        s3_stac_file = f"{s3_path}/{stac_output_file_path.name}"
        try:
            upload_s3_resource(s3_bucket, s3_stac_file, temp_stac.getvalue())
            LOG.info(f"Finished uploading STAC metadata to {s3_stac_file}")
        except S3SyncException as exp:
            LOG.error(f"Failed uploading STAC metadata to {s3_stac_file} - {exp}")
            metadata_error_list.append(
                f"Failed uploading STAC metadata to {s3_stac_file} - {exp}"
            )

    # Publish message containing STAC metadata to SNS Topic
    message_attributes = get_common_message_attributes(json.loads(stac_dump))
    message_attributes.update(
        {"action": {"DataType": "String", "StringValue": "ADDED"}}
    )
    try:
        publish_sns(sns_topic, stac_dump, message_attributes)
        LOG.info(f"Finished publishing SNS Message to SNS Topic {sns_topic}")
    except S3SyncException as exp:
        LOG.error(f"Failed publishing SNS Message to SNS Topic {sns_topic} - {exp}")
        metadata_error_list.append(
            f"Failed publishing SNS Message to SNS Topic {sns_topic} - {exp}"
        )

    # Update checksum file
    checksum_filename = nci_metadata_file_path.stem.replace(".odc-metadata", "")
    checksum_file_path = nci_metadata_file_path.with_name(f"{checksum_filename}.sha1")
    try:
        upload_checksum(
            nci_metadata_file_path,
            checksum_file_path,
            new_checksum_list,
            s3_bucket,
            s3_path,
        )
        LOG.info(
            f"Finished uploading checksum file " f"{s3_path}/{checksum_file_path.name}"
        )
    except S3SyncException as exp:
        LOG.error(
            f"Failed uploading checksum file "
            f"{s3_path}/{checksum_file_path.name} - {exp}"
        )
        metadata_error_list.append(
            f"Failed uploading checksum file "
            f"{s3_path}/{checksum_file_path.name} - {exp}"
        )

    return metadata_error_list