def validate_paths( paths: List[Path], thorough: bool = False ) -> Generator[Tuple[Path, List[ValidationMessage]], None, None]: """Validate the list of paths. Product documents can be specified before their datasets.""" products: Dict[str, Dict] = {} for path in paths: # Load yaml. If product, add to products. # Otherwise validate. doc = serialise.load_yaml(path) messages = [] if is_product(doc): messages.extend(validate_product(doc)) products[doc["name"]] = doc yield path, messages continue # TODO: follow ODC's match rules? product = None product_name = get_path(doc, ("product", "name"), default=None) if products: if len(products) == 1: [product] = products.values() elif product_name is not None: product = products.get(product_name) if product is None: messages.append( _warning( "unknown_product", "Cannot match dataset to product", hint=f"Nothing matches {product_name!r}" if product_name else "No product name in dataset (TODO: field matching)", )) else: messages.append( ValidationMessage( Level.error if thorough else Level.info, "no_product", "No product provided: validating dataset information alone", )) messages.extend( validate_dataset( doc, product_definition=product, readable_location=path, thorough=thorough, )) yield path, messages
def test_invalid_crs(input_doc_folder: Path): input_metadata_path = input_doc_folder.joinpath(ODC_METADATA_FILE) assert input_metadata_path.exists() input_doc = serialise.load_yaml(input_metadata_path) del input_doc["crs"] serialise.dump_yaml(input_metadata_path, input_doc) assert input_metadata_path.exists() with pytest.raises(RuntimeError) as exp: run_tostac(input_metadata_path) assert (str(exp.value) == "Expect string or any object with " "`.to_epsg()` or `.to_wkt()` method")
def test_add_property(input_doc_folder: Path): input_metadata_path = input_doc_folder.joinpath(ODC_METADATA_FILE) assert input_metadata_path.exists() input_doc = serialise.load_yaml(input_metadata_path) input_doc["properties"]["test"] = "testvalue" serialise.dump_yaml(input_metadata_path, input_doc) assert input_metadata_path.exists() run_tostac(input_metadata_path) name = input_metadata_path.stem.replace(".odc-metadata", "") actual_stac_path = input_metadata_path.with_name(f"{name}.stac-item.json") assert actual_stac_path.exists() actual_doc = json.load(actual_stac_path.open()) assert actual_doc["properties"]["test"] == input_doc["properties"]["test"]
def update_metadata( nci_metadata_file, s3_bucket, s3_base_url, explorer_base_url, sns_topic, s3_path ): """ Uploads updated metadata with nbar element removed, updated checksum file, STAC doc created and publish SNS message :param nci_metadata_file: Path of metadata file in NCI :param s3_bucket: Name of S3 bucket :param s3_base_url: Base URL of the S3 bucket :param explorer_base_url: Base URL of the explorer :param sns_topic: ARN of the SNS topic :param s3_path: Path in S3 :return: List of errors """ # Initialise error list metadata_error_list = [] # Initialise checksum list new_checksum_list = {} nci_metadata_file_path = Path(nci_metadata_file) temp_metadata = serialise.load_yaml(nci_metadata_file_path) # Deleting Nbar related metadata # Because Landsat 8 is different, we need to check if the fields exist # before removing them. if "nbar_blue" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_blue"] if "nbar_green" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_green"] if "nbar_nir" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_nir"] if "nbar_red" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_red"] if "nbar_swir_1" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_swir_1"] if "nbar_swir_2" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_swir_2"] if "nbar_coastal_aerosol" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_coastal_aerosol"] if "nbar_panchromatic" in temp_metadata["measurements"]: del temp_metadata["measurements"]["nbar_panchromatic"] if "oa_nbar_contiguity" in temp_metadata["measurements"]: del temp_metadata["measurements"]["oa_nbar_contiguity"] if "thumbnail:nbar" in temp_metadata["accessories"]: del temp_metadata["accessories"]["thumbnail:nbar"] # Format an eo3 dataset dict for human-readable yaml serialisation. temp_metadata = serialise.prepare_formatting(temp_metadata) # Dump metadata yaml into buffer with io.BytesIO() as temp_yaml: serialise.dumps_yaml(temp_yaml, temp_metadata) temp_yaml.seek( 0 ) # Seek back to the beginning of the file before next read/write new_checksum_list[nci_metadata_file_path.name] = verify.calculate_hash( temp_yaml ) # Write odc metadata yaml object into S3 s3_metadata_file = f"{s3_path}/{nci_metadata_file_path.name}" try: upload_s3_resource(s3_bucket, s3_metadata_file, temp_yaml.getvalue()) LOG.info(f"Finished uploading metadata to {s3_metadata_file}") except S3SyncException as exp: LOG.error(f"Failed uploading metadata to {s3_metadata_file} - {exp}") metadata_error_list.append( f"Failed uploading metadata to {s3_metadata_file} - {exp}" ) # Create stac metadata name = nci_metadata_file_path.stem.replace(".odc-metadata", "") stac_output_file_path = nci_metadata_file_path.with_name(f"{name}.stac-item.json") stac_url_path = f"{s3_base_url if s3_base_url else boto3.client('s3').meta.endpoint_url}/{s3_path}/" item_doc = dc_to_stac( serialise.from_doc(temp_metadata), nci_metadata_file_path, stac_output_file_path, stac_url_path, explorer_base_url, True, ) stac_dump = json.dumps(item_doc, indent=4, default=json_fallback) # Write stac json to buffer with io.BytesIO() as temp_stac: temp_stac.write(stac_dump.encode()) temp_stac.seek( 0 ) # Seek back to the beginning of the file before next read/write new_checksum_list[stac_output_file_path.name] = verify.calculate_hash(temp_stac) # Write stac metadata json object into S3 s3_stac_file = f"{s3_path}/{stac_output_file_path.name}" try: upload_s3_resource(s3_bucket, s3_stac_file, temp_stac.getvalue()) LOG.info(f"Finished uploading STAC metadata to {s3_stac_file}") except S3SyncException as exp: LOG.error(f"Failed uploading STAC metadata to {s3_stac_file} - {exp}") metadata_error_list.append( f"Failed uploading STAC metadata to {s3_stac_file} - {exp}" ) # Publish message containing STAC metadata to SNS Topic message_attributes = get_common_message_attributes(json.loads(stac_dump)) message_attributes.update( {"action": {"DataType": "String", "StringValue": "ADDED"}} ) try: publish_sns(sns_topic, stac_dump, message_attributes) LOG.info(f"Finished publishing SNS Message to SNS Topic {sns_topic}") except S3SyncException as exp: LOG.error(f"Failed publishing SNS Message to SNS Topic {sns_topic} - {exp}") metadata_error_list.append( f"Failed publishing SNS Message to SNS Topic {sns_topic} - {exp}" ) # Update checksum file checksum_filename = nci_metadata_file_path.stem.replace(".odc-metadata", "") checksum_file_path = nci_metadata_file_path.with_name(f"{checksum_filename}.sha1") try: upload_checksum( nci_metadata_file_path, checksum_file_path, new_checksum_list, s3_bucket, s3_path, ) LOG.info( f"Finished uploading checksum file " f"{s3_path}/{checksum_file_path.name}" ) except S3SyncException as exp: LOG.error( f"Failed uploading checksum file " f"{s3_path}/{checksum_file_path.name} - {exp}" ) metadata_error_list.append( f"Failed uploading checksum file " f"{s3_path}/{checksum_file_path.name} - {exp}" ) return metadata_error_list