def lambda_handler(event, context): logger.debug('Event: %s' % json.dumps(event)) root_cat = get_root_catalog() # check if collection and if so, add to Cirrus if 'extent' in event: # add to static catalog root_cat.add_child(event) # send to Cirrus Publish SNS response = snsclient.publish(TopicArn=PUBLISH_TOPIC, Message=json.dumps(event)) logger.debug(f"SNS Publish response: {json.dumps(response)}") # check if URL to catalog if 'catalog_url' in event: cat = Catalog.from_file(event['catalog_url']) for child in cat.get_children(): if isinstance(child, Collection): child.remove_links('child') link = Link('copied_from', child) child.add_link(link, child.get_self_href()) root_cat.add_child(child) child_json = json.dumps(child.to_dict()) logger.debug(f"Publishing {child.id}: {child_json}") response = snsclient.publish(TopicArn=PUBLISH_TOPIC, Message=child_json) logger.debug(f"SNS Publish response: {json.dumps(response)}") root_cat.normalize_and_save(ROOT_URL, CatalogType.ABSOLUTE_PUBLISHED)
def create_stac_item(self, original_entry): """ Overload to add additional metadata. """ stac_item = super(RSAT1Adapter, self).create_stac_item(original_entry) stac_item.common_metadata.mission = 'radarsat1' stac_item.links = [ Link('original', self.extract_source_link(original_entry), title='Original EODMS reference')] return stac_item
def source_links_for_labels(items, label_item): """ Maps input STAC Items (items) to label_item "labels" via label extension "source" Links """ return [ Link( "source", o, link_type=LinkType.RELATIVE, media_type="image/tiff; application=geotiff", properties={ "label:assets": "labels" }, ).set_owner(label_item) for o in items if o is not None ]
def test_invocation_recurses_subcatalogs(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) catalog.add_child(Catalog('1a', 'Catalog 1a')) subcatalog = Catalog('1b', 'Catalog 1b') catalog.add_child(subcatalog) subsubcatalog_a = Catalog('2a', 'Catalog 2a') subsubcatalog_b = Catalog('2b', 'Catalog 2b') subsubcatalog_b.add_link( Link('harmony_source', 'http://example.com/C0002-EXAMPLE')) subcatalog.add_children([subsubcatalog_a, subsubcatalog_b]) message = Message(full_message) items_a = [ Item('3', None, [0, 0, 1, 3], '2020-01-01T00:00:00.000Z', {}), Item('4', None, [0, 0, 1, 4], '2020-01-01T00:00:00.000Z', {}) ] items_b = [ Item('5', None, [0, 0, 1, 5], '2020-01-01T00:00:00.000Z', {}), Item('6', None, [0, 0, 1, 6], '2020-01-01T00:00:00.000Z', {}) ] subsubcatalog_a.add_items(items_a) subsubcatalog_b.add_items(items_b) adapter = AdapterTester(message, catalog, config=self.config) adapter.invoke() self.assertEqual(AdapterTester.process_args[0][0].bbox, items_a[0].bbox) self.assertEqual(AdapterTester.process_args[1][0].bbox, items_a[1].bbox) self.assertEqual(AdapterTester.process_args[2][0].bbox, items_b[0].bbox) self.assertEqual(AdapterTester.process_args[3][0].bbox, items_b[1].bbox) self.assertEqual(AdapterTester.process_args[0][1], message.sources[0]) self.assertEqual(AdapterTester.process_args[1][1], message.sources[0]) self.assertEqual(AdapterTester.process_args[2][1], message.sources[1]) self.assertEqual(AdapterTester.process_args[3][1], message.sources[1])
def _odc_links( explorer_base_url: str, dataset: DatasetDoc, collection_url: Optional[str], ) -> List[Link]: """ Add links for ODC product into a STAC Item """ if collection_url: yield Link( rel="collection", target=collection_url, ) if explorer_base_url: if not collection_url: yield Link( rel="collection", target=urljoin(explorer_base_url, f"/stac/collections/{dataset.product.name}"), ) yield Link( title="ODC Product Overview", rel="product_overview", media_type="text/html", target=urljoin(explorer_base_url, f"product/{dataset.product.name}"), ) yield Link( title="ODC Dataset Overview", rel="alternative", media_type="text/html", target=urljoin(explorer_base_url, f"dataset/{dataset.id}"), ) if not collection_url and not explorer_base_url: warnings.warn("No collection provided for Stac Item.")
def test_altered_ids_are_retained(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) message = Message(full_message) items = [ Item('mutate-me', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}), Item('2', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}) ] catalog.add_items(items) adapter = AdapterTester(message, catalog, config=self.config) (message, out_catalog) = adapter.invoke() out_items = [item for item in out_catalog.get_items()] self.assertEqual(out_items[0].id, 'i-mutated-you')
def test_invocation_processes_items_with_sources(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) message = Message(full_message) items = [ Item('1', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}), Item('2', None, [0, 0, 1, 2], '2020-01-01T00:00:00.000Z', {}) ] catalog.add_items(items) adapter = AdapterTester(message, catalog, config=self.config) adapter.invoke() self.assertEqual(AdapterTester.process_args[0][0].bbox, items[0].bbox) self.assertEqual(AdapterTester.process_args[1][0].bbox, items[1].bbox) self.assertEqual(AdapterTester.process_args[0][1], message.sources[0]) self.assertEqual(AdapterTester.process_args[1][1], message.sources[0])
def test_unaltered_ids_are_assigned_new_uuids(self): catalog = Catalog('0', 'Catalog 0') catalog.add_link( Link('harmony_source', 'http://example.com/C0001-EXAMPLE')) message = Message(full_message) items = [ Item('1', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}), Item('2', None, [0, 0, 1, 1], '2020-01-01T00:00:00.000Z', {}) ] catalog.add_items(items) adapter = AdapterTester(message, catalog, config=self.config) (message, out_catalog) = adapter.invoke() self.assertNotEqual(out_catalog.id, catalog.id) out_items = [item for item in out_catalog.get_items()] self.assertNotEqual(out_items[0].id, items[0].id) self.assertNotEqual(out_items[1].id, items[1].id)
def test_minimal_item_collection(self): with TemporaryDirectory() as tmp_dir: path = os.path.join(tmp_dir, 'item_collection.json') ic = ItemCollection.from_file(self.IC_MINIMAL_URI) ic.set_self_href(path) self.assertIsInstance(ic, ItemCollection) self.assertEqual(len(ic.links), 1) self.assertEqual(ic.get_self_href(), path) self.assertEqual(len(ic.links), 1) ic.links = [ Link(l.rel, join(tmp_dir, basename(l.target))) for l in ic.links ] ic.save() self.assertTrue(isfile(path)) with open(path) as f: ic_val_dict = json.load(f) SchemaValidator().validate_dict(ic_val_dict, ItemCollection)
def mapper(item): """ Map STAC LabelItem to list of STAC Item images with labels as links. This is a one to many mapping because each label item could be sourced from multiple image scenes. """ global S1 source_links = list(filter(lambda l: l.rel == "source", item.links)) for link in source_links: link.resolve_stac_object() source_items = [ link.target.clone() for link in source_links if "_S1" in link.target.id ] if len(source_items) == 0: print("WARNING: No source images for {}".format(item.id)) item_id = "_".join(item.id.split("_")[0:-1]) if S1 is None: root_link = list(filter(lambda l: l.rel == "root", item.links))[0] S1 = list(root_link.target.get_child("S1").get_items()) source_items = [i.clone() for i in S1 if i.id == f"{item_id}_S1"] for source_item in source_items: label_asset = item.assets["labels"] # Remove label item source links to avoid recursion -- we're inverting # the label / item relationship. item.links = list(filter(lambda l: l.rel != "source", item.links)) source_item.links = [ Link( "labels", item, link_type=LinkType.RELATIVE, media_type=label_asset.media_type, ).set_owner(source_item) ] return source_items
def transform_stac_to_stac(item: Item, enable_proj: bool = True, self_link: str = None, source_link: str = None) -> Item: """ Handle a 0.7.0 item and convert it to a 1.0.0.beta2 item. """ # Remove USGS extension and add back eo item.ext.enable("eo") # Add and update links item.links = [] if self_link: item.links.append(Link(rel="self", target=self_link)) if source_link: item.links.append( Link(rel="derived_from", target=source_link, media_type="application/json")) # Add some common fields item.common_metadata.constellation = "Landsat" item.common_metadata.instruments = [ i.lower() for i in item.properties["eo:instrument"].split("_") ] del item.properties["eo:instrument"] # Handle view extension item.ext.enable("view") item.ext.view.off_nadir = item.properties["eo:off_nadir"] del item.properties["eo:off_nadir"] if enable_proj: try: # If we can load the blue band, use it to add proj information blue_asset = item.assets["SR_B2.TIF"] blue = rasterio.open(blue_asset.href) shape = [blue.height, blue.width] transform = blue.transform crs = blue.crs.to_epsg() # Now we have the info, we can make the fields item.ext.enable("projection") item.ext.projection.epsg = crs new_assets = {} for name, asset in item.assets.items(): if asset.media_type == "image/vnd.stac.geotiff; cloud-optimized=true": item.ext.projection.set_transform(transform, asset=asset) item.ext.projection.set_shape(shape, asset=asset) asset.media_type = MediaType.COG except RasterioIOError: print("Failed to load blue band, so not handling proj fields") # Remove .TIF from asset names new_assets = {} for name, asset in item.assets.items(): new_name = name.replace(".TIF", "") new_assets[new_name] = asset item.assets = new_assets return item
datetime.strptime("2010-01-01", "%Y-%m-%d"), ] ] ), ) OrthoCollection = Collection( id="canada_spot_orthoimages", description="Orthoimages of Canada 2005-2010", extent=SpotExtents, title=None, stac_extensions=None, license="Proprietery", keywords="SPOT, Geobase, orthoimages", version="0.0.1", providers=SpotProviders, ) GeobaseLicense = Link( "license", "https://open.canada.ca/en/open-government-licence-canada", "text", "Open Government Licence Canada", ) def build_catalog(): OrthoCollection.add_link(GeobaseLicense) GeobaseCatalog.add_child(OrthoCollection) return GeobaseCatalog
def via_link(self, base: str = DEFAULT_BASE) -> Link: """Returns the via link for this file.""" return Link("via", self._asset_href_with_extension(base, "xml"))
def to_pystac_item( dataset: DatasetDoc, stac_item_destination_url: str, dataset_location: Optional[str] = None, odc_dataset_metadata_url: Optional[str] = None, explorer_base_url: Optional[str] = None, collection_url: Optional[str] = None, ) -> pystac.Item: """ Convert the given ODC Dataset into a Stac Item document. Note: You may want to call `validate_item(doc)` on the outputs to find any incomplete properties. :param collection_url: URL to the Stac Collection. Either this or an explorer_base_url should be specified for Stac compliance. :param stac_item_destination_url: Public 'self' URL where the stac document will be findable. :param dataset_location: Use this location instead of picking from dataset.locations (for calculating relative band paths) :param odc_dataset_metadata_url: Public URL for the original ODC dataset yaml document :param explorer_base_url: An Explorer instance that contains this dataset. Will allow links to things such as the product definition. """ if dataset.geometry is not None: geom = Geometry(dataset.geometry, CRS(dataset.crs)) wgs84_geometry = geom.to_crs(CRS("epsg:4326"), math.inf) geometry = wgs84_geometry.json bbox = wgs84_geometry.boundingbox else: geometry = None bbox = None properties = eo3_to_stac_properties(dataset, title=dataset.label) properties.update(_lineage_fields(dataset.lineage)) dt = properties["datetime"] del properties["datetime"] # TODO: choose remote if there's multiple locations? # Without a dataset location, all paths will be relative. dataset_location = dataset_location or (dataset.locations[0] if dataset.locations else None) item = Item( id=str(dataset.id), datetime=dt, properties=properties, geometry=geometry, bbox=bbox, collection=dataset.product.name, ) # Add links if stac_item_destination_url: item.links.append( Link( rel="self", media_type=MediaType.JSON, target=stac_item_destination_url, )) if odc_dataset_metadata_url: item.links.append( Link( title="ODC Dataset YAML", rel="odc_yaml", media_type="text/yaml", target=odc_dataset_metadata_url, )) for link in _odc_links(explorer_base_url, dataset, collection_url): item.links.append(link) EOExtension.ext(item, add_if_missing=True) if dataset.geometry: proj = ProjectionExtension.ext(item, add_if_missing=True) epsg, wkt = _get_projection(dataset) if epsg is not None: proj.apply(epsg=epsg, **_proj_fields(dataset.grids)) elif wkt is not None: proj.apply(wkt2=wkt, **_proj_fields(dataset.grids)) else: raise STACError( "Projection extension requires either epsg or wkt for crs.") # To pass validation, only add 'view' extension when we're using it somewhere. if any(k.startswith("view:") for k in properties.keys()): ViewExtension.ext(item, add_if_missing=True) # Add assets that are data for name, measurement in dataset.measurements.items(): if not dataset_location and not measurement.path: # No URL to link to. URL is mandatory for Stac validation. continue asset = Asset( href=_uri_resolve(dataset_location, measurement.path), media_type=_media_type(Path(measurement.path)), title=name, roles=["data"], ) eo = EOExtension.ext(asset) # TODO: pull out more information about the band band = Band.create(name) eo.apply(bands=[band]) if dataset.grids: proj_fields = _proj_fields(dataset.grids, measurement.grid) if proj_fields is not None: proj = ProjectionExtension.ext(asset) # Not sure how this handles None for an EPSG code proj.apply( shape=proj_fields["shape"], transform=proj_fields["transform"], epsg=epsg, ) item.add_asset(name, asset=asset) # Add assets that are accessories for name, measurement in dataset.accessories.items(): if not dataset_location and not measurement.path: # No URL to link to. URL is mandatory for Stac validation. continue asset = Asset( href=_uri_resolve(dataset_location, measurement.path), media_type=_media_type(Path(measurement.path)), title=_asset_title_fields(name), roles=_asset_roles_fields(name), ) item.add_asset(name, asset=asset) return item
def transform_stac_to_stac(item: Item, enable_proj: bool = True, self_link: str = None, source_link: str = None) -> Item: """ Handle a 0.7.0 item and convert it to a 1.0.0.beta2 item. If `enable_proj` is true, the assets' geotiff files must be accessible. """ # Clear hierarchical links item.set_parent(None) item.set_root(None) # Remove USGS extension and add back eo item.ext.enable("eo") # Add and update links if self_link: item.links.append(Link(rel="self", target=self_link)) if source_link: item.links.append( Link(rel="derived_from", target=source_link, media_type="application/json")) # Add some common fields item.common_metadata.constellation = "Landsat" if not item.properties.get("eo:instrument"): raise STACError("eo:instrument missing among the properties") # Test if eo:instrument come as str or list if isinstance(item.properties["eo:instrument"], str): item.common_metadata.instruments = [ i.lower() for i in item.properties.pop("eo:instrument").split("_") ] elif isinstance(item.properties["eo:instrument"], list): item.common_metadata.instruments = [ i.lower() for i in item.properties.pop("eo:instrument") ] else: raise STACError( f'eo:instrument type {type(item.properties["eo:instrument"])} not supported' ) # Handle view extension item.ext.enable("view") if (item.properties.get("eo:off_nadir") or item.properties.get("eo:off_nadir") == 0): item.ext.view.off_nadir = item.properties.pop("eo:off_nadir") elif (item.properties.get("view:off_nadir") or item.properties.get("view:off_nadir") == 0): item.ext.view.off_nadir = item.properties.pop("view:off_nadir") else: STACError("eo:off_nadir or view:off_nadir is a required property") if enable_proj: # Enabled projection item.ext.enable("projection") obtained_shape = None obtained_transform = None crs = None for asset in item.assets.values(): if "geotiff" in asset.media_type: # retrieve shape, transform and crs from the first geotiff file among the assets if not obtained_shape: try: with rasterio.open(asset.href) as opened_asset: obtained_shape = opened_asset.shape obtained_transform = opened_asset.transform crs = opened_asset.crs.to_epsg() # Check to ensure that all information is present if not obtained_shape or not obtained_transform or not crs: raise STACError( f"Failed setting shape, transform and csr from {asset.href}" ) except RasterioIOError as io_error: raise STACError( "Failed loading geotiff, so not handling proj fields" ) from io_error item.ext.projection.set_transform(obtained_transform, asset=asset) item.ext.projection.set_shape(obtained_shape, asset=asset) asset.media_type = MediaType.COG # Now we have the info, we can make the fields item.ext.projection.epsg = crs # Remove .TIF from asset names item.assets = { name.replace(".TIF", ""): asset for name, asset in item.assets.items() } return item