def collect_items(sar_catalog, usfimr_collection): images = [] labels_collection = pystac.Collection( "labels", "labels", usfimr_collection.extent ) labels_collection = pystac.Collection( "usfimr_sar_labels", "usfimr_sar_labels", usfimr_collection.extent ) for flood_id in ["1", "2", "3", "15", "16"]: usfimr_item = usfimr_collection.get_item(flood_id) usfimr_geojson_asset = usfimr_item.assets["geojson"] usfimr_geojson_asset.set_owner(usfimr_item) usfimr_item_clone = usfimr_item.clone() # Reduce item assets to just the geojson as labels usfimr_item_clone.assets = {"labels": usfimr_item.assets["geojson"]} labels_collection.add_item(usfimr_item_clone) for sar_item in sar_catalog.get_child(flood_id).get_items(): sar_item_clone = sar_item.clone() sar_item_clone.links.append( pystac.Link( "labels", target=usfimr_item_clone, media_type="application/geo+json", link_type=pystac.LinkType.RELATIVE, ).set_owner(sar_item_clone) ) images.append(sar_item_clone) return images, labels_collection
def main(): register_s3_io() parser = argparse.ArgumentParser() parser.add_argument( "--mldata-catalog", default= "s3://usfimr-s1-mldata/usfimr-s1-mldata-catalog_seed42/catalog.json", type=str, ) parser.add_argument( "--chip-label-dir", default="s3://jrc-fimr-rasterized-labels/version2", type=str, ) args = parser.parse_args() catalog = pystac.Catalog.from_file(args.mldata_catalog) chip_label_dir = args.chip_label_dir.rstrip("/") + "/" train = catalog.get_child("train") test = catalog.get_child("test") validation = catalog.get_child("validation") mldata_catalog = pystac.Catalog( "usfimr_jrc-s1-mldata-rasterized", "MLData STAC Catalog for usfimr+jrc labels of flood and permanent water with S1 imagery", ) label_items = [] train_collection = pystac.Collection("train", "Training collection", train.extent) for t in train.get_items(): train_collection.add_item(t) label_items.append(construct_label_item(t, chip_label_dir)) test_collection = pystac.Collection("test", "Test collection", test.extent) for t in test.get_items(): test_collection.add_item(t) label_items.append(construct_label_item(t, chip_label_dir)) val_collection = pystac.Collection("validation", "Validation collection", validation.extent) for v in validation.get_items(): val_collection.add_item(v) label_items.append(construct_label_item(v, chip_label_dir)) label_catalog = pystac.Catalog("usfimr_sar_labels_tif", "USFIMR + JRC labels for flood detection") for l in label_items: label_catalog.add_item(l) mldata_catalog.add_child(label_catalog) mldata_catalog.add_child(train_collection) mldata_catalog.add_child(test_collection) mldata_catalog.add_child(val_collection) mldata_catalog.normalize_and_save( "./data/catalog", catalog_type=pystac.CatalogType.SELF_CONTAINED, )
def test_collection_of_collection(): space = pystac.SpatialExtent([[0, 1, 2, 3]]) time = pystac.TemporalExtent([datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 1)]) child = pystac.Collection('child', 'child-description', extent=pystac.Extent(space, time)) parent = pystac.Collection('parent', 'parent-description', extent=pystac.Extent(space, time),) parent.add_child(child) result = StacCollection(parent) result._load()
def create_collection(seasons: List[int]) -> pystac.Collection: """Creates a STAC COllection for NAIP data. Args: seasons (List[int]): List of years that represent the NAIP seasons this collection represents. """ extent = pystac.Extent( pystac.SpatialExtent(bboxes=[[-124.784, 24.744, -66.951, 49.346]]), pystac.TemporalExtent(intervals=[[ pystac.utils.str_to_datetime(f"{min(seasons)}-01-01T00:00:00Z"), pystac.utils.str_to_datetime(f"{max(seasons)}-01-01T00:00:00Z") ]])) collection = pystac.Collection( id=constants.NAIP_ID, description=constants.NAIP_DESCRIPTION, title=constants.NAIP_TITLE, license=constants.NAIP_LICENSE, providers=[constants.USDA_PROVIDER], extent=extent, stac_extensions=['item-assets'], extra_fields={ 'item_assets': { 'image': { "eo:bands": [b.properties for b in constants.NAIP_BANDS], "roles": ["data"], "title": "RGBIR COG tile", "type": pystac.MediaType.COG }, } }) return collection
def get_aviris_cog_collection(level): if level not in [L1, L2]: raise Exception(f'{level} is not a valid level.') description = ('AVIRIS L2 Refl Imagery converted to pixel-interleaved COGs' if level == L2 else 'AVIRIS L1 Imagery converted to pixel-interleaved COGs') collection = pystac.Collection(f'aviris-{level}-cogs', description, pystac.Extent( pystac.SpatialExtent( [[-180, -90, 180, 90]]), pystac.TemporalExtent([[ datetime(2014, 1, 1, tzinfo=timezone.utc), datetime(2020, 1, 1, tzinfo=timezone.utc), ]]), ), stac_extensions=COG_COLLECTION_EXTENSIONS) collection.links = [] collection.properties = {} collection.properties['eo:bands'] = [{ 'name': b, 'center_wavelength': f } for (b, f) in AVIRIS_BANDS_FREQS] collection.properties['hsi:wavelength_min'] = min(AVIRIS_FREQS) collection.properties['hsi:wavelength_max'] = max(AVIRIS_FREQS) return collection
def main(): register_s3_io() parser = argparse.ArgumentParser() parser.add_argument( "--usfimr-collection", default="s3://usfimr-data/collection.json" ) parser.add_argument("--sar-catalog", required=True, type=str) parser.add_argument("--random-seed", default=42, type=int) args = parser.parse_args() usfimr_collection = pystac.Collection.from_file(args.usfimr_collection) sar_catalog = pystac.Catalog.from_file(args.sar_catalog) mldata_catalog = pystac.Catalog( "usfimr-s1-mldata", "MLData STAC Catalog for usfimr-s1 dataset" ) image_items, labels_collection = collect_items(sar_catalog, usfimr_collection) training, testing, validation = train_test_val_split(image_items, 0.2, 0.2, random_state=args.random_seed) train_collection = pystac.Collection( "train", "train", usfimr_collection.extent ) for t in training: train_collection.add_item(t) test_collection = pystac.Collection( "test", "test", usfimr_collection.extent ) for t in testing: test_collection.add_item(t) val_collection = pystac.Collection( "validation", "validation", usfimr_collection.extent ) for v in validation: val_collection.add_item(v) mldata_catalog.add_child(labels_collection) mldata_catalog.add_child(train_collection) mldata_catalog.add_child(test_collection) mldata_catalog.add_child(val_collection) mldata_catalog.normalize_and_save( "./data/mldata-catalog_seed{}".format(args.random_seed), catalog_type=pystac.CatalogType.SELF_CONTAINED )
def test_collection(self): c = pystac.Collection('collection id', 'desc', extent=None) link = pystac.Link.collection(c) expected = { 'rel': 'collection', 'href': None, 'type': 'application/json' } self.assertEqual(expected, link.to_dict())
def create_stac(self) -> pystac.Collection: stac = pystac.Collection( id=str(ulid.ULID()), description=None, license=None, providers=GLOBAL_PROVIDERS, extent=pystac.SpatialExtent(bboxes=[0, 0, 0, 0]), ) return stac
def test_auto_title_not_found(self) -> None: extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, ) link = pystac.Link("my rel", target=collection) self.assertEqual(None, link.title)
def test_auto_title_is_serialized(self) -> None: extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, title="Collection Title", ) link = pystac.Link("my rel", target=collection) assert link.to_dict().get("title") == collection.title
def test_auto_title_when_resolved(self) -> None: extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, title="Collection Title", ) link = pystac.Link("my rel", target=collection) self.assertEqual(collection.title, link.title)
def setUp(self) -> None: self.maxDiff = None self.collection = pystac.Collection("collection id", "desc", extent=ARBITRARY_EXTENT) self.item = pystac.Item( id="test-item", geometry=None, bbox=None, datetime=TEST_DATETIME, properties={}, )
def test_title_as_init_argument(self) -> None: link_title = "Link title" extent = pystac.Extent.from_items([self.item]) collection = pystac.Collection( id="my_collection", description="Test Collection", extent=extent, title="Collection Title", ) link = pystac.Link("my rel", title=link_title, target=collection) assert link.title == link_title assert link.to_dict().get("title") == link_title
def to_stac(self, collection_id, description=None, title=None): planet_items = self.get_planet_items() stac_items = [i.to_stac() for i in planet_items] extent = pystac.Extent.from_items(stac_items) collection = pystac.Collection(id=collection_id, description=description, title=title, extent=extent, providers=[PLANET_PROVIDER]) collection.add_items(stac_items) return collection
def make_collection() -> pystac.Collection: asset_id = "my/thing" start = datetime.datetime(2018, 8, 24) end = start + datetime.timedelta(5, 4, 3, 2, 1) bboxes = [[-180.0, -90.0, 180.0, 90.0]] spatial_extent = pystac.SpatialExtent(bboxes) intervals: List[List[Optional[datetime.datetime]]] = [[start, end]] temporal_extent = pystac.TemporalExtent(intervals) extent = pystac.Extent(spatial_extent, temporal_extent) collection = pystac.Collection(asset_id, "desc", extent) collection.set_self_href(URL_TEMPLATE % 2019) ScientificExtension.add_to(collection) return collection
def main(): df = AvirisClassic.as_df("aviris-flight-lines.csv") collection = pystac.Collection( AvirisClassic.COLLECTION_NAME, AVIRIS_DESCRIPTION, pystac.Extent( spatial=pystac.SpatialExtent([[None, None, None, None]]), temporal=pystac.TemporalExtent( [[datetime(1970, 1, 1, tzinfo=timezone.utc), None]]), ), ) stacframes.df_to(collection, df) df_ng = AvirisNg.as_df("aviris-ng-flight-lines.csv") collection_ng = pystac.Collection( AvirisNg.COLLECTION_NAME, AVIRIS_DESCRIPTION, pystac.Extent( spatial=pystac.SpatialExtent([[None, None, None, None]]), temporal=pystac.TemporalExtent( [[datetime(1970, 1, 1, tzinfo=timezone.utc), None]]), ), ) stacframes.df_to(collection_ng, df_ng) # Normalize before validation to set all the required object links catalog = pystac.Catalog("aviris", AVIRIS_DESCRIPTION) catalog.add_child(collection) catalog.add_child(collection_ng) catalog_path = "./data/catalog" catalog.normalize_hrefs(catalog_path) logger.info("Validating catalog...") catalog.validate_all() logger.info("Saving catalog to {}...".format(catalog_path)) catalog.save(catalog_type=pystac.CatalogType.SELF_CONTAINED) logger.info("Done!")
def make_collection(year: int) -> pystac.Collection: asset_id = f'my/collection/of/things/{year}' start = datetime.datetime(2014, 8, 10) end = datetime.datetime(year, 1, 3, 4, 5) bboxes = [[-180, -90, 180, 90]] spatial_extent = pystac.SpatialExtent(bboxes) temporal_extent = pystac.TemporalExtent([[start, end]]) extent = pystac.Extent(spatial_extent, temporal_extent) collection = pystac.Collection(asset_id, 'desc', extent) collection.set_self_href(URL_TEMPLATE % year) collection.ext.enable(pystac.Extensions.VERSION) return collection
def make_collection(dbs, source='NIC', region='arctic', year='All', root_href='', stacid='', description='', collection_license='MIT'): """ create a collection of STAC items """ if type(dbs) is str: db = StackDB(dbs) elif type(dbs) is StackDB: db = dbs else: print('Please specify a database or database name') return cis = db.get_stac_items(source=source, region=region, year=year) if len(cis) < 1: print('No data found for source {0}, region {1}, year {2}'.format( source, region, year)) return stacs = [] spatial_extent = [] mindate = datetime.datetime.now() maxdate = datetime.datetime.min # make the timestamp bounds offset aware so we can do comparisons mindate = mindate.replace(tzinfo=pytz.UTC) maxdate = maxdate.replace(tzinfo=pytz.UTC) for cs in cis: stac = pystac.stac_object_from_dict(json.loads(cs[0])) stacs.append(stac) mindate = stac.datetime if stac.datetime < mindate else mindate maxdate = stac.datetime if stac.datetime > maxdate else maxdate spatial_extent = biggest_bbox(spatial_extent, stac.bbox) extent = pystac.Extent( spatial=pystac.pystac.SpatialExtent(bboxes=[spatial_extent]), temporal=pystac.TemporalExtent([[mindate, maxdate]])) collection = pystac.Collection(id=stacid, description=description, extent=extent, license=collection_license) collection.add_items(stacs) collection.normalize_hrefs(root_href=root_href) return collection
def catalog(self): """Check if catalog exists and create it otherwise.""" if self.catalog_path is not None and self._catalog is None: if os.path.isfile(self.catalog_path): os.remove(self.catalog_path) if self.with_bbox: self._catalog = pystac.Collection(id="Sen2Like_catalog" if self.sid is None else self.sid, title="Sen2Like Catalog" if self.title is None else self.title, href=self.catalog_path, description="Catalog containing Sen2Like generated products", extent=pystac.Extent(pystac.SpatialExtent([180, -56, 180, 83]), pystac.TemporalExtent([None, None]))) else: self._catalog = pystac.Catalog(id="Sen2Like_catalog" if self.sid is None else self.sid, title="Sen2Like Catalog" if self.title is None else self.title, href=self.catalog_path, description="Catalog containing Sen2Like generated products") return self._catalog
def get_planet_cog_collection(num_bands: int = 4): collection = pystac.Collection( planet_cog_collection_id(num_bands), f'Planet Imagery: {source_collection_id(num_bands)}', pystac.Extent( pystac.SpatialExtent([[-180, -90, 180, 90]]), pystac.TemporalExtent([[ datetime(1307, 10, 13, tzinfo=timezone.utc), datetime(2063, 4, 5, tzinfo=timezone.utc), ]]), ), stac_extensions=COG_COLLECTION_EXTENSIONS) collection.links = [] collection.properties = {} if num_bands == 4: collection.properties[ 'eo:bands'] = PLANET_BANDS[:3] + PLANET_BANDS[4:5] elif num_bands == 5: collection.properties['eo:bands'] = PLANET_BANDS collection.properties['hsi:wavelength_min'] = 440.0 collection.properties['hsi:wavelength_max'] = 950.0 return collection
item2.add_asset(key='metadata', asset=stac.Asset(href=metapath1, media_type=stac.MediaType.XML)) item2.add_asset(key='thumbnail', asset=stac.Asset(href=thumbpath2, media_type=stac.MediaType.PNG)) ## Temporal and Spatial Extent collection_interval = sorted([item1.datetime, item2.datetime]) temporal_extent = stac.TemporalExtent(intervals=[collection_interval]) spatial_extent = stac.SpatialExtent(bboxes=[bbox1, bbox2]) collection_extent = stac.Extent(spatial=spatial_extent, temporal=temporal_extent) collection = stac.Collection(id='static-maps', description='Collection of Static GIS Maps', extent=collection_extent, license='CC-BY-SA-4.0') collection.add_items([item1, item2]) catalog.add_child(collection) catalog.describe() catalog.normalize_hrefs(os.path.join(catalog_dir, 'stac')) catalog.save(catalog_type=stac.CatalogType.RELATIVE_PUBLISHED) ##Replace local path in catalog.json with web path and upload to s3. os.system( "/usr/local/bin/aws2 s3 sync /home/rstudio/IanBreckheimer/SDP_S3_Uploads/draft_products/ s3://rmbl-sdp/data_products/draft/ --acl 'public-read'"
PRISMA_ARCHIVE_COLLECTION_ID = "prisma" COG_COLLECTION_EXTENSIONS = [ 'https://stac-extensions.github.io/eo/v1.0.0/schema.json', 'https://github.com/azavea/nasa-hyperspectral/tree/master/docs/stac/hsi/json-schema/schema.json' ] COG_ITEM_EXTENSIONS = COG_COLLECTION_EXTENSIONS + \ ['https://stac-extensions.github.io/projection/v1.0.0/schema.json'] PRISMA_COG_COLLECTION = pystac.Collection( "prisma-cogs", "PRISMA Imagery converted to pixel-interleaved COGs", pystac.Extent( pystac.SpatialExtent([[-180, -90, 180, 90]]), pystac.TemporalExtent([[ datetime(2014, 1, 1, tzinfo=timezone.utc), datetime(2020, 1, 1, tzinfo=timezone.utc), ]]), ), stac_extensions=COG_COLLECTION_EXTENSIONS) PRISMA_COG_COLLECTION.links = [] PRISMA_COG_COLLECTION.properties = {} # https://directory.eoportal.org/web/eoportal/satellite-missions/p/prisma-hyperspectral def activation_output(item_id: str): with open('/tmp/activator-output.json', 'w') as outfile: json.dump( {
def main(): parser = argparse.ArgumentParser() parser.add_argument("--pipeline-uri", type=str, help="A URI to JSON with instructions") parser.add_argument("--pipeline", type=str, help="JSON with instructions") parser.add_argument( "--sentinel-stac-id", type=str, help="STAC Item ID to process from the STAC collection") parser.add_argument( "--sentinel-collection-id", type=str, default=SENTINEL_ARCHIVE_COLLECTION_ID, ) parser.add_argument( "--stac-api-uri", type=str, default=os.environ.get("STAC_API_URI", "http://franklin:9090"), ) parser.add_argument( "--stac-api-uri-sentinel", type=str, default=os.environ.get("STAC_API_URI_SENTINEL", "https://earth-search.aws.element84.com/v0"), ) parser.add_argument("--s3-bucket", type=str, default=os.environ.get("S3_BUCKET", "sentinel-s2-data")) parser.add_argument( "--s3-prefix", type=str, default=os.environ.get("S3_PREFIX", "aviris-scene-cogs-l2"), ) parser.add_argument("--temp-dir", type=str, default=os.environ.get("TEMP_DIR", None)) parser.add_argument("--output-format", type=str, default=os.environ.get("GDAL_OUTPUT_FORMAT", "COG")) parser.add_argument( "--keep-temp-dir", action="store_true", help= "If provided, script does not delete temporary directory before script exits. Useful for debugging.", ) parser.add_argument( "--force", action="store_true", help= "If provided, force reingest StacItem even though this it is already present in the catalog.", ) try: warpMemoryLimit = int(os.environ.get("GDAL_WARP_MEMORY_LIMIT", None)) except TypeError: warpMemoryLimit = None # TODO: replace it with parser.parse_args() later cli_args, cli_unknown = parser.parse_known_args() # parse all cli arguments args = CliConfig(cli_args, cli_unknown) s3 = boto3.client("s3") stac_client_sentinel = STACClient(args.stac_api_uri_sentinel) stac_client = STACClient(args.stac_api_uri) collection = stac_client_sentinel.get_collection( args.sentinel_collection_id) SENTINEL_COG_COLLECTION = pystac.Collection( SENTINEL_COG_COLLECTION_ID, "Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) and converted to Cloud-Optimized GeoTIFFs", collection.extent, stac_extensions=COG_COLLECTION_EXTENSIONS) SENTINEL_COG_COLLECTION.links = [] SENTINEL_COG_COLLECTION.properties = {} SENTINEL_COG_COLLECTION.properties['eo:bands'] = SENTINEL_BANDS SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_min'] = SENTINEL_WAVELENGTH_MIN SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_max'] = SENTINEL_WAVELENGTH_MAX # GET STAC Item from SENTINEL Catalog item = stac_client_sentinel.get_collection_item( args.sentinel_collection_id, args.sentinel_stac_id) assets = item.assets bands_map = { 'B01': vsis3(strip_scheme(assets['B01'].href)), 'B02': vsis3(strip_scheme(assets['B02'].href)), 'B03': vsis3(strip_scheme(assets['B03'].href)), 'B04': vsis3(strip_scheme(assets['B04'].href)), 'B05': vsis3(strip_scheme(assets['B05'].href)), 'B06': vsis3(strip_scheme(assets['B06'].href)), 'B07': vsis3(strip_scheme(assets['B07'].href)), 'B08': vsis3(strip_scheme(assets['B08'].href)), 'B8A': vsis3(strip_scheme(assets['B8A'].href)), 'B09': vsis3(strip_scheme(assets['B09'].href)), 'B11': vsis3(strip_scheme(assets['B11'].href)), 'B12': vsis3(strip_scheme(assets['B12'].href)), 'AOT': vsis3(strip_scheme(assets['AOT'].href)), # 'WVP': vsis3(strip_scheme(assets['WVP'].href)), # 'SCL': vsis3(strip_scheme(assets['SCL'].href)) } # we don't need assets here, since the gather scripts knows what and how to download by the sentinel path properties = item.properties datetime = dateutil.parser.isoparse(properties['datetime']) # here "href": "s3://sentinel-s2-l2a/tiles/31/V/CE/2021/8/19/0/R60m/B01.jp2" # path is tiles/31/V/CE/2021/8/19/0 sentintel_path = 'tiles/{}/{}/{}/{}/{}/{}/{}'.format( properties['sentinel:utm_zone'], properties['sentinel:latitude_band'], properties['sentinel:grid_square'], str(datetime.year), str(datetime.month), str(datetime.day), properties['sentinel:sequence']) # Create new COG STAC Item cog_item_id = "{}_{}".format(SENTINEL_COG_COLLECTION.id, item.id) cog_item = pystac.Item( cog_item_id, item.geometry, item.bbox, item.datetime, item.properties, stac_extensions=COG_ITEM_EXTENSIONS, collection=SENTINEL_COG_COLLECTION.id, ) cog_item.properties['eo:bands'] = SENTINEL_COG_COLLECTION.properties[ 'eo:bands'] cog_item.properties[ 'hsi:wavelength_min'] = SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_min'] cog_item.properties[ 'hsi:wavelength_max'] = SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_max'] cog_item.properties['proj:epsg'] = '4326' # Create COG Collection if it doesn't exist if not stac_client.has_collection(SENTINEL_COG_COLLECTION.id): stac_client.post_collection(SENTINEL_COG_COLLECTION) if not args.force: # Exit early if COG STAC Item already exists try: stac_client.get_collection_item(SENTINEL_COG_COLLECTION.id, cog_item_id) logger.info(f'STAC Item {cog_item_id} already exists. Exiting.') activation_output(cog_item_id) return except requests.exceptions.HTTPError: pass _, s3_uri = gather_sentinel( f'{cog_item_id}.tiff', f's3://{args.s3_bucket}/{args.s3_prefix}/{sentintel_path}/', bands_map) # Add assets to COG STAC Item idx = 0 cog_item.add_asset( f'{args.output_asset_name}_{idx}', pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=["data"]), ) # Add COG Item to AVIRIS L2 STAC Collection logger.info(f"POST Item {cog_item.id} to {args.stac_api_uri}") item_data = stac_client.post_collection_item(SENTINEL_COG_COLLECTION.id, cog_item) if item_data.get('id', None): logger.info(f"Success: {item_data['id']}") activation_output(item_data['id']) else: logger.error(f"Failure: {item_data}") return -1
def main(): """ Pull Copernicus EU Rapid Mapping Activations data from the GeoRSS feed """ sentinel_oauth_id = os.environ.get("SENTINELHUB_OAUTH_ID") sentinel_oauth_secret = os.environ.get("SENTINELHUB_OAUTH_SECRET") if sentinel_oauth_id is None: raise ValueError("Must set SENTINELHUB_OAUTH_ID") if sentinel_oauth_secret is None: raise ValueError("Must set SENTINELHUB_OAUTH_SECRET") events_xml_url = "https://emergency.copernicus.eu/mapping/activations-rapid/feed" events_xml_file = Path("./data/copernicus-rapid-mapping-activations.xml") if not events_xml_file.is_file(): logger.info("Pulling {}...".format(events_xml_url)) urlretrieve(events_xml_url, str(events_xml_file)) event_xml_dir = Path("./data/event-xml") os.makedirs(event_xml_dir, exist_ok=True) # Generate a list of all unique CEMS products (combination of event, aoi, # monitoring type, revision and version) for all flood events in 2019 and 2020 products = [] events_root = ET.parse(events_xml_file).getroot() for event in events_root.iter("item"): category = event.find("category").text.strip().lower() if category != "flood": continue event_id = event.find("guid").text title = event.find("title").text rss_url = event.find("{http://www.iwg-sem.org/}activationRSS").text logger.info(title) description = event.find("description").text event_dts = re.findall( r"Date\/Time of Event \(UTC\):[</b>\s]*?(\d{4}-\d{1,2}-\d{1,2} \d{1,2}:\d{2}:\d{2})", description, flags=re.MULTILINE, ) if len(event_dts) != 1: logger.warning("{}: Available event date times {}".format( title, event_dts)) raise AssertionError() event_datetime = datetime.strptime( event_dts[0], "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) if event_datetime < datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc): continue event_country = event.find( "{http://www.iwg-sem.org/}activationAffectedCountries").text event_xml_file = Path(event_xml_dir, event_id).with_suffix(".xml") if not event_xml_file.is_file(): logger.info("\tPulling {} GeoRSS: {}...".format( event_id, event_xml_file)) urlretrieve(rss_url, event_xml_file) event_root = ET.parse(event_xml_file).getroot() for item in event_root.iter("item"): try: data_type = item.find("{http://www.gdacs.org/}cemsctype").text except AttributeError: data_type = "" try: product_type = item.find( "{http://www.gdacs.org/}cemsptype").text except AttributeError: product_type = "" # Only care about downloading VECTOR data for Delineation product # More info at https://emergency.copernicus.eu/mapping/ems/rapid-mapping-portfolio if not (data_type == "VECTOR" and (product_type == "DEL" or product_type == "GRA")): continue item_url = urlparse(item.find("link").text) _, _, product_id, version_id = item_url.path.lstrip("/").split("/") ( product_event_id, aoi_id, product_type_id, monitoring_type, revision_id, data_type_id, ) = product_id.split("_") # Some sanity checks to ensure we've parsed our product id string correctly assert event_id == product_event_id assert product_type_id == product_type assert data_type_id == "VECTORS" georss_polygon = item.find( "{http://www.georss.org/georss}polygon").text # Split string, group number pairs, convert to float and swap pairs to lon first polygon = Polygon( map( lambda x: (float(x[1]), float(x[0])), grouper(georss_polygon.split(" "), 2), )) event_product = EventProduct( # Rebuild product_id from scratch because we need to include version "_".join([ event_id, aoi_id, product_type_id, monitoring_type, revision_id, version_id, data_type_id, ]), event_id, event_country, aoi_id, event_datetime.timestamp(), polygon, data_type_id, product_type_id, monitoring_type, revision_id, version_id, urlunparse(item_url), ) products.append(event_product) df = gpd.GeoDataFrame(products) geojson_file = "./data/cems-rapid-mapping-flood-products-2019-2020.geojson" logger.info( "Writing GeoJSON of flood event products to {}".format(geojson_file)) df.to_file(geojson_file, driver="GeoJSON") sentinel_session = get_session(sentinel_oauth_id, sentinel_oauth_secret) catalog = pystac.Catalog( "copernicus-rapid-mapping-floods-2019-2020", "Copernicus Rapid Mapping provisions geospatial information within hours or days from the activation in support of emergency management activities immediately following a disaster. Standardised mapping products are provided: e.g. to ascertain the situation before the event (reference product), to roughly identify and assess the most affected locations (first estimate product), assess the geographical extent of the event (delineation product) or to evaluate the intensity and scope of the damage resulting from the event (grading product). This catalog contains a subset of products for flood events from 2019-2020 that intersect with Sentinel 2 L2A Chips.", title="Copernicus Rapid Mapping Floods 2019-2020", ) s2_collection = pystac.Collection( "Sentinel-2-L2A", "Sentinel 2 L2A images corresponding to CEMS rapid mapping floods", pystac.Extent( pystac.SpatialExtent([None, None, None, None]), pystac.TemporalExtent([( # TODO: Make this more specific by looping actual dts # after ingest datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc), datetime(2020, 12, 31, 23, 59, 59, tzinfo=timezone.utc), )]), ), ) catalog.add_child(s2_collection) # Loop Products grouped by event id, lookup Sentinel 2 matches for each # Product, and create STAC Items in catalog for any matches sorted_products = sorted(products, key=lambda x: x.event_id) for event_id, event_products in groupby(sorted_products, key=lambda x: x.event_id): for p in event_products: event_datetime = datetime.fromtimestamp(p.event_time, tz=timezone.utc) # Check for sentinel 2 results before anything else, so we # don't do unnecessary work. We'll use these results later # after we've created our STAC Item response = stac_search( p.geometry.bounds, "sentinel-2-l2a", event_datetime - timedelta(hours=12), event_datetime + timedelta(hours=12), sentinel_session, ).json() if len(response["features"]) < 1: logger.debug("No Sentinel 2 results for {}".format( p.product_id)) continue event_collection = catalog.get_child(event_id) if event_collection is None: event_collection = pystac.Collection( event_id, "", pystac.Extent( pystac.SpatialExtent([None, None, None, None]), pystac.TemporalExtent([(event_datetime, None)]), ), ) catalog.add_child(event_collection) pystac_item = pystac.Item( p.product_id, mapping(p.geometry), p.geometry.bounds, event_datetime, properties={ "aoi_id": p.aoi_id, "country": p.event_country, "event_id": p.event_id, "product_type": p.product_type, "data_type": p.data_type, "monitoring_type": p.monitoring_type, "revision": p.revision, "version": p.version, }, ) event_collection.add_item(pystac_item) url_link = pystac.Link("alternate", p.product_link, media_type="text/html") pystac_item.add_link(url_link) # Get or create Item in S2 collection for each match from # SentinelHub and add as links to our Product Item for feature in response["features"]: s2_item = s2_collection.get_item(feature["id"]) if s2_item is None: s2_item = pystac.Item.from_dict(feature) s2_collection.add_item(s2_item) s2_link = pystac.Link( "data", s2_item, link_type=pystac.LinkType.RELATIVE).set_owner(pystac_item) pystac_item.add_link(s2_link) logger.info("Created STAC Item {} with {} Sentinel 2 links".format( p.product_id, len(response["features"]))) # Set spatial extents for collection in catalog.get_children(): if not isinstance(collection, pystac.Collection): continue bounds = GeometryCollection( [shape(s.geometry) for s in collection.get_all_items()]).bounds collection.extent.spatial = pystac.SpatialExtent(bounds) catalog_root = "./data/catalog" logger.info("Writing STAC Catalog to {}...".format(catalog_root)) catalog.normalize_and_save(catalog_root, pystac.CatalogType.SELF_CONTAINED)