def create_asset(self, href: str) -> pystac.Asset: """Creates a new :class:`~pystac.Asset` instance using the fields from this ``AssetDefinition`` and the given ``href``.""" return pystac.Asset( href=href, title=self.title, description=self.description, media_type=self.media_type, roles=self.roles, extra_fields={ k: v for k, v in self.properties.items() if k not in { ASSET_TITLE_PROP, ASSET_DESC_PROP, ASSET_TYPE_PROP, ASSET_ROLES_PROP, } }, )
def add_stac(self, tile): if not tile.poly: return None item = pystac.Item( tile.name, mapping(tile.poly), list(tile.poly.bounds), datetime.datetime.now(), {'description': 'A USGS Lidar pointcloud in Entwine/EPT format'}) #item.ext.enable(pystac.Extensions.POINTCLOUD) # icky s = tile.ept['schema'] p = [] for d in s: p.append(Schema(d)) PointcloudExtension.add_to(item) PointcloudExtension.ext(item).apply( tile.num_points, PhenomenologyType.LIDAR, "ept", p, ) ProjectionExtension.add_to(item) ProjectionExtension.ext(item).apply(3857, projjson=PROJJSON) # item.ext.pointcloud.apply(tile.num_points, 'lidar', 'ept', p, epsg='EPSG:3857') asset = pystac.Asset(tile.url, 'entwine', 'The ept.json for accessing data') item.add_asset('ept.json', asset) item_link = pystac.Link('self', f'{self.args.stac_base_url}{tile.name}.json') item_parent = pystac.Link('parent', f'{self.args.stac_base_url}catalog.json') item.add_links([item_link, item_parent]) return item
def generate_stac_item(filename_tiff, cog_collection, planet_id, s3_uri): logger.info(f'Using gdalinfo to get metadata') filename_json = filename_tiff.replace('.tiff', '.json') os.system(f'gdalinfo -proj4 -json {filename_tiff} > {filename_json}') with open(filename_json, 'r') as f: data = json.load(f) logger.info(f'Organizing metadata') tifftag_datetime = data.get('metadata').get('').get('TIFFTAG_DATETIME') year, month, day = [ int(n) for n in tifftag_datetime.split(' ')[0].split(':') ] dt = datetime(year, month, day, tzinfo=timezone.utc) polygon = data.get('wgs84Extent') coords = polygon.get('coordinates') crs = CRS.from_string(data.get('coordinateSystem').get('proj4')) while len(coords) == 1: coords = coords[0] ys = [y for (y, x) in coords] xs = [x for (y, x) in coords] bbox = [min(ys), min(xs), max(ys), max(xs)] props = { 'eo:bands': cog_collection.properties['eo:bands'], 'hsi:wavelength_min': cog_collection.properties['hsi:wavelength_min'], 'hsi:wavelength_min': cog_collection.properties['hsi:wavelength_min'], 'proj:epsg': crs.to_authority()[-1], } logger.info(f'Creating new cog item') cog_item = pystac.Item(planet_id, polygon, bbox, dt, props, stac_extensions=COG_ITEM_EXTENSIONS, collection=cog_collection.id) cog_item.add_asset( 'tiff_0', pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=['data'])) return cog_item
def test_asset_bands(self): eo_item = pystac.read_file(self.LANDSAT_EXAMPLE_URI) # Get b1_asset = eo_item.assets['B1'] asset_bands = eo_item.ext.eo.get_bands(b1_asset) self.assertIsNot(None, asset_bands) self.assertEqual(len(asset_bands), 1) self.assertEqual(asset_bands[0].name, 'B1') index_asset = eo_item.assets['index'] asset_bands = eo_item.ext.eo.get_bands(index_asset) self.assertIs(None, asset_bands) # Set b2_asset = eo_item.assets['B2'] self.assertEqual(eo_item.ext.eo.get_bands(b2_asset)[0].name, "B2") eo_item.ext.eo.set_bands(eo_item.ext.eo.get_bands(b1_asset), b2_asset) new_b2_asset_bands = eo_item.ext.eo.get_bands(eo_item.assets['B2']) self.assertEqual(new_b2_asset_bands[0].name, 'B1') eo_item.validate() # Check adding a new asset new_bands = [ Band.create(name="red", description=Band.band_description("red")), Band.create(name="green", description=Band.band_description("green")), Band.create(name="blue", description=Band.band_description("blue")), ] asset = pystac.Asset(href="some/path.tif", media_type=pystac.MediaType.GEOTIFF) eo_item.ext.eo.set_bands(new_bands, asset) eo_item.add_asset("test", asset) self.assertEqual(len(eo_item.assets["test"].properties["eo:bands"]), 3)
def add_asset(self, item: pystac.Item, mtl_metadata: MtlMetadata, base_href: str) -> None: asset = pystac.Asset(href=self.get_href(base_href), media_type=self.media_type) if self.title: asset.title = self.title if self.description: asset.description = self.description # common_metadata if self.gsd is not None: item.common_metadata.set_gsd(self.gsd, asset) else: if self.is_sr or self.is_qa: sr_grd = mtl_metadata.sr_gsd if item.common_metadata.gsd != sr_grd: item.common_metadata.set_gsd(sr_grd, asset) if self.is_thermal: thermal_grd = mtl_metadata.thermal_gsd if item.common_metadata.gsd != thermal_grd: item.common_metadata.set_gsd(thermal_grd, asset) # eo if self.bands: asset.properties["eo:bands"] = [b.to_dict() for b in self.bands] # projection if self.is_sr or self.is_qa: item.ext.projection.set_shape(mtl_metadata.sr_shape, asset) item.ext.projection.set_transform(mtl_metadata.sr_transform, asset) if self.is_thermal: item.ext.projection.set_shape(mtl_metadata.thermal_shape, asset) item.ext.projection.set_transform(mtl_metadata.thermal_transform, asset) item.add_asset(self.key, asset)
def add_assets(item, base_url): # add non-band assets item.add_asset( 'thumbnail', pystac.Asset(title='Thumbnail', href=base_url + '_thumb_large.jpg', media_type=pystac.MediaType.JPEG, roles=['thumbnail'])) item.add_asset( 'index', pystac.Asset(title='HTML Page', href=os.path.dirname(base_url) + '/index.html', media_type='application/html')) item.add_asset( 'ANG', pystac.Asset(title='ANG Metadata', href=base_url + '_ANG.txt', media_type='text/plain', roles=['metadata'])) item.add_asset( 'MTL', pystac.Asset(title='MTL Metadata', href=base_url + '_MTL.txt', media_type='text/plain', roles=['metadata'])) item.add_asset( 'BQA', pystac.Asset(title='Quality Band', href=base_url + '_BQA.TIF', media_type=pystac.MediaType.GEOTIFF, roles=['quality'])) # Add bands for band_id, info in band_info.items(): band_url = f"{base_url}_{band_id}.TIF" asset = pystac.Asset(href=band_url, media_type=pystac.MediaType.COG) bands = [info['band']] item.ext.eo.set_bands(bands, asset) item.add_asset(band_id, asset) # If this asset has a different GSD than the item, set it on the asset if info['gsd'] != item.common_metadata.gsd: item.common_metadata.set_gsd(info['gsd'], asset)
from datetime import datetime item1 = stac.Item(id='canopy-height', geometry=footprint1, bbox=bbox1, datetime=datetime(2018, 7, 5), properties={}) item2 = stac.Item(id='landcover', geometry=footprint2, bbox=bbox2, datetime=datetime(2019, 7, 5), properties={}) item1.add_asset(key='data', asset=stac.Asset(href=path1, media_type=stac.MediaType.COG)) item1.add_asset(key='metadata', asset=stac.Asset(href=metapath1, media_type=stac.MediaType.XML)) item1.add_asset(key='thumbnail', asset=stac.Asset(href=thumbpath1, media_type=stac.MediaType.PNG)) item2.add_asset(key='data', asset=stac.Asset(href=path2, media_type=stac.MediaType.COG)) item2.add_asset(key='metadata', asset=stac.Asset(href=metapath1, media_type=stac.MediaType.XML)) item2.add_asset(key='thumbnail', asset=stac.Asset(href=thumbpath2, media_type=stac.MediaType.PNG))
def aviris_series_to_item(s2_scenes_map, series): """ Convert AVIRIS CSV series to another Series compatible with stacframes s2_scenes_map is an object where the key is Flight Scene and the value is an ftp url to that Flight Scene's s2 atmo corrected data file This method is currently valid for both AVIRIS Class and AVIRIS NG """ year = int(series["Year"]) hour = min(max(int(series.get("UTC Hour", 0)), 0), 23) minute = min(max(int(series.get("UTC Minute", 0)), 0), 59) try: flight_dt = datetime(int(year), int(series["Month"]), int(series["Day"]), tzinfo=timezone.utc) + timedelta(hours=hour, minutes=minute) except (ValueError, OverflowError): [month, day, year] = series["Date"].split("/") flight_dt = datetime( int(year), int(month), int(day), tzinfo=timezone.utc) + timedelta( hours=hour, minutes=minute) item_id = "aviris_{}".format(series["Flight Scene"]) lons = [float(series["Lon{}".format(n)]) for n in range(1, 5)] lats = [float(series["Lat{}".format(n)]) for n in range(1, 5)] bbox = [min(lons), min(lats), max(lons), max(lats)] try: geometry = kml_poly_to_geom(series["kml_poly"]) except IndexError: geometry = box(*bbox) properties = { k: series[k] for k in ( "Year", "Site Name", "NASA Log", "Investigator", "Comments", "Name", "Flight Scene", "RDN Ver", "Scene", "GEO Ver", "YY", "Tape", "Flight ID", "Flight", "Run", "Pixel Size", "Rotation", "Number of Lines", "Number of Samples", "Solar Elevation", "Solar Azimuth", "Mean Scene Elevation", "Min Scene Elevation", "Max Scene Elevation", "File Size (Bytes)", "Gzip File Size (Bytes)", ) } # Add any layer ids of interest properties["layer:ids"] = [series["collection"]] assets = { "ftp": pystac.Asset( series["link_ftp"], title="ftp", description= "AVIRIS data archive. The file size is described by the 'Gzip File Size' property.", media_type="application/gzip", ).to_dict(), "kml_overlay": pystac.Asset( series["link_kml_overlay"], title="kml_overlay", description="KML file describing the bounding box of the flight", media_type="application/vnd.google-earth.kml+xml", ).to_dict(), "kml_outline": pystac.Asset( series["link_kml_outline"], title="kml_outline", description="KML file describing the flight outline", media_type="application/vnd.google-earth.kml+xml", ).to_dict(), "rgb": pystac.Asset( series["link_rgb"], title="rgb", description="Full resolution RGB image captured by the flight", media_type="image/jpeg", ).to_dict(), "rgb_small": pystac.Asset( series["link_rgb_small"], title="rgb_small", description= "A lower resolution thumbnail of the same image as the 'rgb' asset.", media_type="image/jpeg", ).to_dict(), "flight_log": pystac.Asset( series["link_log"], title="flight_log", description= "HTML page with table listing the runs for this flight.", media_type="text/html", ).to_dict(), } if series["Name"] in s2_scenes_map: # Include in properties so we can STAC API query for Items with this asset properties["has_refl"] = True assets["ftp_refl"] = pystac.Asset( s2_scenes_map[series["Name"]], title="ftp_refl", description= "AVIRIS data archive of atmospheric corrected imagery for this scene.", media_type="application/gzip", ).to_dict() return pd.Series({ "id": item_id, "datetime": flight_dt, "geometry": geometry, "bbox": bbox, "properties": properties, "assets": assets, "links": [], })
def create_stac(self) -> pystac.Asset: stac = pystac.Asset(href=self.href, properties=self.properties, media_type=self.get_content_type()) return stac
def image_asset_from_href( asset_href: str, item: pystac.Item, resolution_to_shape: Dict[int, Tuple[int, int]], proj_bbox: List[float], media_type: Optional[str] = None) -> Tuple[str, pystac.Asset]: logger.debug(f'Creating asset for image {asset_href}') _, ext = os.path.splitext(asset_href) if media_type is not None: asset_media_type = media_type else: if ext.lower() == '.jp2': asset_media_type = pystac.MediaType.JPEG2000 elif ext.lower() in ['.tiff', '.tif']: asset_media_type = pystac.MediaType.GEOTIFF else: raise Exception( f'Must supply a media type for asset : {asset_href}') # Handle preview image if '_PVI' in asset_href: asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title='True color preview', roles=['data']) item.ext.eo.set_bands([ SENTINEL_BANDS['B04'], SENTINEL_BANDS['B03'], SENTINEL_BANDS['B02'] ], asset) return ('preview', asset) # Extract gsd and proj info gsd = extract_gsd(asset_href) shape = list(resolution_to_shape[int(gsd)]) transform = transform_from_bbox(proj_bbox, shape) def set_asset_properties(asset): item.common_metadata.set_gsd(gsd, asset) item.ext.projection.set_shape(shape, asset) item.ext.projection.set_bbox(proj_bbox, asset) item.ext.projection.set_transform(transform, asset) # Handle band image band_id_search = re.search(r'_(B\w{2})_', asset_href) if band_id_search is not None: band_id = band_id_search.group(1) band = SENTINEL_BANDS[band_id] asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title=band.description, roles=['data']) item.ext.eo.set_bands([SENTINEL_BANDS[band_id]], asset) set_asset_properties(asset) return (band_id, asset) # Handle auxiliary images if '_TCI_' in asset_href: # True color asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title='True color image', roles=['data']) item.ext.eo.set_bands([ SENTINEL_BANDS['B04'], SENTINEL_BANDS['B03'], SENTINEL_BANDS['B02'] ], asset) set_asset_properties(asset) return (f'visual-{asset_href[-7:-4]}', asset) if '_AOT_' in asset_href: # Aerosol asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title='Aerosol optical thickness (AOT)', roles=['data']) set_asset_properties(asset) return (f'AOT-{asset_href[-7:-4]}', asset) if '_WVP_' in asset_href: # Water vapor asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title='Water vapour (WVP)', roles=['data']) set_asset_properties(asset) return (f'WVP-{asset_href[-7:-4]}', asset) if '_SCL_' in asset_href: # Classification map asset = pystac.Asset(href=asset_href, media_type=asset_media_type, title='Scene classfication map (SCL)', roles=['data']) set_asset_properties(asset) return (f'SCL-{asset_href[-7:-4]}', asset) raise ValueError(f'Unexpected asset: {asset_href}')
def create_item( granule_href: str, additional_providers: Optional[List[pystac.Provider]] = None, read_href_modifier: Optional[ReadHrefModifier] = None) -> pystac.Item: """Create a STC Item from a Sentinel 2 granule. Arguments: granule_href: The HREF to the granule. This is expected to be a path to a SAFE archive, e.g. : https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/01/C/CV/2016/03/27/S2A_MSIL2A_20160327T204522_N0212_R128_T01CCV_20210214T042702.SAFE additional_providers: Optional list of additional providers to set into the Item read_href_modifier: A function that takes an HREF and returns a modified HREF. This can be used to modify a HREF to make it readable, e.g. appending an Azure SAS token or creating a signed URL. Returns: pystac.Item: An item representing the Sentinel 2 scene """ # noqa safe_manifest = SafeManifest(granule_href, read_href_modifier) product_metadata = ProductMetadata(safe_manifest.product_metadata_href, read_href_modifier) granule_metadata = GranuleMetadata(safe_manifest.granule_metadata_href, read_href_modifier) item = pystac.Item(id=product_metadata.product_id, geometry=product_metadata.geometry, bbox=product_metadata.bbox, datetime=product_metadata.datetime, properties={}) # --Common metadata-- item.common_metadata.providers = [SENTINEL_PROVIDER] if additional_providers is not None: item.common_metadata.providers.extend(additional_providers) item.common_metadata.platform = product_metadata.platform item.common_metadata.constellation = SENTINEL_CONSTELLATION item.common_metadata.instruments = SENTINEL_INSTRUMENTS # --Extensions-- # eo item.ext.enable('eo') item.ext.eo.cloud_cover = granule_metadata.cloudiness_percentage # sat item.ext.enable('sat') item.ext.sat.orbit_state = OrbitState(product_metadata.orbit_state.lower()) item.ext.sat.relative_orbit = product_metadata.relative_orbit # proj item.ext.enable('projection') item.ext.projection.epsg = granule_metadata.epsg if item.ext.projection.epsg is None: raise ValueError( f'Could not determine EPSG code for {granule_href}; which is required.' ) # s2 properties item.properties.update({ **product_metadata.metadata_dict, **granule_metadata.metadata_dict }) # --Assets-- # Metadata item.add_asset(*safe_manifest.create_asset()) item.add_asset(*product_metadata.create_asset()) item.add_asset(*granule_metadata.create_asset()) item.add_asset( INSPIRE_METADATA_ASSET_KEY, pystac.Asset(href=safe_manifest.inspire_metadata_href, media_type=pystac.MediaType.XML, roles=['metadata'])) item.add_asset( DATASTRIP_METADATA_ASSET_KEY, pystac.Asset(href=safe_manifest.datastrip_metadata_href, media_type=pystac.MediaType.XML, roles=['metadata'])) # Image assets proj_bbox = granule_metadata.proj_bbox image_assets = dict([ image_asset_from_href(os.path.join(granule_href, image_path), item, granule_metadata.resolution_to_shape, proj_bbox, product_metadata.image_media_type) for image_path in product_metadata.image_paths ]) for key, asset in image_assets.items(): assert key not in item.assets item.add_asset(key, asset) # Thumbnail if safe_manifest.thumbnail_href is not None: item.add_asset( "preview", pystac.Asset(href=safe_manifest.thumbnail_href, media_type=pystac.MediaType.COG, roles=['thumbnail'])) # --Links-- item.links.append(SENTINEL_LICENSE) return item
def test_asset_bands(self) -> None: item = pystac.Item.from_file(self.PLANET_EXAMPLE_URI) item2 = pystac.Item.from_file(self.SENTINEL2_EXAMPLE_URI) # Get data_asset = item.assets["data"] asset_bands = RasterExtension.ext(data_asset).bands assert asset_bands is not None self.assertEqual(len(asset_bands), 4) self.assertEqual(asset_bands[0].nodata, 0) self.assertEqual(asset_bands[0].sampling, Sampling.AREA) self.assertEqual(asset_bands[0].unit, "W⋅sr−1⋅m−2⋅nm−1") self.assertEqual(asset_bands[0].data_type, DataType.UINT16) self.assertEqual(asset_bands[0].scale, 0.01) self.assertEqual(asset_bands[0].offset, 0) self.assertEqual(asset_bands[0].spatial_resolution, 3) band0_stats = asset_bands[0].statistics assert band0_stats is not None self.assertEqual(band0_stats.minimum, 1962) self.assertEqual(band0_stats.maximum, 32925) self.assertEqual(band0_stats.mean, 8498.9400644319) self.assertEqual(band0_stats.stddev, 5056.1292002722) self.assertEqual(band0_stats.valid_percent, 61.09) band0_hist = asset_bands[0].histogram assert band0_hist is not None self.assertEqual(band0_hist.count, 256) self.assertEqual(band0_hist.min, 1901.288235294118) self.assertEqual(band0_hist.max, 32985.71176470588) self.assertEqual(len(band0_hist.buckets), band0_hist.count) index_asset = item.assets["metadata"] asset_bands = RasterExtension.ext(index_asset).bands self.assertIs(None, asset_bands) b09_asset = item2.assets["B09"] b09_bands = RasterExtension.ext(b09_asset).bands assert b09_bands is not None self.assertEqual(b09_bands[0].nodata, "nan") # Set b2_asset = item2.assets["B02"] self.assertEqual( get_opt( get_opt(RasterExtension.ext(b2_asset).bands) [0].statistics).maximum, 19264, ) b1_asset = item2.assets["B01"] RasterExtension.ext(b2_asset).bands = RasterExtension.ext( b1_asset).bands new_b2_asset_bands = RasterExtension.ext(item2.assets["B02"]).bands self.assertEqual( get_opt(get_opt(new_b2_asset_bands)[0].statistics).maximum, 20567) new_b2_asset_band0 = get_opt(new_b2_asset_bands)[0] new_b2_asset_band0.nodata = NoDataStrings.INF item2.validate() # Check adding a new asset new_stats = [ Statistics.create(minimum=0, maximum=10000, mean=5000, stddev=10, valid_percent=88), Statistics.create(minimum=-1, maximum=1, mean=0, stddev=1, valid_percent=100), Statistics.create(minimum=1, maximum=255, mean=200, stddev=3, valid_percent=100), ] # new_histograms = [] with open(self.GDALINFO_EXAMPLE_URI) as gdaljson_file: gdaljson_data = json.load(gdaljson_file) new_histograms = list( map( lambda band: Histogram.from_dict(band["histogram"]), gdaljson_data["bands"], )) new_bands = [ RasterBand.create( nodata=1, unit="test1", statistics=new_stats[0], histogram=new_histograms[0], ), RasterBand.create( nodata=2, unit="test2", statistics=new_stats[1], histogram=new_histograms[1], ), RasterBand.create( nodata=NoDataStrings.NINF, unit="test3", statistics=new_stats[2], histogram=new_histograms[2], ), ] asset = pystac.Asset(href="some/path.tif", media_type=pystac.MediaType.GEOTIFF) RasterExtension.ext(asset).bands = new_bands item.add_asset("test", asset) self.assertEqual(len(item.assets["test"].extra_fields["raster:bands"]), 3) self.assertEqual( item.assets["test"].extra_fields["raster:bands"][1]["statistics"] ["minimum"], -1, ) self.assertEqual( item.assets["test"].extra_fields["raster:bands"][1]["histogram"] ["min"], 3848.354901960784, ) self.assertEqual( item.assets["test"].extra_fields["raster:bands"][2]["nodata"], "-inf") for s in new_stats: s.minimum = None s.maximum = None s.mean = None s.stddev = None s.valid_percent = None self.assertEqual(len(s.properties), 0) for b in new_bands: b.bits_per_sample = None b.data_type = None b.histogram = None b.nodata = None b.sampling = None b.scale = None b.spatial_resolution = None b.statistics = None b.unit = None b.offset = None self.assertEqual(len(b.properties), 0) new_stats[2].apply(minimum=0, maximum=10000, mean=5000, stddev=10, valid_percent=88) new_stats[1].apply(minimum=-1, maximum=1, mean=0, stddev=1, valid_percent=100) new_stats[0].apply(minimum=1, maximum=255, mean=200, stddev=3, valid_percent=100) new_bands[2].apply( nodata=1, unit="test1", statistics=new_stats[2], histogram=new_histograms[0], ) new_bands[1].apply( nodata=2, unit="test2", statistics=new_stats[1], histogram=new_histograms[1], ) new_bands[0].apply( nodata=NoDataStrings.NAN, unit="test3", statistics=new_stats[0], histogram=new_histograms[2], ) RasterExtension.ext(item.assets["test"]).apply(new_bands) self.assertEqual( item.assets["test"].extra_fields["raster:bands"][0]["statistics"] ["minimum"], 1, ) self.assertEqual( item.assets["test"].extra_fields["raster:bands"][0]["nodata"], "nan")
def download_gls(year: str, s3_dst: str, workdir: Path, overwrite: bool = False): log = setup_logging() assets = {} out_stac = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return # Download the files for name, file in FILES.items(): # Create a temporary directory to work with with TemporaryDirectory(prefix=workdir) as tmpdir: log.info(f"Working on {file}") url = URL( BASE_URL.format( record_id=YEARS[year][1], year_key=YEARS[year][0], file=file ) ) dest_url = URL(s3_dst) / year / f"{PRODUCT_NAME}_{year}_{name}.tif" if s3_head_object(str(dest_url)) is None or overwrite: log.info(f"Downloading {url}") try: local_file = Path(tmpdir) / str(url.name) # Download the file download_file(url, local_file) log.info(f"Downloaded file to {local_file}") local_file_small = translate_file_deafrica_extent(local_file) log.info(f"Clipped Africa out and saved to {local_file_small}") resampling = "nearest" if name in DO_NEAREST else "bilinear" # Create a COG in memory and upload to S3 with MemoryFile() as mem_dst: # Creating the COG, with a memory cache and no download. Shiny. cog_translate( local_file_small, mem_dst.name, cog_profiles.get("deflate"), in_memory=True, nodata=255, overview_resampling=resampling, ) mem_dst.seek(0) s3_dump(mem_dst, str(dest_url), ACL="bucket-owner-full-control") log.info(f"File written to {dest_url}") except Exception: log.exception(f"Failed to process {url}") exit(1) else: log.info(f"{dest_url} exists, skipping") assets[name] = pystac.Asset( href=str(dest_url), roles=["data"], media_type=pystac.MediaType.COG ) # Write STAC document from the last-written file source_doc = f"https://zenodo.org/record/{YEARS[year][1]}" item = create_stac_item( str(dest_url), id=str(odc_uuid("Copernicus Global Land Cover", "3.0.1", [source_doc])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links( [ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ] ) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")
def create_asset(self): asset = pystac.Asset(href=self.href, media_type=pystac.MediaType.XML, roles=['metadata']) return (GRANULE_METADATA_ASSET_KEY, asset)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--pipeline-uri", type=str, help="A URI to JSON with instructions") parser.add_argument("--pipeline", type=str, help="JSON with instructions") parser.add_argument( "--aviris-stac-id", type=str, help="STAC Item ID to process from the STAC collection") parser.add_argument( "--aviris-collection-id", type=str, default=AVIRIS_ARCHIVE_COLLECTION_ID, ) parser.add_argument( "--stac-api-uri", type=str, default=os.environ.get("STAC_API_URI", "http://franklin:9090"), ) parser.add_argument("--s3-bucket", type=str, default=os.environ.get("S3_BUCKET", "aviris-data")) parser.add_argument( "--s3-prefix", type=str, default=os.environ.get("S3_PREFIX"), ) parser.add_argument("--temp-dir", type=str, default=os.environ.get("TEMP_DIR", None)) parser.add_argument("--output-format", type=str, default=os.environ.get("GDAL_OUTPUT_FORMAT", "COG")) parser.add_argument( "--keep-temp-dir", action="store_true", help= "If provided, script does not delete temporary directory before script exits. Useful for debugging.", ) parser.add_argument( "--skip-large", action="store_true", help= "If provided, script will not process any COG > 200 MB to keep processing times reasonable. Useful for debugging.", ) parser.add_argument( "--force", action="store_true", help= "If provided, force reingest StacItem even though this it is already present in the catalog.", ) parser.add_argument( "--l2", action="store_true", help="If provided, use L2 imagery instead of L1.", ) try: warpMemoryLimit = int(os.environ.get("GDAL_WARP_MEMORY_LIMIT", None)) except TypeError: warpMemoryLimit = None # TODO: replace it with parser.parse_args() later cli_args, cli_unknown = parser.parse_known_args() # parse all cli arguments args = CliConfig(cli_args, cli_unknown) s3 = boto3.client("s3") stac_client = STACClient(args.stac_api_uri) cog_collection = get_aviris_cog_collection(args.level) # GET STAC Item from AVIRIS Catalog item = stac_client.get_collection_item(args.aviris_collection_id, args.aviris_stac_id) asset_key = 'https_refl' if args.l2 else 'https' asset = item.assets.get(asset_key, None) if asset is None: raise ValueError( f'STAC Item {args.aviris_stac_id} from {args.stac_api_uri} has no asset "{asset_key}"!' ) scene_name = item.properties.get("Name") # Create new COG STAC Item cog_item_id = "{}_{}_{}".format( cog_collection.id, item.properties.get("Name"), item.properties.get("Scene"), ) item.properties['eo:bands'] = cog_collection.properties['eo:bands'] item.properties['hsi:wavelength_min'] = cog_collection.properties[ 'hsi:wavelength_min'] item.properties['hsi:wavelength_max'] = cog_collection.properties[ 'hsi:wavelength_max'] item.properties.pop('layer:ids', None) cog_item = pystac.Item( cog_item_id, item.geometry, item.bbox, item.datetime, item.properties, stac_extensions=COG_ITEM_EXTENSIONS, collection=cog_collection.id, ) # Create COG Collection if it doesn't exist if not stac_client.has_collection(cog_collection.id): stac_client.post_collection(cog_collection) if not args.force: # Exit early if COG STAC Item already exists try: stac_client.get_collection_item(cog_collection.id, cog_item_id) print(cog_collection.id) print(cog_item_id) logger.info(f'STAC Item {cog_item_id} already exists. Exiting.') activation_output(cog_item_id, cog_collection.id) return except requests.exceptions.HTTPError: pass # Create tmpdir temp_dir = Path(args.temp_dir if args.temp_dir is not None else mkdtemp()) temp_dir.mkdir(parents=True, exist_ok=True) try: # Retrieve AVIRIS GZIP for matching scene name local_archive = Path(temp_dir, Path(asset.href).name) if local_archive.exists(): logger.info(f'Using existing archive: {local_archive}') else: logger.info(f'Downloading {asset.href} archive {local_archive}...') gzip_https_url = asset.href with DownloadProgressBar(unit='B', unit_scale=True, miniters=1, desc=gzip_https_url.split('/')[-1]) as t: urllib.request.urlretrieve(gzip_https_url, filename=local_archive, reporthook=t.update_to) # Retrieve file names from archive and extract if not already extracted to temp_dir extract_path = Path(temp_dir, f'{scene_name}_{args.level}') with tarfile.open(local_archive, mode="r") as tar_gz_fp: logger.info(f'Retrieving filenames from {local_archive}') with timing("Query archive"): tar_files = tar_gz_fp.getnames() logger.info(f"Files: {tar_files}") if extract_path.exists(): logger.info(f'Skipping extract, exists at {extract_path}') else: logger.info(f"Extracting {local_archive} to {extract_path}") with timing("Extract"): tar_gz_fp.extractall(extract_path) # Find HDR data files in unzipped package hdr_ext = '.hdr' if args.l2 else 'ort_img.hdr' hdr_files = [tf for tf in tar_files if tf.endswith(hdr_ext)] logger.info("HDR Files: {}".format(hdr_files)) for idx, hdr_file_w_ext in enumerate(hdr_files): hdr_file_w_ext_path = Path(hdr_file_w_ext) hdr_path = Path(extract_path, hdr_file_w_ext_path.with_suffix("")) cog_path = Path( f'{hdr_path.with_suffix("")}_{args.output_asset_name}.tiff') if args.skip_large and os.path.getsize(hdr_path) > 0.2 * GB: file_mb = floor(os.path.getsize(hdr_path) / 1024 / 1024) logger.info( "--skip-large provided. Skipping {} with size {}mb".format( hdr_path, file_mb)) continue # Convert HDR data to pixel interleaved COG with GDAL # NUM_THREADS only speeds up compression and overview generation # gdal.Warp is used to fix rasters rotation # NOTE: # We can't directly write TIFFs on S3 as the result of the gdal.Warp operation # see: https://github.com/OSGeo/gdal/issues/1189 warp_opts = gdal.WarpOptions(callback=warp_callback, warpOptions=["NUM_THREADS=ALL_CPUS"], creationOptions=[ "NUM_THREADS=ALL_CPUS", "COMPRESS=DEFLATE", "BIGTIFF=YES", "TILED=YES" ], multithread=True, warpMemoryLimit=warpMemoryLimit, format=args.output_format) logger.info(f"Converting {hdr_path} to {cog_path}...") with timing("GDAL Warp"): gdal.Warp(str(cog_path), str(hdr_path), options=warp_opts) # read metadata from the transformed TIFF cog_ds = gdal.Open(str(cog_path)) cog_proj = osr.SpatialReference(wkt=cog_ds.GetProjection()) cog_proj.AutoIdentifyEPSG() # set projection cog_item.properties['proj:epsg'] = int( cog_proj.GetAttrValue('AUTHORITY', 1)) # Upload COG and metadata, if written, to S3 bucket + key key = Path( args.s3_prefix, str(item.properties.get("Year")), str(item.properties.get("Name")), cog_path.name, ) s3_uri = f's3://{args.s3_bucket}/{key}' logger.info(f"Uploading {cog_path} to {s3_uri}") s3.upload_file( str(cog_path), args.s3_bucket, str(key), Callback=ProgressPercentage(str(cog_path)), Config=TransferConfig(multipart_threshold=1 * GB), ) cog_metadata_path = cog_path.with_suffix(".tiff.aux.xml") if cog_metadata_path.exists(): metadata_key = Path(args.s3_prefix, cog_metadata_path.name) metadata_s3_uri = f's3://{args.s3_bucket}/{metadata_key}' logger.info( f'Uploading {cog_metadata_path} to {metadata_s3_uri}') s3.upload_file(str(cog_metadata_path), args.s3_bucket, str(metadata_key)) # Add assets to COG STAC Item cog_item.add_asset( f'{args.output_asset_name}_{idx}', pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=["data"]), ) if cog_metadata_path.exists(): cog_item.add_asset( f'metadata_{idx}', pystac.Asset( metadata_s3_uri, media_type=pystac.MediaType.XML, roles=["metadata"], ), ) finally: if not args.keep_temp_dir: logger.info(f"Removing temp dir: {temp_dir}") shutil.rmtree(temp_dir, ignore_errors=True) # Add COG Item to AVIRIS L2 STAC Collection logger.info(f"POST Item {cog_item.id} to {args.stac_api_uri}") item_data = stac_client.post_collection_item(cog_collection.id, cog_item) if item_data.get('id', None): logger.info(f"Success: {item_data['id']}") activation_output(item_data['id'], cog_collection.id) else: logger.error(f"Failure: {item_data}") return -1
def to_stac(self): props = deepcopy(self.item_metadata['properties']) # Core Item properties item_id = self.item_metadata['id'] geom = self.item_metadata['geometry'] bbox = list(shape(geom).bounds) datetime = str_to_datetime(props.pop('acquired')) item = pystac.Item(id=item_id, geometry=geom, bbox=bbox, datetime=datetime, properties={}) # Common metadata item.common_metadata.providers = [PLANET_PROVIDER] item.common_metadata.gsd = props.pop('gsd') item.common_metadata.created = str_to_datetime(props.pop('published')) item.common_metadata.updated = str_to_datetime(props.pop('updated')) item.common_metadata.constellation = props.pop('provider') item.common_metadata.platform = props.pop('satellite_id') # Some do not have instrument (e.g. REOrthoTile) instrument = props.pop('instrument', None) if instrument is not None: item.common_metadata.instruments = [instrument] # eo item.ext.enable('eo') # STAC uses 0-100, planet 0-1 item.ext.eo.cloud_cover = props.pop('cloud_cover') * 100 # view item.ext.enable('view') item.ext.view.off_nadir = props.pop('view_angle') if 'satellite_azimuth' in props: item.ext.view.azimuth = props.pop('satellite_azimuth') item.ext.view.sun_azimuth = props.pop('sun_azimuth') item.ext.view.sun_elevation = props.pop('sun_elevation') # Add all additional properties with Planet extension designation. whitelisted_props = [ 'anomalous_pixels', 'ground_control', 'item_type', 'pixel_resolution', 'quality_category', 'strip_id', 'publishing_stage', 'clear_percent' ] for name in whitelisted_props: if name in props: item.properties['{}:{}'.format(PLANET_EXTENSION_PREFIX, name)] = props[name] item_type = props.pop('item_type') planet_url = f'https://api.planet.com/data/v1/item-types/{item_type}/items/{item_id}' via_link = Link('via', planet_url) item.add_link(via_link) geotransform = None for planet_asset in self.item_assets: href = make_absolute_href(planet_asset['path'], start_href=self.base_dir, start_is_dir=True) media_type = planet_asset['media_type'] asset_type = planet_asset['annotations']['planet/asset_type'] bundle_type = planet_asset['annotations']['planet/bundle_type'] # Planet data is delivered as COGs if media_type == 'image/tiff' and asset_type not in [ "udm", "udm2" ]: media_type = pystac.MediaType.COG roles = ['visual'] thumbnail_path = f"{os.path.splitext(href)[0]}.thumbnail.png" with rasterio.open(href) as dataset: height, width = dataset.shape geotransform = dataset.transform if width > height: width, height = 256, int(height / width * 256) else: width, height = int(width / height * 256), 256 profile = dataset.profile profile.update(driver='PNG') profile.update(width=width) profile.update(height=height) if "analytic" in asset_type: data = dataset.read(indexes=[3, 2, 1], out_shape=(3, height, width), resampling=Resampling.cubic) profile.update(count=3) else: data = dataset.read(out_shape=(int(dataset.count), height, width), resampling=Resampling.cubic) with rasterio.open(thumbnail_path, 'w', **profile) as dst: dst.write(data) item.add_asset( 'thumbnail', pystac.Asset(href=thumbnail_path, media_type=pystac.MediaType.PNG, roles=['thumbnail'])) else: roles = ['metadata'] # Use the asset type as the key if it's the same as the bundle # type, as this appears to be the 'main' asset of the bundle type. # If not, use a key that combines the bundle type and asset type. key = asset_type if asset_type != bundle_type: key = '{}:{}'.format(bundle_type, asset_type) item.add_asset( key, pystac.Asset(href=href, media_type=media_type, roles=roles)) asset = pystac.Asset(href=href, media_type=media_type) if media_type == pystac.MediaType.COG: # add bands to asset if item_type.startswith('SkySat'): if "panchro" in asset_type: bands = [SKYSAT_BANDS['PAN']] elif "analytic" in asset_type: bands = [ SKYSAT_BANDS['BLUE'], SKYSAT_BANDS['GREEN'], SKYSAT_BANDS['RED'], SKYSAT_BANDS['NIR'] ] else: bands = [ SKYSAT_BANDS['RED'], SKYSAT_BANDS['GREEN'], SKYSAT_BANDS['BLUE'] ] item.ext.eo.set_bands(bands, asset) item.add_asset(key, asset) # proj if 'epsg_code' in props: item.ext.enable('projection') item.ext.projection.epsg = props.pop('epsg_code') if geotransform is not None: item.ext.projection.transform = geotransform item.ext.projection.shape = [height, width] if self.metadata_href: item.add_asset( 'metadata', pystac.Asset(href=self.metadata_href, media_type=pystac.MediaType.JSON, roles=['metadata'])) return item
def download_and_cog_chirps( year: str, month: str, s3_dst: str, day: str = None, overwrite: bool = False, slack_url: str = None, ): # Cleaning and sanity checks s3_dst = s3_dst.rstrip("/") # Set up file strings if day is not None: # Set up a daily process in_file = f"chirps-v2.0.{year}.{month}.{day}.tif.gz" in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file) in_data = f"/vsigzip//vsicurl/{in_href}" if not check_for_url_existence(in_href): log.warning("Couldn't find the gzipped file, trying the .tif") in_file = f"chirps-v2.0.{year}.{month}.{day}.tif" in_href = DAILY_URL_TEMPLATE.format(year=year, in_file=in_file) in_data = f"/vsicurl/{in_href}" if not check_for_url_existence(in_href): log.error("Couldn't find the .tif file either, aborting") sys.exit(1) file_base = f"{s3_dst}/{year}/{month}/chirps-v2.0_{year}.{month}.{day}" out_data = f"{file_base}.tif" out_stac = f"{file_base}.stac-item.json" start_datetime = f"{year}-{month}-{day}T00:00:00Z" end_datetime = f"{year}-{month}-{day}T23:59:59Z" product_name = "rainfall_chirps_daily" else: # Set up a monthly process in_file = f"chirps-v2.0.{year}.{month}.tif.gz" in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file) in_data = f"/vsigzip//vsicurl/{in_href}" if not check_for_url_existence(in_href): log.warning("Couldn't find the gzipped file, trying the .tif") in_file = f"chirps-v2.0.{year}.{month}.tif" in_href = MONTHLY_URL_TEMPLATE.format(in_file=in_file) in_data = f"/vsicurl/{in_href}" if not check_for_url_existence(in_href): log.error("Couldn't find the .tif file either, aborting") sys.exit(1) file_base = f"{s3_dst}/chirps-v2.0_{year}.{month}" out_data = f"{file_base}.tif" out_stac = f"{file_base}.stac-item.json" _, end = calendar.monthrange(int(year), int(month)) start_datetime = f"{year}-{month}-01T00:00:00Z" end_datetime = f"{year}-{month}-{end}T23:59:59Z" product_name = "rainfall_chirps_monthly" # Set to 15 for the STAC metadata day = 15 try: # Check if file already exists log.info(f"Working on {in_file}") if not overwrite and s3_head_object(out_stac) is not None: log.warning(f"File {out_stac} already exists. Skipping.") return # COG and STAC with MemoryFile() as mem_dst: # Creating the COG, with a memory cache and no download. Shiny. cog_translate( in_data, mem_dst.name, cog_profiles.get("deflate"), in_memory=True, nodata=-9999, ) # Creating the STAC document with appropriate date range _, end = calendar.monthrange(int(year), int(month)) item = create_stac_item( mem_dst, id=str(odc_uuid("chirps", "2.0", [in_file])), with_proj=True, input_datetime=datetime(int(year), int(month), int(day)), properties={ "odc:processing_datetime": datetime_to_str(datetime.now()), "odc:product": product_name, "start_datetime": start_datetime, "end_datetime": end_datetime, }, ) item.set_self_href(out_stac) # Manually redo the asset del item.assets["asset"] item.assets["rainfall"] = pystac.Asset( href=out_data, title="CHIRPS-v2.0", media_type=pystac.MediaType.COG, roles=["data"], ) # Let's add a link to the source item.add_links([ pystac.Link( target=in_href, title="Source file", rel=pystac.RelType.DERIVED_FROM, media_type="application/gzip", ) ]) # Dump the data to S3 mem_dst.seek(0) log.info(f"Writing DATA to: {out_data}") s3_dump(mem_dst, out_data, ACL="bucket-owner-full-control") # Write STAC to S3 log.info(f"Writing STAC to: {out_stac}") s3_dump( json.dumps(item.to_dict(), indent=2), out_stac, ContentType="application/json", ACL="bucket-owner-full-control", ) # All done! log.info(f"Completed work on {in_file}") except Exception as e: message = f"Failed to handle {in_file} with error {e}" if slack_url is not None: send_slack_notification(slack_url, "Chirps Rainfall Monthly", message) log.exception(message) exit(1)
def create_stac_item( source: Union[str, DatasetReader, DatasetWriter, WarpedVRT, MemoryFile], input_datetime: Optional[datetime.datetime] = None, extensions: Optional[List[str]] = None, collection: Optional[str] = None, properties: Optional[Dict] = None, id: Optional[str] = None, assets: Optional[Dict[str, pystac.Asset]] = None, asset_name: str = "asset", asset_roles: Optional[List[str]] = None, asset_media_type: Optional[Union[str, pystac.MediaType]] = None, asset_href: Optional[str] = None, ) -> pystac.Item: """Create a Stac Item. Args: source (str or rasterio openned dataset): input path or rasterio dataset. input_datetime (datetime.datetime, optional): datetime associated with the item. extensions (list of str): input list of extensions to use in the item. collection (str, optional): collection's name the item belong to. properties (dict, optional): additional properties to add in the item. id (str, optional): id to assign to the item (default to the source basename). assets (dict, optional): Assets to set in the item. If set we won't create one from the source. asset_name (str, optional): asset name in the Assets object. asset_roles (list of str, optional): list of asset's role. asset_media_type (str or pystac.MediaType, optional): asset's media type. asset_href (str, optional): asset's URI (default to input path). Returns: pystac.Item: valid STAC Item. """ with ExitStack() as ctx: if isinstance(source, (DatasetReader, DatasetWriter, WarpedVRT)): src_dst = source else: src_dst = ctx.enter_context(rasterio.open(source)) meta = get_metadata(src_dst) media_type = ( get_media_type(src_dst) if asset_media_type == "auto" else asset_media_type ) properties = properties or {} extensions = extensions or [] if "proj" in extensions: properties.update( { f"proj:{name}": value for name, value in meta["proj"].items() if value is not None } ) # item item = pystac.Item( id=id or os.path.basename(meta["name"]), geometry=meta["footprint"], bbox=meta["bbox"], collection=collection, stac_extensions=extensions, datetime=input_datetime, properties=properties, ) # item.assets if assets: for key, asset in assets.items(): item.add_asset( key=key, asset=asset, ) else: item.add_asset( key=asset_name, asset=pystac.Asset(href=asset_href or meta["name"], media_type=media_type), ) return item
def create_item(state, year, cog_href, fgdc_metadata_href: Optional[str], thumbnail_href=None, additional_providers=None): """Creates a STAC Item from NAIP data. Args: state (str): The 2-letter state code for the state this item belongs to. year (str): The NAIP year. fgdc_metadata_href (str): The href to the FGDC metadata for this NAIP scene. Optional, a some NAIP scenes to not have this (e.g. 2010) cog_href (str): The href to the image as a COG. This needs to be an HREF that rasterio is able to open. thumbnail_href (str): Optional href for a thumbnail for this scene. additional_providers(List[pystac.Provider]): Optional list of additional providers to the USDA that will be included on this Item. This function will read the metadata file for information to place in the STAC item. Returns: pystac.Item: A STAC Item representing this NAIP scene. """ with rio.open(cog_href) as ds: gsd = ds.res[0] epsg = int(ds.crs.to_authority()[1]) image_shape = list(ds.shape) original_bbox = list(ds.bounds) transform = list(ds.transform) geom = reproject_geom(ds.crs, 'epsg:4326', mapping(box(*ds.bounds)), precision=6) if fgdc_metadata_href is not None: fgdc_metadata_text = pystac.STAC_IO.read_text(fgdc_metadata_href) fgdc = parse_fgdc_metadata(fgdc_metadata_text) else: fgdc = {} if 'Distribution_Information' in fgdc: resource_desc = fgdc['Distribution_Information'][ 'Resource_Description'] else: resource_desc = os.path.basename(cog_href) item_id = naip_item_id(state, resource_desc) bounds = list(shape(geom).bounds) if any(fgdc): dt = str_to_datetime( fgdc['Identification_Information']['Time_Period_of_Content'] ['Time_Period_Information']['Single_Date/Time']['Calendar_Date']) else: fname = os.path.splitext(os.path.basename(cog_href))[0] fname_date = fname.split('_')[5] dt = dateutil.parser.isoparse(fname_date) properties = {'naip:state': state, 'naip:year': year} item = pystac.Item(id=item_id, geometry=geom, bbox=bounds, datetime=dt, properties=properties) # Common metadata item.common_metadata.providers = [constants.USDA_PROVIDER] if additional_providers is not None: item.common_metadata.providers.extend(additional_providers) item.common_metadata.gsd = gsd # eo, for asset bands item.ext.enable('eo') # proj item.ext.enable('projection') item.ext.projection.epsg = epsg item.ext.projection.shape = image_shape item.ext.projection.bbox = original_bbox item.ext.projection.transform = transform # COG item.add_asset( 'image', pystac.Asset(href=cog_href, media_type=pystac.MediaType.COG, roles=['data'], title="RGBIR COG tile")) # Metadata if any(fgdc): item.add_asset( 'metadata', pystac.Asset(href=fgdc_metadata_href, media_type=pystac.MediaType.TEXT, roles=['metadata'], title='FGDC Metdata')) if thumbnail_href is not None: media_type = pystac.MediaType.JPEG if thumbnail_href.lower().endswith('png'): media_type = pystac.MediaType.PNG item.add_asset( 'thumbnail', pystac.Asset(href=thumbnail_href, media_type=media_type, roles=['thumbnail'], title='Thumbnail')) item.ext.eo.set_bands(constants.NAIP_BANDS, item.assets['image']) return item
def create_item(xml_href: str, vnir_cog_href: Optional[str], swir_cog_href: Optional[str], tir_cog_href: Optional[str], hdf_href: Optional[str] = None, vnir_browse_href: Optional[str] = None, tir_browse_href: Optional[str] = None, qa_browse_href: Optional[str] = None, qa_txt_href: Optional[str] = None, additional_providers=None, read_href_modifier: Optional[ReadHrefModifier] = None): """Creates and item from ASTER Assets.""" if vnir_cog_href is None and \ swir_cog_href is None and \ tir_cog_href is None and \ hdf_href is None: raise ValueError('Need to supply at least one data asset.') file_name = os.path.basename(xml_href) scene_id = AsterSceneId.from_path(file_name) xml_metadata = XmlMetadata.from_file(xml_href, read_href_modifier) geom, bounds = xml_metadata.geometries datetime = xml_metadata.item_datetime item = pystac.Item( id=scene_id.item_id, geometry=geom, bbox=bounds, datetime=datetime, properties={'aster:processing_number': scene_id.processing_number}) # Common metadata item.common_metadata.providers = [ASTER_PROVIDER] if additional_providers is not None: item.common_metadata.providers.extend(additional_providers) item.common_metadata.created = xml_metadata.created item.common_metadata.platform = ASTER_PLATFORM item.common_metadata.instruments = [ASTER_INSTRUMENT] # eo item.ext.enable('eo') item.ext.eo.cloud_cover = xml_metadata.cloud_cover # sat item.ext.enable('sat') item.ext.sat.orbit_state = xml_metadata.orbit_state # view item.ext.enable('view') item.ext.view.sun_azimuth = xml_metadata.sun_azimuth sun_elevation = xml_metadata.sun_elevation # Sun elevation can be negative; if so, will break validation; leave out. # See https://github.com/radiantearth/stac-spec/issues/853 # This is fixed in 1.0.0-RC1; store as an aster property # to be updated once upgrade to 1.0.0-RC1 happens. if sun_elevation >= 0.0: item.ext.view.sun_elevation = sun_elevation else: item.ext.view.sun_elevation = 0.0 item.properties['aster:sun_elevation'] = str(sun_elevation) # proj item.ext.enable('projection') item.ext.projection.epsg = xml_metadata.epsg # ASTER-specific properties item.properties.update(xml_metadata.aster_properties) # -- ASSETS # Create XML asset item.add_asset( XML_ASSET_KEY, pystac.Asset(href=xml_href, media_type=pystac.MediaType.XML, roles=['metadata'], title='XML metadata')) # Create Assets for each of VIR, SWIR, and TIR _add_cog_assets(item=item, xml_metadata=xml_metadata, vnir_cog_href=vnir_cog_href, swir_cog_href=swir_cog_href, tir_cog_href=tir_cog_href, read_href_modifier=read_href_modifier) # Create HDF EOS asset, if available if hdf_href is not None: hdf_asset = pystac.Asset(href=hdf_href, media_type=pystac.MediaType.HDF, roles=['data'], title="ASTER L1T 003 HDF-EOS") item.ext.eo.set_bands(ASTER_BANDS, hdf_asset) item.add_asset(HDF_ASSET_KEY, hdf_asset) # Create assets for browse files, if available if vnir_browse_href is not None: item.add_asset( VNIR_BROWSE_ASSET_KEY, pystac.Asset(href=vnir_browse_href, media_type=pystac.MediaType.JPEG, roles=['thumbnail'], title="VNIR browse file", description='Standalone reduced resolution VNIR')) if tir_browse_href is not None: item.add_asset( TIR_BROWSE_ASSET_KEY, pystac.Asset(href=tir_browse_href, media_type=pystac.MediaType.JPEG, roles=['thumbnail'], title='Standalone reduced resolution TIR')) if qa_browse_href is not None: item.add_asset( QA_BROWSE_ASSET_KEY, pystac.Asset( href=qa_browse_href, media_type=pystac.MediaType.JPEG, roles=['thumbnail'], title='QA browse file', description=( "Single-band black and white reduced resolution browse " "overlaid with red, green, and blue (RGB) markers for GCPs " "used during the geometric verification quality check."))) # Create an asset for the QA text report, if available if qa_txt_href: item.add_asset( QA_TXT_ASSET_KEY, pystac.Asset(href=qa_txt_href, media_type=pystac.MediaType.TEXT, roles=['metadata'], title='QA browse file', description="Geometric quality assessment report.")) return item
def create_asset(self): asset = pystac.Asset(href=self.href, media_type=pystac.MediaType.XML, roles=['metadata']) return (SAFE_MANIFEST_ASSET_KEY, asset)
def download_cci_lc(year: str, s3_dst: str, workdir: str, overwrite: bool = False): log = setup_logging() assets = {} cci_lc_version = get_version_from_year(year) name = f"{PRODUCT_NAME}_{year}_{cci_lc_version}" out_cog = URL(s3_dst) / year / f"{name}.tif" out_stac = URL(s3_dst) / year / f"{name}.stac-item.json" if s3_head_object(str(out_stac)) is not None and not overwrite: log.info(f"{out_stac} exists, skipping") return workdir = Path(workdir) if not workdir.exists(): workdir.mkdir(parents=True, exist_ok=True) # Create a temporary directory to work with tmpdir = mkdtemp(prefix=str(f"{workdir}/")) log.info(f"Working on {year} in the path {tmpdir}") if s3_head_object(str(out_cog)) is None or overwrite: log.info(f"Downloading {year}") try: local_file = Path(tmpdir) / f"{name}.zip" if not local_file.exists(): # Download the file c = cdsapi.Client() # We could also retrieve the object metadata from the CDS. # e.g. f = c.retrieve("series",{params}) | f.location = URL to download c.retrieve( "satellite-land-cover", { "format": "zip", "variable": "all", "version": cci_lc_version, "year": str(year), }, local_file, ) log.info(f"Downloaded file to {local_file}") else: log.info( f"File {local_file} exists, continuing without downloading" ) # Unzip the file log.info(f"Unzipping {local_file}") unzipped = None with zipfile.ZipFile(local_file, "r") as zip_ref: unzipped = local_file.parent / zip_ref.namelist()[0] zip_ref.extractall(tmpdir) # Process data ds = xr.open_dataset(unzipped) # Subset to Africa ulx, uly, lrx, lry = AFRICA_BBOX # Note: lats are upside down! ds_small = ds.sel(lat=slice(uly, lry), lon=slice(ulx, lrx)) ds_small = assign_crs(ds_small, crs="epsg:4326") # Create cog (in memory - :mem: returns bytes object) mem_dst = write_cog( ds_small.lccs_class, ":mem:", nodata=0, overview_resampling="nearest", ) # Write to s3 s3_dump(mem_dst, str(out_cog), ACL="bucket-owner-full-control") log.info(f"File written to {out_cog}") except Exception: log.exception(f"Failed to process {name}") exit(1) else: log.info(f"{out_cog} exists, skipping") assets["classification"] = pystac.Asset(href=str(out_cog), roles=["data"], media_type=pystac.MediaType.COG) # Write STAC document source_doc = ( "https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover" ) item = create_stac_item( str(out_cog), id=str( odc_uuid("Copernicus Land Cover", cci_lc_version, [source_doc, name])), assets=assets, with_proj=True, properties={ "odc:product": PRODUCT_NAME, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", }, ) item.add_links([ pystac.Link( target=source_doc, title="Source", rel=pystac.RelType.DERIVED_FROM, media_type="text/html", ) ]) s3_dump( json.dumps(item.to_dict(), indent=2), str(out_stac), ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {out_stac}")
def to_representation(self, instance: models.RasterMeta) -> dict: item = pystac.Item( id=instance.pk, geometry=json.loads(instance.footprint.json), bbox=instance.extent, datetime=(instance.acquisition_date or instance.modified or instance.created), properties=dict( datetime=str(instance.acquisition_date), platform=instance.instrumentation, ), ) # 'proj' extension item.ext.enable('projection') item.ext.projection.apply( epsg=CRS.from_proj4(instance.crs).to_epsg(), transform=instance.transform, ) # 'eo' extension item.ext.enable('eo') item.ext.eo.apply(cloud_cover=instance.cloud_cover, bands=[]) # Add assets for image in instance.parent_raster.image_set.images.all(): if image.file.type != FileSourceType.URL: # TODO: we need fix this raise ValueError( 'Files must point to valid URL resources, not internal storage.' ) bands = [] for bandmeta in image.bandmeta_set.filter( band_range__contained_by=(None, None)): band = pystac.extensions.eo.Band.create( name=f'B{bandmeta.band_number}', description=bandmeta.description, ) # The wavelength statistics is described by either the # common_name or via center_wavelength and full_width_half_max. # We can derive our bandmeta.band_range.lower, # bandmeta.band_range.upper from the center_wavelength # and full_width_half_max. if ( bandmeta.band_range.lower, bandmeta.band_range.upper, ) in BAND_RANGE_BY_COMMON_NAMES.inverse: band.common_name = BAND_RANGE_BY_COMMON_NAMES.inverse[( bandmeta.band_range.lower, bandmeta.band_range.upper)] else: with decimal.localcontext(decimal.BasicContext): band.center_wavelength = float( (bandmeta.band_range.lower + bandmeta.band_range.upper) / 2) band.full_width_half_max = float( bandmeta.band_range.upper - bandmeta.band_range.lower) bands.append(band) asset = pystac.Asset( href=image.file.get_url(), title=image.file.name, roles=[ 'data', ], ) item.add_asset(f'image-{image.pk}', asset) item.ext.eo.set_bands( bands=bands or [ pystac.extensions.eo.Band.create( name=image.file.name, description=image.bandmeta_set.first().description, ) ], asset=asset, ) for ancillary_file in instance.parent_raster.ancillary_files.all(): asset = pystac.Asset( href=ancillary_file.get_url(), title=ancillary_file.name, roles=[ 'metadata', ], ) item.add_asset(f'ancillary-{ancillary_file.pk}', asset) return item.to_dict()
def create_item(metadata_href): """Creates a STAC Item from CORINE data. Args: metadata_href (str): The href to the metadata for this tif. This function will read the metadata file for information to place in the STAC item. Returns: pystac.Item: A STAC Item representing this CORINE Land Cover. """ metadata_root = ET.parse(metadata_href).getroot() # Item id image_name_node = 'Esri/DataProperties/itemProps/itemName' image_name = metadata_root.find(image_name_node).text item_id = os.path.splitext(image_name)[0] # Bounding box bounding_box_node = 'dataIdInfo/dataExt/geoEle/GeoBndBox/{}' west_long = float( metadata_root.find(bounding_box_node.format('westBL')).text) east_long = float( metadata_root.find(bounding_box_node.format('eastBL')).text) south_lat = float( metadata_root.find(bounding_box_node.format('southBL')).text) north_lat = float( metadata_root.find(bounding_box_node.format('northBL')).text) geom = mapping(box(west_long, south_lat, east_long, north_lat)) bounds = shape(geom).bounds # EPSG epsg_element = 'refSysInfo/RefSystem/refSysID/identCode' epsg = int( metadata_root.find(epsg_element).attrib['code'].replace('EPSG:', '')) # Item date id_dt_node = 'dataIdInfo/idCitation/date/pubDate' id_dt_text = metadata_root.find(id_dt_node).text id_dt = str_to_datetime(id_dt_text) # Title title_node = 'dataIdInfo/idCitation/resTitle' title_text = metadata_root.find(title_node).text item = pystac.Item(id=item_id, geometry=geom, bbox=bounds, datetime=id_dt, properties={'corine:title': title_text}) # Common metadata item.common_metadata.providers = [COPERNICUS_PROVIDER] # proj item.ext.enable('projection') item.ext.projection.epsg = epsg # Tif item.add_asset( ITEM_TIF_IMAGE_NAME, pystac.Asset(href=image_name, media_type=pystac.MediaType.TIFF, roles=['data'], title="tif image")) # Metadata item.add_asset( ITEM_METADATA_NAME, pystac.Asset(href=metadata_href, media_type=pystac.MediaType.TEXT, roles=['metadata'], title='FGDC Metdata')) return item
def render_metadata( product: OutputProduct, geobox: GeoBox, tile_index: TileIdx_xy, time_range: DateTimeRange, uuid: UUID, paths: Dict[str, str], metadata_path: str, processing_dt: Optional[datetime] = None, ) -> Dict[str, Any]: """ Put together STAC metadata document for the output from the task info. """ if processing_dt is None: processing_dt = datetime.utcnow() region_code = product.region_code(tile_index) inputs: List[str] = [] properties: Dict[str, Any] = deepcopy(product.properties) properties["dtr:start_datetime"] = format_datetime(time_range.start) properties["dtr:end_datetime"] = format_datetime(time_range.end) properties["odc:processing_datetime"] = format_datetime( processing_dt, timespec="seconds") properties["odc:region_code"] = region_code properties["odc:lineage"] = dict(inputs=inputs) properties["odc:product"] = product.name geobox_wgs84 = geobox.extent.to_crs("epsg:4326", resolution=math.inf, wrapdateline=True) bbox = geobox_wgs84.boundingbox item = pystac.Item( id=str(uuid), geometry=geobox_wgs84.json, bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top], datetime=time_range.start.replace(tzinfo=timezone.utc), properties=properties, ) # Enable the Projection extension item.ext.enable("projection") item.ext.projection.epsg = geobox.crs.epsg # Add all the assets for band, path in paths.items(): asset = pystac.Asset( href=path, media_type="image/tiff; application=geotiff", roles=["data"], title=band, ) item.add_asset(band, asset) item.ext.projection.set_transform(geobox.transform, asset=asset) item.ext.projection.set_shape(geobox.shape, asset=asset) # Add links item.links.append( pystac.Link( rel="product_overview", media_type="application/json", target=product.href, )) item.links.append( pystac.Link( rel="self", media_type="application/json", target=metadata_path, )) return item.to_dict()
def write_stac(s3_destination: str, file_path: str, file_key: str, year: str, log: Logger) -> str: region_code = file_key.split("_")[0] stac_href = f"s3://{s3_destination}/{file_key}.stac-item.json" log.info(f"Creating STAC file in memory, targeting here: {stac_href}") if int(year) > 2010: hhpath = f"{file_key}_sl_HH_F02DAR.tif" hvpath = f"{file_key}_sl_HV_F02DAR.tif" lincipath = f"{file_key}_sl_linci_F02DAR.tif" maskpath = f"{file_key}_sl_mask_F02DAR.tif" datepath = f"{file_key}_sl_date_F02DAR.tif" launch_date = "2014-05-24" shortname = "alos" else: hhpath = f"{file_key}_sl_HH.tif" hvpath = f"{file_key}_sl_HV.tif" lincipath = f"{file_key}_sl_linci.tif" maskpath = f"{file_key}_sl_mask.tif" datepath = f"{file_key}_sl_date.tif" if int(year) > 2000: launch_date = "2006-01-24" shortname = "alos" else: launch_date = "1992-02-11" shortname = "jers" if shortname == "alos": product_name = "alos_palsar_mosaic" platform = "ALOS/ALOS-2" instrument = "PALSAR/PALSAR-2" cf = "83.0 dB" bandpaths = { "hh": hhpath, "hv": hvpath, "linci": lincipath, "mask": maskpath, "date": datepath, } else: product_name = "jers_sar_mosaic" platform = "JERS-1" instrument = "SAR" cf = "84.66 dB" bandpaths = { "hh": hhpath, "linci": lincipath, "mask": maskpath, "date": datepath, } properties = { "odc:product": product_name, "odc:region_code": region_code, "platform": platform, "instruments": [instrument], "cf": cf, "launchdate": launch_date, "start_datetime": f"{year}-01-01T00:00:00Z", "end_datetime": f"{year}-12-31T23:59:59Z", } assets = {} for name, path in bandpaths.items(): href = f"s3://{s3_destination}/{path}" assets[name] = pystac.Asset(href=href, media_type=pystac.MediaType.COG, roles=["data"]) item = create_stac_item( file_path, id=str( odc_uuid(shortname, "1", [], year=year, tile=file_key.split("_")[0])), properties=properties, assets=assets, with_proj=True, ) item.set_self_href(stac_href) s3_dump( json.dumps(item.to_dict(), indent=2), item.self_href, ContentType="application/json", ACL="bucket-owner-full-control", ) log.info(f"STAC written to {item.self_href}")
def create_cogs(item, cog_directory=None): """Create COGs from the HDF asset contained in the passed in STAC item. Args: item (pystac.Item): ASTER L1T 003 Item that contains an asset with key equal to stactools.aster.constants.HDF_ASSET_KEY, which will be converted to COGs. cog_dir (str): A URI of a directory to store COGs. This will be used in conjunction with the file names based on the COG asset to store the COG data. If not supplied, the directory of the Item's self HREF will be used. Returns: pystac.Item: The same item, mutated to include assets for the new COGs. """ if cog_directory is None: cog_directory = os.path.dirname(item.get_self_href()) hdf_asset = item.assets.get(HDF_ASSET_KEY) if hdf_asset is None: raise ValueError( 'Item does not have a asset with key {}.'.format(HDF_ASSET_KEY)) hdf_href = hdf_asset.href with rio.open(hdf_href) as ds: subdatasets = ds.subdatasets # Gather the subdatasets by sensor, sorted by band number sensor_to_subdatasets = defaultdict(list) for sd in subdatasets: m = re.search(r':?([\w]+)_Swath:ImageData([\d]+)', sd) if m is None: raise ValueError( 'Unexpected subdataset {} - is this a non-standard ASTER L1T 003 HDF-EOS file?' .format(sd)) sensor_to_subdatasets[m.group(1)].append((sd, m.group(2))) for k in sensor_to_subdatasets: sensor_to_subdatasets[k] = [ x[0] for x in sorted(sensor_to_subdatasets[k], key=lambda x: x[1]) ] sensor_to_bands = defaultdict(list) # Gather the bands for each sensor, sorted by band number for band in ASTER_BANDS: sensor_to_bands[band.description.split('_')[0]].append(band) for sensor in sensor_to_bands: sensor_to_bands[sensor] = sorted( sensor_to_bands[sensor], key=lambda b: re.search('([d]+)', b.description).group(1)) # Use subdataset keys, as data might be missing some sensors. for sensor in sensor_to_subdatasets: href = os.path.join(cog_directory, '{}-cog.tif'.format(sensor)) _create_cog(item, href, sensor_to_subdatasets[sensor], sensor_to_bands[sensor]) asset = pystac.Asset(href=href, media_type=pystac.MediaType.COG, roles=['data'], title='{} Swath data'.format(sensor)) item.ext.eo.set_bands(sensor_to_bands[sensor], asset) item.assets[sensor] = asset
def render_metadata( self, ext: str = EXT_TIFF, processing_dt: Optional[datetime] = None) -> Dict[str, Any]: """ Put together STAC metadata document for the output of this task. """ if processing_dt is None: processing_dt = datetime.utcnow() product = self.product geobox = self.geobox region_code = product.region_code(self.tile_index) inputs = list(map(str, self._lineage())) properties: Dict[str, Any] = deepcopy(product.properties) properties["dtr:start_datetime"] = format_datetime( self.time_range.start) properties["dtr:end_datetime"] = format_datetime(self.time_range.end) properties["odc:processing_datetime"] = format_datetime( processing_dt, timespec="seconds") properties["odc:region_code"] = region_code properties["odc:product"] = product.name properties["odc:dataset_version"] = product.version geobox_wgs84 = geobox.extent.to_crs("epsg:4326", resolution=math.inf, wrapdateline=True) bbox = geobox_wgs84.boundingbox item = pystac.Item( id=str(self.uuid), geometry=geobox_wgs84.json, bbox=[bbox.left, bbox.bottom, bbox.right, bbox.top], datetime=self.time_range.start.replace(tzinfo=timezone.utc), properties=properties, stac_extensions=["projection"], ) item.ext.projection.epsg = geobox.crs.epsg # Lineage last item.properties["odc:lineage"] = dict(inputs=inputs) # Add all the assets for band, path in self.paths(ext=ext).items(): asset = pystac.Asset( href=path, media_type="image/tiff; application=geotiff", roles=["data"], title=band, ) item.add_asset(band, asset) item.ext.projection.set_transform(geobox.transform, asset=asset) item.ext.projection.set_shape(geobox.shape, asset=asset) # Add links item.links.append( pystac.Link( rel="product_overview", media_type="application/json", target=product.href, )) item.links.append( pystac.Link( rel="self", media_type="application/json", target=self.metadata_path("absolute", ext="json"), )) return item.to_dict()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--pipeline-uri", type=str, help="A URI to JSON with instructions") parser.add_argument("--pipeline", type=str, help="JSON with instructions") parser.add_argument( "--sentinel-stac-id", type=str, help="STAC Item ID to process from the STAC collection") parser.add_argument( "--sentinel-collection-id", type=str, default=SENTINEL_ARCHIVE_COLLECTION_ID, ) parser.add_argument( "--stac-api-uri", type=str, default=os.environ.get("STAC_API_URI", "http://franklin:9090"), ) parser.add_argument( "--stac-api-uri-sentinel", type=str, default=os.environ.get("STAC_API_URI_SENTINEL", "https://earth-search.aws.element84.com/v0"), ) parser.add_argument("--s3-bucket", type=str, default=os.environ.get("S3_BUCKET", "sentinel-s2-data")) parser.add_argument( "--s3-prefix", type=str, default=os.environ.get("S3_PREFIX", "aviris-scene-cogs-l2"), ) parser.add_argument("--temp-dir", type=str, default=os.environ.get("TEMP_DIR", None)) parser.add_argument("--output-format", type=str, default=os.environ.get("GDAL_OUTPUT_FORMAT", "COG")) parser.add_argument( "--keep-temp-dir", action="store_true", help= "If provided, script does not delete temporary directory before script exits. Useful for debugging.", ) parser.add_argument( "--force", action="store_true", help= "If provided, force reingest StacItem even though this it is already present in the catalog.", ) try: warpMemoryLimit = int(os.environ.get("GDAL_WARP_MEMORY_LIMIT", None)) except TypeError: warpMemoryLimit = None # TODO: replace it with parser.parse_args() later cli_args, cli_unknown = parser.parse_known_args() # parse all cli arguments args = CliConfig(cli_args, cli_unknown) s3 = boto3.client("s3") stac_client_sentinel = STACClient(args.stac_api_uri_sentinel) stac_client = STACClient(args.stac_api_uri) collection = stac_client_sentinel.get_collection( args.sentinel_collection_id) SENTINEL_COG_COLLECTION = pystac.Collection( SENTINEL_COG_COLLECTION_ID, "Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) and converted to Cloud-Optimized GeoTIFFs", collection.extent, stac_extensions=COG_COLLECTION_EXTENSIONS) SENTINEL_COG_COLLECTION.links = [] SENTINEL_COG_COLLECTION.properties = {} SENTINEL_COG_COLLECTION.properties['eo:bands'] = SENTINEL_BANDS SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_min'] = SENTINEL_WAVELENGTH_MIN SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_max'] = SENTINEL_WAVELENGTH_MAX # GET STAC Item from SENTINEL Catalog item = stac_client_sentinel.get_collection_item( args.sentinel_collection_id, args.sentinel_stac_id) assets = item.assets bands_map = { 'B01': vsis3(strip_scheme(assets['B01'].href)), 'B02': vsis3(strip_scheme(assets['B02'].href)), 'B03': vsis3(strip_scheme(assets['B03'].href)), 'B04': vsis3(strip_scheme(assets['B04'].href)), 'B05': vsis3(strip_scheme(assets['B05'].href)), 'B06': vsis3(strip_scheme(assets['B06'].href)), 'B07': vsis3(strip_scheme(assets['B07'].href)), 'B08': vsis3(strip_scheme(assets['B08'].href)), 'B8A': vsis3(strip_scheme(assets['B8A'].href)), 'B09': vsis3(strip_scheme(assets['B09'].href)), 'B11': vsis3(strip_scheme(assets['B11'].href)), 'B12': vsis3(strip_scheme(assets['B12'].href)), 'AOT': vsis3(strip_scheme(assets['AOT'].href)), # 'WVP': vsis3(strip_scheme(assets['WVP'].href)), # 'SCL': vsis3(strip_scheme(assets['SCL'].href)) } # we don't need assets here, since the gather scripts knows what and how to download by the sentinel path properties = item.properties datetime = dateutil.parser.isoparse(properties['datetime']) # here "href": "s3://sentinel-s2-l2a/tiles/31/V/CE/2021/8/19/0/R60m/B01.jp2" # path is tiles/31/V/CE/2021/8/19/0 sentintel_path = 'tiles/{}/{}/{}/{}/{}/{}/{}'.format( properties['sentinel:utm_zone'], properties['sentinel:latitude_band'], properties['sentinel:grid_square'], str(datetime.year), str(datetime.month), str(datetime.day), properties['sentinel:sequence']) # Create new COG STAC Item cog_item_id = "{}_{}".format(SENTINEL_COG_COLLECTION.id, item.id) cog_item = pystac.Item( cog_item_id, item.geometry, item.bbox, item.datetime, item.properties, stac_extensions=COG_ITEM_EXTENSIONS, collection=SENTINEL_COG_COLLECTION.id, ) cog_item.properties['eo:bands'] = SENTINEL_COG_COLLECTION.properties[ 'eo:bands'] cog_item.properties[ 'hsi:wavelength_min'] = SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_min'] cog_item.properties[ 'hsi:wavelength_max'] = SENTINEL_COG_COLLECTION.properties[ 'hsi:wavelength_max'] cog_item.properties['proj:epsg'] = '4326' # Create COG Collection if it doesn't exist if not stac_client.has_collection(SENTINEL_COG_COLLECTION.id): stac_client.post_collection(SENTINEL_COG_COLLECTION) if not args.force: # Exit early if COG STAC Item already exists try: stac_client.get_collection_item(SENTINEL_COG_COLLECTION.id, cog_item_id) logger.info(f'STAC Item {cog_item_id} already exists. Exiting.') activation_output(cog_item_id) return except requests.exceptions.HTTPError: pass _, s3_uri = gather_sentinel( f'{cog_item_id}.tiff', f's3://{args.s3_bucket}/{args.s3_prefix}/{sentintel_path}/', bands_map) # Add assets to COG STAC Item idx = 0 cog_item.add_asset( f'{args.output_asset_name}_{idx}', pystac.Asset(s3_uri, media_type=pystac.MediaType.COG, roles=["data"]), ) # Add COG Item to AVIRIS L2 STAC Collection logger.info(f"POST Item {cog_item.id} to {args.stac_api_uri}") item_data = stac_client.post_collection_item(SENTINEL_COG_COLLECTION.id, cog_item) if item_data.get('id', None): logger.info(f"Success: {item_data['id']}") activation_output(item_data['id']) else: logger.error(f"Failure: {item_data}") return -1
def create_item(tif_href, additional_providers=None): """Creates a STAC Item from Copernicus Global Land Cover Layers data. Args: tif_href (str): The href to the metadata for this tif. This function will read the metadata file for information to place in the STAC item. Returns: pystac.Item: A STAC Item representing this Copernicus Global Land Cover Layers data. """ with rio.open(tif_href) as f: tags = f.tags() band_tags = f.tags(1) bounds = f.bounds # Item id item_id = os.path.basename(tif_href).replace('.tif', '') # Bounds geom = mapping(box(bounds.left, bounds.bottom, bounds.right, bounds.top)) bounds = shape(geom).bounds start_dt = str_to_datetime(tags.pop('time_coverage_start')) end_dt = str_to_datetime(tags.pop('time_coverage_end')) file_creation_dt = str_to_datetime(tags.pop('file_creation')) item = pystac.Item(id=item_id, geometry=geom, bbox=bounds, datetime=None, properties={ 'start_datetime': start_dt, 'end_datetime': end_dt, 'discrete_classification_class_names': DISCRETE_CLASSIFICATION_CLASS_NAMES, 'discrete_classification_class_palette': DISCRETE_CLASSIFICATION_CLASS_PALETTE }) # Common metadata copernicus_provider = pystac.Provider(name=PROVIDER_NAME, url=(tags.pop('doi')), roles=['producer', 'licensor']) item.common_metadata.providers = [copernicus_provider] if additional_providers is not None: item.common_metadata.providers.extend(additional_providers) item.common_metadata.start_datetime = start_dt item.common_metadata.end_datetime = end_dt item.common_metadata.created = file_creation_dt item.common_metadata.description = tags.pop('Info') item.common_metadata.platform = tags.pop('platform') item.common_metadata.title = tags.pop('title') # proj item.ext.enable('projection') item.ext.projection.epsg = int( tags.pop('delivered_product_crs').replace('WGS84 (EPSG:', '').replace(')', '')) # Extra fields for k, v in tags.items(): item.extra_fields[k] = v # Bands long_name = band_tags.pop('long_name') band = pystac.extensions.eo.Band.create( name=long_name, common_name=band_tags.pop('short_name'), description=long_name) item.ext.enable('eo') item.ext.eo.bands = [band] # Tif item.add_asset( ITEM_TIF_IMAGE_NAME, pystac.Asset(href=tif_href, media_type=pystac.MediaType.TIFF, roles=['data'], title="tif image")) return item