def publish_to_s3(self, bucket, public=False) -> List: """Publish all Items to s3 Args: bucket (str): Name of bucket to publish to public (bool, optional): Make published STAC Item public. Defaults to False. Returns: List: List of s3 URLs to published Items """ opts = self['process'].get('output_options', {}) s3urls = [] for item in self['features']: # determine URL of data bucket to publish to- always do this url = os.path.join(get_path(item, opts.get('path_template')), f"{item['id']}.json") if url[0:5] != 's3://': url = f"s3://{bucket}/{url.lstrip('/')}" if public: url = s3.s3_to_https(url) # add canonical and self links (and remove existing self link if present) item['links'] = [ l for l in item['links'] if l['rel'] not in ['self', 'canonical'] ] item['links'].insert(0, { 'rel': 'canonical', 'href': url, 'type': 'application/json' }) item['links'].insert(0, { 'rel': 'self', 'href': url, 'type': 'application/json' }) # get s3 session s3session = get_s3_session(s3url=url) # publish to bucket headers = opts.get('headers', {}) extra = {'ContentType': 'application/json'} extra.update(headers) s3session.upload_json(item, url, public=public, extra=extra) s3urls.append(url) logger.info(f"Uploaded STAC Item {item['id']} as {url}") return s3urls
def lambda_handler(event, context={}): logger.debug('Event: %s' % json.dumps(event)) # parse input s3urls = event['s3urls'] suffix = event.get('suffix', 'json') # process block required process = event['process'] num = 0 for s3url in s3urls: s3session = get_s3_session(s3url=s3url) logger.info(f"Searching {s3url} for STAC Items") # TODO - s3.find() will not work with requester pays needed for listing for filename in s3session.find(s3url, suffix=suffix): item = s3session.read_json(filename) # verify this is a STAC Item before continuing if item.get('type', '') != 'Feature': continue # update relative urls, assuming assets have relative paths for now for a in item['assets']: item['assets'][a]['href'] = os.path.join( os.path.dirname(filename), item['assets'][a]['href']) # create catalog catalog = { 'type': 'FeatureCollection', 'collections': [], 'features': [item], 'process': process } # feed to cirrus through SNS topic client = boto3.client('sns') SNS_TOPIC = os.getenv('CIRRUS_QUEUE_TOPIC_ARN') client.publish(TopicArn=SNS_TOPIC, Message=json.dumps(catalog)) if (num % 500) == 0: logger.debug(f"Added {num+1} items to Cirrus") num += 1 logger.info(f"Published {num} catalogs") return num
def test_get_s3_session(self): session = transfer.get_s3_session(region_name='us-west-2') buckets = session.s3.list_buckets() assert (buckets['Buckets'][0]['Name'] == testbucket)