def lambda_handler(payload, context={}): logger.debug('Payload: %s' % json.dumps(payload)) # if this is batch, output to stdout if not hasattr(context, "invoked_function_arn"): logger.addHandler(logging.StreamHandler()) collections = payload.get('collections') index = payload.get('index', 'input_state') state = payload.get('state', 'FAILED') since = payload.get('since', None) limit = payload.get('limit', None) batch = payload.get('batch', False) catids = payload.get('catids', []) # if this is a lambda and batch is set if batch and hasattr(context, "invoked_function_arn"): submit_batch_job(payload, context.invoked_function_arn, name='rerun') return if len(catids) > 0: catalogs = Catalogs.from_catids(catids) logger.debug(f"Rerunning {len(catalogs)} catalogs") catids = catalogs.process(replace=True) logger.info(f"{len(catids)} catalogs rerun") return catids catalogs = Catalogs.from_statedb(collections, state, since, index, limit=limit) logger.info(f"Fetched {len(catalogs.catalogs)} catalogs") catids = catalogs.process(replace=True) logger.info(f"{len(catids)} catalogs rerun") return catids
def lambda_handler(payload, context): logger.debug('Payload: %s' % json.dumps(payload)) catids = [] for catalog in Catalogs.from_payload(payload): logger.debug(f"Catalog: {json.dumps(catalog)}") try: # get workflow ARN arn = getenv('BASE_WORKFLOW_ARN') + catalog['process']['workflow'] # invoke step function logger.info(f"Running {arn} on {catalog['id']}") exe_response = stepfunctions.start_execution(stateMachineArn=arn, input=json.dumps(catalog.get_payload())) logger.debug(f"Start execution response: {exe_response}") # set state to PROCESSING # TODO - what happens if step function startws but set_processing failed - it will be stuck in queue state resp = statedb.set_processing(catalog['id'], exe_response['executionArn']) logger.debug(f"Set process response: {resp}") catids.append(catalog['id']) except Exception as err: msg = f"start-workflow: failed starting {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) statedb.set_failed(catalog['id'], msg) return catids
def lambda_handler(payload, context): logger.debug('Payload: %s' % json.dumps(payload)) catalogs = Catalogs.from_payload(payload) catids = catalogs.process() return catids
def lambda_handler(payload, context): logger.debug(json.dumps(payload)) # Read SQS payload if 'Records' not in payload: raise ValueError("Input not from SQS") catalogs = [] for record in [json.loads(r['body']) for r in payload['Records']]: cat = json.loads(record['Message']) logger.debug('cat: %s' % json.dumps(cat)) # expand catids to full catalogs if 'catids' in cat: _cats = Catalogs.from_catids(cat['catids']) if 'process_update' in cat: logger.debug( f"Process update: {json.dumps(cat['process_update'])}") for c in _cats: c['process'] = dict_merge(c['process'], cat['process_update']) catalogs += _cats elif cat.get('type', '') == 'Feature': # If Item, create Catalog and use default process for that collection if cat['collection'] not in PROCESSES.keys(): raise ValueError( f"Default process not provided for collection {cat['collection']}" ) cat_json = { 'type': 'FeatureCollection', 'features': [cat], 'process': PROCESSES[cat['collection']] } catalogs.append(Catalog(cat_json, update=True)) else: catalogs.append(Catalog(cat, update=True)) if len(catalogs) > 0: cats = Catalogs(catalogs) cats.process() return len(catalogs)
def lambda_handler(payload, context={}): # if this is batch, output to stdout if not hasattr(context, "invoked_function_arn"): logger.addHandler(logging.StreamHandler()) logger.debug('Payload: %s' % json.dumps(payload)) catalog = Catalogs.from_payload(payload)[0] # assign proper collection names based on ID catalog.assign_collections() # TODO - make this more general for more items/collections item = catalog['features'][0] #, collection=catalog['collections'][0]) # configuration options config = catalog['process']['tasks'].get('copy-assets', {}) outopts = catalog['process'].get('output_options', {}) # asset config assets = config.get('assets', item['assets'].keys()) drop_assets = config.get('drop_assets', []) # drop specified assets for asset in [a for a in drop_assets if a in item['assets'].keys()]: logger.debug(f'Dropping asset {asset}') item['assets'].pop(asset) if type(assets) is str and assets == 'ALL': assets = item['assets'].keys() # create temporary work directory tmpdir = mkdtemp() try: # copy specified assets _assets = [a for a in assets if a in item['assets'].keys()] for asset in _assets: item = download_item_assets(item, path=tmpdir, assets=[asset]) item = upload_item_assets(item, assets=[asset], **outopts) # replace item in catalog catalog['features'][0] = item except Exception as err: msg = f"copy-assets: failed processing {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) raise Exception(msg) from err finally: # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) rmtree(tmpdir) return catalog
def lambda_handler(payload, context): logger.debug('Payload: %s' % json.dumps(payload)) catalog = Catalogs.from_payload(payload)[0] logger.debug('Catalog: %s' % json.dumps(catalog)) config = catalog['process']['tasks'].get('publish', {}) public = config.get('public', False) # additional SNS topics to publish to topics = config.get('sns', []) # these are the URLs to the canonical records on s3 s3urls = [] # assign proper collection names based on ID catalog.assign_collections() try: # publish to s3 s3urls = catalog.publish_to_s3(DATA_BUCKET, public=public) # publish to Cirrus SNS publish topic catalog.publish_to_sns() # Deprecated additional topics if PUBLISH_TOPICS: for t in PUBLISH_TOPICS.split(','): catalog.publish_to_sns(t) for t in topics: catalog.publish_to_sns(t) except Exception as err: msg = f"publish: failed publishing output items in {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) raise Exception(msg) from err try: # update processing in table statedb.set_completed(catalog['id'], s3urls) logger.info(f"publish: completed processing {catalog['id']}") except Exception as err: msg = f"publish: failed setting {catalog['id']} as complete ({err})" logger.error(msg) logger.error(format_exc()) raise Exception(msg) from err return catalog
def lambda_handler(payload, context): logger.debug('Payload: %s' % json.dumps(payload)) catalog = Catalogs.from_payload(payload)[0] url = f"s3://{CATALOG_BUCKET}/batch/{catalog['id']}/{uuid.uuid1()}.json" try: # copy payload to s3 s3().upload_json(catalog, url) logger.debug(f"Uploaded {catalog['id']} to {url}") logger.info(f"Completed pre processing batch job for {catalog['id']}") return {'url': url} except Exception as err: msg = f"pre-batch: failed pre processing batch job for {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) raise Exception(msg) from err
def lambda_handler(payload, context={}): # if this is batch, output to stdout if not hasattr(context, "invoked_function_arn"): logger.addHandler(logging.StreamHandler()) logger.debug('Payload: %s' % json.dumps(payload)) catalog = Catalogs.from_payload(payload)[0] # get step configuration config = catalog['process']['tasks'].get('add-preview', {}) outopts = catalog['process'].get('output_options', {}) assets = config.pop('assets', None) thumb = config.pop('thumbnail', False) if assets is None: msg = f"add-preview: no asset specified for preview, skipping {catalog['id']}" logger.error(msg) raise Exception(msg) # create temporary work directory tmpdir = tempfile.mkdtemp() items = [] for item in catalog['features']: # find asset to use for preview asset = None for a in assets: if a in item['assets']: asset = a break if asset is None: msg = f"add-preview: no asset specified for preview, skipping {item['id']}" logger.warning(msg) return item try: # keep original href href = item['assets'][asset]['href'] # download asset item = download_item_assets(item, path=tmpdir, assets=[asset]) # add preview to item add_preview(item, item['assets'][asset]['href'], **config) if thumb: # add thumbnail to item add_thumbnail(item, item['assets']['preview']['href']) # put back original href item['assets'][asset]['href'] = href # set item in return catalog to this new item #catalog['features'][0] = item._data # upload these new assets item = upload_item_assets(item, assets=['preview', 'thumbnail'], **outopts) items.append(item) except Exception as err: msg = f"add-preview: failed creating preview/thumbnail for {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) shutil.rmtree(tmpdir) raise Exception(msg) from err catalog['features'] = items # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) shutil.rmtree(tmpdir) return catalog
def lambda_handler(payload, context={}): # if this is batch, output to stdout if not hasattr(context, "invoked_function_arn"): logger.addHandler(StreamHandler()) logger.debug('Payload: %s' % json.dumps(payload)) catalog = Catalogs.from_payload(payload)[0] # TODO - make this more general for more items/collections item = catalog['features'][0] #, collection=catalog['collections'][0]) # configuration options config = catalog['process']['tasks'].get('convert-to-cog', {}) outopts = catalog['process'].get('output_options', {}) assets = config.get('assets') # create temporary work directory tmpdir = mkdtemp() try: asset_keys = [a for a in assets if a in item['assets'].keys()] for asset in asset_keys: # download asset item = download_item_assets(item, path=tmpdir, assets=[asset]) logger.debug(f"Downloaded item: {json.dumps(item)}") # cogify fn = item['assets'][asset]['href'] fnout = cogify(fn, op.splitext(fn)[0] + '.tif', **assets[asset]) item['assets'][asset]['href'] = fnout item['assets'][asset][ 'type'] = "image/tiff; application=geotiff; profile=cloud-optimized" with rasterio.open(fnout) as src: item['assets'][asset]['proj:shape'] = src.shape item['assets'][asset]['proj:transform'] = src.transform # upload assets item = upload_item_assets(item, assets=[asset], **outopts) # cleanup files if op.exists(fn): remove(fn) if op.exists(fnout): remove(fnout) # add derived_from link links = [l['href'] for l in item['links'] if l['rel'] == 'self'] if len(links) == 1: # add derived from link item['links'].append({ 'title': 'Source STAC Item', 'rel': 'derived_from', 'href': links[0], 'type': 'application/json' }) # drop any specified assets for asset in [ a for a in config.get('drop_assets', []) if a in item['assets'].keys() ]: logger.info(f"Dropping {asset}") item['assets'].pop(asset) catalog['features'][0] = item except CRSError as err: msg = f"convert-to-cog: invalid CRS for {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) raise InvalidInput(msg) except Exception as err: msg = f"convert-to-cog: failed creating COGs for {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) raise Exception(msg) from err finally: # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) rmtree(tmpdir) return catalog