Exemplo n.º 1
0
def lambda_handler(payload, context={}):
    logger.debug('Payload: %s' % json.dumps(payload))

    # if this is batch, output to stdout
    if not hasattr(context, "invoked_function_arn"):
        logger.addHandler(logging.StreamHandler())

    collections = payload.get('collections')
    index = payload.get('index', 'input_state')
    state = payload.get('state', 'FAILED')
    since = payload.get('since', None)
    limit = payload.get('limit', None)
    batch = payload.get('batch', False)
    catids = payload.get('catids', [])

    # if this is a lambda and batch is set
    if batch and hasattr(context, "invoked_function_arn"):
        submit_batch_job(payload, context.invoked_function_arn, name='rerun')
        return

    if len(catids) > 0:
        catalogs = Catalogs.from_catids(catids)
        logger.debug(f"Rerunning {len(catalogs)} catalogs")
        catids = catalogs.process(replace=True)
        logger.info(f"{len(catids)} catalogs rerun")
        return catids

    catalogs = Catalogs.from_statedb(collections, state, since, index, limit=limit)

    logger.info(f"Fetched {len(catalogs.catalogs)} catalogs")
    catids = catalogs.process(replace=True)
    logger.info(f"{len(catids)} catalogs rerun")

    return catids
Exemplo n.º 2
0
def lambda_handler(payload, context):
    logger.debug('Payload: %s' % json.dumps(payload))

    catids = []
    for catalog in Catalogs.from_payload(payload):
        logger.debug(f"Catalog: {json.dumps(catalog)}")
        try:
            # get workflow ARN
            arn = getenv('BASE_WORKFLOW_ARN') + catalog['process']['workflow']

            # invoke step function
            logger.info(f"Running {arn} on {catalog['id']}")
            exe_response = stepfunctions.start_execution(stateMachineArn=arn, input=json.dumps(catalog.get_payload()))
            logger.debug(f"Start execution response: {exe_response}")

            # set state to PROCESSING
            # TODO - what happens if step function startws but set_processing failed - it will be stuck in queue state
            resp = statedb.set_processing(catalog['id'], exe_response['executionArn'])
            logger.debug(f"Set process response: {resp}")
            catids.append(catalog['id'])
        except Exception as err:
            msg = f"start-workflow: failed starting {catalog['id']} ({err})"
            logger.error(msg)
            logger.error(format_exc())
            statedb.set_failed(catalog['id'], msg)

    return catids
Exemplo n.º 3
0
def lambda_handler(payload, context):
    logger.debug('Payload: %s' % json.dumps(payload))

    catalogs = Catalogs.from_payload(payload)

    catids = catalogs.process()

    return catids
Exemplo n.º 4
0
def lambda_handler(payload, context):
    logger.debug(json.dumps(payload))

    # Read SQS payload
    if 'Records' not in payload:
        raise ValueError("Input not from SQS")

    catalogs = []
    for record in [json.loads(r['body']) for r in payload['Records']]:
        cat = json.loads(record['Message'])
        logger.debug('cat: %s' % json.dumps(cat))
        # expand catids to full catalogs
        if 'catids' in cat:
            _cats = Catalogs.from_catids(cat['catids'])
            if 'process_update' in cat:
                logger.debug(
                    f"Process update: {json.dumps(cat['process_update'])}")
                for c in _cats:
                    c['process'] = dict_merge(c['process'],
                                              cat['process_update'])
            catalogs += _cats
        elif cat.get('type', '') == 'Feature':
            # If Item, create Catalog and use default process for that collection
            if cat['collection'] not in PROCESSES.keys():
                raise ValueError(
                    f"Default process not provided for collection {cat['collection']}"
                )
            cat_json = {
                'type': 'FeatureCollection',
                'features': [cat],
                'process': PROCESSES[cat['collection']]
            }
            catalogs.append(Catalog(cat_json, update=True))
        else:
            catalogs.append(Catalog(cat, update=True))

    if len(catalogs) > 0:
        cats = Catalogs(catalogs)
        cats.process()

    return len(catalogs)
Exemplo n.º 5
0
def lambda_handler(payload, context={}):
    # if this is batch, output to stdout
    if not hasattr(context, "invoked_function_arn"):
        logger.addHandler(logging.StreamHandler())
    logger.debug('Payload: %s' % json.dumps(payload))

    catalog = Catalogs.from_payload(payload)[0]

    # assign proper collection names based on ID
    catalog.assign_collections()

    # TODO - make this more general for more items/collections
    item = catalog['features'][0]  #, collection=catalog['collections'][0])

    # configuration options
    config = catalog['process']['tasks'].get('copy-assets', {})
    outopts = catalog['process'].get('output_options', {})

    # asset config
    assets = config.get('assets', item['assets'].keys())
    drop_assets = config.get('drop_assets', [])
    # drop specified assets
    for asset in [a for a in drop_assets if a in item['assets'].keys()]:
        logger.debug(f'Dropping asset {asset}')
        item['assets'].pop(asset)
    if type(assets) is str and assets == 'ALL':
        assets = item['assets'].keys()

    # create temporary work directory
    tmpdir = mkdtemp()

    try:
        # copy specified assets
        _assets = [a for a in assets if a in item['assets'].keys()]

        for asset in _assets:
            item = download_item_assets(item, path=tmpdir, assets=[asset])

            item = upload_item_assets(item, assets=[asset], **outopts)

        # replace item in catalog
        catalog['features'][0] = item
    except Exception as err:
        msg = f"copy-assets: failed processing {catalog['id']} ({err})"
        logger.error(msg)
        logger.error(format_exc())
        raise Exception(msg) from err
    finally:
        # remove work directory....very important for Lambdas!
        logger.debug('Removing work directory %s' % tmpdir)
        rmtree(tmpdir)

    return catalog
Exemplo n.º 6
0
def lambda_handler(payload, context):
    logger.debug('Payload: %s' % json.dumps(payload))

    catalog = Catalogs.from_payload(payload)[0]

    logger.debug('Catalog: %s' % json.dumps(catalog))

    config = catalog['process']['tasks'].get('publish', {})
    public = config.get('public', False)
    # additional SNS topics to publish to
    topics = config.get('sns', [])

    # these are the URLs to the canonical records on s3
    s3urls = []

    # assign proper collection names based on ID
    catalog.assign_collections()

    try:
        # publish to s3
        s3urls = catalog.publish_to_s3(DATA_BUCKET, public=public)

        # publish to Cirrus SNS publish topic
        catalog.publish_to_sns()

        # Deprecated additional topics
        if PUBLISH_TOPICS:
            for t in PUBLISH_TOPICS.split(','):
                catalog.publish_to_sns(t)

        for t in topics:
            catalog.publish_to_sns(t)
    except Exception as err:
        msg = f"publish: failed publishing output items in {catalog['id']} ({err})"
        logger.error(msg)
        logger.error(format_exc())
        raise Exception(msg) from err

    try:
        # update processing in table
        statedb.set_completed(catalog['id'], s3urls)
        logger.info(f"publish: completed processing {catalog['id']}")
    except Exception as err:
        msg = f"publish: failed setting {catalog['id']} as complete ({err})"
        logger.error(msg)
        logger.error(format_exc())
        raise Exception(msg) from err

    return catalog
Exemplo n.º 7
0
def lambda_handler(payload, context):
    logger.debug('Payload: %s' % json.dumps(payload))

    catalog = Catalogs.from_payload(payload)[0]

    url = f"s3://{CATALOG_BUCKET}/batch/{catalog['id']}/{uuid.uuid1()}.json"

    try:
        # copy payload to s3
        s3().upload_json(catalog, url)

        logger.debug(f"Uploaded {catalog['id']} to {url}")
        logger.info(f"Completed pre processing batch job for {catalog['id']}")
        return {'url': url}
    except Exception as err:
        msg = f"pre-batch: failed pre processing batch job for {catalog['id']} ({err})"
        logger.error(msg)
        logger.error(format_exc())
        raise Exception(msg) from err
Exemplo n.º 8
0
def lambda_handler(payload, context={}):
    # if this is batch, output to stdout
    if not hasattr(context, "invoked_function_arn"):
        logger.addHandler(logging.StreamHandler())

    logger.debug('Payload: %s' % json.dumps(payload))

    catalog = Catalogs.from_payload(payload)[0]

    # get step configuration
    config = catalog['process']['tasks'].get('add-preview', {})
    outopts = catalog['process'].get('output_options', {})
    assets = config.pop('assets', None)
    thumb = config.pop('thumbnail', False)

    if assets is None:
        msg = f"add-preview: no asset specified for preview, skipping {catalog['id']}"
        logger.error(msg)
        raise Exception(msg)

    # create temporary work directory
    tmpdir = tempfile.mkdtemp()
    items = []
    for item in catalog['features']:
        # find asset to use for preview
        asset = None
        for a in assets:
            if a in item['assets']:
                asset = a
                break
        if asset is None:
            msg = f"add-preview: no asset specified for preview, skipping {item['id']}"
            logger.warning(msg)
            return item

        try:
            # keep original href
            href = item['assets'][asset]['href']
            # download asset
            item = download_item_assets(item, path=tmpdir, assets=[asset])

            # add preview to item

            add_preview(item, item['assets'][asset]['href'], **config)
            if thumb:
                # add thumbnail to item
                add_thumbnail(item, item['assets']['preview']['href'])

            # put back original href
            item['assets'][asset]['href'] = href

            # set item in return catalog to this new item
            #catalog['features'][0] = item._data
            # upload these new assets
            item = upload_item_assets(item,
                                      assets=['preview', 'thumbnail'],
                                      **outopts)
            items.append(item)
        except Exception as err:
            msg = f"add-preview: failed creating preview/thumbnail for {catalog['id']} ({err})"
            logger.error(msg)
            logger.error(format_exc())
            # remove work directory....very important for Lambdas!
            logger.debug('Removing work directory %s' % tmpdir)
            shutil.rmtree(tmpdir)
            raise Exception(msg) from err

    catalog['features'] = items

    # remove work directory....very important for Lambdas!
    logger.debug('Removing work directory %s' % tmpdir)
    shutil.rmtree(tmpdir)

    return catalog
Exemplo n.º 9
0
def lambda_handler(payload, context={}):
    # if this is batch, output to stdout
    if not hasattr(context, "invoked_function_arn"):
        logger.addHandler(StreamHandler())
    logger.debug('Payload: %s' % json.dumps(payload))

    catalog = Catalogs.from_payload(payload)[0]

    # TODO - make this more general for more items/collections
    item = catalog['features'][0]  #, collection=catalog['collections'][0])

    # configuration options
    config = catalog['process']['tasks'].get('convert-to-cog', {})
    outopts = catalog['process'].get('output_options', {})
    assets = config.get('assets')

    # create temporary work directory
    tmpdir = mkdtemp()

    try:
        asset_keys = [a for a in assets if a in item['assets'].keys()]

        for asset in asset_keys:
            # download asset
            item = download_item_assets(item, path=tmpdir, assets=[asset])
            logger.debug(f"Downloaded item: {json.dumps(item)}")

            # cogify
            fn = item['assets'][asset]['href']
            fnout = cogify(fn, op.splitext(fn)[0] + '.tif', **assets[asset])
            item['assets'][asset]['href'] = fnout
            item['assets'][asset][
                'type'] = "image/tiff; application=geotiff; profile=cloud-optimized"
            with rasterio.open(fnout) as src:
                item['assets'][asset]['proj:shape'] = src.shape
                item['assets'][asset]['proj:transform'] = src.transform

            # upload assets
            item = upload_item_assets(item, assets=[asset], **outopts)
            # cleanup files
            if op.exists(fn):
                remove(fn)
            if op.exists(fnout):
                remove(fnout)

        # add derived_from link
        links = [l['href'] for l in item['links'] if l['rel'] == 'self']
        if len(links) == 1:
            # add derived from link
            item['links'].append({
                'title': 'Source STAC Item',
                'rel': 'derived_from',
                'href': links[0],
                'type': 'application/json'
            })

        # drop any specified assets
        for asset in [
                a for a in config.get('drop_assets', [])
                if a in item['assets'].keys()
        ]:
            logger.info(f"Dropping {asset}")
            item['assets'].pop(asset)

        catalog['features'][0] = item
    except CRSError as err:
        msg = f"convert-to-cog: invalid CRS for {catalog['id']} ({err})"
        logger.error(msg)
        logger.error(format_exc())
        raise InvalidInput(msg)
    except Exception as err:
        msg = f"convert-to-cog: failed creating COGs for {catalog['id']} ({err})"
        logger.error(msg)
        logger.error(format_exc())
        raise Exception(msg) from err
    finally:
        # remove work directory....very important for Lambdas!
        logger.debug('Removing work directory %s' % tmpdir)
        rmtree(tmpdir)

    return catalog