Пример #1
0
def indexing_lambda(event, context):
    if 'Records' in event:
        global search_flow_verified
        global es_indexer

        if search_flow_verified is False:
            # determine the source API information to ensure the search flow is configured
            api_name = None
            event_source_tokens = event.get("eventSourceARN").split(":")
            if event_source_tokens[2] == "dynamodb":
                table_spec_tokens = event_source_tokens[5].split("/")
                table = table_spec_tokens[1]

            # create an API Metadata Handler
            api_metadata_handler = ApiMetadata(REGION, log)

            api_metadata = api_metadata_handler.get_api_metadata(api_name, STAGE)

            # verify that the delivery streams are in place
            es_indexer.configure_search_flow(endpoints=api_metadata.get("SearchConfig").get("DeliveryStreams"),
                                             es_domain_name=event.get(params.ES_DOMAIN),
                                             firehose_delivery_role_arn=event.get(
                                                 params.FIREHOSE_DELIVERY_ROLE_ARN),
                                             failure_record_bucket=event.get(
                                                 params.DELIVERY_STREAM_FAILURE_BUCKET),
                                             kms_key_arn=event.get(params.KMS_KEY_ARN)
                                             )
            es_indexer = StreamsIntegration(STAGE, api_metadata.get("SearchConfig"))
            search_flow_verified = True

        output = es_indexer.forward_to_es_firehose(records=event['Records'])
Пример #2
0
def get_registry(region: str,
                 stage: str,
                 logger: logging.Logger = None) -> list:
    """
    Method to return the list of all API Namespaces in the Stage and Region

    :param region: The AWS Region to query
    :param stage: The STage to query for Namespaces
    :return: list:
        str: Namespace name
    """
    # create an API Metadata Handler
    global log
    if logger is None:
        logging.basicConfig()
        log = logging.getLogger(params.AWS_DATA_API_NAME)
        log.setLevel(logging.INFO)
    else:
        log = logger

    api_metadata_handler = ApiMetadata(region, log)

    all_apis = api_metadata_handler.get_all_apis()

    return_apis = []
    for a in all_apis:
        if f"-{stage}" in a:
            return_apis.append(a.replace(f"-{stage}", ""))

    return return_apis
Пример #3
0
def get_api_status(api_name: str,
                   stage: str,
                   region: str,
                   logger: logging.Logger = None) -> dict:
    """
    Method to return the status of an API Namespace for the specified Stage in a Region.

    :param api_name: The API Namespace to get the status of
    :param stage: The Stage to query for the Namespace status
    :param region: The AWS Region in which the Stage is provisioned
    :return: dict:
        Status: The Status of the API Namespace in the stage
    """
    global log
    if logger is None:
        log = utils.setup_logging()
    else:
        log = logger

    api_metadata_handler = ApiMetadata(region, log)
    s = "Status"
    return {
        s:
        api_metadata_handler.get_api_metadata(api_name=api_name,
                                              stage=stage).get(s)
    }
Пример #4
0
    def get(self, api_name):
        if api_name not in self._api_cache:
            # load from metadata
            self._logger.info(f"Cache Miss: Loading API Instance {api_name} Stage {self._stage} from Metadata service")
            api_metadata_handler = ApiMetadata(self._region, self._logger)

            api_metadata = api_metadata_handler.get_api_metadata(api_name=api_name, stage=self._stage)

            if api_metadata is None:
                msg = f"Unable to resolve API {api_name} in Stage {self._stage}"
                self._logger.error(msg)
                raise BadRequestError(msg)
            else:
                if api_metadata.get("Status") == params.STATUS_CREATING:
                    raise InvalidArgumentsException("API Not Yet Active")
                else:
                    api_metadata['app'] = self._app
                    api_metadata[params.REGION] = self._region
                    api_metadata[params.API_NAME_PARAM] = api_name
                    api_metadata[params.EXTENDED_CONFIG] = self._extended_config

                    # instantiate the API from metadata
                    api = dapi.load_api(**api_metadata)

                    # TODO add caching for CORS objects from API Metadata

                    self.add(api_name, api)

                    return api
        else:
            return self._api_cache[api_name][CONF_CACHE_HANDLER]
Пример #5
0
def understander_lambda(event, context):
    log.debug(event)
    understander = None
    api_metadata_handler = ApiMetadata(REGION, log)

    def arg_handler(arg):
        if arg not in event:
            raise InvalidArgumentsException(
                f"Invocation event must include '{arg}'")
        else:
            return event.get(arg)

    prefix = arg_handler("prefix")
    id = arg_handler("id")

    if understander is None:
        understander = u.Understander(region=REGION)

    api_name = arg_handler(params.API_NAME_PARAM)
    api_stage = arg_handler(params.API_STAGE_PARAM)

    # run the understander method
    understanding = understander.understand(prefix=prefix)

    # create a metadata update structure for all non-empty understanding items
    meta_add = {}

    def _meta_adder(val):
        if val in understanding:
            v = understanding.get(val)

            # add non empty structures
            if v is not None and v != [] and v != {}:
                meta_add[val] = v

    if understanding is not None:
        # add the new metadata to the object
        [
            _meta_adder(x) for x in [
                u.PAGES, u.RAW_LINES, u.KEY_VALUES, u.ENTITIES, u.LANGUAGE,
                u.SENTIMENT, u.KEY_PHRASES
            ]
        ]

        api_metadata_handler.create_metadata(api_name,
                                             api_stage,
                                             caller_identity='System',
                                             **meta_add)
        print(f"Metadata Analysis complete for {id}")

        return understanding
    else:
        return None
Пример #6
0
def provisioning_lambda(event, context):
    # TODO Add support for creation of Read/Only and Read/Write IAM Roles during provisioning
    log.debug(event)
    api_name = event.get("ApiName")
    event.pop("ApiName")

    # create an API Metadata Handler
    api_metadata_handler = ApiMetadata(REGION, log)

    # check if this API is already deployed
    table_name = utils.get_table_name(table_name=api_name, deployment_stage=STAGE)
    api_metadata = api_metadata_handler.get_api_metadata(api_name, STAGE)

    if api_metadata is None:
        log.debug(f"API {api_name} not found. Creating new Data API")
        api_metadata = {}
        # add the default parameters from the application container
        _add_api_defaults(api_metadata)
        api_metadata[params.DATA_TYPE] = api_name
        api_metadata[params.DEPLOYED_ACCOUNT] = context.invoked_function_arn.split(":")[4]
        api_metadata[params.STORAGE_TABLE] = table_name
        api_metadata[params.STORAGE_HANDLER] = params.DEFAULT_STORAGE_HANDLER
        api_metadata[params.CATALOG_DATABASE] = params.DEFAULT_CATALOG_DATABASE
    else:
        log.debug(f"API {api_name} already exists. Performing property update and instance rebuild")

        # remove last update date/by information to prevent update collision
        utils.remove_internal_attrs(api_metadata)

    # overlay the supplied parameters onto the api metadata
    api_metadata.update(event)

    # add a pending status
    api_metadata['Status'] = params.STATUS_CREATING

    # add a control table entry for this stage with the current configuration
    api_metadata_handler.create_metadata(api_name=api_name, stage=STAGE, caller_identity='System', **api_metadata)

    api_metadata[params.APP] = app

    # load the api class
    api = dapi.load_api(**api_metadata)

    # setup the search flow
    search_config = None
    if params.ES_DOMAIN in event:
        try:
            search_config = es_indexer.configure_search_flow(endpoints=api.get_endpoints(),
                                                             es_domain_name=event.get(params.ES_DOMAIN),
                                                             firehose_delivery_role_arn=event.get(
                                                                 params.FIREHOSE_DELIVERY_ROLE_ARN),
                                                             failure_record_bucket=event.get(
                                                                 params.DELIVERY_STREAM_FAILURE_BUCKET),
                                                             kms_key_arn=event.get(params.KMS_KEY_ARN)
                                                             )
        except KeyError:
            raise BadRequestError(
                f"Unable to provision search configuration without {params.ES_DOMAIN}, {params.FIREHOSE_DELIVERY_ROLE_ARN}, and {params.DELIVERY_STREAM_FAILURE_BUCKET}")

    # add the search config to metadata
    if search_config is not None:
        api_metadata_handler.update_metadata(api_name=api_name, stage=STAGE,
                                             updates=search_config, caller_identity='System')

    # destroy the cache reference to cause a reload on next invoke
    if api_cache.contains(api_name):
        log.debug(f"Invalidating API Cache")
        api_cache.remove(api_name)

    # update the metadata to show that the API is online
    api_metadata_handler.update_metadata(api_name=api_name, stage=STAGE,
                                         updates={"Status": params.STATUS_ACTIVE}, caller_identity='System')
    log.info(f"Provisioning complete. API {api_name} online in Stage {STAGE}")
Пример #7
0
              params.COGNITO_PROVIDER_ARNS)
elif set_authorizer == params.AUTHORIZER_CUSTOM:
    use_authorizer = None
else:
    use_authorizer = None

if use_authorizer is None:
    print("Stage deployed without Authorizer")
else:
    print("Using Authorizer %s" % set_authorizer.__name__)

# setup class logger
log = utils.setup_logging()

# create an API Metadata Handler
api_metadata_handler = ApiMetadata(REGION, log)

# create a cache of all API references tracked by this deployment stage
api_cache = DataApiCache(app=app, stage=STAGE, region=REGION, logger=log)

# create the streams integration handler, which is used by the lambda function embedded at the end of this app
es_indexer = None

# module level settings used as flags for lazy initialisers in functions
search_flow_verified = False

# load the cors config
cors_config = None
cors = None
try:
    with open("chalicelib/cors.json", "r") as f:
Пример #8
0
    def __init__(self, **kwargs):
        self._region = kwargs.get(params.REGION, os.getenv('AWS_REGION'))

        self._full_config = kwargs
        self._api_name = kwargs.get(params.API_NAME_PARAM)
        self._table_name = kwargs.get(params.STORAGE_TABLE)

        # setup instance logger
        self._logger = utils.setup_logging(params.AWS_DATA_API_NAME)
        global log
        log = self._logger

        self._logger.debug("Constructing new Data API with Args")
        self._logger.debug(kwargs)

        # create the API metadata handler
        self._api_metadata_handler = ApiMetadata(
            self._region, self._logger, kwargs.get(params.KMS_KEY_ARN))

        # Load class properties from any supplied metadata. These will be populated when hydrating an existing API
        # namespace from DynamoDB
        self._app = kwargs.get(params.APP, None)
        self._deployment_stage = kwargs.get(params.STAGE)
        self._pk_name = kwargs.get(params.PRIMARY_KEY, None)
        self._delete_mode = kwargs.get(params.DELETE_MODE,
                                       params.DEFAULT_DELETE_MODE)
        self._allow_runtime_delete_mode_change = kwargs.get(
            params.ALLOW_RUNTIME_DELETE_MODE_CHANGE,
            params.DEFAULT_ALLOW_RUNTIME_DELETE_MODE_CHANGE)
        self._crawler_rolename = kwargs.get(params.CRAWLER_ROLENAME, None)
        self._table_indexes = kwargs.get(params.TABLE_INDEXES, None)
        self._metadata_indexes = kwargs.get(params.METADATA_INDEXES, None)
        self._schema_validation_refresh_hitcount = kwargs.get(
            params.SCHEMA_VALIDATION_REFRESH_HITCOUNT,
            params.DEFAULT_SCHEMA_VALIDATION_REFRESH_HITCOUNT)
        self._gremlin_address = kwargs.get(params.GREMLIN_ADDRESS, None)
        self._allow_non_itemmaster_writes = kwargs.get(
            params.NON_ITEM_MASTER_WRITES_ALLOWED,
            params.DEFAULT_NON_ITEM_MASTER_WRITE_ALLOWED)
        self._strict_occv = kwargs.get(params.STRICT_OCCV,
                                       params.DEFAULT_STRICT_OCCV)
        self._catalog_database = kwargs.get(params.CATALOG_DATABASE,
                                            params.DEFAULT_CATALOG_DATABASE)

        # setup the storage handler which implements the backend data api functionality
        storage_args = kwargs

        resource_schema = self._api_metadata_handler.get_schema(
            api_name=self._api_name,
            stage=self._deployment_stage,
            schema_type=params.RESOURCE)
        if resource_schema is not None:
            storage_args[params.CONTROL_TYPE_RESOURCE_SCHEMA] = resource_schema

        metadata_schema = self._api_metadata_handler.get_schema(
            api_name=self._api_name,
            stage=self._deployment_stage,
            schema_type=params.METADATA)
        if metadata_schema is not None:
            storage_args[params.CONTROL_TYPE_METADATA_SCHEMA] = metadata_schema

        storage_args["table_name"] = self._table_name
        storage_args["primary_key_attribute"] = self._pk_name
        storage_args["region"] = self._region
        storage_args["delete_mode"] = self._delete_mode
        storage_args[
            "allow_runtime_delete_mode_change"] = self._allow_runtime_delete_mode_change
        storage_args["table_indexes"] = self._table_indexes
        storage_args["metadata_indexes"] = self._metadata_indexes
        storage_args[
            "schema_validation_refresh_hitcount"] = self._schema_validation_refresh_hitcount
        storage_args["crawler_rolename"] = self._crawler_rolename
        storage_args["catalog_database"] = self._catalog_database
        storage_args[
            "allow_non_itemmaster_writes"] = self._allow_non_itemmaster_writes
        storage_args["strict_occv"] = self._strict_occv
        storage_args["deployed_account"] = kwargs.get(params.DEPLOYED_ACCOUNT,
                                                      None)
        storage_args["handler_name"] = kwargs[params.STORAGE_HANDLER]
        storage_args["pitr_enabled"] = utils.strtobool(
            kwargs.get(params.PITR_ENABLED, params.DEFAULT_PITR_ENABLED))
        storage_args["kms_key_arn"] = kwargs.get(params.STORAGE_CRYPTO_KEY_ARN,
                                                 None)

        self._storage_handler = self._get_storage_handler(**storage_args)

        # setup the gremlin integration if one has been provided
        if self._gremlin_address is not None:
            log.info(
                f"Binding new Gremlin Handler to address {self._gremlin_address}"
            )
            tokens = self._gremlin_address.split(":")
            self._gremlin_endpoint = GremlinHandler(url=tokens[0],
                                                    port=tokens[1])

        if "SearchConfig" in kwargs:
            self._search_config = kwargs.get("SearchConfig")

        log.info(
            f"AWS Data API for {self._catalog_database}.{self._table_name} Online."
        )
Пример #9
0
class AwsDataAPI:
    _full_config = None
    _app = None
    _region = None
    _deployment_stage = None
    _deployed_account = None
    _gremlin_address = None
    _gremlin_endpoint = None
    _es_client = None
    _search_config = None
    _storage_handler = None
    _catalog_database = None
    _api_name = None
    _table_name = None
    _pk_name = None
    _sts_client = None
    _cwl_client = None
    _log_group_name = None
    _last_log_info = None
    _caller_identity = None
    _simple_identity = None
    _logger = None
    _delete_mode = None
    _allow_runtime_delete_mode_change = False
    _crawler_rolename = None
    _table_indexes = None
    _metadata_indexes = None
    _schema_validation_refresh_hitcount = None
    _allow_non_itemmaster_writes = None
    _strict_occv = None
    _dynamo_helper = None
    _lambda_client = None
    _cloudwatch_emitter = None
    _api_metadata_handler = None
    _extended_config = None

    def __init__(self, **kwargs):
        self._region = kwargs.get(params.REGION, os.getenv('AWS_REGION'))

        self._full_config = kwargs
        self._api_name = kwargs.get(params.API_NAME_PARAM)
        self._table_name = kwargs.get(params.STORAGE_TABLE)

        # setup instance logger
        self._logger = utils.setup_logging(params.AWS_DATA_API_NAME)
        global log
        log = self._logger

        self._logger.debug("Constructing new Data API with Args")
        self._logger.debug(kwargs)

        # create the API metadata handler
        self._api_metadata_handler = ApiMetadata(
            self._region, self._logger, kwargs.get(params.KMS_KEY_ARN))

        # Load class properties from any supplied metadata. These will be populated when hydrating an existing API
        # namespace from DynamoDB
        self._app = kwargs.get(params.APP, None)
        self._deployment_stage = kwargs.get(params.STAGE)
        self._pk_name = kwargs.get(params.PRIMARY_KEY, None)
        self._delete_mode = kwargs.get(params.DELETE_MODE,
                                       params.DEFAULT_DELETE_MODE)
        self._allow_runtime_delete_mode_change = kwargs.get(
            params.ALLOW_RUNTIME_DELETE_MODE_CHANGE,
            params.DEFAULT_ALLOW_RUNTIME_DELETE_MODE_CHANGE)
        self._crawler_rolename = kwargs.get(params.CRAWLER_ROLENAME, None)
        self._table_indexes = kwargs.get(params.TABLE_INDEXES, None)
        self._metadata_indexes = kwargs.get(params.METADATA_INDEXES, None)
        self._schema_validation_refresh_hitcount = kwargs.get(
            params.SCHEMA_VALIDATION_REFRESH_HITCOUNT,
            params.DEFAULT_SCHEMA_VALIDATION_REFRESH_HITCOUNT)
        self._gremlin_address = kwargs.get(params.GREMLIN_ADDRESS, None)
        self._allow_non_itemmaster_writes = kwargs.get(
            params.NON_ITEM_MASTER_WRITES_ALLOWED,
            params.DEFAULT_NON_ITEM_MASTER_WRITE_ALLOWED)
        self._strict_occv = kwargs.get(params.STRICT_OCCV,
                                       params.DEFAULT_STRICT_OCCV)
        self._catalog_database = kwargs.get(params.CATALOG_DATABASE,
                                            params.DEFAULT_CATALOG_DATABASE)

        # setup the storage handler which implements the backend data api functionality
        storage_args = kwargs

        resource_schema = self._api_metadata_handler.get_schema(
            api_name=self._api_name,
            stage=self._deployment_stage,
            schema_type=params.RESOURCE)
        if resource_schema is not None:
            storage_args[params.CONTROL_TYPE_RESOURCE_SCHEMA] = resource_schema

        metadata_schema = self._api_metadata_handler.get_schema(
            api_name=self._api_name,
            stage=self._deployment_stage,
            schema_type=params.METADATA)
        if metadata_schema is not None:
            storage_args[params.CONTROL_TYPE_METADATA_SCHEMA] = metadata_schema

        storage_args["table_name"] = self._table_name
        storage_args["primary_key_attribute"] = self._pk_name
        storage_args["region"] = self._region
        storage_args["delete_mode"] = self._delete_mode
        storage_args[
            "allow_runtime_delete_mode_change"] = self._allow_runtime_delete_mode_change
        storage_args["table_indexes"] = self._table_indexes
        storage_args["metadata_indexes"] = self._metadata_indexes
        storage_args[
            "schema_validation_refresh_hitcount"] = self._schema_validation_refresh_hitcount
        storage_args["crawler_rolename"] = self._crawler_rolename
        storage_args["catalog_database"] = self._catalog_database
        storage_args[
            "allow_non_itemmaster_writes"] = self._allow_non_itemmaster_writes
        storage_args["strict_occv"] = self._strict_occv
        storage_args["deployed_account"] = kwargs.get(params.DEPLOYED_ACCOUNT,
                                                      None)
        storage_args["handler_name"] = kwargs[params.STORAGE_HANDLER]
        storage_args["pitr_enabled"] = utils.strtobool(
            kwargs.get(params.PITR_ENABLED, params.DEFAULT_PITR_ENABLED))
        storage_args["kms_key_arn"] = kwargs.get(params.STORAGE_CRYPTO_KEY_ARN,
                                                 None)

        self._storage_handler = self._get_storage_handler(**storage_args)

        # setup the gremlin integration if one has been provided
        if self._gremlin_address is not None:
            log.info(
                f"Binding new Gremlin Handler to address {self._gremlin_address}"
            )
            tokens = self._gremlin_address.split(":")
            self._gremlin_endpoint = GremlinHandler(url=tokens[0],
                                                    port=tokens[1])

        if "SearchConfig" in kwargs:
            self._search_config = kwargs.get("SearchConfig")

        log.info(
            f"AWS Data API for {self._catalog_database}.{self._table_name} Online."
        )

    # method which writes a set of object references to the Gremlin helper class
    def _put_references(self, id: str, reference_doc: list):
        g = self._gremlin_endpoint
        if g is not None:
            from_id = utils.get_arn(id, self._table_name,
                                    self._deployed_account)

            ctr = 0
            exceptions = []
            for r in reference_doc:
                if params.RESOURCE not in r:
                    raise InvalidArgumentsException(
                        f"Malformed Reference: {r}. Must Contain a {params.RESOURCE}"
                    )
                else:
                    to_id = r[params.RESOURCE]

                    # remove the resource and ID keys so we can use the rest of the document for extra properties
                    del r[params.RESOURCE]

                    try:
                        g.create_relationship(label=params.REFERENCES,
                                              from_id=from_id,
                                              to_id=to_id,
                                              extra_properties=r)
                        ctr += 1
                    except Exception as e:
                        exceptions.append({"ID": to_id, "Message": e.message})

            response = {"ReferenceCount": ctr}

            if len(exceptions) > 0:
                response["Exceptions"] = exceptions

            return response
        else:
            raise UnimplementedFeatureException(NO_GREMLIN)

    def _get_storage_handler(self, **kwargs):
        """
        Method to load a Storage Handler class based upon the provided handler name.
        """
        log.info(
            f"Creating new Data API Storage Handler from {kwargs.get(params.STORAGE_HANDLER)}"
        )
        storage_module = load_storage_handler_module(
            kwargs.get(params.STORAGE_HANDLER))
        storage_class = getattr(storage_module, "DataAPIStorageHandler")
        return storage_class(**kwargs)

    # simple accessor method for the pk_name attribute, which is required in some cases for API integration
    def get_primary_key(self):
        return self._pk_name

    # access method that returns a boolean outcome based upon if the provided ID is valid
    # @evented(api_operation="Check")
    @identity_trace
    def check(self, id):
        return self._storage_handler.check(id=id)

    # return a paginated list of elements from the API
    # @evented(api_operation="List")
    @identity_trace
    def list(self, **kwargs):
        return self._storage_handler.list_items(**kwargs)

    # return information about storage usage for this API namespace
    # @evented(api_operation="Usage")
    @identity_trace
    def get_usage(self):
        resources = self._storage_handler.get_usage(
            table_name=self._table_name)
        metadata = self._storage_handler.get_usage(
            table_name=utils.get_metaname(self._table_name))

        references = None
        # TODO figure out why the gremlin connection is failing
        # if self._gremlin_endpoint is not None:
        #    references = self._gremlin_endpoint.get_usage()

        usage = {params.RESOURCE: resources, params.METADATA: metadata}

        if references is not None:
            usage[params.REFERENCES] = {"Count": references}

        return usage

    # run the natural language understanding integration, which attaches new Metadata to the Resource
    # @evented(api_operation="Understand")
    @identity_trace
    def understand(self, id, storage_location=None):
        fetch_id = self._validate_arn_id(id)

        # validate the attribute that stores the location of the object
        if storage_location is None:
            storage_location = params.DEFAULT_STORAGE_LOCATION_ATTRIBUTE

        # fetch the resource
        item = self._storage_handler.get(id=fetch_id)

        storage_loc = None

        if item is None:
            raise ResourceNotFoundException(
                f"Unable to find Resource with ID {fetch_id}")
        else:
            if storage_location in item[params.RESOURCE]:
                storage_loc = item.get(params.RESOURCE).get(storage_location)
            else:
                # storage location may be in metadata
                meta = self._storage_handler.get_metadata(id)
                if storage_location in meta[params.METADATA]:
                    storage_loc = meta.get(
                        params.METADATA).get(storage_location)

        if storage_loc is None:
            raise DetailedException(
                f"Unable to run Metadata Resolver without a Storage Location Attribute in Item Resource or Metadata (Default {params.DEFAULT_STORAGE_LOCATION_ATTRIBUTE})"
            )

        if self._lambda_client is None:
            self._lambda_client = boto3.client("lambda",
                                               region_name=self._region)

        # run the understander and metadata update through an async lambda
        f = f"{params.AWS_DATA_API_NAME}-{self._deployment_stage}-{params.UNDERSTANDER_NAME}"

        args = {
            "prefix": storage_loc,
            "id": fetch_id,
            "caller": self._simple_identity,
            "primary_key_attribute": self._pk_name,
            params.API_NAME_PARAM: self._api_name,
            params.API_STAGE_PARAM: self._deployment_stage
        }
        response = self._lambda_client.invoke(FunctionName=f,
                                              InvocationType='Event',
                                              Payload=json.dumps(args))

        if "FunctionError" in response:
            if response.get("FunctionError") == "Handled":
                raise DetailedException(response.get("Payload"))
            else:
                raise DetailedException(
                    "Unhandled error occurred during submission of async Understanding request"
                )
        else:
            return response.get("StatusCode")

    def _validate_arn_id(self, id):
        # decode the ID as it forms part of the request url
        decoded_id = parser.unquote(id)

        log.debug(f"Validating Resource ARN {id}")

        if utils.get_arn_base() in decoded_id:
            # validate arn structure and then fetch by id
            arn = utils.shred_arn(decoded_id)

            if arn is None:
                raise ResourceNotFoundException(
                    f"Invalid ARN format {decoded_id}")

            if utils.get_caller_account() != arn[params.ARN_ACCOUNT]:
                raise ResourceNotFoundException(
                    "Requested resource not available from Data API Account")

            if self._table_name != arn[params.ARN_TABLE]:
                self.exception = ResourceNotFoundException(
                    f"Requested resource {arn[params.ARN_TABLE]} not available from Data API {self._table_name}"
                )
                raise self.exception

            if arn[params.ARN_REGION] != self._region:
                raise ResourceNotFoundException(
                    f"ARN Valid Region {arn[params.ARN_REGION]}")

            fetch_id = arn[params.ARN_ID]
        else:
            fetch_id = decoded_id

        return fetch_id

    # get the Resource, which may include or prefer the Item Master
    # @evented(api_operation="GetResource")
    @identity_trace
    def get(self,
            id,
            master_option,
            suppress_meta_fetch: bool = False,
            only_attributes: list = None,
            not_attributes: list = None):
        fetch_id = self._validate_arn_id(id)
        response = {}
        item = self._storage_handler.get(
            id=fetch_id,
            suppress_meta_fetch=suppress_meta_fetch,
            only_attributes=only_attributes,
            not_attributes=not_attributes)

        # set the 'Item' in the response unless master_option = prefer
        if params.ITEM_MASTER_ID not in item[params.RESOURCE] or \
                master_option is None or \
                master_option.lower() == params.ITEM_MASTER_INCLUDE.lower():
            response["Item"] = item

        # extract the master if there is one, and the provided master option is 'include' or 'prefer'
        # TODO Test what happens if we have very large Item Master hierarchies here
        if params.ITEM_MASTER_ID in item[
                params.
                RESOURCE] and master_option is not None and master_option.lower(
                ) in [
                    params.ITEM_MASTER_INCLUDE.lower(),
                    params.ITEM_MASTER_PREFER.lower()
                ]:
            master = self._storage_handler.get(
                id=item[params.RESOURCE][params.ITEM_MASTER_ID])
            response["Master"] = master

        return response

    # undelete a Data API Resource that has been soft deleted (non-Tombstone)
    # @evented(api_operation="Restore")
    @identity_trace
    def restore(self, id):
        fetch_id = self._validate_arn_id(id)

        return self._storage_handler.restore(
            id=fetch_id, caller_identity=self._simple_identity)

    # get the Metadata for a Resource
    # @evented(api_operation="GetMetadata")
    @identity_trace
    def get_metadata(self, id):
        fetch_id = self._validate_arn_id(id)

        return self._storage_handler.get_metadata(id=fetch_id)

    # Delete a Resource and Metadata based upon the specified deletion mode of the system or in the request
    # @evented(api_operation="Delete")
    @identity_trace
    def delete(self, id, **kwargs):
        fetch_id = self._validate_arn_id(id)

        return self._storage_handler.delete(
            id=fetch_id, caller_identity=self._simple_identity, **kwargs)

    # Update a Data API Resource
    # @evented(api_operation="Update")
    @identity_trace
    def update_item(self, id, **kwargs):
        response = {}

        def _wrap_response(type, type_res):
            response[type] = {
                params.DATA_MODIFIED: True if type_res is not None else False
            }

            if type_res is not None and "Messages" in type_res:
                response["Messages"] = type_res.get("Messages")

        fetch_id = self._validate_arn_id(id)

        if params.REFERENCES in kwargs:
            log.debug("Creating Reference Links")
            _wrap_response(
                params.REFERENCES,
                self._put_references(id, kwargs.get(params.REFERENCES)))

        # update the item, which may update metadata and resources
        item_response = self._storage_handler.update_item(
            caller_identity=self._simple_identity, id=fetch_id, **kwargs)
        _wrap_response(params.METADATA, item_response.get(params.METADATA))
        _wrap_response(params.RESOURCE, item_response.get(params.RESOURCE))

        return response

    # Drop an entire API Namespace. This will do a backup before dropping the underlying storage tables
    # @evented(api_operation="DropAPI")
    @identity_trace
    def drop(self, do_export=True):
        # drop tables with final backup
        self._storage_handler.drop_table(table_name=self._table_name,
                                         do_export=do_export)
        self._storage_handler.drop_table(table_name=utils.get_metaname(
            self._table_name),
                                         do_export=do_export)

        # delete API information
        self._api_metadata_handler.delete_all_api_metadata(
            self._api_name, self._deployment_stage)

    # Perform a search request against the Resource or Metadata, based upon provided query args
    # @evented(api_operation="Find")
    @identity_trace
    def find(self, **kwargs):
        return self._storage_handler.find(**kwargs)

    def _get_es_endpoint(self):
        return self._search_config.get("ElasticSearchDomain").get(
            "ElasticSearchEndpoint")

    # private lazy loader method for es client to ensure that we don't get constructor stalls if VPC connections are weird
    def _get_es_client(self):
        if self._es_client is None:
            # setup a reference to ElasticSearch if a SearchConfig is setup
            self._es_client = Elasticsearch(hosts=[self._get_es_endpoint()])

        return self._es_client

    # Perform a search request against the configured ES endpoint
    # @evented(api_operation="Search")
    @identity_trace
    def search(self, search_type, **kwargs):
        if self._es_client is None:
            raise UnimplementedFeatureException(
                "No ElasticSearch Endpoint Configured")
        else:
            response = {}

            def _add_results(result_type):
                index_name = utils.get_es_index_name(self._table_name,
                                                     result_type)
                doc = utils.get_es_type_name(self._table_name, result_type),

                response[result_type] = self._get_es_client().search(
                    index=index_name, doc_type=doc, body=kwargs.get("query"))

            if search_type is not None:
                # perform a search just for the specified type of data
                _add_results(search_type)
            else:
                # perform a search across both Resource and Metadata indexes
                _add_results(params.RESOURCE)
                _add_results(params.METADATA)

            return response

    # Return the API's underlying storage implementations, including tables in use, Dynamo Streams that can be processed
    # and references to Gremlin and ElasticSearch endpoints in use
    # @evented(api_operation="Endpoints")
    @identity_trace
    def get_endpoints(self):
        endpoints = self._storage_handler.get_streams()

        if self._gremlin_address is not None:
            endpoints['GraphURL'] = self._gremlin_address

        if self._search_config is not None:
            endpoints['Elasticsearch'] = self._get_es_endpoint()

        return endpoints

    # Return the JSON schema for an API Namespace
    # @evented(api_operation="GetSchema")
    @identity_trace
    def get_schema(self, schema_type):
        return self._api_metadata_handler.get_schema(
            api_name=self._api_name,
            stage=self._deployment_stage,
            schema_type=schema_type)

    # Create or Update a JSON Schema for the API Namespace Resources or Metadata
    # @evented(api_operation="PutSchema")
    @identity_trace
    def put_schema(self, schema_type, schema):
        return self._api_metadata_handler.put_schema(
            api_name=self._api_name,
            stage=self._deployment_stage,
            schema_type=schema_type,
            caller_identity=self._simple_identity,
            schema=schema).get(params.DATA_MODIFIED)

    # Remove the JSON Schema from the Namespace for Resources or Metadata
    # @evented(api_operation="DeleteSchema")
    @identity_trace
    def remove_schema(self, schema_type):
        if schema_type.lower() == params.RESOURCE.lower():
            set_schema_type = params.CONTROL_TYPE_RESOURCE_SCHEMA
        elif schema_type.lower() == params.METADATA.lower():
            set_schema_type = params.CONTROL_TYPE_METADATA_SCHEMA
        else:
            raise InvalidArgumentsException(
                f"Schema Type {schema_type} invalid. Use {params.CONTROL_TYPE_METADATA_SCHEMA} or {params.CONTROL_TYPE_RESOURCE_SCHEMA}"
            )

        return self._api_metadata_handler.delete_metadata(
            api_name=self._api_name,
            stage=self._deployment_stage,
            metadata_type=set_schema_type,
            caller_identity=self._simple_identity)

    # Setup the Item Master for a given Resource
    # @evented(api_operation="SetItemMaster")
    @identity_trace
    def item_master_update(self, **kwargs):
        return self._storage_handler.item_master_update(
            caller_identity=self._simple_identity, **kwargs)

    # Remote the specified Item Master for a given Resource
    # @evented(api_operation="RemoveItemMaster")
    @identity_trace
    def item_master_delete(self, **kwargs):
        item_id = kwargs.get(self._pk_name)

        if item_id is None:
            raise ResourceNotFoundException
        else:
            # validate that this item actually has the correct item master set
            current = self._storage_handler.get(id=item_id)
            assert_item_master = kwargs.get(params.ITEM_MASTER_ID)
            current_master = current.get(params.RESOURCE).get(
                params.ITEM_MASTER_ID, None)
            if current_master is None:
                return True
            elif current_master != assert_item_master:
                raise InvalidArgumentsException(
                    "Item Master {assert_item_master} does not match actual Item Master"
                )
            else:
                # TODO migrate this to use item_master_update with None target ID
                return self._storage_handler.remove_resource_attributes(
                    id=item_id,
                    resource_attributes=[params.ITEM_MASTER_ID],
                    caller_identity=self._simple_identity)

    # Extract the Metadata for the API itself
    # @evented(api_operation="GetApiMetadata")
    @identity_trace
    def get_table_metadata(self, attribute_filters=None):
        return self._api_metadata_handler.get_api_metadata(
            api_name=self._api_name,
            stage=self._deployment_stage,
            attribute_filters=attribute_filters)

    # Create or Update API Metadata
    # @evented(api_operation="CreateApiMetadata")
    @identity_trace
    def create_table_metadata(self, caller_identity=None, **kwargs):
        try:
            return self._dynamo_helper.create_table_metadata(
                api_name=self._table_name,
                caller_identity=self._simple_identity
                if caller_identity is None else caller_identity,
                **kwargs)
        except Exception as e:
            raise DetailedException(e)

    # Perform a search for all References in the Gremlin DB for objects that directly or indirectly reference an API Item
    # @evented(api_operation="GetDownstreamReferences")
    @identity_trace
    def get_downstream(self, id, search_depth=1):
        if self._gremlin_endpoint is not None:
            if id is None:
                raise InvalidArgumentsException(
                    "Must have ID to run lineage search")
            else:
                try:
                    return self._gremlin_endpoint.get_outbound(
                        id=utils.get_arn(id, self._table_name,
                                         self._deployed_account),
                        search_depth=search_depth)
                except ResourceNotFoundException:
                    return None
                except Exception as e:
                    raise DetailedException(e)
        else:
            raise UnimplementedFeatureException(params.NO_GREMLIN)

    # Perform a search for all References that the provided API Item references, directly or indirectly
    # @evented(api_operation="GetUpstreamReferences")
    @identity_trace
    def get_upstream(self, id, search_depth=1):
        if self._gremlin_endpoint is not None:
            if id is None:
                raise InvalidArgumentsException(
                    "Must have ID to run lineage search")
            else:
                try:
                    return self._gremlin_endpoint.get_inbound(
                        id=utils.get_arn(id, self._table_name,
                                         self._deployed_account),
                        search_depth=search_depth)
                except ResourceNotFoundException:
                    return None
                except Exception as e:
                    raise DetailedException(e)
        else:
            raise UnimplementedFeatureException(params.NO_GREMLIN)

    def _do_ddb_export_to_s3(self,
                             table_name,
                             export_path,
                             log_path,
                             read_pct,
                             dpu,
                             kms_key_arn,
                             setup_crawler,
                             catalog_database=None):
        if setup_crawler is True and self._crawler_rolename is None:
            raise InvalidArgumentsException(
                "Cannot Setup Crawler for Exported Dataset as API is not configured with a Crawler Role"
            )

        set_table_name = f"{table_name}_{utils.get_date_now()}"
        export = utils.run_glue_export(table_name=set_table_name,
                                       s3_export_path=export_path,
                                       kms_key_arn=kms_key_arn,
                                       read_pct=read_pct,
                                       log_path=log_path,
                                       export_role=self._crawler_rolename,
                                       dpu=dpu)

        if setup_crawler is not None:
            crawler = utils.create_s3_crawler(
                target_entity_name=set_table_name,
                crawler_name=f"{table_name}-export",
                crawler_rolename=self._crawler_rolename,
                catalog_db=f"{self._catalog_database}-export"
                if catalog_database is None else catalog_database,
                s3_path=export_path,
                and_run=True)

            if crawler is not None:
                export['Crawler'] = crawler
            else:
                msg = "Unable to configure Export Location Crawler"
                export['Errors'] = [{"Error": msg}]
                raise DetailedException(message=msg, detail=export)

        return export

    # Get the status of an API Export to S3
    # @evented(api_operation="GetExportStatus")
    @identity_trace
    def get_export_job_status(self, job_name, run_id):
        return utils.get_glue_job_status(job_name=job_name, run_id=run_id)

    # Get a list of all export jobs running
    # @evented(api_operation="GetExportJobs")
    @identity_trace
    def get_running_export_jobs(self, job_name):
        return utils.get_running_export_jobs(job_name=job_name)

    # Start an export of API Data to S3
    # @evented(api_operation="StartExport")
    @identity_trace
    def export_to_s3(self, **kwargs):
        EXPORT_DATA = 'Data'
        EXPORT_META = 'Metadata'
        EXPORT_ALL = 'All'
        export_path = kwargs.get(params.EXPORT_S3_PATH)
        if export_path is None:
            raise Exception("Cannot export without S3 Export Path")
        dpu = int(kwargs.get(params.EXPORT_JOB_DPU, params.DEFAULT_EXPORT_DPU))
        kms_key_arn = kwargs.get(params.KMS_KEY_ARN, None)
        read_pct = int(kwargs.get(params.EXPORT_READ_PCT, 50))
        log_path = kwargs.get(params.EXPORT_LOG_PATH)
        export_type = kwargs.get(params.EXPORT_TYPE, EXPORT_DATA)
        catalog_database = kwargs.get(params.CATALOG_DATABASE)

        export_types = [EXPORT_DATA, EXPORT_META, EXPORT_ALL]
        if not any(x in export_type for x in export_types):
            raise InvalidArgumentsException(
                "ExportType must be one of {0}, {1}, or {2}" %
                tuple(export_types))

        def _fix_path(path):
            if path[:1] != "/":
                path += "/"

        _fix_path(export_path)

        crawl = kwargs.get(params.EXPORT_SETUP_CRAWLER, None)

        out = {}
        # export main data to s3 location
        if export_type == EXPORT_DATA or export_type == EXPORT_ALL:
            result = self._do_ddb_export_to_s3(
                table_name=self._table_name,
                export_path=export_path,
                log_path=log_path,
                read_pct=read_pct,
                dpu=dpu,
                kms_key_arn=kms_key_arn,
                setup_crawler=crawl,
                catalog_database=catalog_database)
            if result is not None:
                out[EXPORT_DATA] = result

        # export metadata to S3
        if export_type == EXPORT_META or export_type == EXPORT_ALL:
            result = self._do_ddb_export_to_s3(
                table_name=utils.get_metaname(self._table_name),
                export_path=export_path,
                log_path=log_path,
                read_pct=read_pct,
                dpu=dpu,
                kms_key_arn=kms_key_arn,
                setup_crawler=crawl,
                catalog_database=catalog_database)
            if result is not None:
                out[EXPORT_META] = result

        return out