def indexing_lambda(event, context): if 'Records' in event: global search_flow_verified global es_indexer if search_flow_verified is False: # determine the source API information to ensure the search flow is configured api_name = None event_source_tokens = event.get("eventSourceARN").split(":") if event_source_tokens[2] == "dynamodb": table_spec_tokens = event_source_tokens[5].split("/") table = table_spec_tokens[1] # create an API Metadata Handler api_metadata_handler = ApiMetadata(REGION, log) api_metadata = api_metadata_handler.get_api_metadata(api_name, STAGE) # verify that the delivery streams are in place es_indexer.configure_search_flow(endpoints=api_metadata.get("SearchConfig").get("DeliveryStreams"), es_domain_name=event.get(params.ES_DOMAIN), firehose_delivery_role_arn=event.get( params.FIREHOSE_DELIVERY_ROLE_ARN), failure_record_bucket=event.get( params.DELIVERY_STREAM_FAILURE_BUCKET), kms_key_arn=event.get(params.KMS_KEY_ARN) ) es_indexer = StreamsIntegration(STAGE, api_metadata.get("SearchConfig")) search_flow_verified = True output = es_indexer.forward_to_es_firehose(records=event['Records'])
def get_registry(region: str, stage: str, logger: logging.Logger = None) -> list: """ Method to return the list of all API Namespaces in the Stage and Region :param region: The AWS Region to query :param stage: The STage to query for Namespaces :return: list: str: Namespace name """ # create an API Metadata Handler global log if logger is None: logging.basicConfig() log = logging.getLogger(params.AWS_DATA_API_NAME) log.setLevel(logging.INFO) else: log = logger api_metadata_handler = ApiMetadata(region, log) all_apis = api_metadata_handler.get_all_apis() return_apis = [] for a in all_apis: if f"-{stage}" in a: return_apis.append(a.replace(f"-{stage}", "")) return return_apis
def get_api_status(api_name: str, stage: str, region: str, logger: logging.Logger = None) -> dict: """ Method to return the status of an API Namespace for the specified Stage in a Region. :param api_name: The API Namespace to get the status of :param stage: The Stage to query for the Namespace status :param region: The AWS Region in which the Stage is provisioned :return: dict: Status: The Status of the API Namespace in the stage """ global log if logger is None: log = utils.setup_logging() else: log = logger api_metadata_handler = ApiMetadata(region, log) s = "Status" return { s: api_metadata_handler.get_api_metadata(api_name=api_name, stage=stage).get(s) }
def get(self, api_name): if api_name not in self._api_cache: # load from metadata self._logger.info(f"Cache Miss: Loading API Instance {api_name} Stage {self._stage} from Metadata service") api_metadata_handler = ApiMetadata(self._region, self._logger) api_metadata = api_metadata_handler.get_api_metadata(api_name=api_name, stage=self._stage) if api_metadata is None: msg = f"Unable to resolve API {api_name} in Stage {self._stage}" self._logger.error(msg) raise BadRequestError(msg) else: if api_metadata.get("Status") == params.STATUS_CREATING: raise InvalidArgumentsException("API Not Yet Active") else: api_metadata['app'] = self._app api_metadata[params.REGION] = self._region api_metadata[params.API_NAME_PARAM] = api_name api_metadata[params.EXTENDED_CONFIG] = self._extended_config # instantiate the API from metadata api = dapi.load_api(**api_metadata) # TODO add caching for CORS objects from API Metadata self.add(api_name, api) return api else: return self._api_cache[api_name][CONF_CACHE_HANDLER]
def understander_lambda(event, context): log.debug(event) understander = None api_metadata_handler = ApiMetadata(REGION, log) def arg_handler(arg): if arg not in event: raise InvalidArgumentsException( f"Invocation event must include '{arg}'") else: return event.get(arg) prefix = arg_handler("prefix") id = arg_handler("id") if understander is None: understander = u.Understander(region=REGION) api_name = arg_handler(params.API_NAME_PARAM) api_stage = arg_handler(params.API_STAGE_PARAM) # run the understander method understanding = understander.understand(prefix=prefix) # create a metadata update structure for all non-empty understanding items meta_add = {} def _meta_adder(val): if val in understanding: v = understanding.get(val) # add non empty structures if v is not None and v != [] and v != {}: meta_add[val] = v if understanding is not None: # add the new metadata to the object [ _meta_adder(x) for x in [ u.PAGES, u.RAW_LINES, u.KEY_VALUES, u.ENTITIES, u.LANGUAGE, u.SENTIMENT, u.KEY_PHRASES ] ] api_metadata_handler.create_metadata(api_name, api_stage, caller_identity='System', **meta_add) print(f"Metadata Analysis complete for {id}") return understanding else: return None
def provisioning_lambda(event, context): # TODO Add support for creation of Read/Only and Read/Write IAM Roles during provisioning log.debug(event) api_name = event.get("ApiName") event.pop("ApiName") # create an API Metadata Handler api_metadata_handler = ApiMetadata(REGION, log) # check if this API is already deployed table_name = utils.get_table_name(table_name=api_name, deployment_stage=STAGE) api_metadata = api_metadata_handler.get_api_metadata(api_name, STAGE) if api_metadata is None: log.debug(f"API {api_name} not found. Creating new Data API") api_metadata = {} # add the default parameters from the application container _add_api_defaults(api_metadata) api_metadata[params.DATA_TYPE] = api_name api_metadata[params.DEPLOYED_ACCOUNT] = context.invoked_function_arn.split(":")[4] api_metadata[params.STORAGE_TABLE] = table_name api_metadata[params.STORAGE_HANDLER] = params.DEFAULT_STORAGE_HANDLER api_metadata[params.CATALOG_DATABASE] = params.DEFAULT_CATALOG_DATABASE else: log.debug(f"API {api_name} already exists. Performing property update and instance rebuild") # remove last update date/by information to prevent update collision utils.remove_internal_attrs(api_metadata) # overlay the supplied parameters onto the api metadata api_metadata.update(event) # add a pending status api_metadata['Status'] = params.STATUS_CREATING # add a control table entry for this stage with the current configuration api_metadata_handler.create_metadata(api_name=api_name, stage=STAGE, caller_identity='System', **api_metadata) api_metadata[params.APP] = app # load the api class api = dapi.load_api(**api_metadata) # setup the search flow search_config = None if params.ES_DOMAIN in event: try: search_config = es_indexer.configure_search_flow(endpoints=api.get_endpoints(), es_domain_name=event.get(params.ES_DOMAIN), firehose_delivery_role_arn=event.get( params.FIREHOSE_DELIVERY_ROLE_ARN), failure_record_bucket=event.get( params.DELIVERY_STREAM_FAILURE_BUCKET), kms_key_arn=event.get(params.KMS_KEY_ARN) ) except KeyError: raise BadRequestError( f"Unable to provision search configuration without {params.ES_DOMAIN}, {params.FIREHOSE_DELIVERY_ROLE_ARN}, and {params.DELIVERY_STREAM_FAILURE_BUCKET}") # add the search config to metadata if search_config is not None: api_metadata_handler.update_metadata(api_name=api_name, stage=STAGE, updates=search_config, caller_identity='System') # destroy the cache reference to cause a reload on next invoke if api_cache.contains(api_name): log.debug(f"Invalidating API Cache") api_cache.remove(api_name) # update the metadata to show that the API is online api_metadata_handler.update_metadata(api_name=api_name, stage=STAGE, updates={"Status": params.STATUS_ACTIVE}, caller_identity='System') log.info(f"Provisioning complete. API {api_name} online in Stage {STAGE}")
params.COGNITO_PROVIDER_ARNS) elif set_authorizer == params.AUTHORIZER_CUSTOM: use_authorizer = None else: use_authorizer = None if use_authorizer is None: print("Stage deployed without Authorizer") else: print("Using Authorizer %s" % set_authorizer.__name__) # setup class logger log = utils.setup_logging() # create an API Metadata Handler api_metadata_handler = ApiMetadata(REGION, log) # create a cache of all API references tracked by this deployment stage api_cache = DataApiCache(app=app, stage=STAGE, region=REGION, logger=log) # create the streams integration handler, which is used by the lambda function embedded at the end of this app es_indexer = None # module level settings used as flags for lazy initialisers in functions search_flow_verified = False # load the cors config cors_config = None cors = None try: with open("chalicelib/cors.json", "r") as f:
def __init__(self, **kwargs): self._region = kwargs.get(params.REGION, os.getenv('AWS_REGION')) self._full_config = kwargs self._api_name = kwargs.get(params.API_NAME_PARAM) self._table_name = kwargs.get(params.STORAGE_TABLE) # setup instance logger self._logger = utils.setup_logging(params.AWS_DATA_API_NAME) global log log = self._logger self._logger.debug("Constructing new Data API with Args") self._logger.debug(kwargs) # create the API metadata handler self._api_metadata_handler = ApiMetadata( self._region, self._logger, kwargs.get(params.KMS_KEY_ARN)) # Load class properties from any supplied metadata. These will be populated when hydrating an existing API # namespace from DynamoDB self._app = kwargs.get(params.APP, None) self._deployment_stage = kwargs.get(params.STAGE) self._pk_name = kwargs.get(params.PRIMARY_KEY, None) self._delete_mode = kwargs.get(params.DELETE_MODE, params.DEFAULT_DELETE_MODE) self._allow_runtime_delete_mode_change = kwargs.get( params.ALLOW_RUNTIME_DELETE_MODE_CHANGE, params.DEFAULT_ALLOW_RUNTIME_DELETE_MODE_CHANGE) self._crawler_rolename = kwargs.get(params.CRAWLER_ROLENAME, None) self._table_indexes = kwargs.get(params.TABLE_INDEXES, None) self._metadata_indexes = kwargs.get(params.METADATA_INDEXES, None) self._schema_validation_refresh_hitcount = kwargs.get( params.SCHEMA_VALIDATION_REFRESH_HITCOUNT, params.DEFAULT_SCHEMA_VALIDATION_REFRESH_HITCOUNT) self._gremlin_address = kwargs.get(params.GREMLIN_ADDRESS, None) self._allow_non_itemmaster_writes = kwargs.get( params.NON_ITEM_MASTER_WRITES_ALLOWED, params.DEFAULT_NON_ITEM_MASTER_WRITE_ALLOWED) self._strict_occv = kwargs.get(params.STRICT_OCCV, params.DEFAULT_STRICT_OCCV) self._catalog_database = kwargs.get(params.CATALOG_DATABASE, params.DEFAULT_CATALOG_DATABASE) # setup the storage handler which implements the backend data api functionality storage_args = kwargs resource_schema = self._api_metadata_handler.get_schema( api_name=self._api_name, stage=self._deployment_stage, schema_type=params.RESOURCE) if resource_schema is not None: storage_args[params.CONTROL_TYPE_RESOURCE_SCHEMA] = resource_schema metadata_schema = self._api_metadata_handler.get_schema( api_name=self._api_name, stage=self._deployment_stage, schema_type=params.METADATA) if metadata_schema is not None: storage_args[params.CONTROL_TYPE_METADATA_SCHEMA] = metadata_schema storage_args["table_name"] = self._table_name storage_args["primary_key_attribute"] = self._pk_name storage_args["region"] = self._region storage_args["delete_mode"] = self._delete_mode storage_args[ "allow_runtime_delete_mode_change"] = self._allow_runtime_delete_mode_change storage_args["table_indexes"] = self._table_indexes storage_args["metadata_indexes"] = self._metadata_indexes storage_args[ "schema_validation_refresh_hitcount"] = self._schema_validation_refresh_hitcount storage_args["crawler_rolename"] = self._crawler_rolename storage_args["catalog_database"] = self._catalog_database storage_args[ "allow_non_itemmaster_writes"] = self._allow_non_itemmaster_writes storage_args["strict_occv"] = self._strict_occv storage_args["deployed_account"] = kwargs.get(params.DEPLOYED_ACCOUNT, None) storage_args["handler_name"] = kwargs[params.STORAGE_HANDLER] storage_args["pitr_enabled"] = utils.strtobool( kwargs.get(params.PITR_ENABLED, params.DEFAULT_PITR_ENABLED)) storage_args["kms_key_arn"] = kwargs.get(params.STORAGE_CRYPTO_KEY_ARN, None) self._storage_handler = self._get_storage_handler(**storage_args) # setup the gremlin integration if one has been provided if self._gremlin_address is not None: log.info( f"Binding new Gremlin Handler to address {self._gremlin_address}" ) tokens = self._gremlin_address.split(":") self._gremlin_endpoint = GremlinHandler(url=tokens[0], port=tokens[1]) if "SearchConfig" in kwargs: self._search_config = kwargs.get("SearchConfig") log.info( f"AWS Data API for {self._catalog_database}.{self._table_name} Online." )
class AwsDataAPI: _full_config = None _app = None _region = None _deployment_stage = None _deployed_account = None _gremlin_address = None _gremlin_endpoint = None _es_client = None _search_config = None _storage_handler = None _catalog_database = None _api_name = None _table_name = None _pk_name = None _sts_client = None _cwl_client = None _log_group_name = None _last_log_info = None _caller_identity = None _simple_identity = None _logger = None _delete_mode = None _allow_runtime_delete_mode_change = False _crawler_rolename = None _table_indexes = None _metadata_indexes = None _schema_validation_refresh_hitcount = None _allow_non_itemmaster_writes = None _strict_occv = None _dynamo_helper = None _lambda_client = None _cloudwatch_emitter = None _api_metadata_handler = None _extended_config = None def __init__(self, **kwargs): self._region = kwargs.get(params.REGION, os.getenv('AWS_REGION')) self._full_config = kwargs self._api_name = kwargs.get(params.API_NAME_PARAM) self._table_name = kwargs.get(params.STORAGE_TABLE) # setup instance logger self._logger = utils.setup_logging(params.AWS_DATA_API_NAME) global log log = self._logger self._logger.debug("Constructing new Data API with Args") self._logger.debug(kwargs) # create the API metadata handler self._api_metadata_handler = ApiMetadata( self._region, self._logger, kwargs.get(params.KMS_KEY_ARN)) # Load class properties from any supplied metadata. These will be populated when hydrating an existing API # namespace from DynamoDB self._app = kwargs.get(params.APP, None) self._deployment_stage = kwargs.get(params.STAGE) self._pk_name = kwargs.get(params.PRIMARY_KEY, None) self._delete_mode = kwargs.get(params.DELETE_MODE, params.DEFAULT_DELETE_MODE) self._allow_runtime_delete_mode_change = kwargs.get( params.ALLOW_RUNTIME_DELETE_MODE_CHANGE, params.DEFAULT_ALLOW_RUNTIME_DELETE_MODE_CHANGE) self._crawler_rolename = kwargs.get(params.CRAWLER_ROLENAME, None) self._table_indexes = kwargs.get(params.TABLE_INDEXES, None) self._metadata_indexes = kwargs.get(params.METADATA_INDEXES, None) self._schema_validation_refresh_hitcount = kwargs.get( params.SCHEMA_VALIDATION_REFRESH_HITCOUNT, params.DEFAULT_SCHEMA_VALIDATION_REFRESH_HITCOUNT) self._gremlin_address = kwargs.get(params.GREMLIN_ADDRESS, None) self._allow_non_itemmaster_writes = kwargs.get( params.NON_ITEM_MASTER_WRITES_ALLOWED, params.DEFAULT_NON_ITEM_MASTER_WRITE_ALLOWED) self._strict_occv = kwargs.get(params.STRICT_OCCV, params.DEFAULT_STRICT_OCCV) self._catalog_database = kwargs.get(params.CATALOG_DATABASE, params.DEFAULT_CATALOG_DATABASE) # setup the storage handler which implements the backend data api functionality storage_args = kwargs resource_schema = self._api_metadata_handler.get_schema( api_name=self._api_name, stage=self._deployment_stage, schema_type=params.RESOURCE) if resource_schema is not None: storage_args[params.CONTROL_TYPE_RESOURCE_SCHEMA] = resource_schema metadata_schema = self._api_metadata_handler.get_schema( api_name=self._api_name, stage=self._deployment_stage, schema_type=params.METADATA) if metadata_schema is not None: storage_args[params.CONTROL_TYPE_METADATA_SCHEMA] = metadata_schema storage_args["table_name"] = self._table_name storage_args["primary_key_attribute"] = self._pk_name storage_args["region"] = self._region storage_args["delete_mode"] = self._delete_mode storage_args[ "allow_runtime_delete_mode_change"] = self._allow_runtime_delete_mode_change storage_args["table_indexes"] = self._table_indexes storage_args["metadata_indexes"] = self._metadata_indexes storage_args[ "schema_validation_refresh_hitcount"] = self._schema_validation_refresh_hitcount storage_args["crawler_rolename"] = self._crawler_rolename storage_args["catalog_database"] = self._catalog_database storage_args[ "allow_non_itemmaster_writes"] = self._allow_non_itemmaster_writes storage_args["strict_occv"] = self._strict_occv storage_args["deployed_account"] = kwargs.get(params.DEPLOYED_ACCOUNT, None) storage_args["handler_name"] = kwargs[params.STORAGE_HANDLER] storage_args["pitr_enabled"] = utils.strtobool( kwargs.get(params.PITR_ENABLED, params.DEFAULT_PITR_ENABLED)) storage_args["kms_key_arn"] = kwargs.get(params.STORAGE_CRYPTO_KEY_ARN, None) self._storage_handler = self._get_storage_handler(**storage_args) # setup the gremlin integration if one has been provided if self._gremlin_address is not None: log.info( f"Binding new Gremlin Handler to address {self._gremlin_address}" ) tokens = self._gremlin_address.split(":") self._gremlin_endpoint = GremlinHandler(url=tokens[0], port=tokens[1]) if "SearchConfig" in kwargs: self._search_config = kwargs.get("SearchConfig") log.info( f"AWS Data API for {self._catalog_database}.{self._table_name} Online." ) # method which writes a set of object references to the Gremlin helper class def _put_references(self, id: str, reference_doc: list): g = self._gremlin_endpoint if g is not None: from_id = utils.get_arn(id, self._table_name, self._deployed_account) ctr = 0 exceptions = [] for r in reference_doc: if params.RESOURCE not in r: raise InvalidArgumentsException( f"Malformed Reference: {r}. Must Contain a {params.RESOURCE}" ) else: to_id = r[params.RESOURCE] # remove the resource and ID keys so we can use the rest of the document for extra properties del r[params.RESOURCE] try: g.create_relationship(label=params.REFERENCES, from_id=from_id, to_id=to_id, extra_properties=r) ctr += 1 except Exception as e: exceptions.append({"ID": to_id, "Message": e.message}) response = {"ReferenceCount": ctr} if len(exceptions) > 0: response["Exceptions"] = exceptions return response else: raise UnimplementedFeatureException(NO_GREMLIN) def _get_storage_handler(self, **kwargs): """ Method to load a Storage Handler class based upon the provided handler name. """ log.info( f"Creating new Data API Storage Handler from {kwargs.get(params.STORAGE_HANDLER)}" ) storage_module = load_storage_handler_module( kwargs.get(params.STORAGE_HANDLER)) storage_class = getattr(storage_module, "DataAPIStorageHandler") return storage_class(**kwargs) # simple accessor method for the pk_name attribute, which is required in some cases for API integration def get_primary_key(self): return self._pk_name # access method that returns a boolean outcome based upon if the provided ID is valid # @evented(api_operation="Check") @identity_trace def check(self, id): return self._storage_handler.check(id=id) # return a paginated list of elements from the API # @evented(api_operation="List") @identity_trace def list(self, **kwargs): return self._storage_handler.list_items(**kwargs) # return information about storage usage for this API namespace # @evented(api_operation="Usage") @identity_trace def get_usage(self): resources = self._storage_handler.get_usage( table_name=self._table_name) metadata = self._storage_handler.get_usage( table_name=utils.get_metaname(self._table_name)) references = None # TODO figure out why the gremlin connection is failing # if self._gremlin_endpoint is not None: # references = self._gremlin_endpoint.get_usage() usage = {params.RESOURCE: resources, params.METADATA: metadata} if references is not None: usage[params.REFERENCES] = {"Count": references} return usage # run the natural language understanding integration, which attaches new Metadata to the Resource # @evented(api_operation="Understand") @identity_trace def understand(self, id, storage_location=None): fetch_id = self._validate_arn_id(id) # validate the attribute that stores the location of the object if storage_location is None: storage_location = params.DEFAULT_STORAGE_LOCATION_ATTRIBUTE # fetch the resource item = self._storage_handler.get(id=fetch_id) storage_loc = None if item is None: raise ResourceNotFoundException( f"Unable to find Resource with ID {fetch_id}") else: if storage_location in item[params.RESOURCE]: storage_loc = item.get(params.RESOURCE).get(storage_location) else: # storage location may be in metadata meta = self._storage_handler.get_metadata(id) if storage_location in meta[params.METADATA]: storage_loc = meta.get( params.METADATA).get(storage_location) if storage_loc is None: raise DetailedException( f"Unable to run Metadata Resolver without a Storage Location Attribute in Item Resource or Metadata (Default {params.DEFAULT_STORAGE_LOCATION_ATTRIBUTE})" ) if self._lambda_client is None: self._lambda_client = boto3.client("lambda", region_name=self._region) # run the understander and metadata update through an async lambda f = f"{params.AWS_DATA_API_NAME}-{self._deployment_stage}-{params.UNDERSTANDER_NAME}" args = { "prefix": storage_loc, "id": fetch_id, "caller": self._simple_identity, "primary_key_attribute": self._pk_name, params.API_NAME_PARAM: self._api_name, params.API_STAGE_PARAM: self._deployment_stage } response = self._lambda_client.invoke(FunctionName=f, InvocationType='Event', Payload=json.dumps(args)) if "FunctionError" in response: if response.get("FunctionError") == "Handled": raise DetailedException(response.get("Payload")) else: raise DetailedException( "Unhandled error occurred during submission of async Understanding request" ) else: return response.get("StatusCode") def _validate_arn_id(self, id): # decode the ID as it forms part of the request url decoded_id = parser.unquote(id) log.debug(f"Validating Resource ARN {id}") if utils.get_arn_base() in decoded_id: # validate arn structure and then fetch by id arn = utils.shred_arn(decoded_id) if arn is None: raise ResourceNotFoundException( f"Invalid ARN format {decoded_id}") if utils.get_caller_account() != arn[params.ARN_ACCOUNT]: raise ResourceNotFoundException( "Requested resource not available from Data API Account") if self._table_name != arn[params.ARN_TABLE]: self.exception = ResourceNotFoundException( f"Requested resource {arn[params.ARN_TABLE]} not available from Data API {self._table_name}" ) raise self.exception if arn[params.ARN_REGION] != self._region: raise ResourceNotFoundException( f"ARN Valid Region {arn[params.ARN_REGION]}") fetch_id = arn[params.ARN_ID] else: fetch_id = decoded_id return fetch_id # get the Resource, which may include or prefer the Item Master # @evented(api_operation="GetResource") @identity_trace def get(self, id, master_option, suppress_meta_fetch: bool = False, only_attributes: list = None, not_attributes: list = None): fetch_id = self._validate_arn_id(id) response = {} item = self._storage_handler.get( id=fetch_id, suppress_meta_fetch=suppress_meta_fetch, only_attributes=only_attributes, not_attributes=not_attributes) # set the 'Item' in the response unless master_option = prefer if params.ITEM_MASTER_ID not in item[params.RESOURCE] or \ master_option is None or \ master_option.lower() == params.ITEM_MASTER_INCLUDE.lower(): response["Item"] = item # extract the master if there is one, and the provided master option is 'include' or 'prefer' # TODO Test what happens if we have very large Item Master hierarchies here if params.ITEM_MASTER_ID in item[ params. RESOURCE] and master_option is not None and master_option.lower( ) in [ params.ITEM_MASTER_INCLUDE.lower(), params.ITEM_MASTER_PREFER.lower() ]: master = self._storage_handler.get( id=item[params.RESOURCE][params.ITEM_MASTER_ID]) response["Master"] = master return response # undelete a Data API Resource that has been soft deleted (non-Tombstone) # @evented(api_operation="Restore") @identity_trace def restore(self, id): fetch_id = self._validate_arn_id(id) return self._storage_handler.restore( id=fetch_id, caller_identity=self._simple_identity) # get the Metadata for a Resource # @evented(api_operation="GetMetadata") @identity_trace def get_metadata(self, id): fetch_id = self._validate_arn_id(id) return self._storage_handler.get_metadata(id=fetch_id) # Delete a Resource and Metadata based upon the specified deletion mode of the system or in the request # @evented(api_operation="Delete") @identity_trace def delete(self, id, **kwargs): fetch_id = self._validate_arn_id(id) return self._storage_handler.delete( id=fetch_id, caller_identity=self._simple_identity, **kwargs) # Update a Data API Resource # @evented(api_operation="Update") @identity_trace def update_item(self, id, **kwargs): response = {} def _wrap_response(type, type_res): response[type] = { params.DATA_MODIFIED: True if type_res is not None else False } if type_res is not None and "Messages" in type_res: response["Messages"] = type_res.get("Messages") fetch_id = self._validate_arn_id(id) if params.REFERENCES in kwargs: log.debug("Creating Reference Links") _wrap_response( params.REFERENCES, self._put_references(id, kwargs.get(params.REFERENCES))) # update the item, which may update metadata and resources item_response = self._storage_handler.update_item( caller_identity=self._simple_identity, id=fetch_id, **kwargs) _wrap_response(params.METADATA, item_response.get(params.METADATA)) _wrap_response(params.RESOURCE, item_response.get(params.RESOURCE)) return response # Drop an entire API Namespace. This will do a backup before dropping the underlying storage tables # @evented(api_operation="DropAPI") @identity_trace def drop(self, do_export=True): # drop tables with final backup self._storage_handler.drop_table(table_name=self._table_name, do_export=do_export) self._storage_handler.drop_table(table_name=utils.get_metaname( self._table_name), do_export=do_export) # delete API information self._api_metadata_handler.delete_all_api_metadata( self._api_name, self._deployment_stage) # Perform a search request against the Resource or Metadata, based upon provided query args # @evented(api_operation="Find") @identity_trace def find(self, **kwargs): return self._storage_handler.find(**kwargs) def _get_es_endpoint(self): return self._search_config.get("ElasticSearchDomain").get( "ElasticSearchEndpoint") # private lazy loader method for es client to ensure that we don't get constructor stalls if VPC connections are weird def _get_es_client(self): if self._es_client is None: # setup a reference to ElasticSearch if a SearchConfig is setup self._es_client = Elasticsearch(hosts=[self._get_es_endpoint()]) return self._es_client # Perform a search request against the configured ES endpoint # @evented(api_operation="Search") @identity_trace def search(self, search_type, **kwargs): if self._es_client is None: raise UnimplementedFeatureException( "No ElasticSearch Endpoint Configured") else: response = {} def _add_results(result_type): index_name = utils.get_es_index_name(self._table_name, result_type) doc = utils.get_es_type_name(self._table_name, result_type), response[result_type] = self._get_es_client().search( index=index_name, doc_type=doc, body=kwargs.get("query")) if search_type is not None: # perform a search just for the specified type of data _add_results(search_type) else: # perform a search across both Resource and Metadata indexes _add_results(params.RESOURCE) _add_results(params.METADATA) return response # Return the API's underlying storage implementations, including tables in use, Dynamo Streams that can be processed # and references to Gremlin and ElasticSearch endpoints in use # @evented(api_operation="Endpoints") @identity_trace def get_endpoints(self): endpoints = self._storage_handler.get_streams() if self._gremlin_address is not None: endpoints['GraphURL'] = self._gremlin_address if self._search_config is not None: endpoints['Elasticsearch'] = self._get_es_endpoint() return endpoints # Return the JSON schema for an API Namespace # @evented(api_operation="GetSchema") @identity_trace def get_schema(self, schema_type): return self._api_metadata_handler.get_schema( api_name=self._api_name, stage=self._deployment_stage, schema_type=schema_type) # Create or Update a JSON Schema for the API Namespace Resources or Metadata # @evented(api_operation="PutSchema") @identity_trace def put_schema(self, schema_type, schema): return self._api_metadata_handler.put_schema( api_name=self._api_name, stage=self._deployment_stage, schema_type=schema_type, caller_identity=self._simple_identity, schema=schema).get(params.DATA_MODIFIED) # Remove the JSON Schema from the Namespace for Resources or Metadata # @evented(api_operation="DeleteSchema") @identity_trace def remove_schema(self, schema_type): if schema_type.lower() == params.RESOURCE.lower(): set_schema_type = params.CONTROL_TYPE_RESOURCE_SCHEMA elif schema_type.lower() == params.METADATA.lower(): set_schema_type = params.CONTROL_TYPE_METADATA_SCHEMA else: raise InvalidArgumentsException( f"Schema Type {schema_type} invalid. Use {params.CONTROL_TYPE_METADATA_SCHEMA} or {params.CONTROL_TYPE_RESOURCE_SCHEMA}" ) return self._api_metadata_handler.delete_metadata( api_name=self._api_name, stage=self._deployment_stage, metadata_type=set_schema_type, caller_identity=self._simple_identity) # Setup the Item Master for a given Resource # @evented(api_operation="SetItemMaster") @identity_trace def item_master_update(self, **kwargs): return self._storage_handler.item_master_update( caller_identity=self._simple_identity, **kwargs) # Remote the specified Item Master for a given Resource # @evented(api_operation="RemoveItemMaster") @identity_trace def item_master_delete(self, **kwargs): item_id = kwargs.get(self._pk_name) if item_id is None: raise ResourceNotFoundException else: # validate that this item actually has the correct item master set current = self._storage_handler.get(id=item_id) assert_item_master = kwargs.get(params.ITEM_MASTER_ID) current_master = current.get(params.RESOURCE).get( params.ITEM_MASTER_ID, None) if current_master is None: return True elif current_master != assert_item_master: raise InvalidArgumentsException( "Item Master {assert_item_master} does not match actual Item Master" ) else: # TODO migrate this to use item_master_update with None target ID return self._storage_handler.remove_resource_attributes( id=item_id, resource_attributes=[params.ITEM_MASTER_ID], caller_identity=self._simple_identity) # Extract the Metadata for the API itself # @evented(api_operation="GetApiMetadata") @identity_trace def get_table_metadata(self, attribute_filters=None): return self._api_metadata_handler.get_api_metadata( api_name=self._api_name, stage=self._deployment_stage, attribute_filters=attribute_filters) # Create or Update API Metadata # @evented(api_operation="CreateApiMetadata") @identity_trace def create_table_metadata(self, caller_identity=None, **kwargs): try: return self._dynamo_helper.create_table_metadata( api_name=self._table_name, caller_identity=self._simple_identity if caller_identity is None else caller_identity, **kwargs) except Exception as e: raise DetailedException(e) # Perform a search for all References in the Gremlin DB for objects that directly or indirectly reference an API Item # @evented(api_operation="GetDownstreamReferences") @identity_trace def get_downstream(self, id, search_depth=1): if self._gremlin_endpoint is not None: if id is None: raise InvalidArgumentsException( "Must have ID to run lineage search") else: try: return self._gremlin_endpoint.get_outbound( id=utils.get_arn(id, self._table_name, self._deployed_account), search_depth=search_depth) except ResourceNotFoundException: return None except Exception as e: raise DetailedException(e) else: raise UnimplementedFeatureException(params.NO_GREMLIN) # Perform a search for all References that the provided API Item references, directly or indirectly # @evented(api_operation="GetUpstreamReferences") @identity_trace def get_upstream(self, id, search_depth=1): if self._gremlin_endpoint is not None: if id is None: raise InvalidArgumentsException( "Must have ID to run lineage search") else: try: return self._gremlin_endpoint.get_inbound( id=utils.get_arn(id, self._table_name, self._deployed_account), search_depth=search_depth) except ResourceNotFoundException: return None except Exception as e: raise DetailedException(e) else: raise UnimplementedFeatureException(params.NO_GREMLIN) def _do_ddb_export_to_s3(self, table_name, export_path, log_path, read_pct, dpu, kms_key_arn, setup_crawler, catalog_database=None): if setup_crawler is True and self._crawler_rolename is None: raise InvalidArgumentsException( "Cannot Setup Crawler for Exported Dataset as API is not configured with a Crawler Role" ) set_table_name = f"{table_name}_{utils.get_date_now()}" export = utils.run_glue_export(table_name=set_table_name, s3_export_path=export_path, kms_key_arn=kms_key_arn, read_pct=read_pct, log_path=log_path, export_role=self._crawler_rolename, dpu=dpu) if setup_crawler is not None: crawler = utils.create_s3_crawler( target_entity_name=set_table_name, crawler_name=f"{table_name}-export", crawler_rolename=self._crawler_rolename, catalog_db=f"{self._catalog_database}-export" if catalog_database is None else catalog_database, s3_path=export_path, and_run=True) if crawler is not None: export['Crawler'] = crawler else: msg = "Unable to configure Export Location Crawler" export['Errors'] = [{"Error": msg}] raise DetailedException(message=msg, detail=export) return export # Get the status of an API Export to S3 # @evented(api_operation="GetExportStatus") @identity_trace def get_export_job_status(self, job_name, run_id): return utils.get_glue_job_status(job_name=job_name, run_id=run_id) # Get a list of all export jobs running # @evented(api_operation="GetExportJobs") @identity_trace def get_running_export_jobs(self, job_name): return utils.get_running_export_jobs(job_name=job_name) # Start an export of API Data to S3 # @evented(api_operation="StartExport") @identity_trace def export_to_s3(self, **kwargs): EXPORT_DATA = 'Data' EXPORT_META = 'Metadata' EXPORT_ALL = 'All' export_path = kwargs.get(params.EXPORT_S3_PATH) if export_path is None: raise Exception("Cannot export without S3 Export Path") dpu = int(kwargs.get(params.EXPORT_JOB_DPU, params.DEFAULT_EXPORT_DPU)) kms_key_arn = kwargs.get(params.KMS_KEY_ARN, None) read_pct = int(kwargs.get(params.EXPORT_READ_PCT, 50)) log_path = kwargs.get(params.EXPORT_LOG_PATH) export_type = kwargs.get(params.EXPORT_TYPE, EXPORT_DATA) catalog_database = kwargs.get(params.CATALOG_DATABASE) export_types = [EXPORT_DATA, EXPORT_META, EXPORT_ALL] if not any(x in export_type for x in export_types): raise InvalidArgumentsException( "ExportType must be one of {0}, {1}, or {2}" % tuple(export_types)) def _fix_path(path): if path[:1] != "/": path += "/" _fix_path(export_path) crawl = kwargs.get(params.EXPORT_SETUP_CRAWLER, None) out = {} # export main data to s3 location if export_type == EXPORT_DATA or export_type == EXPORT_ALL: result = self._do_ddb_export_to_s3( table_name=self._table_name, export_path=export_path, log_path=log_path, read_pct=read_pct, dpu=dpu, kms_key_arn=kms_key_arn, setup_crawler=crawl, catalog_database=catalog_database) if result is not None: out[EXPORT_DATA] = result # export metadata to S3 if export_type == EXPORT_META or export_type == EXPORT_ALL: result = self._do_ddb_export_to_s3( table_name=utils.get_metaname(self._table_name), export_path=export_path, log_path=log_path, read_pct=read_pct, dpu=dpu, kms_key_arn=kms_key_arn, setup_crawler=crawl, catalog_database=catalog_database) if result is not None: out[EXPORT_META] = result return out