def export_to_s3(self, **kwargs): EXPORT_DATA = 'Data' EXPORT_META = 'Metadata' EXPORT_ALL = 'All' export_path = kwargs.get(params.EXPORT_S3_PATH) if export_path is None: raise Exception("Cannot export without S3 Export Path") dpu = int(kwargs.get(params.EXPORT_JOB_DPU, params.DEFAULT_EXPORT_DPU)) kms_key_arn = kwargs.get(params.KMS_KEY_ARN, None) read_pct = int(kwargs.get(params.EXPORT_READ_PCT, 50)) log_path = kwargs.get(params.EXPORT_LOG_PATH) export_type = kwargs.get(params.EXPORT_TYPE, EXPORT_DATA) catalog_database = kwargs.get(params.CATALOG_DATABASE) export_types = [EXPORT_DATA, EXPORT_META, EXPORT_ALL] if not any(x in export_type for x in export_types): raise InvalidArgumentsException( "ExportType must be one of {0}, {1}, or {2}" % tuple(export_types)) def _fix_path(path): if path[:1] != "/": path += "/" _fix_path(export_path) crawl = kwargs.get(params.EXPORT_SETUP_CRAWLER, None) out = {} # export main data to s3 location if export_type == EXPORT_DATA or export_type == EXPORT_ALL: result = self._do_ddb_export_to_s3( table_name=self._table_name, export_path=export_path, log_path=log_path, read_pct=read_pct, dpu=dpu, kms_key_arn=kms_key_arn, setup_crawler=crawl, catalog_database=catalog_database) if result is not None: out[EXPORT_DATA] = result # export metadata to S3 if export_type == EXPORT_META or export_type == EXPORT_ALL: result = self._do_ddb_export_to_s3( table_name=utils.get_metaname(self._table_name), export_path=export_path, log_path=log_path, read_pct=read_pct, dpu=dpu, kms_key_arn=kms_key_arn, setup_crawler=crawl, catalog_database=catalog_database) if result is not None: out[EXPORT_META] = result return out
def drop(self, do_export=True): # drop tables with final backup self._storage_handler.drop_table(table_name=self._table_name, do_export=do_export) self._storage_handler.drop_table(table_name=utils.get_metaname( self._table_name), do_export=do_export) # delete API information self._api_metadata_handler.delete_all_api_metadata( self._api_name, self._deployment_stage)
def get_usage(self): resources = self._storage_handler.get_usage( table_name=self._table_name) metadata = self._storage_handler.get_usage( table_name=utils.get_metaname(self._table_name)) references = None # TODO figure out why the gremlin connection is failing # if self._gremlin_endpoint is not None: # references = self._gremlin_endpoint.get_usage() usage = {params.RESOURCE: resources, params.METADATA: metadata} if references is not None: usage[params.REFERENCES] = {"Count": references} return usage