def update_glue_crawler_datastores(context, datastores): global glue_crawler_response glue = Glue() crawler_name = glue.get_crawler_name(context[c.KEY_LAMBDA_FUNCTION]) if not glue_crawler_response: glue_crawler_response = glue.get_crawler(crawler_name) if glue_crawler_response is not None: bucket = "s3://{}/".format(os.environ[c.RES_S3_STORAGE]) path_format = "s3://{}/{}".format(os.environ[c.RES_S3_STORAGE], "{}") srcs = [] if len(glue_crawler_response['Crawler']['Targets']['S3Targets']) > 0: for s3target in glue_crawler_response['Crawler']['Targets'][ 'S3Targets']: table = s3target['Path'].replace(bucket, '').lower() if table in datastores: del datastores[table] srcs.append(s3target) if len(datastores) == 0: return for table in datastores: srcs.append({'Path': path_format.format(table), 'Exclusions': []}) print "Defining GLUE datastores" db_name = athena.get_database_name(os.environ[c.ENV_S3_STORAGE]) table_prefix = athena.get_table_prefix(os.environ[c.ENV_S3_STORAGE]) glue.update_crawler(crawler_name, os.environ[c.ENV_SERVICE_ROLE], db_name, table_prefix, srcs=srcs)
def handler(event, context): print "Start Glue" stack_id = event[c.ENV_STACK_ID] resources = util.get_stack_resources(stack_id) request_type = event['RequestType'] db_name = athena.get_database_name(stack_id, False) glue = Glue() for resource in resources: if resource.logical_id == c.RES_SERVICE_ROLE: role_name = resource.physical_id if resource.logical_id == c.RES_S3_STORAGE: storage_physical_id = resource.physical_id if role_name is None: raise errors.ClientError("The logical resource '{}' was not found. Is the resource in the cloud formation stack?".format(c.RES_SERVICE_ROLE)) if storage_physical_id is None: raise errors.ClientError("The logical resource '{}' was not found. Is the resource in the cloud formation stack?".format(c.RES_S3_STORAGE)) crawler_id_1 = glue.get_crawler_name(stack_id) srcs = [ { 'Path': "{}/{}{}".format(storage_physical_id, "table=", DEFAULT_EVENTS.CLIENTINITCOMPLETE), 'Exclusions': [] }, { 'Path': "{}/{}{}".format(storage_physical_id, "table=", DEFAULT_EVENTS.SESSIONSTART), 'Exclusions': [] } ] print request_type, db_name, crawler_id_1, "role: ", role_name, "s3: ", storage_physical_id if request_type.lower() == 'delete': if glue.get_crawler(crawler_id_1) is not None: glue.stop_crawler(crawler_id_1) glue.delete_crawler(crawler_id_1) if glue.database_exists(db_name): glue.delete_database(db_name) elif request_type.lower() == 'create': if not glue.database_exists(db_name): glue.create_database(db_name) if glue.get_crawler(crawler_id_1) is None: glue.create_crawler(crawler_id_1, role_name, db_name, athena.get_table_prefix(stack_id), srcs=srcs ) else: if glue.get_crawler(crawler_id_1) is None: glue.create_crawler(crawler_id_1, role_name, db_name, athena.get_table_prefix(stack_id), srcs=srcs ) else: glue.stop_crawler(crawler_id_1) glue.update_crawler(crawler_id_1, role_name, db_name, athena.get_table_prefix(stack_id) ) return custom_resource_response.success_response({}, "*")