def handoff_event_to_emitter(context, bucket, key, events): bucket = os.environ["ProjectConfigurationBucket"] lmdclient = Lambda(context) s3client = S3(context, bucket) parts = KeyParts(key, context[c.KEY_SEPERATOR_PARTITION]) key = "deployment/share/emitted_event_payloads/{}/{}/{}/{}".format( parts.source, parts.event, parts.datetime, parts.filename.replace(parts.extension, 'json')) payload = { 'emitted': { 'key': key, 'bucket': bucket, 'type': parts.event, 'source': parts.source, 'buildid': parts.buildid, 'filename': parts.filename.replace(parts.extension, 'json'), 'datetime': parts.datetime, 'datetimeformat': util.partition_date_format(), 'sensitivitylevel': parts.sensitivity_level } } #create a temporary file for the event emitter to read expires = datetime.datetime.utcnow() + datetime.timedelta(minutes=30) s3client.put_object(key, events.to_json(orient='records'), expires) resp = lmdclient.invoke(os.environ[c.ENV_EVENT_EMITTER], payload)
def main(event, request): context = dict({}) context[c.KEY_LAMBDA_FUNCTION] = request.function_name if hasattr( request, 'function_name') else None context[c.KEY_REQUEST_ID] = request.aws_request_id if hasattr( request, 'aws_request_id') else None stackid = os.environ[c.ENV_DEPLOYMENT_STACK_ARN] context[c.KEY_DB] = DynamoDb(context) context[c.KEY_ATHENA_QUERY] = Query(stackid) context[c.KEY_GLUE_CRAWLER] = Glue() thread_pool = ThreadPool(size=3) crawler_name = context[c.KEY_GLUE_CRAWLER].get_crawler_name(stackid) crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE]) glue = Glue() events = glue.get_events() start = datetime.datetime.utcnow() - datetime.timedelta(hours=2) now = datetime.datetime.utcnow() found = False for type in events: dt = start while dt <= now: prefix = metric_schema.s3_key_format().format( context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day, dt.hour, type, dt.strftime(util.partition_date_format())) found = crawler.exists(prefix) if found: print "FOUND new events=>", prefix break dt += timedelta(hours=1) if found: break if found: thread_pool.add(crawl, context, crawler_name, context[c.KEY_ATHENA_QUERY].execute_with_format) thread_pool.wait() return custom_resource_response.success_response({}, "*")
def launch(event, lambdacontext): print "Start" hours_delta = 36 context = dict({}) context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr( lambdacontext, 'function_name') else None context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr( lambdacontext, 'aws_request_id') else None global threadpool global is_lambda threadpool = ThreadPool(context, 8) is_lambda = context[c.KEY_REQUEST_ID] is not None available_amoeba_lambdas = [] available_amoeba_lambdas.append(c.ENV_AMOEBA_1) available_amoeba_lambdas.append(c.ENV_AMOEBA_2) available_amoeba_lambdas.append(c.ENV_AMOEBA_3) available_amoeba_lambdas.append(c.ENV_AMOEBA_4) available_amoeba_lambdas.append(c.ENV_AMOEBA_5) db = DynamoDb(context) crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE]) glue = Glue() events = glue.get_events() #TODO: adjust the amoeba tree depth so that we have fully utilized all available amoebas; len(available_amoeba_lambdas) * 1000 #since the number of leaf nodes for the metric partitions can quickly get very large we use a 5 lambda pool to ensure we don't hit the 1000 invocation limit. start = datetime.datetime.utcnow() - datetime.timedelta(hours=hours_delta) now = datetime.datetime.utcnow() for type in events: dt = start while dt <= now: prefix = metric_schema.s3_key_format().format( context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day, dt.hour, type, dt.strftime(util.partition_date_format())) threadpool.add(crawler.crawl, prefix, available_amoeba_lambdas, invoke_lambda) dt += timedelta(hours=1) threadpool.wait() return custom_resource_response.success_response({"StatusCode": 200}, "*")
def after_this_resource_group_updated(hook, deployment_name, **kwargs): context = hook.context dynamoDB = context.aws.session.client('dynamodb', region_name=context.config.project_region) gem_name = kwargs['resource_group_name'] deployment_arn = context.config.get_deployment_stack_id(deployment_name) gem_resource_group = context.stack.describe_resources(deployment_arn, recursive=True) db_id = gem_resource_group[gem_name+'.MetricContext']['PhysicalResourceId'] response = dynamoDB.scan( TableName=db_id ) if response and len(response['Items']) > 0: return params = dict({}) params["TableName"]=db_id params["Item"]={ "key": { "S": c.KEY_GROWTH_RATE_BEFORE_ADDING_LAMBDAS }, "value": { "N": "0.05" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_CSV_PARQUET_COMPRESSION_RATIO }, "value": { "N": "13" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_FREQUENCY_TO_CHECK_TO_SPAWN_ANOTHER }, "value": { "N": "5" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_FREQUENCY_TO_CHECK_SQS_STATE }, "value": { "N": "5" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_BACKOFF_MAX_SECONDS }, "value": { "N": "5.0" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_BACKOFF_BASE_SECONDS }, "value": { "N": "5.0" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_RECORD_DETAIL_METRIC_DATA }, "value": { "BOOL": False } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_BACKOFF_MAX_TRYS }, "value": { "N": "5.0" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_MAX_MESSAGE_RETRY }, "value": { "N": "10" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_SEPERATOR_CSV }, "value": { "S": c.CSV_SEP_DEFAULT } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_FIFO_GROWTH_TRIGGER }, "value": { "N": "3000" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_MEMORY_FLUSH_TRIGGER }, "value": { "N": "75" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_AMOEBA_MEMORY_FLUSH_TRIGGER }, "value": { "N": "60" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_TARGET_AGGREGATION_FILE_SIZE_IN_MB }, "value": { "N": "128" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_MAX_LAMBDA_TIME }, "value": { "N": "275" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_SEPERATOR_PARTITION }, "value": { "S": "/" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_AGGREGATION_PERIOD_IN_SEC }, "value": { "N": "220" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_THRESHOLD_BEFORE_SPAWN_NEW_CONSUMER }, "value": { "N": "3000" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_MAX_INFLIGHT_MESSAGES }, "value": { "N": "12000" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_FILTERS }, "value": { "L": [] } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_PRIORITIES }, "value": { "L": [] } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_NUMBER_OF_INIT_LAMBDAS }, "value": { "N": "3" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.CLIENT_BUFFER_FLUSH_TO_FILE_IN_BYTES }, "value": { "N": "204800" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.CLIENT_BUFFER_GAME_FLUSH_PERIOD_IN_SEC }, "value": { "N": "60" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.CLIENT_FILE_NUM_METRICS_TO_SEND_IN_BATCH }, "value": { "N": "5" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.CLIENT_FILE_SEND_METRICS_INTERVAL_IN_SECONDS }, "value": { "N": "300" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.CLIENT_FILE_MAX_METRICS_TO_SEND_IN_BATCH_IN_MB }, "value": { "N": "5" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.CLIENT_FILE_PRIORITIZATION_THRESHOLD_IN_PERC }, "value": { "N": "60" } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_HEATMAPS }, "value": { "L": [] } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_SAVE_GLOBAL_COORDINATES }, "value": { "BOOL": False } } dynamoDB.put_item(**params) params["Item"]={ "key": { "S": c.KEY_PARTITIONS }, "value": { "L": [ {"M":{ "key": { "S": schema.EVENT.id }, "parts": { "L": [] } , "type": {"S":"str"}, "description": {"S":"The identifying name of the game event."} }}, {"M":{ "key": { "S": schema.SERVER_TIMESTAMP.id }, "parts": { "L": [ {"S":".strftime('"+util.partition_date_format()+"')"} ]}, "type": {"S":"datetime.datetime.utcfromtimestamp"}, "description": {"S":"The server UTC timestamp. This partition has parts associated to it. The parts will result in a S3 folder gather data in a folder named 20180101230000"} }}, {"M":{ "key": { "S": schema.SERVER_TIMESTAMP.id }, "parts": { "L": [ {"S":".year"}, {"S":".month"}, {"S":".day"}, {"S":".hour"} ]}, "type": {"S":"datetime.datetime.utcfromtimestamp"}, "description": {"S":"The server UTC timestamp. This partition has parts associated to it. The parts will be extracted from the srv_tmutc attribute value. Example: 2018-01-01T13:00:00Z will result in a S3 key path .../2018/01/01/05/..."} }}, {"M":{ "key": { "S": schema.SOURCE.id }, "parts": { "L": [] }, "type": {"S":"str"}, "description": {"S":"Where the data is originating from. Most often this will be 'cloudgemmetric' but could be other Cloud Gems such as Cloud Gem Defect Reporter."} }}, {"M":{ "key": { "S": schema.BUILD_ID.id }, "parts": { "L": [] }, "type": {"S":"str"}, "description": {"S":"The build identifier the event originated on."} }}, {"M":{ "key": { "S": schema.SENSITIVITY.id }, "parts": { "L": [] }, "type": {"S":"map"}, "description": {"S":"A flag used to defining if the data is encrypted in S3."} }}, {"M":{ "key": { "S": schema.SCHEMA_HASH.id }, "parts": { "L": [] }, "type": {"S":"map"}, "description": {"S":"A hash of the event schema."} }} ] } } dynamoDB.put_item(**params) seed_cgp_queries(db_id, dynamoDB)