Пример #1
0
def generate_threads(functionid, threads_count, iterations_per_thread, events_per_iteration, sleep_duration, use_lambda, event_type, sensitivity_type, compression_mode):
    start = time.time()       
    context = {}            
    threadpool = ThreadPool(context, threads_count)  
    context=dict({})        
    db = DynamoDb(context) 
    print "Sleep durations: ", sleep_duration
    print "Number of threads: ", threads_count
    print "Number of iterations per thread: ", iterations_per_thread
    print "Number of events per iteration: ", events_per_iteration
    print "Using event type: ", event_type
    print "Using sensitivity type: ", sensitivity_type
    print "Using compression mode: ", compression_mode
    for i in range(0, threads_count):          
        threadpool.add(thread_job, functionid, iterations_per_thread, events_per_iteration, use_lambda, context, sleep_duration, event_type, sensitivity_type, compression_mode)                                                    
    threadpool.wait()      
    print "A total of {} metrics have been sent to the FIFO queues.".format((iterations_per_thread*events_per_iteration)*threads_count)    
    print "The overall process took {} seconds.".format(time.time() - start)
Пример #2
0
def main(event, request):
    context = dict({})
    context[c.KEY_LAMBDA_FUNCTION] = request.function_name if hasattr(
        request, 'function_name') else None
    context[c.KEY_REQUEST_ID] = request.aws_request_id if hasattr(
        request, 'aws_request_id') else None
    stackid = os.environ[c.ENV_DEPLOYMENT_STACK_ARN]

    context[c.KEY_DB] = DynamoDb(context)
    context[c.KEY_ATHENA_QUERY] = Query(stackid)
    context[c.KEY_GLUE_CRAWLER] = Glue()
    thread_pool = ThreadPool(size=3)
    crawler_name = context[c.KEY_GLUE_CRAWLER].get_crawler_name(stackid)
    crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE])
    glue = Glue()
    events = glue.get_events()

    start = datetime.datetime.utcnow() - datetime.timedelta(hours=2)
    now = datetime.datetime.utcnow()

    found = False
    for type in events:
        dt = start
        while dt <= now:
            prefix = metric_schema.s3_key_format().format(
                context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day,
                dt.hour, type, dt.strftime(util.partition_date_format()))
            found = crawler.exists(prefix)
            if found:
                print "FOUND new events=>", prefix
                break
            dt += timedelta(hours=1)
        if found:
            break

    if found:
        thread_pool.add(crawl, context, crawler_name,
                        context[c.KEY_ATHENA_QUERY].execute_with_format)
        thread_pool.wait()

    return custom_resource_response.success_response({}, "*")
Пример #3
0
def launch(event, lambdacontext):
    print "Start"
    hours_delta = 36
    context = dict({})
    context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr(
        lambdacontext, 'function_name') else None
    context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(
        lambdacontext, 'aws_request_id') else None
    global threadpool
    global is_lambda
    threadpool = ThreadPool(context, 8)
    is_lambda = context[c.KEY_REQUEST_ID] is not None
    available_amoeba_lambdas = []
    available_amoeba_lambdas.append(c.ENV_AMOEBA_1)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_2)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_3)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_4)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_5)
    db = DynamoDb(context)
    crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE])
    glue = Glue()

    events = glue.get_events()
    #TODO: adjust the amoeba tree depth so that we have fully utilized all available amoebas; len(available_amoeba_lambdas) * 1000
    #since the number of leaf nodes for the metric partitions can quickly get very large we use a 5 lambda pool to ensure we don't hit the 1000 invocation limit.

    start = datetime.datetime.utcnow() - datetime.timedelta(hours=hours_delta)
    now = datetime.datetime.utcnow()

    for type in events:
        dt = start
        while dt <= now:
            prefix = metric_schema.s3_key_format().format(
                context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day,
                dt.hour, type, dt.strftime(util.partition_date_format()))
            threadpool.add(crawler.crawl, prefix, available_amoeba_lambdas,
                           invoke_lambda)
            dt += timedelta(hours=1)

    threadpool.wait()
    return custom_resource_response.success_response({"StatusCode": 200}, "*")
Пример #4
0
def start_crawler(event, context):
    glue = Glue()
    crawler_id_1 = glue.get_crawler_name(event)
    thread_pool = ThreadPool()
    thread_pool.add(glue.start_crawler, crawler_id_1)
    thread_pool.wait()