Exemplo n.º 1
0
def duplication_rate(request):  
    query = Query(os.environ[c.ENV_DEPLOYMENT_STACK_ARN])     
    results = query.execute_with_format('''
          with source as (
              select '''+ schema.SERVER_TIMESTAMP.long_name +''' as srv_tmutc,
                '''+ schema.UUID.long_name +''' as uuid
              from
                "{0}"."{1}''' + DEFAULT_EVENTS.SESSIONSTART + '''"
              WHERE p_'''+ schema.SERVER_TIMESTAMP.long_name +'''_strftime > date_format((current_timestamp - interval '24' hour), '%Y%m%d%H0000')
              UNION
              select '''+ schema.SERVER_TIMESTAMP.long_name +''' as srv_tmutc,
                '''+ schema.UUID.long_name +''' as uuid
              from
                "{0}"."{1}''' + DEFAULT_EVENTS.CLIENTINITCOMPLETE + '''"
              WHERE p_'''+ schema.SERVER_TIMESTAMP.long_name +'''_strftime > date_format((current_timestamp - interval '24' hour), '%Y%m%d%H0000')
          )

            SELECT to_unixtime(from_iso8601_timestamp(T1.tmp)) AS Timestmp,
                     round((T1.value1 - T1.value2) / (T1.value2 * 1.0), 6) AS DuplicationRate
            FROM 
                (SELECT date_format(from_unixtime(srv_tmutc),
                    '%Y-%m-%dT%H:00:00Z') AS tmp, count(uuid) AS value1, count(distinct uuid) AS value2
                FROM source    
                GROUP BY  1) AS T1
            ORDER BY  1 asc''')
    return convert_to_tuple_dataset(results)
Exemplo n.º 2
0
def platforms(request):  
    query = Query(os.environ[c.ENV_DEPLOYMENT_STACK_ARN])     
    results = query.execute_with_format("select distinct T3.plt " \
        "from " \
        "( " \
        "SELECT distinct "+ schema.PLATFORM_ID.long_name +" as plt FROM \"{0}\".\"{1}" + DEFAULT_EVENTS.CLIENTINITCOMPLETE +"\" as T1 " \
        ") as T3 order by 1 asc ")    
    return convert_to_dataset(results)
Exemplo n.º 3
0
def __update_partitions(paths):
    alter = StringIO()
    alter.write("ALTER TABLE {0}.{1} ADD ")
    for path in paths:
        if path.sensitivity_level == sensitivity.SENSITIVITY_TYPE.NONE and path.buildid == '1.0.2' and (
                path.platform == 'Android' or path.platform == 'OSX'):
            alter.write(
                " PARTITION (idx_source='{1}', idx_bldid='{2}', idx_year='{3}', idx_month='{4}', idx_day='{5}', idx_hour='{6}', idx_platform='{7}', idx_event='{8}') LOCATION 's3://<bucket>/{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/{8}/'"
                .format(path.sensitivity_level, path.source, path.buildid,
                        path.year, path.month, path.day, path.hour,
                        path.platform, path.event))

    query = Query(
        type('obj', (object, ),
             {c.ENV_STACK_ID: os.environ[c.ENV_DEPLOYMENT_STACK_ARN]}))
    query.execute(alter.getvalue())
Exemplo n.º 4
0
def query_results(request, id):        
    query = Query(os.environ[c.ENV_DEPLOYMENT_STACK_ARN])       
    results = query.client.get_query_execution(id)  
    #the JSON serializer doesn't support these types right now    
    del results['Status']['SubmissionDateTime']
    if 'CompletionDateTime' in results['Status']:
        del results['Status']['CompletionDateTime']
    if results['Status']['State'] == 'SUCCEEDED':
        results['Result'] = query.client.get_output( results['ResultConfiguration']['OutputLocation'] )
    return results
Exemplo n.º 5
0
def main(event, request):
    context = dict({})
    context[c.KEY_LAMBDA_FUNCTION] = request.function_name if hasattr(
        request, 'function_name') else None
    context[c.KEY_REQUEST_ID] = request.aws_request_id if hasattr(
        request, 'aws_request_id') else None
    stackid = os.environ[c.ENV_DEPLOYMENT_STACK_ARN]

    context[c.KEY_DB] = DynamoDb(context)
    context[c.KEY_ATHENA_QUERY] = Query(stackid)
    context[c.KEY_GLUE_CRAWLER] = Glue()
    thread_pool = ThreadPool(size=3)
    crawler_name = context[c.KEY_GLUE_CRAWLER].get_crawler_name(stackid)
    crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE])
    glue = Glue()
    events = glue.get_events()

    start = datetime.datetime.utcnow() - datetime.timedelta(hours=2)
    now = datetime.datetime.utcnow()

    found = False
    for type in events:
        dt = start
        while dt <= now:
            prefix = metric_schema.s3_key_format().format(
                context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day,
                dt.hour, type, dt.strftime(util.partition_date_format()))
            found = crawler.exists(prefix)
            if found:
                print "FOUND new events=>", prefix
                break
            dt += timedelta(hours=1)
        if found:
            break

    if found:
        thread_pool.add(crawl, context, crawler_name,
                        context[c.KEY_ATHENA_QUERY].execute_with_format)
        thread_pool.wait()

    return custom_resource_response.success_response({}, "*")
Exemplo n.º 6
0
def query(request, sql, sync=False):    
    sql = sql["sql"]
    query = Query(os.environ[c.ENV_DEPLOYMENT_STACK_ARN])
    return query.execute(sql, sync = sync)