Exemplo n.º 1
0
def top_filters(request):
    response = {'status': -1}

    interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get())
    db_tables = json.loads(request.POST.get('dbTables', '[]'))
    column_name = request.POST.get('columnName')  # Unused

    api = get_api(request, interface)

    data = api.top_filters(db_tables=db_tables)

    if data:
        response['status'] = 0
        response['values'] = data['results']
    else:
        response['message'] = 'Optimizer: %s' % data

    if OPTIMIZER.APPLY_SENTRY_PERMISSIONS.get():
        filtered_filters = []
        for result in results['results']:
            cols = [
                _get_table_name(col['columnName'])
                for col in result["popularValues"][0]["group"]
            ]
            if len(cols) == len(list(_secure_results(cols, self.user))):
                filtered_filters.append(result)
        results['results'] = filtered_filters

    return JsonResponse(response)
Exemplo n.º 2
0
 def getkey(table):
     names = _get_table_name(table['name'])
     return {
         'server': get_hive_sentry_provider(),
         'db': names['database'],
         'table': names['table']
     }
Exemplo n.º 3
0
def top_tables(request):
    response = {'status': -1}

    interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get())
    database = request.POST.get('database', 'default')
    limit = request.POST.get('len', 1000)

    api = get_api(request, interface)

    data = api.top_tables(database_name=database, page_size=limit)

    if OPTIMIZER.APPLY_SENTRY_PERMISSIONS.get():
        checker = get_checker(user=self.user)
        action = 'SELECT'

        def getkey(table):
            names = _get_table_name(table['name'])
            return {
                'server': get_hive_sentry_provider(),
                'db': names['database'],
                'table': names['table']
            }

        data['results'] = list(
            checker.filter_objects(data['results'], action, key=getkey))

    tables = [{
        'eid': table['eid'],
        'database': _get_table_name(table['name'])['database'],
        'name': _get_table_name(table['name'])['table'],
        'popularity': table['workloadPercent'],
        'column_count': table['columnCount'],
        'patternCount': table['patternCount'],
        'total': table['total'],
        'is_fact': table['type'] != 'Dimension'
    } for table in data['results']]

    response['top_tables'] = tables
    response['status'] = 0

    return JsonResponse(response)
Exemplo n.º 4
0
def upload_table_stats(request):
    response = {'status': -1}

    interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get())
    db_tables = json.loads(request.POST.get('db_tables', '[]'))
    source_platform = json.loads(request.POST.get('sourcePlatform', '"hive"'))
    with_ddl = json.loads(request.POST.get('with_ddl', 'false'))
    with_table_stats = json.loads(request.POST.get('with_table', 'false'))
    with_columns_stats = json.loads(request.POST.get('with_columns', 'false'))

    table_ddls = []
    table_stats = []
    column_stats = []

    if not OPTIMIZER.AUTO_UPLOAD_DDL.get():
        with_ddl = False

    if not OPTIMIZER.AUTO_UPLOAD_STATS.get():
        with_table_stats = with_columns_stats = False

    for db_table in db_tables:
        path = _get_table_name(db_table)

        try:
            if with_ddl:
                db = _get_db(request.user, source_type=source_platform)
                query = hql_query(
                    'SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path)
                handle = db.execute_and_wait(query, timeout_sec=5.0)

                if handle:
                    result = db.fetch(handle, rows=5000)
                    db.close(handle)
                    table_ddls.append(
                        (0, 0, ' '.join([row[0] for row in result.rows()]),
                         path['database']))

            if with_table_stats:
                mock_request = MockRequest(user=request.user,
                                           source_platform=source_platform)
                full_table_stats = json.loads(
                    get_table_stats(mock_request,
                                    database=path['database'],
                                    table=path['table']).content)
                stats = dict((stat['data_type'], stat['comment'])
                             for stat in full_table_stats['stats'])

                table_stats.append({
                    'table_name':
                    '%(database)s.%(table)s' % path,  # DB Prefix
                    'num_rows':
                    stats.get('numRows', -1),
                    'last_modified_time':
                    stats.get('transient_lastDdlTime', -1),
                    'total_size':
                    stats.get('totalSize', -1),
                    'raw_data_size':
                    stats.get('rawDataSize', -1),
                    'num_files':
                    stats.get('numFiles', -1),
                    'num_partitions':
                    stats.get('numPartitions', -1),
                    # bytes_cached
                    # cache_replication
                    # format
                })

            if with_columns_stats:
                if source_platform == 'impala':
                    colum_stats = json.loads(
                        get_table_stats(mock_request,
                                        database=path['database'],
                                        table=path['table'],
                                        column=-1).content)['stats']
                else:
                    colum_stats = [
                        json.loads(
                            get_table_stats(mock_request,
                                            database=path['database'],
                                            table=path['table'],
                                            column=col).content)['stats']
                        for col in full_table_stats['columns'][:25]
                    ]

                raw_column_stats = [
                    dict([(key, val if val is not None else '')
                          for col_stat in col
                          for key, val in col_stat.items()])
                    for col in colum_stats
                ]

                for col_stats in raw_column_stats:
                    column_stats.append({
                        'table_name':
                        '%(database)s.%(table)s' % path,  # DB Prefix
                        'column_name':
                        col_stats['col_name'],
                        'data_type':
                        col_stats['data_type'],
                        "num_distinct":
                        int(col_stats.get('distinct_count'))
                        if col_stats.get('distinct_count') != '' else -1,
                        "num_nulls":
                        int(col_stats['num_nulls'])
                        if col_stats['num_nulls'] != '' else -1,
                        "avg_col_len":
                        int(float(col_stats['avg_col_len']))
                        if col_stats['avg_col_len'] != '' else -1,
                        "max_size":
                        int(float(col_stats['max_col_len']))
                        if col_stats['max_col_len'] != '' else -1,
                        "min":
                        col_stats['min']
                        if col_stats.get('min', '') != '' else -1,
                        "max":
                        col_stats['max']
                        if col_stats.get('max', '') != '' else -1,
                        "num_trues":
                        col_stats['num_trues']
                        if col_stats.get('num_trues', '') != '' else -1,
                        "num_falses":
                        col_stats['num_falses']
                        if col_stats.get('num_falses', '') != '' else -1,
                    })
        except Exception as e:
            LOG.exception('Skipping upload of %s: %s' % (db_table, e))

    api = get_api(request, interface)

    response['status'] = 0

    if table_stats:
        response['upload_table_stats'] = api.upload(
            data=table_stats,
            data_type='table_stats',
            source_platform=source_platform)
        response['upload_table_stats_status'] = 0 if response[
            'upload_table_stats']['status']['state'] in ('WAITING', 'FINISHED',
                                                         'IN_PROGRESS') else -1
        response['status'] = response['upload_table_stats_status']
    if column_stats:
        response['upload_cols_stats'] = api.upload(
            data=column_stats,
            data_type='cols_stats',
            source_platform=source_platform)
        response['upload_cols_stats_status'] = response['status'] if response[
            'upload_cols_stats']['status']['state'] in ('WAITING', 'FINISHED',
                                                        'IN_PROGRESS') else -1
        if response['upload_cols_stats_status'] != 0:
            response['status'] = response['upload_cols_stats_status']
    if table_ddls:
        response['upload_table_ddl'] = api.upload(
            data=table_ddls,
            data_type='queries',
            source_platform=source_platform)
        response['upload_table_ddl_status'] = response['status'] if response[
            'upload_table_ddl']['status']['state'] in ('WAITING', 'FINISHED',
                                                       'IN_PROGRESS') else -1
        if response['upload_table_ddl_status'] != 0:
            response['status'] = response['upload_table_ddl_status']

    return JsonResponse(response)