def top_filters(request): response = {'status': -1} interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get()) db_tables = json.loads(request.POST.get('dbTables', '[]')) column_name = request.POST.get('columnName') # Unused api = get_api(request, interface) data = api.top_filters(db_tables=db_tables) if data: response['status'] = 0 response['values'] = data['results'] else: response['message'] = 'Optimizer: %s' % data if OPTIMIZER.APPLY_SENTRY_PERMISSIONS.get(): filtered_filters = [] for result in results['results']: cols = [ _get_table_name(col['columnName']) for col in result["popularValues"][0]["group"] ] if len(cols) == len(list(_secure_results(cols, self.user))): filtered_filters.append(result) results['results'] = filtered_filters return JsonResponse(response)
def getkey(table): names = _get_table_name(table['name']) return { 'server': get_hive_sentry_provider(), 'db': names['database'], 'table': names['table'] }
def top_tables(request): response = {'status': -1} interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get()) database = request.POST.get('database', 'default') limit = request.POST.get('len', 1000) api = get_api(request, interface) data = api.top_tables(database_name=database, page_size=limit) if OPTIMIZER.APPLY_SENTRY_PERMISSIONS.get(): checker = get_checker(user=self.user) action = 'SELECT' def getkey(table): names = _get_table_name(table['name']) return { 'server': get_hive_sentry_provider(), 'db': names['database'], 'table': names['table'] } data['results'] = list( checker.filter_objects(data['results'], action, key=getkey)) tables = [{ 'eid': table['eid'], 'database': _get_table_name(table['name'])['database'], 'name': _get_table_name(table['name'])['table'], 'popularity': table['workloadPercent'], 'column_count': table['columnCount'], 'patternCount': table['patternCount'], 'total': table['total'], 'is_fact': table['type'] != 'Dimension' } for table in data['results']] response['top_tables'] = tables response['status'] = 0 return JsonResponse(response)
def upload_table_stats(request): response = {'status': -1} interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get()) db_tables = json.loads(request.POST.get('db_tables', '[]')) source_platform = json.loads(request.POST.get('sourcePlatform', '"hive"')) with_ddl = json.loads(request.POST.get('with_ddl', 'false')) with_table_stats = json.loads(request.POST.get('with_table', 'false')) with_columns_stats = json.loads(request.POST.get('with_columns', 'false')) table_ddls = [] table_stats = [] column_stats = [] if not OPTIMIZER.AUTO_UPLOAD_DDL.get(): with_ddl = False if not OPTIMIZER.AUTO_UPLOAD_STATS.get(): with_table_stats = with_columns_stats = False for db_table in db_tables: path = _get_table_name(db_table) try: if with_ddl: db = _get_db(request.user, source_type=source_platform) query = hql_query( 'SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: result = db.fetch(handle, rows=5000) db.close(handle) table_ddls.append( (0, 0, ' '.join([row[0] for row in result.rows()]), path['database'])) if with_table_stats: mock_request = MockRequest(user=request.user, source_platform=source_platform) full_table_stats = json.loads( get_table_stats(mock_request, database=path['database'], table=path['table']).content) stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats']) table_stats.append({ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'num_rows': stats.get('numRows', -1), 'last_modified_time': stats.get('transient_lastDdlTime', -1), 'total_size': stats.get('totalSize', -1), 'raw_data_size': stats.get('rawDataSize', -1), 'num_files': stats.get('numFiles', -1), 'num_partitions': stats.get('numPartitions', -1), # bytes_cached # cache_replication # format }) if with_columns_stats: if source_platform == 'impala': colum_stats = json.loads( get_table_stats(mock_request, database=path['database'], table=path['table'], column=-1).content)['stats'] else: colum_stats = [ json.loads( get_table_stats(mock_request, database=path['database'], table=path['table'], column=col).content)['stats'] for col in full_table_stats['columns'][:25] ] raw_column_stats = [ dict([(key, val if val is not None else '') for col_stat in col for key, val in col_stat.items()]) for col in colum_stats ] for col_stats in raw_column_stats: column_stats.append({ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'column_name': col_stats['col_name'], 'data_type': col_stats['data_type'], "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1, "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1, "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1, "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1, "min": col_stats['min'] if col_stats.get('min', '') != '' else -1, "max": col_stats['max'] if col_stats.get('max', '') != '' else -1, "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1, "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1, }) except Exception as e: LOG.exception('Skipping upload of %s: %s' % (db_table, e)) api = get_api(request, interface) response['status'] = 0 if table_stats: response['upload_table_stats'] = api.upload( data=table_stats, data_type='table_stats', source_platform=source_platform) response['upload_table_stats_status'] = 0 if response[ 'upload_table_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 response['status'] = response['upload_table_stats_status'] if column_stats: response['upload_cols_stats'] = api.upload( data=column_stats, data_type='cols_stats', source_platform=source_platform) response['upload_cols_stats_status'] = response['status'] if response[ 'upload_cols_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 if response['upload_cols_stats_status'] != 0: response['status'] = response['upload_cols_stats_status'] if table_ddls: response['upload_table_ddl'] = api.upload( data=table_ddls, data_type='queries', source_platform=source_platform) response['upload_table_ddl_status'] = response['status'] if response[ 'upload_table_ddl']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 if response['upload_table_ddl_status'] != 0: response['status'] = response['upload_table_ddl_status'] return JsonResponse(response)