Пример #1
0
def top_tables(request):
  response = {'status': -1}

  database = request.POST.get('database', 'default')
  limit = request.POST.get('len', 1000)

  api = OptimizerApi(user=request.user)
  data = api.top_tables(database_name=database, page_size=limit)

  tables = [{
      'eid': table['eid'],
      'database': _get_table_name(table['name'])['database'],
      'name': _get_table_name(table['name'])['table'],
      'popularity': table['workloadPercent'],
      'column_count': table['columnCount'],
      'patternCount': table['patternCount'],
      'total': table['total'],
      'is_fact': table['type'] != 'Dimension'
    } for table in data['results']
  ]

  response['top_tables'] = tables
  response['status'] = 0

  return JsonResponse(response)
Пример #2
0
def upload_table_stats(request):
  response = {'status': -1}

  db_tables = json.loads(request.POST.get('db_tables'), '[]')
  source_platform = request.POST.get('sourcePlatform', 'hive')
  with_columns = json.loads(request.POST.get('with_columns', 'false'))
  with_ddl = json.loads(request.POST.get('with_ddl', 'false'))

  table_stats = []
  column_stats = []
  table_ddls = []

  for db_table in db_tables:
    path = _get_table_name(db_table)

    try:
      if with_ddl:
        db = dbms.get(request.user)
        query = hql_query('SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path)
        handle = db.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = db.fetch(handle, rows=5000)
          db.close(handle)
          table_ddls.append((0, 0, ' '.join([row[0] for row in result.rows()]), path['database']))

      full_table_stats = json.loads(get_table_stats(request, database=path['database'], table=path['table']).content)
      stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats'])

      table_stats.append({
        'table_name': path['table'],
        'num_rows':  stats.get('numRows', -1),
        'last_modified_time':  stats.get('transient_lastDdlTime', -1),
        'total_size':  stats.get('totalSize', -1),
        'raw_data_size':  stats.get('rawDataSize', -1),
        'num_files':  stats.get('numFiles', -1),
        # bytes_cached
        # cache_replication
        # format
      })

      if with_columns:
        for col in full_table_stats['columns']:
          col_stats = json.loads(get_table_stats(request, database=path['database'], table=path['table'], column=col).content)['stats']
          col_stats = dict([(key, val) for col_stat in col_stats for key, val in col_stat.iteritems()])

          column_stats.append({
            'table_name': path['table'],
            'column_name': col,
            'data_type': col_stats['data_type'],
            "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1,
            "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1,
            "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1,
            "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1,
            "min": col_stats['min'] if col_stats.get('min', '') != '' else -1,
            "max": col_stats['max'] if col_stats.get('max', '') != '' else -1,
            "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1,
            "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1,
          })
    except Exception, e:
      LOG.exception('Skipping upload of %s: %s' % (db_table, e))
Пример #3
0
def upload_table_stats(request):
  response = {'status': -1}

  db_tables = json.loads(request.POST.get('db_tables'), '[]')
  source_platform = json.loads(request.POST.get('sourcePlatform', '"hive"'))
  with_ddl = json.loads(request.POST.get('with_ddl', 'false'))
  with_table_stats = json.loads(request.POST.get('with_table', 'false'))
  with_columns_stats = json.loads(request.POST.get('with_columns', 'false'))

  table_ddls = []
  table_stats = []
  column_stats = []

  if not OPTIMIZER.AUTO_UPLOAD_DDL.get():
    with_ddl = False

  if not OPTIMIZER.AUTO_UPLOAD_STATS.get():
    with_table_stats = with_columns_stats = False


  for db_table in db_tables:
    path = _get_table_name(db_table)

    try:
      if with_ddl:
        db = _get_db(request.user, source_type=source_platform)
        query = hql_query('SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path)
        handle = db.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = db.fetch(handle, rows=5000)
          db.close(handle)
          table_ddls.append((0, 0, ' '.join([row[0] for row in result.rows()]), path['database']))

      if with_table_stats:
        mock_request = MockRequest(user=request.user, source_platform=source_platform)
        full_table_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table']).content)
        stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats'])

        table_stats.append({
          'table_name': '%(database)s.%(table)s' % path, # DB Prefix
          'num_rows':  stats.get('numRows', -1),
          'last_modified_time':  stats.get('transient_lastDdlTime', -1),
          'total_size':  stats.get('totalSize', -1),
          'raw_data_size':  stats.get('rawDataSize', -1),
          'num_files':  stats.get('numFiles', -1),
          'num_partitions':  stats.get('numPartitions', -1),
          # bytes_cached
          # cache_replication
          # format
        })

      if with_columns_stats:
        if source_platform == 'impala':
          colum_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=-1).content)['stats']
        else:
          colum_stats = [
              json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=col).content)['stats']
              for col in full_table_stats['columns'][:25]
          ]

        raw_column_stats = [dict([(key, val if val is not None else '') for col_stat in col for key, val in col_stat.iteritems()]) for col in colum_stats]

        for col_stats in raw_column_stats:
          column_stats.append({
            'table_name': '%(database)s.%(table)s' % path, # DB Prefix
            'column_name': col_stats['col_name'],
            'data_type': col_stats['data_type'],
            "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1,
            "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1,
            "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1,
            "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1,
            "min": col_stats['min'] if col_stats.get('min', '') != '' else -1,
            "max": col_stats['max'] if col_stats.get('max', '') != '' else -1,
            "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1,
            "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1,
          })
    except Exception, e:
      LOG.exception('Skipping upload of %s: %s' % (db_table, e))