Python Client示例，clusterfuzz._internal.google_cloud_utils.big_query.Client Python示例

示例#1

0

显示文件

def _get_performance_features(fuzzer_name, job_type, datetime_start,
                              datetime_end):
    """Get raw performance features stored in BigQuery."""
    query_fields = [
        fuzzer_stats.QueryField(fuzzer_stats.TestcaseQuery.ALIAS, column, None)
        for column in constants.QUERY_COLUMNS
    ]

    # TODO(mmoroz): the query should be possible for datetime as well object.
    query = fuzzer_stats.TestcaseQuery(
        fuzzer_name=fuzzer_name,
        job_types=[job_type],
        query_fields=query_fields,
        group_by=fuzzer_stats.QueryGroupBy.GROUP_BY_NONE,
        date_start=datetime_start.date(),
        date_end=datetime_end.date())

    client = big_query.Client()

    try:
        result = client.query(query=query.build())
    except Exception as e:
        logging.error('Exception during BigQuery request: %s\n', str(e))
        raise helpers.EarlyExitException('Internal error.', 500)

    if not result.rows:
        raise helpers.EarlyExitException('No stats.', 404)

    return result

示例#2

0

显示文件

文件： corpus_pruning_task.py 项目： vanhauser-thc/clusterfuzz

def _record_cross_pollination_stats(stats):
    """Log stats about cross pollination in BigQuery."""
    # If no stats were gathered due to a timeout or lack of corpus, return.
    if not stats:
        return

    bigquery_row = {
        'project_qualified_name': stats.project_qualified_name,
        'method': stats.method,
        'sources': stats.sources,
        'tags': stats.tags if stats.tags else '',
        'initial_corpus_size': stats.initial_corpus_size,
        'corpus_size': stats.corpus_size,
        'initial_edge_coverage': stats.initial_edge_coverage,
        'edge_coverage': stats.edge_coverage,
        'initial_feature_coverage': stats.initial_feature_coverage,
        'feature_coverage': stats.feature_coverage
    }

    # BigQuery not available in local development. This is necessary because the
    # untrusted runner is in a separate process and can't be easily mocked.
    # Check here instead of earlier to test as much of the function as we can.
    if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value(
            'PY_UNITTESTS'):
        return

    client = big_query.Client(dataset_id='main',
                              table_id='cross_pollination_statistics')
    client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])

示例#3

0

显示文件

def get_last_crash_time(testcase):
  """Return timestamp for last crash with same crash params as testcase."""
  client = big_query.Client()

  where_clause = ('crash_type = {crash_type} AND '
                  'crash_state = {crash_state} AND '
                  'security_flag = {security_flag} AND '
                  'project = {project}').format(
                      crash_type=json.dumps(testcase.crash_type),
                      crash_state=json.dumps(testcase.crash_state),
                      security_flag=json.dumps(testcase.security_flag),
                      project=json.dumps(testcase.project_name),
                  )

  sql = """
SELECT hour
FROM main.crash_stats
WHERE {where_clause}
ORDER by hour DESC
LIMIT 1
""".format(where_clause=where_clause)

  result = client.query(query=sql)
  if result and result.rows:
    return get_datetime(result.rows[0]['hour'])

  return None

示例#4

0

显示文件

    def get(self):
        """Process all fuzz targets and update FuzzTargetJob weights."""
        client = big_query.Client()
        update_target_weights_for_engine(client, 'libFuzzer',
                                         LIBFUZZER_SPECIFICATIONS)
        update_target_weights_for_engine(client, 'afl', AFL_SPECIFICATIONS)
        update_job_weights()

        store_current_weights_in_bigquery()

示例#5

0

显示文件

def get(params, query, offset, limit):
    """Get the data from BigQuery."""
    sql = SQL.format(table_id='%ss' % params['type'],
                     where_clause=query.get_where_clause(),
                     prefix=params['type'],
                     offset=offset,
                     limit=limit)
    client = big_query.Client()
    result = client.query(query=sql, offset=offset, limit=limit)
    return result.rows, result.total_count

示例#6

0

显示文件

def store_current_weights_in_bigquery():
    """Update a bigquery table containing the daily stats."""
    rows = []
    target_jobs = ndb_utils.get_all_from_model(data_types.FuzzTargetJob)
    for target_job in target_jobs:
        row = {
            'fuzzer': target_job.fuzz_target_name,
            'job': target_job.job,
            'weight': target_job.weight
        }
        rows.append(big_query.Insert(row=row, insert_id=None))

    client = big_query.Client(dataset_id='main', table_id='fuzzer_weights')
    client.insert(rows)

示例#7

0

显示文件

文件： fuzzer_stats.py 项目： stjordanis/clusterfuzz

def _do_bigquery_query(query):
  """Return results from BigQuery."""
  logs.log(query)
  client = big_query.Client()

  try:
    results = client.raw_query(query, max_results=10000)
  except HttpError as e:
    raise helpers.EarlyExitException(str(e), 500)

  if 'rows' not in results:
    raise helpers.EarlyExitException('No stats.', 404)

  return results

示例#8

0

显示文件

def get_start_hour():
    """Get the start hour from the first crash."""
    client = big_query.Client()

    sql = """
SELECT min(CAST(FLOOR(UNIX_SECONDS(created_at) / 3600) AS INT64)) as min_hour
FROM main.crashes
"""

    result = client.query(query=sql)
    if result and result.rows:
        return result.rows[0]['min_hour']

    return 0

示例#9

0

显示文件

文件： fuzz_strategy_selection.py 项目： vanhauser-thc/clusterfuzz

def _query_multi_armed_bandit_probabilities(engine):
    """Get query results.

  Queries above BANDIT_PROBABILITY_QUERY and yields results
  from bigquery. This query is sorted by strategies implemented."""
    strategy_names_list = [
        strategy_entry.name for strategy_entry in engine.query_strategy_list
    ]
    strategies_subquery = '\n'.join([
        STRATEGY_SUBQUERY_FORMAT.format(strategy_name=strategy_name)
        for strategy_name in strategy_names_list
    ])
    client = big_query.Client()
    strategies = ','.join(
        ['strategy_' + strategy_name for strategy_name in strategy_names_list])
    formatted_query = BANDIT_PROBABILITY_QUERY_FORMAT.format(
        performance_metric=engine.performance_metric,
        temperature_value=TEMPERATURE_PARAMETER,
        strategies=strategies,
        strategies_subquery=strategies_subquery,
        engine=engine.name)
    return client.query(query=formatted_query).rows

示例#10

0

显示文件

def build(end_hour):
    """Build crash stats for the end hour."""
    logging.info('Started building crash stats for %s.',
                 crash_stats.get_datetime(end_hour))
    job_id = JOB_ID_TEMPLATE.format(unique_number=int(time.time()))

    client = big_query.Client()
    make_request(client, job_id, end_hour)

    start_time = time.time()
    while (time.time() - start_time) < TIMEOUT:
        time.sleep(10)

        result = client.get_job(job_id)
        logging.info('Checking %s', json.dumps(result))

        if result['status']['state'] == 'DONE':
            if result['status'].get('errors'):
                raise Exception(json.dumps(result))
            return

    raise Exception('Building crash stats exceeded %d seconds.' % TIMEOUT)

示例#11

0

显示文件

文件： fuzz_strategy_selection.py 项目： vanhauser-thc/clusterfuzz

def _store_probabilities_in_bigquery(engine, data):
    """Update a bigquery table containing the daily updated
  probability distribution over strategies."""
    bigquery_data = []

    # TODO(mukundv): Update once we choose a temperature parameter for final
    # implementation.
    for row in data:
        bigquery_row = {
            'strategy_name': row['strategy'],
            'probability': row['bandit_weight'],
            'engine': engine.name
        }
        bigquery_data.append(big_query.Insert(row=bigquery_row,
                                              insert_id=None))

    if bigquery_data:
        client = big_query.Client(dataset_id='main',
                                  table_id='fuzz_strategy_probability')
        client.insert(bigquery_data)
    else:
        logs.log('No fuzz strategy distribution data was found to upload to '
                 'BigQuery.')

示例#12

0

显示文件

def get(end, days, block, group_by, where_clause, group_having_clause, sort_by,
        offset, limit):
  """Query from BigQuery given the params."""
  if where_clause:
    where_clause = '(%s) AND ' % where_clause

  start = end - (days * 24) + 1

  where_clause += '(hour BETWEEN %d AND %d) AND ' % (start, end)
  where_clause += ('(_PARTITIONTIME BETWEEN TIMESTAMP_TRUNC("%s", DAY) '
                   'AND TIMESTAMP_TRUNC("%s", DAY))' %
                   (get_datetime(start).strftime('%Y-%m-%d'),
                    get_datetime(end).strftime('%Y-%m-%d')))

  time_span = 1 if block == 'hour' else 24
  remainder = get_remainder_for_index(end, time_span)

  if group_having_clause:
    group_having_clause = 'HAVING ' + group_having_clause

  if (not big_query.VALID_FIELD_NAME_REGEX.match(group_by) or
      not big_query.VALID_FIELD_NAME_REGEX.match(sort_by)):
    raise ValueError('Invalid group_by or sort_by')

  sql = SQL.format(
      time_span=time_span,
      remainder=remainder,
      group_by=group_by,
      where_clause=where_clause,
      group_having_clause=group_having_clause,
      sort_by=sort_by)

  client = big_query.Client()
  result = client.query(query=sql, offset=offset, limit=limit)

  items = []
  for row in result.rows:
    avg_crash_time_in_ms = row['sum_crash_time_in_ms'] // row['total_count']

    for group in row['groups']:
      for index in group['indices']:
        index['hour'] = convert_index_to_hour(index['index'], time_span,
                                              remainder)

    items.append({
        'projectName': row['project'],
        'crashType': row['crash_type'],
        'crashState': row['crash_state'],
        'isSecurity': row['security_flag'],
        'isReproducible': row['is_reproducible'],
        'isNew': row['is_new'],
        'totalCount': row['total_count'],
        'crashTime': {
            'min':
                row['min_crash_time_in_ms'],
            'max':
                row['max_crash_time_in_ms'],
            'avg':
                avg_crash_time_in_ms,
            'std':
                math.sqrt(
                    (row['sum_square_crash_time_in_ms'] // row['total_count']) -
                    (avg_crash_time_in_ms * avg_crash_time_in_ms))
        },
        'groups': row['groups'],
        'days': days,
        'block': block,
        'end': end + 1  # Convert to UI's end.
    })
  return result.total_count, items