def _get_performance_features(fuzzer_name, job_type, datetime_start, datetime_end): """Get raw performance features stored in BigQuery.""" query_fields = [ fuzzer_stats.QueryField(fuzzer_stats.TestcaseQuery.ALIAS, column, None) for column in constants.QUERY_COLUMNS ] # TODO(mmoroz): the query should be possible for datetime as well object. query = fuzzer_stats.TestcaseQuery( fuzzer_name=fuzzer_name, job_types=[job_type], query_fields=query_fields, group_by=fuzzer_stats.QueryGroupBy.GROUP_BY_NONE, date_start=datetime_start.date(), date_end=datetime_end.date()) client = big_query.Client() try: result = client.query(query=query.build()) except Exception as e: logging.error('Exception during BigQuery request: %s\n', str(e)) raise helpers.EarlyExitException('Internal error.', 500) if not result.rows: raise helpers.EarlyExitException('No stats.', 404) return result
def _record_cross_pollination_stats(stats): """Log stats about cross pollination in BigQuery.""" # If no stats were gathered due to a timeout or lack of corpus, return. if not stats: return bigquery_row = { 'project_qualified_name': stats.project_qualified_name, 'method': stats.method, 'sources': stats.sources, 'tags': stats.tags if stats.tags else '', 'initial_corpus_size': stats.initial_corpus_size, 'corpus_size': stats.corpus_size, 'initial_edge_coverage': stats.initial_edge_coverage, 'edge_coverage': stats.edge_coverage, 'initial_feature_coverage': stats.initial_feature_coverage, 'feature_coverage': stats.feature_coverage } # BigQuery not available in local development. This is necessary because the # untrusted runner is in a separate process and can't be easily mocked. # Check here instead of earlier to test as much of the function as we can. if environment.get_value('LOCAL_DEVELOPMENT') or environment.get_value( 'PY_UNITTESTS'): return client = big_query.Client(dataset_id='main', table_id='cross_pollination_statistics') client.insert([big_query.Insert(row=bigquery_row, insert_id=None)])
def get_last_crash_time(testcase): """Return timestamp for last crash with same crash params as testcase.""" client = big_query.Client() where_clause = ('crash_type = {crash_type} AND ' 'crash_state = {crash_state} AND ' 'security_flag = {security_flag} AND ' 'project = {project}').format( crash_type=json.dumps(testcase.crash_type), crash_state=json.dumps(testcase.crash_state), security_flag=json.dumps(testcase.security_flag), project=json.dumps(testcase.project_name), ) sql = """ SELECT hour FROM main.crash_stats WHERE {where_clause} ORDER by hour DESC LIMIT 1 """.format(where_clause=where_clause) result = client.query(query=sql) if result and result.rows: return get_datetime(result.rows[0]['hour']) return None
def get(self): """Process all fuzz targets and update FuzzTargetJob weights.""" client = big_query.Client() update_target_weights_for_engine(client, 'libFuzzer', LIBFUZZER_SPECIFICATIONS) update_target_weights_for_engine(client, 'afl', AFL_SPECIFICATIONS) update_job_weights() store_current_weights_in_bigquery()
def get(params, query, offset, limit): """Get the data from BigQuery.""" sql = SQL.format(table_id='%ss' % params['type'], where_clause=query.get_where_clause(), prefix=params['type'], offset=offset, limit=limit) client = big_query.Client() result = client.query(query=sql, offset=offset, limit=limit) return result.rows, result.total_count
def store_current_weights_in_bigquery(): """Update a bigquery table containing the daily stats.""" rows = [] target_jobs = ndb_utils.get_all_from_model(data_types.FuzzTargetJob) for target_job in target_jobs: row = { 'fuzzer': target_job.fuzz_target_name, 'job': target_job.job, 'weight': target_job.weight } rows.append(big_query.Insert(row=row, insert_id=None)) client = big_query.Client(dataset_id='main', table_id='fuzzer_weights') client.insert(rows)
def _do_bigquery_query(query): """Return results from BigQuery.""" logs.log(query) client = big_query.Client() try: results = client.raw_query(query, max_results=10000) except HttpError as e: raise helpers.EarlyExitException(str(e), 500) if 'rows' not in results: raise helpers.EarlyExitException('No stats.', 404) return results
def get_start_hour(): """Get the start hour from the first crash.""" client = big_query.Client() sql = """ SELECT min(CAST(FLOOR(UNIX_SECONDS(created_at) / 3600) AS INT64)) as min_hour FROM main.crashes """ result = client.query(query=sql) if result and result.rows: return result.rows[0]['min_hour'] return 0
def _query_multi_armed_bandit_probabilities(engine): """Get query results. Queries above BANDIT_PROBABILITY_QUERY and yields results from bigquery. This query is sorted by strategies implemented.""" strategy_names_list = [ strategy_entry.name for strategy_entry in engine.query_strategy_list ] strategies_subquery = '\n'.join([ STRATEGY_SUBQUERY_FORMAT.format(strategy_name=strategy_name) for strategy_name in strategy_names_list ]) client = big_query.Client() strategies = ','.join( ['strategy_' + strategy_name for strategy_name in strategy_names_list]) formatted_query = BANDIT_PROBABILITY_QUERY_FORMAT.format( performance_metric=engine.performance_metric, temperature_value=TEMPERATURE_PARAMETER, strategies=strategies, strategies_subquery=strategies_subquery, engine=engine.name) return client.query(query=formatted_query).rows
def build(end_hour): """Build crash stats for the end hour.""" logging.info('Started building crash stats for %s.', crash_stats.get_datetime(end_hour)) job_id = JOB_ID_TEMPLATE.format(unique_number=int(time.time())) client = big_query.Client() make_request(client, job_id, end_hour) start_time = time.time() while (time.time() - start_time) < TIMEOUT: time.sleep(10) result = client.get_job(job_id) logging.info('Checking %s', json.dumps(result)) if result['status']['state'] == 'DONE': if result['status'].get('errors'): raise Exception(json.dumps(result)) return raise Exception('Building crash stats exceeded %d seconds.' % TIMEOUT)
def _store_probabilities_in_bigquery(engine, data): """Update a bigquery table containing the daily updated probability distribution over strategies.""" bigquery_data = [] # TODO(mukundv): Update once we choose a temperature parameter for final # implementation. for row in data: bigquery_row = { 'strategy_name': row['strategy'], 'probability': row['bandit_weight'], 'engine': engine.name } bigquery_data.append(big_query.Insert(row=bigquery_row, insert_id=None)) if bigquery_data: client = big_query.Client(dataset_id='main', table_id='fuzz_strategy_probability') client.insert(bigquery_data) else: logs.log('No fuzz strategy distribution data was found to upload to ' 'BigQuery.')
def get(end, days, block, group_by, where_clause, group_having_clause, sort_by, offset, limit): """Query from BigQuery given the params.""" if where_clause: where_clause = '(%s) AND ' % where_clause start = end - (days * 24) + 1 where_clause += '(hour BETWEEN %d AND %d) AND ' % (start, end) where_clause += ('(_PARTITIONTIME BETWEEN TIMESTAMP_TRUNC("%s", DAY) ' 'AND TIMESTAMP_TRUNC("%s", DAY))' % (get_datetime(start).strftime('%Y-%m-%d'), get_datetime(end).strftime('%Y-%m-%d'))) time_span = 1 if block == 'hour' else 24 remainder = get_remainder_for_index(end, time_span) if group_having_clause: group_having_clause = 'HAVING ' + group_having_clause if (not big_query.VALID_FIELD_NAME_REGEX.match(group_by) or not big_query.VALID_FIELD_NAME_REGEX.match(sort_by)): raise ValueError('Invalid group_by or sort_by') sql = SQL.format( time_span=time_span, remainder=remainder, group_by=group_by, where_clause=where_clause, group_having_clause=group_having_clause, sort_by=sort_by) client = big_query.Client() result = client.query(query=sql, offset=offset, limit=limit) items = [] for row in result.rows: avg_crash_time_in_ms = row['sum_crash_time_in_ms'] // row['total_count'] for group in row['groups']: for index in group['indices']: index['hour'] = convert_index_to_hour(index['index'], time_span, remainder) items.append({ 'projectName': row['project'], 'crashType': row['crash_type'], 'crashState': row['crash_state'], 'isSecurity': row['security_flag'], 'isReproducible': row['is_reproducible'], 'isNew': row['is_new'], 'totalCount': row['total_count'], 'crashTime': { 'min': row['min_crash_time_in_ms'], 'max': row['max_crash_time_in_ms'], 'avg': avg_crash_time_in_ms, 'std': math.sqrt( (row['sum_square_crash_time_in_ms'] // row['total_count']) - (avg_crash_time_in_ms * avg_crash_time_in_ms)) }, 'groups': row['groups'], 'days': days, 'block': block, 'end': end + 1 # Convert to UI's end. }) return result.total_count, items