示例#1
0
    def _load_data(self, bigquery, fuzzer):
        """Load yesterday's stats into BigQuery."""
        project_id = utils.get_application_id()

        yesterday = (self._utc_now().date() - datetime.timedelta(days=1))
        date_string = yesterday.strftime('%Y%m%d')
        timestamp = utils.utc_date_to_timestamp(yesterday)

        dataset_id = fuzzer_stats.dataset_name(fuzzer)
        if not self._create_dataset_if_needed(bigquery, dataset_id):
            return

        for kind in STATS_KINDS:
            kind_name = kind.__name__
            table_id = kind_name
            if not self._create_table_if_needed(bigquery, dataset_id,
                                                table_id):
                continue

            if kind == fuzzer_stats.TestcaseRun:
                schema = fuzzer_stats_schema.get(fuzzer)
            else:
                schema = kind.SCHEMA

            gcs_path = fuzzer_stats.get_gcs_stats_path(kind_name, fuzzer,
                                                       timestamp)
            load = {
                'destinationTable': {
                    'projectId': project_id,
                    'tableId': table_id + '$' + date_string,
                    'datasetId': dataset_id,
                },
                'schemaUpdateOptions': [
                    'ALLOW_FIELD_ADDITION',
                ],
                'sourceFormat': 'NEWLINE_DELIMITED_JSON',
                'sourceUris': ['gs:/' + gcs_path + '*.json'],
                'writeDisposition': 'WRITE_TRUNCATE',
            }
            if schema is not None:
                load['schema'] = schema

            job_body = {
                'configuration': {
                    'load': load,
                },
            }

            logs.log("Uploading job to BigQuery.", job_body=job_body)
            request = bigquery.jobs().insert(projectId=project_id,
                                             body=job_body)
            response = request.execute()

            # We cannot really check the response here, as the query might be still
            # running, but having a BigQuery jobId in the log would make our life
            # simpler if we ever have to manually check the status of the query.
            # See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query.
            logs.log('Response from BigQuery: %s' % response)
示例#2
0
def update_matches_for_specification(specification, client, engine,
                                     matched_specifications, run_set):
    """Run a query and adjust weights based on a given query specification."""
    query = specification.formatter(specification.query_format,
                                    fuzzer_stats.dataset_name(engine))
    results = _query_helper(client, query)
    for result in results:
        fuzzer = result['fuzzer']
        job = result['job']
        ratio = result['ratio']

        run_set.add((fuzzer, job))
        if ratio >= specification.threshold:
            _update_match(matched_specifications, fuzzer, job, specification)
示例#3
0
def update_matches_for_specification(specification, client, engine, matches,
                                     run_set):
    """Run a query and adjust weights based on a given query specification."""
    query = specification.formatter(specification.query_format,
                                    fuzzer_stats.dataset_name(engine))
    results = _query_helper(client, query)
    for result in results:
        fuzzer = result['fuzzer']
        job = result['job']
        new_weight = result['new_weight']

        run_set.add((fuzzer, job))
        if new_weight != 1.0:
            match = SpecificationMatch(new_weight=new_weight,
                                       reason=specification.reason)
            _update_match(matches, fuzzer, job, match)