示例#1
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)
    if not os.path.exists(snapshot_measurer.trial_dir):
        snapshot_logger.warning('Trial dir: %s does not exist yet.',
                                snapshot_measurer.trial_dir)
        return None

    this_time = cycle * experiment_utils.SNAPSHOT_PERIOD
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)

        current_pcs = snapshot_measurer.get_current_pcs()
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(current_pcs))

    snapshot_measurer.initialize_measurement_dirs()

    if not snapshot_measurer.extract_cycle_corpus(cycle):
        return None

    # Get the coverage of the new corpus units.
    snapshot_measurer.run_cov_new_units()
    all_pcs = snapshot_measurer.merge_new_pcs()
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(all_pcs))

    # Save the new corpus.
    filesystem.replace_dir(snapshot_measurer.corpus_dir,
                           snapshot_measurer.prev_corpus_dir)

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)

    snapshot_logger.info('Measured cycle: %d.', cycle)
    return snapshot
示例#2
0
def get_covered_region(experiment: str, fuzzer: str, benchmark: str,
                       q: multiprocessing.Queue):
    """Get the final covered region for a specific pair of fuzzer-benchmark."""
    initialize_logs()
    logger.debug('Measuring covered region: fuzzer: %s, benchmark: %s.',
                 fuzzer, benchmark)
    key = get_fuzzer_benchmark_key(fuzzer, benchmark)
    covered_regions = {key: set()}
    trial_ids = get_trial_ids(experiment, fuzzer, benchmark)
    for trial_id in trial_ids:
        logger.info('Measuring covered region: trial_id = %d.', trial_id)
        snapshot_logger = logs.Logger('measurer',
                                      default_extras={
                                          'fuzzer': fuzzer,
                                          'benchmark': benchmark,
                                          'trial_id': str(trial_id),
                                      })
        snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_id,
                                             snapshot_logger)
        new_covered_regions = snapshot_measurer.get_current_covered_regions()
        covered_regions[key] = covered_regions[key].union(new_covered_regions)
    q.put(covered_regions)
    logger.debug('Done measuring covered region: fuzzer: %s, benchmark: %s.',
                 fuzzer, benchmark)
示例#3
0
from sqlalchemy import orm

from common import experiment_utils
from common import experiment_path as exp_path
from common import filesystem
from common import gsutil
from common import logs
from common import utils
from database import utils as db_utils
from database import models
from experiment.build import builder
from experiment import run_coverage
from experiment import scheduler
from third_party import sancov

logger = logs.Logger('measurer')  # pylint: disable=invalid-name

SnapshotMeasureRequest = collections.namedtuple(
    'SnapshotMeasureRequest', ['fuzzer', 'benchmark', 'trial_id', 'cycle'])

NUM_RETRIES = 3
RETRY_DELAY = 3
FAIL_WAIT_SECONDS = 30
SNAPSHOT_QUEUE_GET_TIMEOUT = 1
SNAPSHOTS_BATCH_SAVE_SIZE = 100


def get_experiment_folders_dir():
    """Return experiment folders directory."""
    return exp_path.path('experiment-folders')
示例#4
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for running a sancov instrumented binary on a corpus."""
import os
import tempfile
from typing import List

from common import experiment_utils
from common import logs
from common import new_process

logger = logs.Logger('run_coverage')


def find_crashing_units(artifacts_dir: str) -> List[str]:
    """Returns the crashing unit in coverage_binary_output."""
    return [
        # This assumes the artifacts are named {crash,oom,timeout,*}-$SHA1_HASH
        # and that input units are also named with their hash.
        filename.split('-')[1] for filename in os.listdir(artifacts_dir)
        if os.path.isfile(os.path.join(artifacts_dir, filename))
    ]


RSS_LIMIT_MB = 2048
UNIT_TIMEOUT = 5
MAX_TOTAL_TIME = experiment_utils.get_snapshot_seconds()
示例#5
0
from common import experiment_utils
from common import fuzzer_config_utils
from common import gcloud
from common import logs
from common import yaml_utils
from database import models
from database import utils as db_utils

# Give the trial runner a little extra time to shut down and account for how
# long it can take to actually start running once an instance is started. 5
# minutes is an arbitrary amount of time.
GRACE_TIME_SECONDS = 5 * 60

FAIL_WAIT_SECONDS = 10 * 60

logger = logs.Logger('scheduler')  # pylint: disable=invalid-name


def datetime_now() -> datetime.datetime:
    """Return datetime.datetime.utcnow(). This function is needed for
    mocking."""
    return datetime.datetime.now(datetime.timezone.utc)


# TODO(metzman): Figure out what are the best practices for the functions which
# must return sqlalchemy.orm.Query. Importing it just for annotation might be
# confusing to readers. There may also be weird situations where it is
# acceptable to use a list or query (because of duck typing) but type hints
# prevents us unless handled intelligently).
def get_experiment_trials(experiment: str):
    """Returns a query of trials in |experiment|."""
示例#6
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)

    measuring_start_time = time.time()
    snapshot_logger.info('Measuring cycle: %d.', cycle)
    this_time = cycle * experiment_utils.get_snapshot_seconds()
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)
        regions_covered = snapshot_measurer.get_current_coverage()
        fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=regions_covered,
                               fuzzer_stats=fuzzer_stats_data)

    corpus_archive_dst = os.path.join(
        snapshot_measurer.trial_dir, 'corpus',
        experiment_utils.get_corpus_archive_name(cycle))
    corpus_archive_src = exp_path.filestore(corpus_archive_dst)

    corpus_archive_dir = os.path.dirname(corpus_archive_dst)
    if not os.path.exists(corpus_archive_dir):
        os.makedirs(corpus_archive_dir)

    if filestore_utils.cp(corpus_archive_src,
                          corpus_archive_dst,
                          expect_zero=False).retcode:
        snapshot_logger.warning('Corpus not found for cycle: %d.', cycle)
        return None

    snapshot_measurer.initialize_measurement_dirs()
    snapshot_measurer.extract_corpus(corpus_archive_dst)
    # Don't keep corpus archives around longer than they need to be.
    os.remove(corpus_archive_dst)

    # Run coverage on the new corpus units.
    snapshot_measurer.run_cov_new_units()

    # Generate profdata and transform it into json form.
    snapshot_measurer.generate_coverage_information(cycle)

    # Get the coverage of the new corpus units.
    regions_covered = snapshot_measurer.get_current_coverage()
    fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=regions_covered,
                               fuzzer_stats=fuzzer_stats_data)

    # Record the new corpus files.
    snapshot_measurer.update_measured_files()

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)
    measuring_time = round(time.time() - measuring_start_time, 2)
    snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle,
                         measuring_time)
    return snapshot
示例#7
0
    import experiment.build.gcb_build as buildlib
else:
    import experiment.build.local_build as buildlib

# FIXME: Make this configurable for users with the default quota of 10.
# Even though it says queueing happen, we end up exceeding limits on "get", so
# be conservative. Use 30 for now since this is limit for FuzzBench service.
MAX_CONCURRENT_BUILDS = 30

# Build fail retries and wait interval.
NUM_BUILD_RETRIES = 3
BUILD_FAIL_WAIT = 5 * 60

BENCHMARKS_DIR = os.path.join(utils.ROOT_DIR, 'benchmarks')

logger = logs.Logger('builder')  # pylint: disable=invalid-name


def build_base_images() -> Tuple[int, str]:
    """Build base images."""
    return buildlib.build_base_images()


def get_coverage_binary(benchmark: str) -> str:
    """Get the coverage binary for benchmark."""
    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    fuzz_target = benchmark_utils.get_fuzz_target(benchmark)
    return fuzzer_utils.get_fuzz_target_binary(coverage_binaries_dir /
                                               benchmark,
                                               fuzz_target_name=fuzz_target)
示例#8
0
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int,
                              cycle: int) -> models.Snapshot:
    """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
    and |benchmark|."""
    snapshot_logger = logs.Logger('measurer',
                                  default_extras={
                                      'fuzzer': fuzzer,
                                      'benchmark': benchmark,
                                      'trial_id': str(trial_num),
                                      'cycle': str(cycle),
                                  })
    snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
                                         snapshot_logger)

    measuring_start_time = time.time()
    snapshot_logger.info('Measuring cycle: %d.', cycle)
    this_time = cycle * experiment_utils.get_snapshot_seconds()
    if snapshot_measurer.is_cycle_unchanged(cycle):
        snapshot_logger.info('Cycle: %d is unchanged.', cycle)
        current_pcs = snapshot_measurer.get_current_pcs()
        return models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(current_pcs))

    corpus_archive_dst = os.path.join(
        snapshot_measurer.trial_dir, 'corpus',
        experiment_utils.get_corpus_archive_name(cycle))
    corpus_archive_src = exp_path.gcs(corpus_archive_dst)

    corpus_archive_dir = os.path.dirname(corpus_archive_dst)
    if not os.path.exists(corpus_archive_dir):
        os.makedirs(corpus_archive_dir)
    if gsutil.cp(corpus_archive_src,
                 corpus_archive_dst,
                 expect_zero=False,
                 parallel=False,
                 write_to_stdout=False)[0] != 0:
        snapshot_logger.warning('Corpus not found for cycle: %d.', cycle)
        return None

    snapshot_measurer.initialize_measurement_dirs()
    snapshot_measurer.extract_corpus(corpus_archive_dst)
    # Don't keep corpus archives around longer than they need to be.
    os.remove(corpus_archive_dst)

    # Get the coverage of the new corpus units.
    snapshot_measurer.run_cov_new_units()
    all_pcs = snapshot_measurer.merge_new_pcs()
    snapshot = models.Snapshot(time=this_time,
                               trial_id=trial_num,
                               edges_covered=len(all_pcs))

    # Record the new corpus files.
    snapshot_measurer.update_measured_files()

    # Archive crashes directory.
    snapshot_measurer.archive_crashes(cycle)

    measuring_time = round(time.time() - measuring_start_time, 2)
    snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle,
                         measuring_time)
    return snapshot
示例#9
0
import os
import json

from common import experiment_path as exp_path
from common import experiment_utils as exp_utils
from common import new_process
from common import benchmark_utils
from common import fuzzer_utils
from common import logs
from common import filestore_utils
from common import filesystem
from database import utils as db_utils
from database import models
from experiment.build import build_utils

logger = logs.Logger('coverage_utils')  # pylint: disable=invalid-name

COV_DIFF_QUEUE_GET_TIMEOUT = 1


def get_coverage_info_dir():
    """Returns the directory to store coverage information including
    coverage report and json summary file."""
    work_dir = exp_utils.get_work_dir()
    return os.path.join(work_dir, 'coverage')


def generate_coverage_reports(experiment_config: dict):
    """Generates coverage reports for each benchmark and fuzzer."""
    logs.initialize()
    logger.info('Start generating coverage reports.')
示例#10
0
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper functions for using the gsutil tool."""

from common import environment
from common import logs
from common import new_process

logger = logs.Logger('gsutil')


def gsutil_command(arguments, *args, parallel=True, **kwargs):
    """Executes a gsutil command with |arguments| and returns the result."""
    if environment.get('LOCAL_EXPERIMENT'):
        logger.info('LOCAL_EXPERIMENT set, not running \'gsutil %s\'.',
                    ' '.join(arguments))
        return 0, ''
    command = ['gsutil']
    if parallel:
        command.append('-m')
    write_to_stdout = kwargs.pop('write_to_stdout', False)
    return new_process.execute(command + arguments,
                               *args,
                               write_to_stdout=write_to_stdout,
示例#11
0
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for processing crashes."""

import collections
import os
import re

from clusterfuzz import stacktraces

from common import logs
from common import new_process
from experiment.measurer import run_coverage
from experiment.measurer import sanitizer

logger = logs.Logger('run_crashes')

Crash = collections.namedtuple('Crash', [
    'crash_testcase', 'crash_type', 'crash_address', 'crash_state',
    'crash_stacktrace'
])

SIZE_REGEX = re.compile(r'\s([0-9]+|{\*})$', re.DOTALL)
CPLUSPLUS_TEMPLATE_REGEX = re.compile(r'(<[^>]+>|<[^\n]+(?=\n))')


def _filter_crash_type(crash_type):
    """Filters crash type to remove size numbers."""
    return SIZE_REGEX.sub('', crash_type)

示例#12
0
import argparse
import collections
import os
import re
import sys
from typing import Optional

from common import benchmark_utils
from common import logs
from common import utils
from common import yaml_utils
from database import models
from database import utils as db_utils
from experiment import run_experiment

logger = logs.Logger('automatic_run_experiment')  # pylint: disable=invalid-name

EXPERIMENT_CONFIG_FILE = os.path.join(utils.ROOT_DIR, 'service',
                                      'experiment-config.yaml')

REQUESTED_EXPERIMENTS_PATH = os.path.join(utils.ROOT_DIR, 'service',
                                          'experiment-requests.yaml')

# Don't run an experiment if we have a "request" just containing this keyword.
# TODO(metzman): Look into replacing this mechanism for pausing the service.
PAUSE_SERVICE_KEYWORD = 'PAUSE_SERVICE'

EXPERIMENT_NAME_REGEX = re.compile(r'^\d{4}-\d{2}-\d{2}.*')


def _get_experiment_name(experiment_config: dict) -> str:
# limitations under the License.
"""Module for starting instances to run measure workers."""
import collections
import os
import posixpath
import sys
import time

from common import experiment_utils
from common import gce
from common import gcloud
from common import logs
from common import queue_utils
from common import yaml_utils

logger = logs.Logger('schedule_measure_workers')  # pylint: disable=invalid-name

# This is the default quota on GCE.
# TODO(metzman): Use the GCE API to determine this quota.
MAX_INSTANCES_PER_GROUP = 1000


def get_instance_group_name(experiment: str):
    """Returns the name of the instance group of measure workers for
    |experiment|."""
    # "worker-" needs to come first because name cannot start with number.
    return 'worker-' + experiment


def get_measure_worker_instance_template_name(experiment: str):
    """Returns an instance template name for measurer workers running in
示例#14
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Stops a running experiment."""

import sys

from common import experiment_utils
from common import logs
from common import gce
from common import gcloud
from common import yaml_utils

logger = logs.Logger('stop_experiment')  # pylint: disable=invalid-name


def stop_experiment(experiment_name, experiment_config_filename):
    """Stop the experiment specified by |experiment_config_filename|."""
    experiment_config = yaml_utils.read(experiment_config_filename)
    if experiment_config.get('local_experiment', False):
        raise NotImplementedError(
            'Local experiment stop logic is not implemented.')

    cloud_project = experiment_config['cloud_project']
    cloud_compute_zone = experiment_config['cloud_compute_zone']

    gce.initialize()
    instances = list(gce.get_instances(cloud_project, cloud_compute_zone))
示例#15
0
"""Utility functions for coverage data calculation."""

import collections
import itertools
import json
import posixpath
from typing import Dict, List, Tuple
import tempfile

import pandas as pd

from analysis import data_utils
from common import filestore_utils
from common import logs

logger = logs.Logger('coverage_data_utils')


def fuzzer_and_benchmark_to_key(fuzzer: str, benchmark: str) -> str:
    """Returns the key representing |fuzzer| and |benchmark|."""
    return fuzzer + ' ' + benchmark


def key_to_fuzzer_and_benchmark(key: str) -> Tuple[str, str]:
    """Returns a tuple containing the fuzzer and the benchmark represented by
    |key|."""
    return tuple(key.split(' '))


def get_experiment_filestore_path_for_fuzzer_benchmark(
    fuzzer: str,
示例#16
0
import posixpath

from common import experiment_utils
from common import experiment_path as exp_path
from common import filesystem
from common import filestore_utils
from common import logs
from common import yaml_utils
from analysis import generate_report
from analysis import data_utils

CORE_FUZZERS_YAML = os.path.abspath(
    os.path.join(os.path.dirname(__file__), '..', 'service',
                 'core-fuzzers.yaml'))

logger = logs.Logger('reporter')  # pylint: disable=invalid-name


def get_reports_dir():
    """Return reports directory."""
    return exp_path.path('reports')


def get_core_fuzzers():
    """Return list of core fuzzers to be used for merging experiment data."""
    return yaml_utils.read(CORE_FUZZERS_YAML)['fuzzers']


def output_report(experiment_config: dict,
                  in_progress=False,
                  coverage_report=False):
示例#17
0
import argparse
import os
import sys

import pandas as pd

from analysis import data_utils
from analysis import coverage_data_utils
from analysis import experiment_results
from analysis import plotting
from analysis import queries
from analysis import rendering
from common import filesystem
from common import logs

logger = logs.Logger('generate_report')


def get_arg_parser():
    """Returns argument parser."""
    parser = argparse.ArgumentParser(description='Report generator.')
    parser.add_argument('experiments', nargs='+', help='Experiment names')
    parser.add_argument(
        '-n',
        '--report-name',
        help='Name of the report. Default: name of the first experiment.')
    parser.add_argument(
        '-t',
        '--report-type',
        choices=['default', 'experimental'],
        default='default',