def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) if not os.path.exists(snapshot_measurer.trial_dir): snapshot_logger.warning('Trial dir: %s does not exist yet.', snapshot_measurer.trial_dir) return None this_time = cycle * experiment_utils.SNAPSHOT_PERIOD if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) current_pcs = snapshot_measurer.get_current_pcs() return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(current_pcs)) snapshot_measurer.initialize_measurement_dirs() if not snapshot_measurer.extract_cycle_corpus(cycle): return None # Get the coverage of the new corpus units. snapshot_measurer.run_cov_new_units() all_pcs = snapshot_measurer.merge_new_pcs() snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(all_pcs)) # Save the new corpus. filesystem.replace_dir(snapshot_measurer.corpus_dir, snapshot_measurer.prev_corpus_dir) # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) snapshot_logger.info('Measured cycle: %d.', cycle) return snapshot
def get_covered_region(experiment: str, fuzzer: str, benchmark: str, q: multiprocessing.Queue): """Get the final covered region for a specific pair of fuzzer-benchmark.""" initialize_logs() logger.debug('Measuring covered region: fuzzer: %s, benchmark: %s.', fuzzer, benchmark) key = get_fuzzer_benchmark_key(fuzzer, benchmark) covered_regions = {key: set()} trial_ids = get_trial_ids(experiment, fuzzer, benchmark) for trial_id in trial_ids: logger.info('Measuring covered region: trial_id = %d.', trial_id) snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_id), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_id, snapshot_logger) new_covered_regions = snapshot_measurer.get_current_covered_regions() covered_regions[key] = covered_regions[key].union(new_covered_regions) q.put(covered_regions) logger.debug('Done measuring covered region: fuzzer: %s, benchmark: %s.', fuzzer, benchmark)
from sqlalchemy import orm from common import experiment_utils from common import experiment_path as exp_path from common import filesystem from common import gsutil from common import logs from common import utils from database import utils as db_utils from database import models from experiment.build import builder from experiment import run_coverage from experiment import scheduler from third_party import sancov logger = logs.Logger('measurer') # pylint: disable=invalid-name SnapshotMeasureRequest = collections.namedtuple( 'SnapshotMeasureRequest', ['fuzzer', 'benchmark', 'trial_id', 'cycle']) NUM_RETRIES = 3 RETRY_DELAY = 3 FAIL_WAIT_SECONDS = 30 SNAPSHOT_QUEUE_GET_TIMEOUT = 1 SNAPSHOTS_BATCH_SAVE_SIZE = 100 def get_experiment_folders_dir(): """Return experiment folders directory.""" return exp_path.path('experiment-folders')
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Module for running a sancov instrumented binary on a corpus.""" import os import tempfile from typing import List from common import experiment_utils from common import logs from common import new_process logger = logs.Logger('run_coverage') def find_crashing_units(artifacts_dir: str) -> List[str]: """Returns the crashing unit in coverage_binary_output.""" return [ # This assumes the artifacts are named {crash,oom,timeout,*}-$SHA1_HASH # and that input units are also named with their hash. filename.split('-')[1] for filename in os.listdir(artifacts_dir) if os.path.isfile(os.path.join(artifacts_dir, filename)) ] RSS_LIMIT_MB = 2048 UNIT_TIMEOUT = 5 MAX_TOTAL_TIME = experiment_utils.get_snapshot_seconds()
from common import experiment_utils from common import fuzzer_config_utils from common import gcloud from common import logs from common import yaml_utils from database import models from database import utils as db_utils # Give the trial runner a little extra time to shut down and account for how # long it can take to actually start running once an instance is started. 5 # minutes is an arbitrary amount of time. GRACE_TIME_SECONDS = 5 * 60 FAIL_WAIT_SECONDS = 10 * 60 logger = logs.Logger('scheduler') # pylint: disable=invalid-name def datetime_now() -> datetime.datetime: """Return datetime.datetime.utcnow(). This function is needed for mocking.""" return datetime.datetime.now(datetime.timezone.utc) # TODO(metzman): Figure out what are the best practices for the functions which # must return sqlalchemy.orm.Query. Importing it just for annotation might be # confusing to readers. There may also be weird situations where it is # acceptable to use a list or query (because of duck typing) but type hints # prevents us unless handled intelligently). def get_experiment_trials(experiment: str): """Returns a query of trials in |experiment|."""
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) this_time = cycle * experiment_utils.get_snapshot_seconds() if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) regions_covered = snapshot_measurer.get_current_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data) corpus_archive_dst = os.path.join( snapshot_measurer.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) corpus_archive_src = exp_path.filestore(corpus_archive_dst) corpus_archive_dir = os.path.dirname(corpus_archive_dst) if not os.path.exists(corpus_archive_dir): os.makedirs(corpus_archive_dir) if filestore_utils.cp(corpus_archive_src, corpus_archive_dst, expect_zero=False).retcode: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) return None snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) # Run coverage on the new corpus units. snapshot_measurer.run_cov_new_units() # Generate profdata and transform it into json form. snapshot_measurer.generate_coverage_information(cycle) # Get the coverage of the new corpus units. regions_covered = snapshot_measurer.get_current_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data) # Record the new corpus files. snapshot_measurer.update_measured_files() # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %f seconds.', cycle, measuring_time) return snapshot
import experiment.build.gcb_build as buildlib else: import experiment.build.local_build as buildlib # FIXME: Make this configurable for users with the default quota of 10. # Even though it says queueing happen, we end up exceeding limits on "get", so # be conservative. Use 30 for now since this is limit for FuzzBench service. MAX_CONCURRENT_BUILDS = 30 # Build fail retries and wait interval. NUM_BUILD_RETRIES = 3 BUILD_FAIL_WAIT = 5 * 60 BENCHMARKS_DIR = os.path.join(utils.ROOT_DIR, 'benchmarks') logger = logs.Logger('builder') # pylint: disable=invalid-name def build_base_images() -> Tuple[int, str]: """Build base images.""" return buildlib.build_base_images() def get_coverage_binary(benchmark: str) -> str: """Get the coverage binary for benchmark.""" coverage_binaries_dir = build_utils.get_coverage_binaries_dir() fuzz_target = benchmark_utils.get_fuzz_target(benchmark) return fuzzer_utils.get_fuzz_target_binary(coverage_binaries_dir / benchmark, fuzz_target_name=fuzz_target)
def measure_snapshot_coverage(fuzzer: str, benchmark: str, trial_num: int, cycle: int) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', default_extras={ 'fuzzer': fuzzer, 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, snapshot_logger) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) this_time = cycle * experiment_utils.get_snapshot_seconds() if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) current_pcs = snapshot_measurer.get_current_pcs() return models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(current_pcs)) corpus_archive_dst = os.path.join( snapshot_measurer.trial_dir, 'corpus', experiment_utils.get_corpus_archive_name(cycle)) corpus_archive_src = exp_path.gcs(corpus_archive_dst) corpus_archive_dir = os.path.dirname(corpus_archive_dst) if not os.path.exists(corpus_archive_dir): os.makedirs(corpus_archive_dir) if gsutil.cp(corpus_archive_src, corpus_archive_dst, expect_zero=False, parallel=False, write_to_stdout=False)[0] != 0: snapshot_logger.warning('Corpus not found for cycle: %d.', cycle) return None snapshot_measurer.initialize_measurement_dirs() snapshot_measurer.extract_corpus(corpus_archive_dst) # Don't keep corpus archives around longer than they need to be. os.remove(corpus_archive_dst) # Get the coverage of the new corpus units. snapshot_measurer.run_cov_new_units() all_pcs = snapshot_measurer.merge_new_pcs() snapshot = models.Snapshot(time=this_time, trial_id=trial_num, edges_covered=len(all_pcs)) # Record the new corpus files. snapshot_measurer.update_measured_files() # Archive crashes directory. snapshot_measurer.archive_crashes(cycle) measuring_time = round(time.time() - measuring_start_time, 2) snapshot_logger.info('Measured cycle: %d in %d seconds.', cycle, measuring_time) return snapshot
import os import json from common import experiment_path as exp_path from common import experiment_utils as exp_utils from common import new_process from common import benchmark_utils from common import fuzzer_utils from common import logs from common import filestore_utils from common import filesystem from database import utils as db_utils from database import models from experiment.build import build_utils logger = logs.Logger('coverage_utils') # pylint: disable=invalid-name COV_DIFF_QUEUE_GET_TIMEOUT = 1 def get_coverage_info_dir(): """Returns the directory to store coverage information including coverage report and json summary file.""" work_dir = exp_utils.get_work_dir() return os.path.join(work_dir, 'coverage') def generate_coverage_reports(experiment_config: dict): """Generates coverage reports for each benchmark and fuzzer.""" logs.initialize() logger.info('Start generating coverage reports.')
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Helper functions for using the gsutil tool.""" from common import environment from common import logs from common import new_process logger = logs.Logger('gsutil') def gsutil_command(arguments, *args, parallel=True, **kwargs): """Executes a gsutil command with |arguments| and returns the result.""" if environment.get('LOCAL_EXPERIMENT'): logger.info('LOCAL_EXPERIMENT set, not running \'gsutil %s\'.', ' '.join(arguments)) return 0, '' command = ['gsutil'] if parallel: command.append('-m') write_to_stdout = kwargs.pop('write_to_stdout', False) return new_process.execute(command + arguments, *args, write_to_stdout=write_to_stdout,
# See the License for the specific language governing permissions and # limitations under the License. """Module for processing crashes.""" import collections import os import re from clusterfuzz import stacktraces from common import logs from common import new_process from experiment.measurer import run_coverage from experiment.measurer import sanitizer logger = logs.Logger('run_crashes') Crash = collections.namedtuple('Crash', [ 'crash_testcase', 'crash_type', 'crash_address', 'crash_state', 'crash_stacktrace' ]) SIZE_REGEX = re.compile(r'\s([0-9]+|{\*})$', re.DOTALL) CPLUSPLUS_TEMPLATE_REGEX = re.compile(r'(<[^>]+>|<[^\n]+(?=\n))') def _filter_crash_type(crash_type): """Filters crash type to remove size numbers.""" return SIZE_REGEX.sub('', crash_type)
import argparse import collections import os import re import sys from typing import Optional from common import benchmark_utils from common import logs from common import utils from common import yaml_utils from database import models from database import utils as db_utils from experiment import run_experiment logger = logs.Logger('automatic_run_experiment') # pylint: disable=invalid-name EXPERIMENT_CONFIG_FILE = os.path.join(utils.ROOT_DIR, 'service', 'experiment-config.yaml') REQUESTED_EXPERIMENTS_PATH = os.path.join(utils.ROOT_DIR, 'service', 'experiment-requests.yaml') # Don't run an experiment if we have a "request" just containing this keyword. # TODO(metzman): Look into replacing this mechanism for pausing the service. PAUSE_SERVICE_KEYWORD = 'PAUSE_SERVICE' EXPERIMENT_NAME_REGEX = re.compile(r'^\d{4}-\d{2}-\d{2}.*') def _get_experiment_name(experiment_config: dict) -> str:
# limitations under the License. """Module for starting instances to run measure workers.""" import collections import os import posixpath import sys import time from common import experiment_utils from common import gce from common import gcloud from common import logs from common import queue_utils from common import yaml_utils logger = logs.Logger('schedule_measure_workers') # pylint: disable=invalid-name # This is the default quota on GCE. # TODO(metzman): Use the GCE API to determine this quota. MAX_INSTANCES_PER_GROUP = 1000 def get_instance_group_name(experiment: str): """Returns the name of the instance group of measure workers for |experiment|.""" # "worker-" needs to come first because name cannot start with number. return 'worker-' + experiment def get_measure_worker_instance_template_name(experiment: str): """Returns an instance template name for measurer workers running in
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Stops a running experiment.""" import sys from common import experiment_utils from common import logs from common import gce from common import gcloud from common import yaml_utils logger = logs.Logger('stop_experiment') # pylint: disable=invalid-name def stop_experiment(experiment_name, experiment_config_filename): """Stop the experiment specified by |experiment_config_filename|.""" experiment_config = yaml_utils.read(experiment_config_filename) if experiment_config.get('local_experiment', False): raise NotImplementedError( 'Local experiment stop logic is not implemented.') cloud_project = experiment_config['cloud_project'] cloud_compute_zone = experiment_config['cloud_compute_zone'] gce.initialize() instances = list(gce.get_instances(cloud_project, cloud_compute_zone))
"""Utility functions for coverage data calculation.""" import collections import itertools import json import posixpath from typing import Dict, List, Tuple import tempfile import pandas as pd from analysis import data_utils from common import filestore_utils from common import logs logger = logs.Logger('coverage_data_utils') def fuzzer_and_benchmark_to_key(fuzzer: str, benchmark: str) -> str: """Returns the key representing |fuzzer| and |benchmark|.""" return fuzzer + ' ' + benchmark def key_to_fuzzer_and_benchmark(key: str) -> Tuple[str, str]: """Returns a tuple containing the fuzzer and the benchmark represented by |key|.""" return tuple(key.split(' ')) def get_experiment_filestore_path_for_fuzzer_benchmark( fuzzer: str,
import posixpath from common import experiment_utils from common import experiment_path as exp_path from common import filesystem from common import filestore_utils from common import logs from common import yaml_utils from analysis import generate_report from analysis import data_utils CORE_FUZZERS_YAML = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', 'service', 'core-fuzzers.yaml')) logger = logs.Logger('reporter') # pylint: disable=invalid-name def get_reports_dir(): """Return reports directory.""" return exp_path.path('reports') def get_core_fuzzers(): """Return list of core fuzzers to be used for merging experiment data.""" return yaml_utils.read(CORE_FUZZERS_YAML)['fuzzers'] def output_report(experiment_config: dict, in_progress=False, coverage_report=False):
import argparse import os import sys import pandas as pd from analysis import data_utils from analysis import coverage_data_utils from analysis import experiment_results from analysis import plotting from analysis import queries from analysis import rendering from common import filesystem from common import logs logger = logs.Logger('generate_report') def get_arg_parser(): """Returns argument parser.""" parser = argparse.ArgumentParser(description='Report generator.') parser.add_argument('experiments', nargs='+', help='Experiment names') parser.add_argument( '-n', '--report-name', help='Name of the report. Default: name of the first experiment.') parser.add_argument( '-t', '--report-type', choices=['default', 'experimental'], default='default',