Exemplo n.º 1
0
def render_startup_script_template(instance_name: str, fuzzer: str,
                                   benchmark: str, trial_id: int,
                                   experiment_config: dict):
    """Render the startup script using the template and the parameters
    provided and return the result."""
    docker_image_url = benchmark_utils.get_runner_image_url(
        benchmark, fuzzer, experiment_config['docker_registry'])
    fuzz_target = benchmark_utils.get_fuzz_target(benchmark)

    local_experiment = experiment_utils.is_local_experiment()
    template = JINJA_ENV.get_template('runner-startup-script-template.sh')
    kwargs = {
        'instance_name': instance_name,
        'benchmark': benchmark,
        'experiment': experiment_config['experiment'],
        'fuzzer': fuzzer,
        'trial_id': trial_id,
        'max_total_time': experiment_config['max_total_time'],
        'experiment_filestore': experiment_config['experiment_filestore'],
        'report_filestore': experiment_config['report_filestore'],
        'fuzz_target': fuzz_target,
        'docker_image_url': docker_image_url,
        'docker_registry': experiment_config['docker_registry'],
        'local_experiment': local_experiment
    }

    if not local_experiment:
        kwargs['cloud_compute_zone'] = experiment_config['cloud_compute_zone']
        kwargs['cloud_project'] = experiment_config['cloud_project']

    return template.render(**kwargs)
Exemplo n.º 2
0
def end_expired_trials(experiment_config: dict):
    """Get all expired trials, end them and return them."""
    trials_past_expiry = get_expired_trials(
        experiment_config['experiment'], experiment_config['max_total_time'])
    expired_instances = []
    current_dt = datetime_now()
    for trial in trials_past_expiry:
        expired_instances.append(
            experiment_utils.get_trial_instance_name(
                experiment_config['experiment'], trial.id))
        trial.time_ended = current_dt

    # Bail out here because trials_past_expiry will be truthy until evaluated.
    if not expired_instances:
        return

    if not experiment_utils.is_local_experiment() and not delete_instances(
            expired_instances, experiment_config):
        # If we failed to delete some instances, then don't update the status
        # of expired trials in database as we don't know which instances were
        # successfully deleted. Wait for next iteration of end_expired_trials.
        logger.error('Failed to delete instances after trial expiry.')
        return

    db_utils.bulk_save(trials_past_expiry)
def initialize(experiment_config: dict):
    """Initialize everything that will be needed to schedule measurers."""
    logger.info('Initializing worker scheduling.')
    gce.initialize()
    experiment = experiment_config['experiment']
    project = experiment_config['project']
    instance_template_name = get_measure_worker_instance_template_name(
        experiment)
    docker_image = posixpath.join(experiment_config['docker_registry'],
                                  'measure-worker:{}'.format(experiment))

    redis_host = experiment_config['redis_host']
    experiment_filestore = experiment_config['experiment_filestore']
    local_experiment = experiment_utils.is_local_experiment()
    cloud_compute_zone = experiment_config.get('cloud_compute_zone')
    env = {
        'REDIS_HOST': redis_host,
        'EXPERIMENT_FILESTORE': experiment_filestore,
        'EXPERIMENT': experiment,
        'LOCAL_EXPERIMENT': local_experiment,
        'CLOUD_COMPUTE_ZONE': cloud_compute_zone,
    }

    zone = experiment_config['cloud_compute_zone']
    instance_template_url = gcloud.create_instance_template(
        instance_template_name, docker_image, env, project, zone)

    instance_group_name = get_instance_group_name(experiment)

    base_instance_name = get_base_worker_instance_name(experiment)

    gce.create_instance_group(instance_group_name, instance_template_url,
                              base_instance_name, project, zone)
    queue = queue_utils.initialize_queue(redis_host)
    return queue
Exemplo n.º 4
0
def create_instance(instance_name: str,
                    instance_type: InstanceType,
                    config: dict,
                    startup_script: str = None,
                    preemptible: bool = False,
                    **kwargs) -> bool:
    """Creates a GCE instance with name, |instance_name|, type, |instance_type|
    and with optionally provided and |startup_script|."""

    if experiment_utils.is_local_experiment():
        return run_local_instance(startup_script)

    command = [
        'gcloud',
        'compute',
        'instances',
        'create',
        instance_name,
        '--image-family=cos-stable',
        '--image-project=cos-cloud',
        '--zone=%s' % config['cloud_compute_zone'],
        '--scopes=cloud-platform',
    ]
    if instance_type == InstanceType.DISPATCHER:
        command.extend([
            '--machine-type=%s' % DISPATCHER_MACHINE_TYPE,
            '--boot-disk-size=%s' % DISPATCHER_BOOT_DISK_SIZE,
            '--boot-disk-type=%s' % DISPATCHER_BOOT_DISK_TYPE,
        ])
    else:
        machine_type = config['runner_machine_type']
        if machine_type is not None:
            command.append('--machine-type=%s' % machine_type)
        else:
            # Do this to support KLEE experiments.
            command.append([
                '--custom-memory=%s' % config['runner_memory'],
                '--custom-cpu=%s' % config['runner_num_cpu_cores']
            ])

        command.extend([
            '--no-address',
            '--boot-disk-size=%s' % RUNNER_BOOT_DISK_SIZE,
        ])

    if preemptible:
        command.append('--preemptible')
    if startup_script:
        command.extend(
            ['--metadata-from-file', 'startup-script=' + startup_script])

    result = new_process.execute(command, expect_zero=False, **kwargs)
    if result.retcode == 0:
        return True

    logs.info('Failed to create instance. Command: %s failed. Output: %s',
              command, result.output)
    return False
Exemplo n.º 5
0
def dispatcher_main():
    """Do the experiment and report results."""
    logs.info('Starting experiment.')

    # Set this here because we get failures if we do it in measurer for some
    # reason.
    multiprocessing.set_start_method('spawn')
    db_utils.initialize()
    if experiment_utils.is_local_experiment():
        models.Base.metadata.create_all(db_utils.engine)

    experiment_config_file_path = _get_config_file_path()
    experiment = Experiment(experiment_config_file_path)

    _initialize_experiment_in_db(experiment.config)

    trials = build_images_for_trials(experiment.fuzzers, experiment.benchmarks,
                                     experiment.num_trials,
                                     experiment.preemptible,
                                     experiment.concurrent_builds)
    _initialize_trials_in_db(trials)

    create_work_subdirs(['experiment-folders', 'measurement-folders'])

    # Start measurer and scheduler in seperate threads/processes.
    scheduler_loop_thread = threading.Thread(target=scheduler.schedule_loop,
                                             args=(experiment.config, ))
    scheduler_loop_thread.start()

    measurer_main_process = multiprocessing.Process(
        target=measure_manager.measure_main, args=(experiment.config, ))

    measurer_main_process.start()

    is_complete = False
    while True:
        time.sleep(LOOP_WAIT_SECONDS)
        if not scheduler_loop_thread.is_alive():
            is_complete = not measurer_main_process.is_alive()

        # Generate periodic output reports.
        reporter.output_report(experiment.config,
                               in_progress=not is_complete,
                               coverage_report=is_complete)

        if is_complete:
            # Experiment is complete, bail out.
            break

    scheduler_loop_thread.join()
    measurer_main_process.join()

    _record_experiment_time_ended(experiment.experiment_name)
    logs.info('Experiment ended.')
Exemplo n.º 6
0
def create_instance(instance_name: str,
                    instance_type: InstanceType,
                    config: dict,
                    metadata: dict = None,
                    startup_script: str = None,
                    **kwargs) -> bool:
    """Creates a GCE instance with name, |instance_name|, type, |instance_type|
    and with optionally provided |metadata| and |startup_script|."""

    if experiment_utils.is_local_experiment():
        return run_local_instance(startup_script)

    command = [
        'gcloud',
        'compute',
        'instances',
        'create',
        instance_name,
        '--image-family=cos-stable',
        '--image-project=cos-cloud',
        '--zone=%s' % config['cloud_compute_zone'],
        '--scopes=cloud-platform',
    ]
    if instance_type == InstanceType.DISPATCHER:
        command.extend([
            '--machine-type=%s' % DISPATCHER_MACHINE_TYPE,
            '--boot-disk-size=%s' % DISPATCHER_BOOT_DISK_SIZE,
            '--boot-disk-type=%s' % DISPATCHER_BOOT_DISK_TYPE,
        ])
    else:
        command.extend([
            '--no-address',
            '--machine-type=%s' % RUNNER_MACHINE_TYPE,
            '--boot-disk-size=%s' % RUNNER_BOOT_DISK_SIZE,
        ])
        if config.get('preemptible_runners'):
            # TODO(metzman): Make runners signal to scheduler that they were
            # preempted, and make scheduler+measurer tolerate preemption.
            command.append('--preemptible')

    if metadata:
        metadata_str = ','.join('{key}={value}'.format(key=key, value=value)
                                for key, value in metadata.items())
        command.extend(['--metadata', metadata_str])
    if startup_script:
        command.extend(
            ['--metadata-from-file', 'startup-script=' + startup_script])

    return new_process.execute(command, expect_zero=False, **kwargs)[0] == 0
Exemplo n.º 7
0
def schedule_loop(experiment_config: dict):
    """Continuously run the scheduler until there is nothing left to schedule.
    Note that this should not be called unless
    multiprocessing.set_start_method('spawn') was called first. Otherwise it
    will use fork to create the Pool which breaks logging."""
    # Create the thread pool once and reuse it to avoid leaking threads and
    # other issues.
    logger.info('Starting scheduler.')
    num_trials = len(
        get_experiment_trials(experiment_config['experiment']).all())
    local_experiment = experiment_utils.is_local_experiment()
    if not local_experiment:
        gce.initialize()
        trial_instance_manager = TrialInstanceManager(num_trials,
                                                      experiment_config)
    experiment = experiment_config['experiment']
    with multiprocessing.Pool() as pool:
        handle_preempted = False
        while not all_trials_ended(experiment):
            try:
                if (not local_experiment and not handle_preempted
                        and not any_pending_trials(experiment)):
                    # This ensures that:
                    # 1. handle_preempted will not becomes True when running
                    #    locally.
                    # 2. Only start handling preempted instances once every
                    #    initial trial was started.
                    handle_preempted = True

                schedule(experiment_config, pool)
                if handle_preempted:
                    trial_instance_manager.handle_preempted_trials()
            except Exception:  # pylint: disable=broad-except
                logger.error('Error occurred during scheduling.')

            # Either
            # - We had an unexpected exception OR
            # - We have not been able to start trials and still have some
            #   remaining. This can happen when we run out of instance quota.
            # In these cases, sleep before retrying again.
            time.sleep(FAIL_WAIT_SECONDS)

    logger.info('Finished scheduling.')
Exemplo n.º 8
0
def create_instance(instance_name: str,
                    instance_type: InstanceType,
                    config: dict,
                    startup_script: str = None,
                    preemptible: bool = False,
                    **kwargs) -> bool:
    """Creates a GCE instance with name, |instance_name|, type, |instance_type|
    and with optionally provided and |startup_script|."""

    if experiment_utils.is_local_experiment():
        return run_local_instance(startup_script)

    command = [
        'gcloud',
        'compute',
        'instances',
        'create',
        instance_name,
        '--image-family=cos-stable',
        '--image-project=cos-cloud',
        '--zone=%s' % config['cloud_compute_zone'],
        '--scopes=cloud-platform',
    ]
    if instance_type == InstanceType.DISPATCHER:
        command.extend([
            '--machine-type=%s' % DISPATCHER_MACHINE_TYPE,
            '--boot-disk-size=%s' % DISPATCHER_BOOT_DISK_SIZE,
            '--boot-disk-type=%s' % DISPATCHER_BOOT_DISK_TYPE,
        ])
    else:
        command.extend([
            '--no-address',
            '--machine-type=%s' % RUNNER_MACHINE_TYPE,
            '--boot-disk-size=%s' % RUNNER_BOOT_DISK_SIZE,
        ])

    if preemptible:
        command.append('--preemptible')
    if startup_script:
        command.extend(
            ['--metadata-from-file', 'startup-script=' + startup_script])

    return new_process.execute(command, expect_zero=False, **kwargs)[0] == 0
Exemplo n.º 9
0
def render_startup_script_template(instance_name: str, benchmark: str,
                                   fuzzer: str, trial_id: int,
                                   experiment_config: dict):
    """Render the startup script using the template and the parameters
    provided and return the result."""
    fuzzer_config = fuzzer_config_utils.get_by_variant_name(fuzzer)
    underlying_fuzzer_name = fuzzer_config['fuzzer']
    docker_image_url = benchmark_utils.get_runner_image_url(
        benchmark, underlying_fuzzer_name, experiment_config['cloud_project'])
    fuzz_target = benchmark_utils.get_fuzz_target(benchmark)

    # Convert additional environment variables from configuration to arguments
    # that will be passed to docker.
    additional_env = ''
    if 'env' in fuzzer_config:
        additional_env = ' '.join([
            '-e {k}={v}'.format(k=k, v=shlex.quote(v))
            for k, v in fuzzer_config['env'].items()
        ])

    local_experiment = experiment_utils.is_local_experiment()
    template = JINJA_ENV.get_template('runner-startup-script-template.sh')
    kwargs = {
        'instance_name': instance_name,
        'benchmark': benchmark,
        'experiment': experiment_config['experiment'],
        'fuzzer': underlying_fuzzer_name,
        'fuzzer_variant_name': fuzzer,
        'trial_id': trial_id,
        'max_total_time': experiment_config['max_total_time'],
        'cloud_project': experiment_config['cloud_project'],
        'cloud_compute_zone': experiment_config['cloud_compute_zone'],
        'cloud_experiment_bucket':
        experiment_config['cloud_experiment_bucket'],
        'fuzz_target': fuzz_target,
        'docker_image_url': docker_image_url,
        'additional_env': additional_env,
        'local_experiment': local_experiment
    }
    if local_experiment:
        kwargs['host_gcloud_config'] = os.environ['HOST_GCLOUD_CONFIG']

    return template.render(**kwargs)
Exemplo n.º 10
0
def main():
    """Do the experiment and report results."""
    logs.initialize(default_extras={
        'component': 'dispatcher',
    })

    try:
        dispatcher_main()
    except Exception as error:
        logs.error('Error conducting experiment.')
        raise error

    if experiment_utils.is_local_experiment():
        return 0

    experiment_config_file_path = _get_config_file_path()

    if stop_experiment.stop_experiment(experiment_utils.get_experiment_name(),
                                       experiment_config_file_path):
        return 0

    return 1
Exemplo n.º 11
0
import tarfile
import time
from typing import Callable, List, Tuple

from common import benchmark_utils
from common import experiment_path as exp_path
from common import experiment_utils
from common import filesystem
from common import fuzzer_utils
from common import utils
from common import gsutil
from common import logs

from experiment.build import build_utils

if not experiment_utils.is_local_experiment():
    import experiment.build.gcb_build as buildlib
else:
    import experiment.build.local_build as buildlib

# FIXME: Make this configurable for users with the default quota of 10.
# Even though it says queueing happen, we end up exceeding limits on "get", so
# be conservative. Use 30 for now since this is limit for FuzzBench service.
MAX_CONCURRENT_BUILDS = 30

# Build fail retries and wait interval.
NUM_BUILD_RETRIES = 3
BUILD_FAIL_WAIT = 5 * 60

BENCHMARKS_DIR = os.path.join(utils.ROOT_DIR, 'benchmarks')