Пример #1
0
def initialize_resources_dir():
  """Download Fuchsia QEMU resources from GCS bucket."""
  # This module depends on multiprocessing, which is not available in
  # appengine, and since appengine *imports* this file (but does not run this
  # function!), we import it here.
  from google_cloud_utils import gsutil
  resources_dir = environment.get_value('RESOURCES_DIR')
  if not resources_dir:
    raise errors.FuchsiaConfigError('Could not find RESOURCES_DIR')
  fuchsia_resources_dir = os.path.join(resources_dir, 'fuchsia')

  shell.create_directory(
      fuchsia_resources_dir, create_intermediates=True, recreate=True)

  # Bucket for QEMU resources.
  fuchsia_resources_url = environment.get_value('FUCHSIA_RESOURCES_URL')
  if not fuchsia_resources_url:
    raise errors.FuchsiaConfigError(
        'Could not find path for remote'
        'Fuchsia resources bucket (FUCHSIA_RESOURCES_URL')

  gsutil_command_arguments = [
      '-m', 'cp', '-r', fuchsia_resources_url, fuchsia_resources_dir
  ]
  logs.log("Beginning Fuchsia SDK download.")
  result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments)
  if result.return_code or result.timed_out:
    raise errors.FuchsiaSdkError('Failed to download Fuchsia '
                                 'resources: ' + result.output)
  logs.log("Fuchsia SDK download complete.")

  # Bucket for build resources. Necessary for fuzzer selection.
  logs.log("Fetching Fuchsia build.")
  fuchsia_build_url = environment.get_value('FUCHSIA_BUILD_URL')
  if not fuchsia_build_url:
    raise errors.FuchsiaConfigError('Could not find path for remote'
                                    'Fuchsia build bucket (FUCHSIA BUILD URL')

  gsutil_command_arguments = [
      '-m', 'cp', '-r', fuchsia_build_url, fuchsia_resources_dir
  ]
  logs.log("Beginning Fuchsia build download.")
  result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments)
  if result.return_code or result.timed_out:
    raise errors.FuchsiaSdkError('Failed to download Fuchsia '
                                 'resources: ' + result.output)

  return fuchsia_resources_dir
Пример #2
0
def initialize_resources_dir():
    """Download Fuchsia QEMU resources from GCS bucket."""
    resources_dir = environment.get_value('RESOURCES_DIR')
    if not resources_dir:
        raise errors.FuchsiaConfigError('Could not find RESOURCES_DIR')
    fuchsia_resources_dir = os.path.join(resources_dir, 'fuchsia')

    shell.create_directory(fuchsia_resources_dir, recreate=True)

    fuchsia_resources_url = environment.get_value('FUCHSIA_RESOURCES_URL')
    if not fuchsia_resources_url:
        raise errors.FuchsiaConfigError(
            'Could not find path for remote'
            'Fuchsia resources bucket (FUCHSIA_RESOURCES_URL')

    gsutil_command_arguments = [
        '-m', 'cp', '-r', fuchsia_resources_url, fuchsia_resources_dir
    ]
    logs.log("Beginning Fuchsia SDK download.")
    result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments)
    if result.return_code or result.timed_out:
        raise errors.FuchsiaSdkError('Failed to download Fuchsia'
                                     'resources: ' + result.output)
    logs.log("Fuchsia SDK download complete.")
    return fuchsia_resources_dir
Пример #3
0
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory):
    """Upload test cases from the list to a cloud storage bucket."""
    bucket_name = local_config.ProjectConfig().get(
        'coverage.fuzzer-testcases.bucket')
    if not bucket_name:
        return

    # Only consider test cases in the output directory. We might upload too much
    # if we search the data directory as well, or have missing resources.
    # TODO(mbarbella): Support resources in data bundles.
    testcase_list = [
        os.path.relpath(testcase, testcase_directory)
        for testcase in testcase_list
        if testcase.startswith(testcase_directory)
    ]
    if not testcase_list:
        return

    # Bail out if this batch of test cases is too large.
    directory_size = shell.get_directory_size(testcase_directory)
    if directory_size >= MAX_TESTCASE_DIRECTORY_SIZE:
        return

    formatted_date = str(utils.utcnow().date())
    gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format(
        bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name)

    runner = gsutil.GSUtilRunner()
    batch_directory_blobs = storage.list_blobs(gcs_base_url)
    total_testcases = 0
    for blob in batch_directory_blobs:
        if not blob.endswith(LIST_FILE_BASENAME):
            continue

        list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name,
                                                     blob=blob)
        data = storage.read_data(list_gcs_url)
        if not data:
            logs.log_error(
                'Read no data from test case list at {gcs_url}'.format(
                    gcs_url=list_gcs_url))
            continue

        total_testcases += len(data.splitlines())

        # If we've already uploaded enough test cases for this fuzzer today, return.
        if total_testcases >= TESTCASES_PER_DAY:
            return

    # Upload each batch of tests to its own unique sub-bucket.
    identifier = environment.get_value('BOT_NAME') + str(utils.utcnow())
    gcs_base_url += utils.string_hash(identifier)

    list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME
    if not storage.write_data('\n'.join(testcase_list), list_gcs_url):
        return

    runner.rsync(testcase_directory, gcs_base_url)
    logs.log('Synced {count} test cases to {gcs_url}'.format(
        count=len(testcase_list), gcs_url=gcs_base_url))
Пример #4
0
    def setUp(self):
        test_helpers.patch_environ(self)
        test_helpers.patch(self,
                           ["system.new_process.ProcessRunner.run_and_wait"])

        test_utils.set_up_pyfakefs(self)
        self.gsutil_runner_obj = gsutil.GSUtilRunner()
Пример #5
0
def initialize_resources_dir():
    """Download Fuchsia QEMU resources from GCS bucket."""
    # This module depends on multiprocessing, which is not available in
    # appengine, and since appengine *imports* this file (but does not run this
    # function!), we import it here.
    from google_cloud_utils import gsutil
    resources_dir = environment.get_value('RESOURCES_DIR')
    if not resources_dir:
        raise errors.FuchsiaConfigError('Could not find RESOURCES_DIR')
    fuchsia_resources_dir = os.path.join(resources_dir, 'fuchsia')

    shell.create_directory(fuchsia_resources_dir,
                           create_intermediates=True,
                           recreate=True)

    # Bucket for QEMU resources.
    fuchsia_resources_url = environment.get_value('FUCHSIA_BUILD_URL')
    if not fuchsia_resources_url:
        raise errors.FuchsiaConfigError(
            'Could not find path for remote'
            'Fuchsia resources bucket (FUCHSIA_BUILD_URL')

    gsutil_command_arguments = [
        '-m', 'cp', '-r', fuchsia_resources_url, fuchsia_resources_dir
    ]
    logs.log("Fetching Fuchsia build.")
    result = gsutil.GSUtilRunner().run_gsutil(gsutil_command_arguments)
    if result.return_code or result.timed_out:
        raise errors.FuchsiaSdkError('Failed to download Fuchsia '
                                     'resources: ' + result.output)

    # Chmod the symbolizers so they can be used easily.
    symbolizer_path = os.path.join(fuchsia_resources_dir, 'build', 'zircon',
                                   'prebuilt', 'downloads', 'symbolize',
                                   'linux-x64', 'symbolize')
    llvm_symbolizer_path = os.path.join(fuchsia_resources_dir, 'build',
                                        'buildtools', 'linux-x64', 'clang',
                                        'bin', 'llvm-symbolizer')
    os.chmod(symbolizer_path, 0o111)
    os.chmod(llvm_symbolizer_path, 0o111)

    logs.log("Fuchsia build download complete.")

    return fuchsia_resources_dir
Пример #6
0
def update_data_bundle(fuzzer, data_bundle):
    """Updates a data bundle to the latest version."""
    # This module can't be in the global imports due to appengine issues
    # with multiprocessing and psutil imports.
    from google_cloud_utils import gsutil

    # If we are using a data bundle on NFS, it is expected that our testcases
    # will usually be large enough that we would fill up our tmpfs directory
    # pretty quickly. So, change it to use an on-disk directory.
    if not data_bundle.is_local:
        testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK')
        environment.set_value('FUZZ_INPUTS', testcase_disk_directory)

    data_bundle_directory = get_data_bundle_directory(fuzzer.name)
    if not data_bundle_directory:
        logs.log_error('Failed to setup data bundle %s.' % data_bundle.name)
        return False

    if not shell.create_directory(data_bundle_directory,
                                  create_intermediates=True):
        logs.log_error('Failed to create data bundle %s directory.' %
                       data_bundle.name)
        return False

    # Check if data bundle is up to date. If yes, skip the update.
    if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory):
        logs.log('Data bundle was recently synced, skip.')
        return True

    # Fetch lock for this data bundle.
    if not _fetch_lock_for_data_bundle_update(data_bundle):
        logs.log_error('Failed to lock data bundle %s.' % data_bundle.name)
        return False

    # Re-check if another bot did the sync already. If yes, skip.
    if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory):
        logs.log('Another bot finished the sync, skip.')
        _release_lock_for_data_bundle_update(data_bundle)
        return True

    time_before_sync_start = time.time()

    # No need to sync anything if this is a search index data bundle. In that
    # case, the fuzzer will generate testcases from a gcs bucket periodically.
    if not _is_search_index_data_bundle(data_bundle.name):
        bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name)

        if environment.is_trusted_host() and data_bundle.sync_to_worker:
            from bot.untrusted_runner import corpus_manager
            from bot.untrusted_runner import file_host
            worker_data_bundle_directory = file_host.rebase_to_worker_root(
                data_bundle_directory)

            file_host.create_directory(worker_data_bundle_directory,
                                       create_intermediates=True)
            result = corpus_manager.RemoteGSUtilRunner().rsync(
                bucket_url, worker_data_bundle_directory, delete=False)
        else:
            result = gsutil.GSUtilRunner().rsync(bucket_url,
                                                 data_bundle_directory,
                                                 delete=False)

        if result.return_code != 0:
            logs.log_error('Failed to sync data bundle %s: %s.' %
                           (data_bundle.name, result.output))
            _release_lock_for_data_bundle_update(data_bundle)
            return False

    # Update the testcase list file.
    testcase_manager.create_testcase_list_file(data_bundle_directory)

    #  Write last synced time in the sync file.
    sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory)
    utils.write_data_to_file(time_before_sync_start, sync_file_path)
    if environment.is_trusted_host() and data_bundle.sync_to_worker:
        from bot.untrusted_runner import file_host
        worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path)
        file_host.copy_file_to_worker(sync_file_path, worker_sync_file_path)

    # Release acquired lock.
    _release_lock_for_data_bundle_update(data_bundle)

    return True
    def setUp(self):
        """Set up."""
        super(CorpusPruningTestUntrusted, self).setUp()
        environment.set_value('JOB_NAME', 'libfuzzer_asan_job')

        helpers.patch(self, [
            'bot.fuzzers.engine.get', 'bot.tasks.setup.get_fuzzer_directory',
            'base.tasks.add_task',
            'bot.tasks.corpus_pruning_task._record_cross_pollination_stats'
        ])

        self.mock.get.return_value = libFuzzer_engine.LibFuzzerEngine()
        self.mock.get_fuzzer_directory.return_value = os.path.join(
            environment.get_value('ROOT_DIR'), 'src', 'python', 'bot',
            'fuzzers', 'libFuzzer')
        self.corpus_bucket = os.environ['CORPUS_BUCKET']
        self.quarantine_bucket = os.environ['QUARANTINE_BUCKET']
        self.backup_bucket = os.environ['BACKUP_BUCKET']

        job = data_types.Job(
            name='libfuzzer_asan_job',
            environment_string=(
                'APP_NAME = test_fuzzer\n'
                'CORPUS_BUCKET = {corpus_bucket}\n'
                'QUARANTINE_BUCKET = {quarantine_bucket}\n'
                'BACKUP_BUCKET={backup_bucket}\n'
                'RELEASE_BUILD_BUCKET_PATH = '
                'gs://clusterfuzz-test-data/test_libfuzzer_builds/'
                'test-libfuzzer-build-([0-9]+).zip\n'
                'REVISION_VARS_URL = gs://clusterfuzz-test-data/'
                'test_libfuzzer_builds/'
                'test-libfuzzer-build-%s.srcmap.json\n'.format(
                    corpus_bucket=self.corpus_bucket,
                    quarantine_bucket=self.quarantine_bucket,
                    backup_bucket=self.backup_bucket)))
        job.put()

        job = data_types.Job(
            name='libfuzzer_asan_job2',
            environment_string=(
                'APP_NAME = test2_fuzzer\n'
                'BACKUP_BUCKET = {backup_bucket}\n'
                'CORPUS_FUZZER_NAME_OVERRIDE = libfuzzer\n'.format(
                    backup_bucket=self.backup_bucket)))
        job.put()

        os.environ['PROJECT_NAME'] = 'oss-fuzz'
        data_types.FuzzTarget(engine='libFuzzer',
                              project='test',
                              binary='test_fuzzer').put()
        data_types.FuzzTargetJob(fuzz_target_name='libFuzzer_test_fuzzer',
                                 engine='libFuzzer',
                                 job='libfuzzer_asan_job',
                                 last_run=datetime.datetime.now()).put()

        data_types.FuzzTarget(engine='libFuzzer',
                              project='test2',
                              binary='fuzzer').put()
        data_types.FuzzTargetJob(fuzz_target_name='libFuzzer_test2_fuzzer',
                                 engine='libFuzzer',
                                 job='libfuzzer_asan_job2',
                                 last_run=datetime.datetime.now()).put()

        environment.set_value('USE_MINIJAIL', True)
        environment.set_value('SHARED_CORPUS_BUCKET', TEST_SHARED_BUCKET)

        # Set up remote corpora.
        self.corpus = corpus_manager.FuzzTargetCorpus('libFuzzer',
                                                      'test_fuzzer')
        self.corpus.rsync_from_disk(os.path.join(TEST_DIR, 'corpus'),
                                    delete=True)

        self.quarantine_corpus = corpus_manager.FuzzTargetCorpus(
            'libFuzzer', 'test_fuzzer', quarantine=True)
        self.quarantine_corpus.rsync_from_disk(os.path.join(
            TEST_DIR, 'quarantine'),
                                               delete=True)

        self.mock.get_data_bundle_bucket_name.return_value = TEST_GLOBAL_BUCKET
        data_types.DataBundle(name='bundle',
                              is_local=True,
                              sync_to_worker=True).put()

        data_types.Fuzzer(revision=1,
                          file_size='builtin',
                          source='builtin',
                          name='libFuzzer',
                          max_testcases=4,
                          builtin=True,
                          data_bundle_name='bundle').put()

        self.temp_dir = tempfile.mkdtemp()

        # Copy corpus backup in the older date format.
        corpus_backup_date = (
            datetime.datetime.utcnow().date() - datetime.timedelta(
                days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS))
        corpus_backup_dir = ('gs://{bucket}/corpus/libfuzzer/test2_fuzzer/')
        gsutil.GSUtilRunner().run_gsutil([
            'cp', (corpus_backup_dir +
                   'backup.zip').format(bucket=TEST2_BACKUP_BUCKET),
            (corpus_backup_dir +
             '%s.zip' % corpus_backup_date).format(bucket=self.backup_bucket)
        ])
Пример #8
0
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory,
                               data_directory):
    """Upload test cases from the list to a cloud storage bucket."""
    # Since builtin fuzzers have a coverage minimized corpus, no need to upload
    # test case samples for them.
    if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS:
        return

    bucket_name = local_config.ProjectConfig().get(
        'coverage.fuzzer-testcases.bucket')
    if not bucket_name:
        return

    files_list = []
    has_testcases_in_testcase_directory = False
    has_testcases_in_data_directory = False
    for testcase_path in testcase_list:
        if testcase_path.startswith(testcase_directory):
            files_list.append(
                os.path.relpath(testcase_path, testcase_directory))
            has_testcases_in_testcase_directory = True
        elif testcase_path.startswith(data_directory):
            files_list.append(os.path.relpath(testcase_path, data_directory))
            has_testcases_in_data_directory = True
    if not files_list:
        return

    formatted_date = str(utils.utcnow().date())
    gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format(
        bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name)

    runner = gsutil.GSUtilRunner()
    batch_directory_blobs = storage.list_blobs(gcs_base_url)
    total_testcases = 0
    for blob in batch_directory_blobs:
        if not blob.endswith(LIST_FILE_BASENAME):
            continue

        list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name,
                                                     blob=blob)
        data = storage.read_data(list_gcs_url)
        if not data:
            logs.log_error(
                'Read no data from test case list at {gcs_url}'.format(
                    gcs_url=list_gcs_url))
            continue

        total_testcases += len(data.splitlines())

        # If we've already uploaded enough test cases for this fuzzer today, return.
        if total_testcases >= TESTCASES_PER_DAY:
            return

    # Cap the number of files.
    testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases)
    files_list = files_list[:testcases_limit]

    # Upload each batch of tests to its own unique sub-bucket.
    identifier = environment.get_value('BOT_NAME') + str(utils.utcnow())
    gcs_base_url += utils.string_hash(identifier)

    list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME
    if not storage.write_data('\n'.join(files_list), list_gcs_url):
        return

    if has_testcases_in_testcase_directory:
        # Sync everything in |testcase_directory| since it is fuzzer-generated.
        runner.rsync(testcase_directory, gcs_base_url)

    if has_testcases_in_data_directory:
        # Sync all fuzzer generated testcase in data bundle directory.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern=('(?!.*{fuzz_prefix})'.format(
                         fuzz_prefix=testcase_manager.FUZZ_PREFIX)))

        # Sync all possible resource dependencies as a best effort. It matches
        # |resources-| prefix that a fuzzer can use to indicate resources. Also, it
        # matches resources directory that Chromium web_tests use for dependencies.
        runner.rsync(data_directory,
                     gcs_base_url,
                     exclusion_pattern='(?!.*resource)')

    logs.log('Synced {count} test cases to {gcs_url}.'.format(
        count=len(files_list), gcs_url=gcs_base_url))
  def setUp(self):
    """Set up."""
    super(CorpusPruningTestUntrusted, self).setUp()
    environment.set_value("JOB_NAME", "libfuzzer_asan_job")

    helpers.patch(
        self,
        [
            "bot.fuzzers.engine.get",
            "bot.fuzzers.libFuzzer.fuzzer.LibFuzzer.fuzzer_directory",
            "base.tasks.add_task",
            "datastore.data_handler.get_data_bundle_bucket_name",
        ],
    )

    self.mock.get.return_value = libFuzzer_engine.LibFuzzerEngine()
    self.mock.fuzzer_directory.return_value = os.path.join(
        environment.get_value("ROOT_DIR"),
        "src",
        "python",
        "bot",
        "fuzzers",
        "libFuzzer",
    )

    self.corpus_bucket = os.environ["CORPUS_BUCKET"]
    self.quarantine_bucket = os.environ["QUARANTINE_BUCKET"]
    self.backup_bucket = os.environ["BACKUP_BUCKET"]

    job = data_types.Job(
        name="libfuzzer_asan_job",
        environment_string=("APP_NAME = test_fuzzer\n"
                            "CORPUS_BUCKET = {corpus_bucket}\n"
                            "QUARANTINE_BUCKET = {quarantine_bucket}\n"
                            "BACKUP_BUCKET={backup_bucket}\n"
                            "RELEASE_BUILD_BUCKET_PATH = "
                            "gs://clusterfuzz-test-data/test_libfuzzer_builds/"
                            "test-libfuzzer-build-([0-9]+).zip\n"
                            "REVISION_VARS_URL = gs://clusterfuzz-test-data/"
                            "test_libfuzzer_builds/"
                            "test-libfuzzer-build-%s.srcmap.json\n".format(
                                corpus_bucket=self.corpus_bucket,
                                quarantine_bucket=self.quarantine_bucket,
                                backup_bucket=self.backup_bucket,
                            )),
    )
    job.put()

    job = data_types.Job(
        name="libfuzzer_asan_job2",
        environment_string=("APP_NAME = test2_fuzzer\n"
                            "BACKUP_BUCKET = {backup_bucket}\n"
                            "CORPUS_FUZZER_NAME_OVERRIDE = libfuzzer\n".format(
                                backup_bucket=self.backup_bucket)),
    )
    job.put()

    os.environ["PROJECT_NAME"] = "oss-fuzz"
    data_types.FuzzTarget(
        engine="libFuzzer", project="test", binary="test_fuzzer").put()
    data_types.FuzzTargetJob(
        fuzz_target_name="libFuzzer_test_fuzzer",
        engine="libFuzzer",
        job="libfuzzer_asan_job",
        last_run=datetime.datetime.now(),
    ).put()

    data_types.FuzzTarget(
        engine="libFuzzer", project="test2", binary="fuzzer").put()
    data_types.FuzzTargetJob(
        fuzz_target_name="libFuzzer_test2_fuzzer",
        engine="libFuzzer",
        job="libfuzzer_asan_job2",
        last_run=datetime.datetime.now(),
    ).put()

    environment.set_value("USE_MINIJAIL", True)
    environment.set_value("SHARED_CORPUS_BUCKET", TEST_SHARED_BUCKET)

    # Set up remote corpora.
    self.corpus = corpus_manager.FuzzTargetCorpus("libFuzzer", "test_fuzzer")
    self.corpus.rsync_from_disk(os.path.join(TEST_DIR, "corpus"), delete=True)

    self.quarantine_corpus = corpus_manager.FuzzTargetCorpus(
        "libFuzzer", "test_fuzzer", quarantine=True)
    self.quarantine_corpus.rsync_from_disk(
        os.path.join(TEST_DIR, "quarantine"), delete=True)

    self.mock.get_data_bundle_bucket_name.return_value = TEST_GLOBAL_BUCKET
    data_types.DataBundle(
        name="bundle", is_local=True, sync_to_worker=True).put()

    data_types.Fuzzer(
        revision=1,
        file_size="builtin",
        source="builtin",
        name="libFuzzer",
        max_testcases=4,
        builtin=True,
        data_bundle_name="bundle",
    ).put()

    self.temp_dir = tempfile.mkdtemp()

    # Copy corpus backup in the older date format.
    corpus_backup_date = datetime.datetime.utcnow().date() - datetime.timedelta(
        days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)
    corpus_backup_dir = "gs://{bucket}/corpus/libfuzzer/test2_fuzzer/"
    gsutil.GSUtilRunner().run_gsutil([
        "cp",
        (corpus_backup_dir + "backup.zip").format(bucket=TEST2_BACKUP_BUCKET),
        (corpus_backup_dir +
         "%s.zip" % corpus_backup_date).format(bucket=self.backup_bucket),
    ])