Пример #1
0
def update_tests_if_needed():
    """Updates layout tests every day."""
    data_directory = environment.get_value('FUZZ_DATA')
    error_occured = False
    expected_task_duration = 60 * 60  # 1 hour.
    retry_limit = environment.get_value('FAIL_RETRIES')
    temp_archive = os.path.join(data_directory, 'temp.zip')
    tests_url = environment.get_value('WEB_TESTS_URL')

    # Check if we have a valid tests url.
    if not tests_url:
        return

    # Layout test updates are usually disabled to speedup local testing.
    if environment.get_value('LOCAL_DEVELOPMENT'):
        return

    # |UPDATE_WEB_TESTS| env variable can be used to control our update behavior.
    if not environment.get_value('UPDATE_WEB_TESTS'):
        return

    last_modified_time = persistent_cache.get_value(
        TESTS_LAST_UPDATE_KEY, constructor=datetime.datetime.utcfromtimestamp)
    if (last_modified_time is not None
            and not dates.time_has_expired(last_modified_time,
                                           days=TESTS_UPDATE_INTERVAL_DAYS)):
        return

    logs.log('Updating layout tests.')
    tasks.track_task_start(tasks.Task('update_tests', '', ''),
                           expected_task_duration)

    # Download and unpack the tests archive.
    for _ in range(retry_limit):
        try:
            shell.remove_directory(data_directory, recreate=True)
            storage.copy_file_from(tests_url, temp_archive)
            archive.unpack(temp_archive, data_directory, trusted=True)
            shell.remove_file(temp_archive)
            error_occured = False
            break
        except:
            logs.log_error(
                'Could not retrieve and unpack layout tests archive. Retrying.'
            )
            error_occured = True

    if not error_occured:
        persistent_cache.set_value(TESTS_LAST_UPDATE_KEY,
                                   time.time(),
                                   persist_across_reboots=True)

    tasks.track_task_end()
Пример #2
0
def get_corpus(corpus_directory, fuzzer_name):
    """Get corpus directory.

  This function will download latest corpus backup file from GCS, unzip
  the file and put them in corpus directory.

  Args:
    directory: The directory to place corpus.
    fuzzer_name: Fuzzer name, e.g. libpng_read_fuzzer, xml_parser_fuzzer, etc.

  Returns:
    True if the corpus can be acquired and False otherwise.
  """
    backup_bucket_name = environment.get_value('BACKUP_BUCKET')
    corpus_fuzzer_name = environment.get_value('CORPUS_FUZZER_NAME_OVERRIDE')

    # Get GCS backup path.
    gcs_backup_path = corpus_manager.gcs_url_for_backup_file(
        backup_bucket_name, corpus_fuzzer_name, fuzzer_name,
        corpus_manager.LATEST_BACKUP_TIMESTAMP)

    # Get local backup path.
    local_backup_name = os.path.basename(gcs_backup_path)
    local_backup_path = os.path.join(corpus_directory, local_backup_name)

    # Download latest backup.
    if not storage.copy_file_from(gcs_backup_path, local_backup_path):
        logs.log_error('Failed to download corpus from GCS bucket {}.'.format(
            gcs_backup_path))
        return False

    # Extract corpus from zip file.
    archive.unpack(local_backup_path, corpus_directory)
    shell.remove_file(local_backup_path)
    return True
Пример #3
0
def download_model_from_gcs(local_model_directory, fuzzer_name):
    """Pull model from GCS bucket and put them in specified model directory."""
    # ML model is stored in corpus bucket.
    gcs_corpus_bucket = environment.get_value('CORPUS_BUCKET')
    if not gcs_corpus_bucket:
        logs.log('Corpus bucket is not set. Skip generation.')
        return False

    # Get cloud storage path.
    # e.g. gs://clusterfuzz-corpus/rnn/libpng_read_fuzzer
    gcs_model_directory = 'gs://%s/%s/%s' % (
        gcs_corpus_bucket, constants.RNN_MODEL_NAME, fuzzer_name)

    logs.log('GCS model directory for fuzzer %s is %s.' %
             (fuzzer_name, gcs_model_directory))

    # RNN model consists of two files.
    data_filename = constants.RNN_MODEL_NAME + constants.MODEL_DATA_SUFFIX
    index_filename = constants.RNN_MODEL_NAME + constants.MODEL_INDEX_SUFFIX

    # Cloud file paths.
    gcs_data_path = '%s/%s' % (gcs_model_directory, data_filename)
    gcs_index_path = '%s/%s' % (gcs_model_directory, index_filename)

    # Check if model exists.
    if not (storage.exists(gcs_data_path) and storage.exists(gcs_index_path)):
        logs.log(
            'ML RNN model for fuzzer %s does not exist. Skip generation.' %
            fuzzer_name)
        return False

    # Local file paths.
    local_data_path = os.path.join(local_model_directory, data_filename)
    local_index_path = os.path.join(local_model_directory, index_filename)

    # Download model files.
    result = (storage.copy_file_from(gcs_data_path, local_data_path)
              and storage.copy_file_from(gcs_index_path, local_index_path))

    if not result:
        logs.log(
            'Failed to download RNN model for fuzzer %s. Skip generation.' %
            fuzzer_name)
        return False

    return True
Пример #4
0
def _download_mutator_plugin_archive(mutator_plugin_archive):
    """Downloads the |mutator_plugin_archive| from the mutator plugin storage
  bucket to the plugin archives directory. Returns the path that the archive was
  downloaded to."""
    file_path = os.path.join(_get_mutator_plugins_archives_dir(),
                             mutator_plugin_archive)
    url = '%s/%s' % (_get_mutator_plugins_bucket_url(), mutator_plugin_archive)
    if not storage.copy_file_from(url, file_path):
        logs.log_error('Failed to copy plugin archive from %s to %s' %
                       (url, file_path))
        return None

    return file_path
    def _cross_pollinate_other_fuzzer_corpuses(self):
        """Add other fuzzer corpuses to shared corpus path for cross-pollination."""
        corpus_backup_date = utils.utcnow().date() - datetime.timedelta(
            days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)

        for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers:
            project_qualified_name = (
                cross_pollinate_fuzzer.fuzz_target.project_qualified_name())
            backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name
            corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name

            corpus_backup_url = corpus_manager.gcs_url_for_backup_file(
                backup_bucket_name, corpus_engine_name, project_qualified_name,
                corpus_backup_date)
            corpus_backup_local_filename = '%s-%s' % (
                project_qualified_name, os.path.basename(corpus_backup_url))
            corpus_backup_local_path = os.path.join(
                self.shared_corpus_path, corpus_backup_local_filename)

            if not storage.exists(corpus_backup_url, ignore_errors=True):
                # This can happen in cases when a new fuzz target is checked in or if
                # missed to capture a backup for a particular day (for OSS-Fuzz, this
                # will result in a 403 instead of 404 since that GCS path belongs to
                # other project). So, just log a warning for debugging purposes only.
                logs.log_warn('Corpus backup does not exist, ignoring: %s.' %
                              corpus_backup_url)
                continue

            if not storage.copy_file_from(corpus_backup_url,
                                          corpus_backup_local_path):
                continue

            corpus_backup_output_directory = os.path.join(
                self.shared_corpus_path, project_qualified_name)
            shell.create_directory(corpus_backup_output_directory)
            result = archive.unpack(corpus_backup_local_path,
                                    corpus_backup_output_directory)
            shell.remove_file(corpus_backup_local_path)

            if result:
                logs.log(
                    'Corpus backup url %s successfully unpacked into shared corpus.'
                    % corpus_backup_url)
            else:
                logs.log_error('Failed to unpack corpus backup from url %s.' %
                               corpus_backup_url)
  def download_recommended_dictionary_from_gcs(self, local_dict_path):
    """Download recommended dictionary from GCS to the given location.

    Args:
      local_dict_path: Path to a dictionary file on the disk.

    Returns:
      A boolean indicating whether downloading succeeded or not.
    """
    if environment.is_lib():
      return 0

    # When the fuzz target is initially created or when it has no new
    # coverage or dictionary recommendations, then we won't have a
    # recommended dictionary in GCS.
    if not storage.exists(self.gcs_path):
      return False

    if storage.copy_file_from(self.gcs_path, local_dict_path):
      return True

    logs.log('Downloading %s failed.' % self.gcs_path)
    return False
Пример #7
0
def update_source_code():
    """Updates source code files with latest version from appengine."""
    process_handler.cleanup_stale_processes()
    shell.clear_temp_directory()

    root_directory = environment.get_value('ROOT_DIR')
    temp_directory = environment.get_value('BOT_TMPDIR')
    temp_archive = os.path.join(temp_directory, 'clusterfuzz-source.zip')
    try:
        storage.copy_file_from(get_source_url(), temp_archive)
    except Exception:
        logs.log_error('Could not retrieve source code archive from url.')
        return

    try:
        file_list = archive.get_file_list(temp_archive)
        zip_archive = zipfile.ZipFile(temp_archive, 'r')
    except Exception:
        logs.log_error('Bad zip file.')
        return

    src_directory = os.path.join(root_directory, 'src')
    output_directory = os.path.dirname(root_directory)
    error_occurred = False
    normalized_file_set = set()
    for filepath in file_list:
        filename = os.path.basename(filepath)

        # This file cannot be updated on the fly since it is running as server.
        if filename == 'adb':
            continue

        absolute_filepath = os.path.join(output_directory, filepath)
        if os.path.altsep:
            absolute_filepath = absolute_filepath.replace(
                os.path.altsep, os.path.sep)

        if os.path.realpath(absolute_filepath) != absolute_filepath:
            continue

        normalized_file_set.add(absolute_filepath)
        try:
            file_extension = os.path.splitext(filename)[1]

            # Remove any .so files first before overwriting, as they can be loaded
            # in the memory of existing processes. Overwriting them directly causes
            # segfaults in existing processes (e.g. run.py).
            if file_extension == '.so' and os.path.exists(absolute_filepath):
                os.remove(absolute_filepath)

            # On Windows, to update DLLs (and native .pyd extensions), we rename it
            # first so that we can install the new version.
            if (environment.platform() == 'WINDOWS'
                    and file_extension in ['.dll', '.pyd']
                    and os.path.exists(absolute_filepath)):
                _rename_dll_for_update(absolute_filepath)
        except Exception:
            logs.log_error('Failed to remove or move %s before extracting new '
                           'version.' % absolute_filepath)

        try:
            extracted_path = zip_archive.extract(filepath, output_directory)
            external_attr = zip_archive.getinfo(filepath).external_attr
            mode = (external_attr >> 16) & 0o777
            mode |= 0o440
            os.chmod(extracted_path, mode)
        except:
            error_occurred = True
            logs.log_error('Failed to extract file %s from source archive.' %
                           filepath)

    zip_archive.close()

    if error_occurred:
        return

    clear_pyc_files(src_directory)
    clear_old_files(src_directory, normalized_file_set)

    local_manifest_path = os.path.join(root_directory,
                                       utils.LOCAL_SOURCE_MANIFEST)
    source_version = utils.read_data_from_file(
        local_manifest_path, eval_data=False).decode('utf-8').strip()
    logs.log('Source code updated to %s.' % source_version)