def test_report_generation(self):
        """Test report generation for a directory."""
        analyzer = performance_analyzer.LibFuzzerPerformanceAnalyzer()
        report_logs_directory = os.path.join(self.libfuzzer_data_directory,
                                             'report_logs')
        stats_rows = []

        # Use default values for stats values usually provided by CF.
        stats_overrides = DEFAULT_STATS_PROVIDED_BY_CF.copy()

        for filename in sorted(os.listdir(report_logs_directory)):
            # Use different timestamp values for each log.
            stats_overrides['timestamp'] += 1

            stats_rows.append(
                _get_stats_from_log(os.path.join(report_logs_directory,
                                                 filename),
                                    stats_overrides=stats_overrides))

        performance_scores, affected_runs_percents, examples = (
            analyzer.analyze_stats(stats_rows))

        performance_issues = analyzer.get_issues(performance_scores,
                                                 affected_runs_percents,
                                                 examples)
        performance_report = performance_analyzer.generate_report(
            performance_issues, 'fuzzer1', 'job1')

        expected_report = utils.read_data_from_file(os.path.join(
            self.libfuzzer_data_directory, 'expected_report.json'),
                                                    eval_data=False)

        self.maxDiff = None  # pylint: disable=invalid-name
        self.assertEqual(json.loads(performance_report),
                         json.loads(expected_report))
Exemplo n.º 2
0
    def test_recommended_dictionary_merge(self):
        """Test merging with GCS copy of recommended dictionary."""
        fake_gcs_dict_path = os.path.join(
            DATA_DIRECTORY, 'fake_gcs_recommended_dictionary.txt')

        dict_manager = dictionary_manager.DictionaryManager('fuzzer_name')
        log_data = utils.read_data_from_file(os.path.join(
            DATA_DIRECTORY, 'log_with_recommended_dict.txt'),
                                             eval_data=False).decode('utf-8')

        dict_from_log = dict_manager.parse_recommended_dictionary_from_data(
            log_data)
        utils.write_data_to_file('\n'.join(dict_from_log),
                                 self.local_dict_path)

        dictionary_manager.merge_dictionary_files(self.local_dict_path,
                                                  fake_gcs_dict_path,
                                                  self.local_dict_path)

        # Compare resulting dictionary with its expected result.
        merged_dictionary = self._parse_dictionary_file(self.local_dict_path)
        expected_dictionary_path = os.path.join(
            DATA_DIRECTORY, 'expected_merged_recommended_dictionary.txt')
        expected_dictionary = self._parse_dictionary_file(
            expected_dictionary_path)

        self.assertEqual(sorted(merged_dictionary),
                         sorted(expected_dictionary))
Exemplo n.º 3
0
def download_artifact_if_needed(build_id, artifact_directory,
                                artifact_archive_path,
                                targets_with_type_and_san, artifact_file_name,
                                output_filename_override, build_params,
                                build_params_check_path):
    """Downloads artifact to actifacts_archive_path if needed"""
    # Check if we already have the symbols in cache.
    cached_build_params = utils.read_data_from_file(build_params_check_path,
                                                    eval_data=True)
    if cached_build_params and cached_build_params == build_params:
        # No work to do, same system symbols already in cache.
        return

    # Delete existing symbols directory first.
    shell.remove_directory(artifact_directory, recreate=True)

    # Fetch symbol file from cloud storage cache (if available).
    found_in_cache = storage.get_file_from_cache_if_exists(
        artifact_archive_path, update_modification_time_on_access=False)
    if not found_in_cache:
        for target_with_type_and_san in targets_with_type_and_san:
            # Fetch the artifact now.
            fetch_artifact.get(build_id, target_with_type_and_san,
                               artifact_file_name, artifact_directory,
                               output_filename_override)
            if os.path.exists(artifact_archive_path):
                break
Exemplo n.º 4
0
 def _parse_dictionary_file(self, dictionary_path):
     """Parse given dictionary file and return set of its lines."""
     data = utils.read_data_from_file(dictionary_path,
                                      eval_data=False).decode('utf-8')
     lines = [line.strip() for line in data.splitlines()]
     dictionary = {line for line in lines if line}
     return dictionary
Exemplo n.º 5
0
 def _validate_correction(self, input_filename, output_filename):
     full_input_filename = os.path.join(DATA_DIRECTORY, input_filename)
     dictionary_manager.correct_if_needed(full_input_filename)
     full_output_filename = os.path.join(DATA_DIRECTORY, output_filename)
     expected_output = utils.read_data_from_file(
         full_output_filename, eval_data=False).decode('utf-8')
     self.mock.write_data_to_file.assert_called_once_with(
         expected_output, full_input_filename)
Exemplo n.º 6
0
def get_cache_file_size_from_metadata(cache_file_path):
    """Return cache file size from metadata file."""
    cache_file_metadata_path = get_cache_file_metadata_path(cache_file_path)
    metadata_content = utils.read_data_from_file(cache_file_metadata_path,
                                                 eval_data=True)

    if not metadata_content or 'size' not in metadata_content:
        return None

    return metadata_content['size']
def merge_dictionary_files(original_dictionary_path,
                           recommended_dictionary_path, merged_dictionary_path):
  """Merge a list of dictionaries with given paths into a singe dictionary."""
  if original_dictionary_path and os.path.exists(original_dictionary_path):
    merged_dictionary_data = utils.read_data_from_file(
        original_dictionary_path, eval_data=False).decode('utf-8')
  else:
    merged_dictionary_data = ''

  recommended_dictionary_lines = utils.read_data_from_file(
      recommended_dictionary_path,
      eval_data=False).decode('utf-8').splitlines()

  dictionary_lines_to_add = set()
  for line in recommended_dictionary_lines:
    if line not in merged_dictionary_data:
      dictionary_lines_to_add.add(line)

  merged_dictionary_data += '\n%s\n' % RECOMMENDED_DICTIONARY_HEADER

  merged_dictionary_data += '\n'.join(dictionary_lines_to_add)
  utils.write_data_to_file(merged_dictionary_data, merged_dictionary_path)
Exemplo n.º 8
0
def _get_repo_prop_data(build_id, fuzz_target):
  """Downloads repo.prop and returuns the data based on build_id and target."""
  symbols_directory = os.path.join(
      environment.get_value('SYMBOLS_DIR'), fuzz_target)
  repro_filename = symbols_downloader.get_repo_prop_archive_filename(
      build_id, fuzz_target)

  # Grab repo.prop, it is not on the device nor in the build_dir.
  _download_kernel_repo_prop_if_needed(symbols_directory, build_id, fuzz_target)
  local_repo_path = utils.find_binary_path(symbols_directory, repro_filename)
  if local_repo_path and os.path.exists(local_repo_path):
    return utils.read_data_from_file(local_repo_path, eval_data=False).decode()

  return None
def correct_if_needed(dict_path):
  """Corrects obvious errors such as missing quotes in a dictionary."""
  if not dict_path or not os.path.exists(dict_path):
    return

  content = utils.read_data_from_file(
      dict_path, eval_data=False).decode('utf-8')
  new_content = ''
  for current_line in content.splitlines():
    new_content += _fix_dictionary_line(current_line, dict_path) + '\n'

  # End of file newlines are inconsistent in dictionaries.
  if new_content.rstrip('\n') != content.rstrip('\n'):
    utils.write_data_to_file(new_content, dict_path)
Exemplo n.º 10
0
    def test_useless_dictionary_parse(self):
        """Test parsing of useless dictionary from fuzzer log."""
        dict_manager = dictionary_manager.DictionaryManager('fuzzer_name')
        log_data = utils.read_data_from_file(os.path.join(
            DATA_DIRECTORY, 'log_with_useless_dict.txt'),
                                             eval_data=False).decode('utf-8')

        useless_dict = dict_manager.parse_useless_dictionary_from_data(
            log_data)

        expected_dictionary_path = os.path.join(
            DATA_DIRECTORY, 'expected_parsed_useless_dictionary.txt')
        expected_dictionary = self._parse_dictionary_file(
            expected_dictionary_path)

        self.assertEqual(sorted(useless_dict), sorted(expected_dictionary))
Exemplo n.º 11
0
def get_stats_for_dictionary_file(dictionary_path):
  """Calculate size of manual and recommended sections of given dictionary."""
  if not dictionary_path or not os.path.exists(dictionary_path):
    return 0, 0

  dictionary_content = utils.read_data_from_file(
      dictionary_path, eval_data=False).decode('utf-8')
  dictionaries = dictionary_content.split(RECOMMENDED_DICTIONARY_HEADER)

  # If there are any elements before RECOMMENDED_DICTIONARY_HEADER, those are
  # from "manual" dictionary stored in the repository.
  manual_dictionary_size = get_dictionary_size(dictionaries[0])
  if len(dictionaries) < 2:
    return manual_dictionary_size, 0

  # Any elements after RECOMMENDED_DICTIONARY_HEADER are recommended dictionary.
  recommended_dictionary_size = get_dictionary_size(dictionaries[1])
  return manual_dictionary_size, recommended_dictionary_size
Exemplo n.º 12
0
def _get_authorization(force_reauthorization, configuration):
    """Get the value for an oauth authorization header."""
    # Try to read from cache unless we need to reauthorize.
    if not force_reauthorization:
        cached_authorization = utils.read_data_from_file(
            AUTHORIZATION_CACHE_FILE, eval_data=False)
        if cached_authorization:
            return cached_authorization

    # Prompt the user for a code if we don't have one or need a new one.
    oauth_url = configuration.get('oauth_url')
    print('Please login at the following URL to authenticate: {oauth_url}'.
          format(oauth_url=oauth_url))

    with SuppressOutput():
        webbrowser.open(oauth_url, new=1, autoraise=True)

    verification_code = prompts.get_string('Enter verification code')
    return 'VerificationCode {code}'.format(code=verification_code)
def _get_stats_from_log(log_path,
                        strategies=None,
                        arguments=None,
                        stats_overrides=None):
    """Calculate stats for the given log the same way as the engine does."""
    if strategies is None:
        strategies = []
    if arguments is None:
        arguments = []

    log_lines = utils.decode_to_unicode(
        utils.read_data_from_file(log_path, eval_data=False)).splitlines()
    stats = libfuzzer.parse_log_stats(log_lines)
    stats.update(
        performance_stats.parse_performance_features(log_lines, strategies,
                                                     arguments))
    if stats_overrides:
        stats.update(stats_overrides)

    return stats
Exemplo n.º 14
0
def get_additional_command_line_flags(testcase_path):
    """Returns additional command line flags to use for a testcase."""
    # Get the initial flags list from the environment value.
    additional_command_line_flags = (environment.get_value(
        'ADDITIONAL_COMMAND_LINE_FLAGS', ''))

    # If we don't have a fuzz prefix, no need to look further for flags file.
    testcase_filename = os.path.basename(testcase_path)
    if not testcase_filename.startswith(FUZZ_PREFIX):
        return additional_command_line_flags

    # Gets the flags list from the flags file.
    stripped_testcase_name = testcase_filename[len(FUZZ_PREFIX):]
    flags_filename = '%s%s' % (FLAGS_PREFIX, stripped_testcase_name)
    flags_file_path = os.path.join(os.path.dirname(testcase_path),
                                   flags_filename)
    flags_file_content = utils.read_data_from_file(flags_file_path,
                                                   eval_data=False)
    if flags_file_content:
        additional_command_line_flags += ' ' + flags_file_content.decode(
            'utf-8')
    return additional_command_line_flags.strip()
Exemplo n.º 15
0
def _is_data_bundle_up_to_date(data_bundle, data_bundle_directory):
    """Return true if the data bundle is up to date, false otherwise."""
    sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory)

    if environment.is_trusted_host() and data_bundle.sync_to_worker:
        from clusterfuzz._internal.bot.untrusted_runner import file_host
        worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path)
        shell.remove_file(sync_file_path)
        file_host.copy_file_from_worker(worker_sync_file_path, sync_file_path)

    if not os.path.exists(sync_file_path):
        return False

    last_sync_time = datetime.datetime.utcfromtimestamp(
        utils.read_data_from_file(sync_file_path))

    # Check if we recently synced.
    if not dates.time_has_expired(
            last_sync_time, seconds=_DATA_BUNDLE_SYNC_INTERVAL_IN_SECONDS):
        return True

    # For search index data bundle, we don't sync them from bucket. Instead, we
    # rely on the fuzzer to generate testcases periodically.
    if _is_search_index_data_bundle(data_bundle.name):
        return False

    # Check when the bucket url had last updates. If no new updates, no need to
    # update directory.
    bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name)
    last_updated_time = storage.last_updated(bucket_url)
    if last_updated_time and last_sync_time > last_updated_time:
        logs.log('Data bundle %s has no new content from last sync.' %
                 data_bundle.name)
        return True

    return False
Exemplo n.º 16
0
    def fuzz(
        self,
        fuzz_timeout,
        additional_args,
        unused_additional_args=None,
        unused_extra_env=None,
    ) -> engine.FuzzResult:
        """This is where actual syzkaller fuzzing is done.

    Args:
      fuzz_timeout (float): The maximum time in seconds that fuzz job is allowed
          to run for.
      additional_args: A sequence of additional arguments to be passed to
          the executable.
    Returns:
      engine.FuzzResult
    """

        logs.log('Running Syzkaller.')
        additional_args = copy.copy(additional_args)

        # Save kernel_bid for later in case the device is down.
        _, kernel_bid = kernel_utils.get_kernel_hash_and_build_id()

        fuzz_result = self.run_and_loop(additional_args, timeout=fuzz_timeout)
        logs.log('Syzkaller stopped, fuzzing timed out: {}'.format(
            fuzz_result.time_executed))

        fuzz_logs = (fuzz_result.output or '') + '\n'
        crashes = []
        parsed_stats = {}
        visited = set()
        for subdir, _, files in os.walk(get_work_dir()):
            for file in files:
                # Each crash typically have 2 files: reportN and logN. Similar crashes
                # are grouped together in subfolders. unique_crash puts together the
                # subfolder name and reportN.
                unique_crash = os.path.join(subdir, file)
                if fnmatch.fnmatch(file,
                                   'report*') and unique_crash not in visited:
                    visited.add(unique_crash)
                    log_content = self._filter_log(
                        utils.read_data_from_file(
                            os.path.join(subdir, file),
                            eval_data=False).decode('utf-8'))
                    fuzz_logs += log_content + '\n'

                    # Since each crash (report file) has a corresponding log file
                    # that contains the syscalls that caused the crash. This file is
                    # located in the same subfolder and has the same number.
                    # E.g. ./439c37d288d4f26a33a6c7e5c57a97791453a447/report15 and
                    # ./439c37d288d4f26a33a6c7e5c57a97791453a447/log15.
                    crash_testcase_file_path = os.path.join(
                        subdir, 'log' + file[len('report'):])

                    # TODO(hzawawy): Parse stats information and add them to FuzzResult.

                    if crash_testcase_file_path:
                        reproduce_arguments = [unique_crash]
                        actual_duration = int(fuzz_result.time_executed)
                        # Write the new testcase.
                        # Copy crash testcase contents into the main testcase path.
                        crashes.append(
                            engine.Crash(crash_testcase_file_path, log_content,
                                         reproduce_arguments, actual_duration))

        _upload_kernel_coverage_data(get_cover_file_path(), kernel_bid)
        return engine.FuzzResult(fuzz_logs, fuzz_result.command, crashes,
                                 parsed_stats, fuzz_result.time_executed)
Exemplo n.º 17
0
def update_source_code():
    """Updates source code files with latest version from appengine."""
    process_handler.cleanup_stale_processes()
    shell.clear_temp_directory()

    root_directory = environment.get_value('ROOT_DIR')
    temp_directory = environment.get_value('BOT_TMPDIR')
    temp_archive = os.path.join(temp_directory, 'clusterfuzz-source.zip')
    try:
        storage.copy_file_from(get_source_url(), temp_archive)
    except Exception:
        logs.log_error('Could not retrieve source code archive from url.')
        return

    try:
        file_list = archive.get_file_list(temp_archive)
        zip_archive = zipfile.ZipFile(temp_archive, 'r')
    except Exception:
        logs.log_error('Bad zip file.')
        return

    src_directory = os.path.join(root_directory, 'src')
    output_directory = os.path.dirname(root_directory)
    error_occurred = False
    normalized_file_set = set()
    for filepath in file_list:
        filename = os.path.basename(filepath)

        # This file cannot be updated on the fly since it is running as server.
        if filename == 'adb':
            continue

        absolute_filepath = os.path.join(output_directory, filepath)
        if os.path.altsep:
            absolute_filepath = absolute_filepath.replace(
                os.path.altsep, os.path.sep)

        if os.path.realpath(absolute_filepath) != absolute_filepath:
            continue

        normalized_file_set.add(absolute_filepath)
        try:
            file_extension = os.path.splitext(filename)[1]

            # Remove any .so files first before overwriting, as they can be loaded
            # in the memory of existing processes. Overwriting them directly causes
            # segfaults in existing processes (e.g. run.py).
            if file_extension == '.so' and os.path.exists(absolute_filepath):
                os.remove(absolute_filepath)

            # On Windows, to update DLLs (and native .pyd extensions), we rename it
            # first so that we can install the new version.
            if (environment.platform() == 'WINDOWS'
                    and file_extension in ['.dll', '.pyd']
                    and os.path.exists(absolute_filepath)):
                _rename_dll_for_update(absolute_filepath)
        except Exception:
            logs.log_error('Failed to remove or move %s before extracting new '
                           'version.' % absolute_filepath)

        try:
            extracted_path = zip_archive.extract(filepath, output_directory)
            external_attr = zip_archive.getinfo(filepath).external_attr
            mode = (external_attr >> 16) & 0o777
            mode |= 0o440
            os.chmod(extracted_path, mode)
        except:
            error_occurred = True
            logs.log_error('Failed to extract file %s from source archive.' %
                           filepath)

    zip_archive.close()

    if error_occurred:
        return

    clear_pyc_files(src_directory)
    clear_old_files(src_directory, normalized_file_set)

    local_manifest_path = os.path.join(root_directory,
                                       utils.LOCAL_SOURCE_MANIFEST)
    source_version = utils.read_data_from_file(
        local_manifest_path, eval_data=False).decode('utf-8').strip()
    logs.log('Source code updated to %s.' % source_version)
Exemplo n.º 18
0
def check_symbols_cached(build_params_check_path, build_params):
  # Check if we already have the symbols locally.
  cached_build_params = utils.read_data_from_file(
      build_params_check_path, eval_data=True)
  return cached_build_params and cached_build_params == build_params