Exemplos de unpack em Python, exemplos de clusterfuzz._internal.system.archive.unpack em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ml_train_utils.py Projeto: renovate-bot/clusterfuzz

def get_corpus(corpus_directory, fuzzer_name):
    """Get corpus directory.

  This function will download latest corpus backup file from GCS, unzip
  the file and put them in corpus directory.

  Args:
    directory: The directory to place corpus.
    fuzzer_name: Fuzzer name, e.g. libpng_read_fuzzer, xml_parser_fuzzer, etc.

  Returns:
    True if the corpus can be acquired and False otherwise.
  """
    backup_bucket_name = environment.get_value('BACKUP_BUCKET')
    corpus_fuzzer_name = environment.get_value('CORPUS_FUZZER_NAME_OVERRIDE')

    # Get GCS backup path.
    gcs_backup_path = corpus_manager.gcs_url_for_backup_file(
        backup_bucket_name, corpus_fuzzer_name, fuzzer_name,
        corpus_manager.LATEST_BACKUP_TIMESTAMP)

    # Get local backup path.
    local_backup_name = os.path.basename(gcs_backup_path)
    local_backup_path = os.path.join(corpus_directory, local_backup_name)

    # Download latest backup.
    if not storage.copy_file_from(gcs_backup_path, local_backup_path):
        logs.log_error('Failed to download corpus from GCS bucket {}.'.format(
            gcs_backup_path))
        return False

    # Extract corpus from zip file.
    archive.unpack(local_backup_path, corpus_directory)
    shell.remove_file(local_backup_path)
    return True

Exemplo n.º 2

0

Exibir arquivo

Arquivo: mutator_plugin.py Projeto: vanhauser-thc/clusterfuzz

def _unpack_mutator_plugin(mutator_plugin_archive_path):
    """Unpacks |mutator_plugin_archive_path| in the unpacked plugins directory and
  returns the path it was unpacked into."""
    mutator_plugin_name = os.path.basename(
        os.path.splitext(mutator_plugin_archive_path)[0])
    unpacked_plugin_dir = os.path.join(_get_mutator_plugins_unpacked_dir(),
                                       mutator_plugin_name)
    archive.unpack(mutator_plugin_archive_path, unpacked_plugin_dir)
    return unpacked_plugin_dir

Exemplo n.º 3

0

Exibir arquivo

    def test_unpack_file_with_cwd_prefix(self):
        """Test unpack with trusted=False passes with file having './' prefix."""
        tgz_path = os.path.join(TESTDATA_PATH, 'cwd-prefix.tgz')
        output_directory = tempfile.mkdtemp(prefix='cwd-prefix')
        archive.unpack(tgz_path, output_directory, trusted=False)

        test_file_path = os.path.join(output_directory, 'test')
        self.assertTrue(os.path.exists(test_file_path))
        self.assertEqual(open(test_file_path).read(), 'abc\n')

        shell.remove_directory(output_directory)

Exemplo n.º 4

0

Exibir arquivo

def update_tests_if_needed():
    """Updates layout tests every day."""
    data_directory = environment.get_value('FUZZ_DATA')
    error_occured = False
    expected_task_duration = 60 * 60  # 1 hour.
    retry_limit = environment.get_value('FAIL_RETRIES')
    temp_archive = os.path.join(data_directory, 'temp.zip')
    tests_url = environment.get_value('WEB_TESTS_URL')

    # Check if we have a valid tests url.
    if not tests_url:
        return

    # Layout test updates are usually disabled to speedup local testing.
    if environment.get_value('LOCAL_DEVELOPMENT'):
        return

    # |UPDATE_WEB_TESTS| env variable can be used to control our update behavior.
    if not environment.get_value('UPDATE_WEB_TESTS'):
        return

    last_modified_time = persistent_cache.get_value(
        TESTS_LAST_UPDATE_KEY, constructor=datetime.datetime.utcfromtimestamp)
    if (last_modified_time is not None
            and not dates.time_has_expired(last_modified_time,
                                           days=TESTS_UPDATE_INTERVAL_DAYS)):
        return

    logs.log('Updating layout tests.')
    tasks.track_task_start(tasks.Task('update_tests', '', ''),
                           expected_task_duration)

    # Download and unpack the tests archive.
    for _ in range(retry_limit):
        try:
            shell.remove_directory(data_directory, recreate=True)
            storage.copy_file_from(tests_url, temp_archive)
            archive.unpack(temp_archive, data_directory, trusted=True)
            shell.remove_file(temp_archive)
            error_occured = False
            break
        except:
            logs.log_error(
                'Could not retrieve and unpack layout tests archive. Retrying.'
            )
            error_occured = True

    if not error_occured:
        persistent_cache.set_value(TESTS_LAST_UPDATE_KEY,
                                   time.time(),
                                   persist_across_reboots=True)

    tasks.track_task_end()

Exemplo n.º 5

0

Exibir arquivo

def download_system_symbols_if_needed(symbols_directory):
  """Download system libraries from |SYMBOLS_URL| and cache locally."""
  if not should_download_symbols():
    return

  # Get the build fingerprint parameters.
  build_params = settings.get_build_parameters()
  if not build_params:
    logs.log_error('Unable to determine build parameters.')
    return

  build_params_check_path = os.path.join(symbols_directory,
                                         '.cached_build_params')
  if check_symbols_cached(build_params_check_path, build_params):
    return

  build_id = build_params.get('build_id')
  target = build_params.get('target')
  build_type = build_params.get('type')
  if not build_id or not target or not build_type:
    logs.log_error('Null build parameters found, exiting.')
    return

  symbols_archive_filename = f'{target}-symbols-{build_id}.zip'
  artifact_file_name = symbols_archive_filename
  output_filename_override = None

  # Include type and sanitizer information in the target.
  tool_suffix = environment.get_value('SANITIZER_TOOL_NAME')
  target_with_type_and_san = f'{target}-{build_type}'
  if tool_suffix and not tool_suffix in target_with_type_and_san:
    target_with_type_and_san += f'_{tool_suffix}'

  targets_with_type_and_san = [target_with_type_and_san]

  symbols_archive_path = os.path.join(symbols_directory,
                                      symbols_archive_filename)
  download_artifact_if_needed(build_id, symbols_directory, symbols_archive_path,
                              targets_with_type_and_san, artifact_file_name,
                              output_filename_override)
  if not os.path.exists(symbols_archive_path):
    logs.log_error(
        'Unable to locate symbols archive %s.' % symbols_archive_path)
    return

  # Store the artifact for later use or for use by other bots.
  storage.store_file_in_cache(symbols_archive_path)

  archive.unpack(symbols_archive_path, symbols_directory, trusted=True)
  shell.remove_file(symbols_archive_path)

  utils.write_data_to_file(build_params, build_params_check_path)

Exemplo n.º 6

0

Exibir arquivo

def unpack_testcase(testcase):
    """Unpack a testcase and return all files it is composed of."""
    # Figure out where the testcase file should be stored.
    input_directory, testcase_file_path = _get_testcase_file_and_path(testcase)

    minimized = testcase.minimized_keys and testcase.minimized_keys != 'NA'
    if minimized:
        key = testcase.minimized_keys
        archived = bool(testcase.archive_state
                        & data_types.ArchiveStatus.MINIMIZED)
    else:
        key = testcase.fuzzed_keys
        archived = bool(testcase.archive_state
                        & data_types.ArchiveStatus.FUZZED)

    if archived:
        if minimized:
            temp_filename = (os.path.join(
                input_directory,
                str(testcase.key.id()) + _TESTCASE_ARCHIVE_EXTENSION))
        else:
            temp_filename = os.path.join(input_directory,
                                         testcase.archive_filename)
    else:
        temp_filename = testcase_file_path

    if not blobs.read_blob_to_disk(key, temp_filename):
        return None, input_directory, testcase_file_path

    file_list = []
    if archived:
        archive.unpack(temp_filename, input_directory)
        file_list = archive.get_file_list(temp_filename)
        shell.remove_file(temp_filename)

        file_exists = False
        for file_name in file_list:
            if os.path.basename(file_name) == os.path.basename(
                    testcase_file_path):
                file_exists = True
                break

        if not file_exists:
            logs.log_error(
                'Expected file to run %s is not in archive. Base directory is %s and '
                'files in archive are [%s].' %
                (testcase_file_path, input_directory, ','.join(file_list)))
            return None, input_directory, testcase_file_path
    else:
        file_list.append(testcase_file_path)

    return file_list, input_directory, testcase_file_path

Exemplo n.º 7

0

Exibir arquivo

Arquivo: reproduce.py Projeto: vanhauser-thc/clusterfuzz

def _download_testcase(testcase_id, testcase, configuration):
    """Download the test case and return its path."""
    print('Downloading testcase...')
    testcase_download_url = '{url}?id={id}'.format(
        url=configuration.get('testcase_download_url'), id=testcase_id)
    response, content = http_utils.request(testcase_download_url,
                                           method=http_utils.GET_METHOD,
                                           configuration=configuration)

    if response.status != 200:
        raise errors.ReproduceToolUnrecoverableError(
            'Unable to download test case.')

    bot_absolute_filename = response[FILENAME_RESPONSE_HEADER]
    # Store the test case in the config directory for debuggability.
    testcase_directory = os.path.join(CONFIG_DIRECTORY, 'current-testcase')
    shell.remove_directory(testcase_directory, recreate=True)
    environment.set_value('FUZZ_INPUTS', testcase_directory)
    testcase_path = os.path.join(testcase_directory,
                                 os.path.basename(bot_absolute_filename))

    utils.write_data_to_file(content, testcase_path)

    # Unpack the test case if it's archived.
    # TODO(mbarbella): Rewrite setup.unpack_testcase and share this code.
    if testcase.minimized_keys and testcase.minimized_keys != 'NA':
        mask = data_types.ArchiveStatus.MINIMIZED
    else:
        mask = data_types.ArchiveStatus.FUZZED

    if testcase.archive_state & mask:
        archive.unpack(testcase_path, testcase_directory)
        file_list = archive.get_file_list(testcase_path)

        testcase_path = None
        for file_name in file_list:
            if os.path.basename(file_name) == os.path.basename(
                    testcase.absolute_path):
                testcase_path = os.path.join(testcase_directory, file_name)
                break

        if not testcase_path:
            raise errors.ReproduceToolUnrecoverableError(
                'Test case file was not found in archive.\n'
                'Original filename: {absolute_path}.\n'
                'Archive contents: {file_list}'.format(
                    absolute_path=testcase.absolute_path, file_list=file_list))

    return testcase_path

Exemplo n.º 8

0

Exibir arquivo

Arquivo: testcase_manager.py Projeto: stjordanis/clusterfuzz

def setup_user_profile_directory_if_needed(user_profile_directory):
    """Set user profile directory if it does not exist."""
    if os.path.exists(user_profile_directory):
        # User profile directory already exists. Bail out.
        return

    shell.create_directory(user_profile_directory)

    # Create a file in user profile directory based on format:
    # filename;base64 encoded zlib compressed file contents.
    user_profile_file = environment.get_value('USER_PROFILE_FILE')
    if user_profile_file and ';' in user_profile_file:
        user_profile_filename, encoded_file_contents = (
            user_profile_file.split(';', 1))
        user_profile_file_contents = zlib.decompress(
            base64.b64decode(encoded_file_contents))
        user_profile_file_path = os.path.join(user_profile_directory,
                                              user_profile_filename)
        utils.write_data_to_file(user_profile_file_contents,
                                 user_profile_file_path)

    # For Firefox, we need to install a special fuzzPriv extension that exposes
    # special functions to javascript, e.g. gc(), etc.
    app_name = environment.get_value('APP_NAME')
    if app_name.startswith('firefox'):
        # Create extensions directory.
        extensions_directory = os.path.join(user_profile_directory,
                                            'extensions')
        shell.create_directory(extensions_directory)

        # Unpack the fuzzPriv extension.
        extension_archive = os.path.join(environment.get_resources_directory(),
                                         'firefox', 'fuzzPriv-extension.zip')
        archive.unpack(extension_archive, extensions_directory)

        # Add this extension in the extensions configuration file.
        extension_config_file_path = os.path.join(user_profile_directory,
                                                  'extensions.ini')
        fuzz_extension_directory = os.path.join(extensions_directory,
                                                '*****@*****.**')
        extension_config_file_contents = ('[ExtensionDirs]\r\n'
                                          'Extension0=%s\r\n'
                                          '\r\n'
                                          '[ThemeDirs]\r\n' %
                                          fuzz_extension_directory)
        utils.write_data_to_file(extension_config_file_contents,
                                 extension_config_file_path)

Exemplo n.º 9

0

Exibir arquivo

def download_latest_build(build_info, image_regexes, image_directory):
  """Download the latest build artifact for the given branch and target."""
  # Check if our local build matches the latest build. If not, we will
  # download it.
  build_id = build_info['bid']
  target = build_info['target']
  last_build_info = persistent_cache.get_value(constants.LAST_FLASH_BUILD_KEY)
  if last_build_info and last_build_info['bid'] == build_id:
    return

  # Clean up the images directory first.
  shell.remove_directory(image_directory, recreate=True)
  for image_regex in image_regexes:
    image_file_path = fetch_artifact.get(build_id, target, image_regex,
                                         image_directory)
    if not image_file_path:
      logs.log_error('Failed to download artifact %s for '
                     'branch %s and target %s.' %
                     (image_file_path, build_info['branch'], target))
      return
    if image_file_path.endswith('.zip') or image_file_path.endswith('.tar.gz'):
      archive.unpack(image_file_path, image_directory)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: corpus_pruning_task.py Projeto: vanhauser-thc/clusterfuzz

    def _cross_pollinate_other_fuzzer_corpuses(self):
        """Add other fuzzer corpuses to shared corpus path for cross-pollination."""
        corpus_backup_date = utils.utcnow().date() - datetime.timedelta(
            days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)

        for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers:
            project_qualified_name = (
                cross_pollinate_fuzzer.fuzz_target.project_qualified_name())
            backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name
            corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name

            corpus_backup_url = corpus_manager.gcs_url_for_backup_file(
                backup_bucket_name, corpus_engine_name, project_qualified_name,
                corpus_backup_date)
            corpus_backup_local_filename = '%s-%s' % (
                project_qualified_name, os.path.basename(corpus_backup_url))
            corpus_backup_local_path = os.path.join(
                self.shared_corpus_path, corpus_backup_local_filename)

            if not storage.exists(corpus_backup_url, ignore_errors=True):
                # This can happen in cases when a new fuzz target is checked in or if
                # missed to capture a backup for a particular day (for OSS-Fuzz, this
                # will result in a 403 instead of 404 since that GCS path belongs to
                # other project). So, just log a warning for debugging purposes only.
                logs.log_warn('Corpus backup does not exist, ignoring: %s.' %
                              corpus_backup_url)
                continue

            if not storage.copy_file_from(corpus_backup_url,
                                          corpus_backup_local_path):
                continue

            corpus_backup_output_directory = os.path.join(
                self.shared_corpus_path, project_qualified_name)
            shell.create_directory(corpus_backup_output_directory)
            result = archive.unpack(corpus_backup_local_path,
                                    corpus_backup_output_directory)
            shell.remove_file(corpus_backup_local_path)

            if result:
                logs.log(
                    'Corpus backup url %s successfully unpacked into shared corpus.'
                    % corpus_backup_url)
            else:
                logs.log_error('Failed to unpack corpus backup from url %s.' %
                               corpus_backup_url)

Exemplo n.º 11

0

Exibir arquivo

def update_fuzzer_and_data_bundles(fuzzer_name):
    """Update the fuzzer with a given name if necessary."""
    fuzzer = data_types.Fuzzer.query(
        data_types.Fuzzer.name == fuzzer_name).get()
    if not fuzzer:
        logs.log_error('No fuzzer exists with name %s.' % fuzzer_name)
        raise errors.InvalidFuzzerError

    # Set some helper environment variables.
    fuzzer_directory = get_fuzzer_directory(fuzzer_name)
    environment.set_value('FUZZER_DIR', fuzzer_directory)
    environment.set_value('UNTRUSTED_CONTENT', fuzzer.untrusted_content)

    # If the fuzzer generates large testcases or a large number of small ones
    # that don't fit on tmpfs, then use the larger disk directory.
    if fuzzer.has_large_testcases:
        testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK')
        environment.set_value('FUZZ_INPUTS', testcase_disk_directory)

    # Adjust the test timeout, if user has provided one.
    if fuzzer.timeout:
        environment.set_value('TEST_TIMEOUT', fuzzer.timeout)

        # Increase fuzz test timeout if the fuzzer timeout is higher than its
        # current value.
        fuzz_test_timeout = environment.get_value('FUZZ_TEST_TIMEOUT')
        if fuzz_test_timeout and fuzz_test_timeout < fuzzer.timeout:
            environment.set_value('FUZZ_TEST_TIMEOUT', fuzzer.timeout)

    # Adjust the max testcases if this fuzzer has specified a lower limit.
    max_testcases = environment.get_value('MAX_TESTCASES')
    if fuzzer.max_testcases and fuzzer.max_testcases < max_testcases:
        environment.set_value('MAX_TESTCASES', fuzzer.max_testcases)

    # Check for updates to this fuzzer.
    version_file = os.path.join(fuzzer_directory, '.%s_version' % fuzzer_name)
    if (not fuzzer.builtin
            and revisions.needs_update(version_file, fuzzer.revision)):
        logs.log('Fuzzer update was found, updating.')

        # Clear the old fuzzer directory if it exists.
        if not shell.remove_directory(fuzzer_directory, recreate=True):
            logs.log_error('Failed to clear fuzzer directory.')
            return None

        # Copy the archive to local disk and unpack it.
        archive_path = os.path.join(fuzzer_directory, fuzzer.filename)
        if not blobs.read_blob_to_disk(fuzzer.blobstore_key, archive_path):
            logs.log_error('Failed to copy fuzzer archive.')
            return None

        try:
            archive.unpack(archive_path, fuzzer_directory)
        except Exception:
            error_message = (
                'Failed to unpack fuzzer archive %s '
                '(bad archive or unsupported format).') % fuzzer.filename
            logs.log_error(error_message)
            fuzzer_logs.upload_script_log('Fatal error: ' + error_message,
                                          fuzzer_name=fuzzer_name)
            return None

        fuzzer_path = os.path.join(fuzzer_directory, fuzzer.executable_path)
        if not os.path.exists(fuzzer_path):
            error_message = (
                'Fuzzer executable %s not found. '
                'Check fuzzer configuration.') % fuzzer.executable_path
            logs.log_error(error_message)
            fuzzer_logs.upload_script_log('Fatal error: ' + error_message,
                                          fuzzer_name=fuzzer_name)
            return None

        # Make fuzzer executable.
        os.chmod(fuzzer_path, 0o750)

        # Cleanup unneeded archive.
        shell.remove_file(archive_path)

        # Save the current revision of this fuzzer in a file for later checks.
        revisions.write_revision_to_revision_file(version_file,
                                                  fuzzer.revision)
        logs.log('Updated fuzzer to revision %d.' % fuzzer.revision)

    _clear_old_data_bundles_if_needed()

    # Setup data bundles associated with this fuzzer.
    data_bundles = ndb_utils.get_all_from_query(
        data_types.DataBundle.query(
            data_types.DataBundle.name == fuzzer.data_bundle_name))
    for data_bundle in data_bundles:
        if not update_data_bundle(fuzzer, data_bundle):
            return None

    # Setup environment variable for launcher script path.
    if fuzzer.launcher_script:
        fuzzer_launcher_path = os.path.join(fuzzer_directory,
                                            fuzzer.launcher_script)
        environment.set_value('LAUNCHER_PATH', fuzzer_launcher_path)

        # For launcher script usecase, we need the entire fuzzer directory on the
        # worker.
        if environment.is_trusted_host():
            from clusterfuzz._internal.bot.untrusted_runner import file_host
            worker_fuzzer_directory = file_host.rebase_to_worker_root(
                fuzzer_directory)
            file_host.copy_directory_to_worker(fuzzer_directory,
                                               worker_fuzzer_directory,
                                               replace=True)

    return fuzzer

Exemplo n.º 12

0

Exibir arquivo

def execute_task(metadata_id, job_type):
    """Unpack a bundled testcase archive and create analyze jobs for each item."""
    metadata = ndb.Key(data_types.BundledArchiveMetadata,
                       int(metadata_id)).get()
    if not metadata:
        logs.log_error('Invalid bundle metadata id %s.' % metadata_id)
        return

    bot_name = environment.get_value('BOT_NAME')
    upload_metadata = data_types.TestcaseUploadMetadata.query(
        data_types.TestcaseUploadMetadata.blobstore_key ==
        metadata.blobstore_key).get()
    if not upload_metadata:
        logs.log_error('Invalid upload metadata key %s.' %
                       metadata.blobstore_key)
        return

    job = data_types.Job.query(data_types.Job.name == metadata.job_type).get()
    if not job:
        logs.log_error('Invalid job_type %s.' % metadata.job_type)
        return

    # Update the upload metadata with this bot name.
    upload_metadata.bot_name = bot_name
    upload_metadata.put()

    # We can't use FUZZ_INPUTS directory since it is constrained
    # by tmpfs limits.
    testcases_directory = environment.get_value('FUZZ_INPUTS_DISK')

    # Retrieve multi-testcase archive.
    archive_path = os.path.join(testcases_directory, metadata.archive_filename)
    if not blobs.read_blob_to_disk(metadata.blobstore_key, archive_path):
        logs.log_error('Could not retrieve archive for bundle %d.' %
                       metadata_id)
        tasks.add_task('unpack', metadata_id, job_type)
        return

    try:
        archive.unpack(archive_path, testcases_directory)
    except:
        logs.log_error('Could not unpack archive for bundle %d.' % metadata_id)
        tasks.add_task('unpack', metadata_id, job_type)
        return

    # Get additional testcase metadata (if any).
    additional_metadata = None
    if upload_metadata.additional_metadata_string:
        additional_metadata = json.loads(
            upload_metadata.additional_metadata_string)

    archive_state = data_types.ArchiveStatus.NONE
    bundled = True
    file_list = archive.get_file_list(archive_path)

    for file_path in file_list:
        absolute_file_path = os.path.join(testcases_directory, file_path)
        filename = os.path.basename(absolute_file_path)

        # Only files are actual testcases. Skip directories.
        if not os.path.isfile(absolute_file_path):
            continue

        try:
            file_handle = open(absolute_file_path, 'rb')
            blob_key = blobs.write_blob(file_handle)
            file_handle.close()
        except:
            blob_key = None

        if not blob_key:
            logs.log_error('Could not write testcase %s to blobstore.' %
                           absolute_file_path)
            continue

        data_handler.create_user_uploaded_testcase(
            blob_key, metadata.blobstore_key, archive_state,
            metadata.archive_filename, filename, metadata.timeout, job,
            metadata.job_queue, metadata.http_flag, metadata.gestures,
            metadata.additional_arguments, metadata.bug_information,
            metadata.crash_revision, metadata.uploader_email,
            metadata.platform_id, metadata.app_launch_command,
            metadata.fuzzer_name, metadata.overridden_fuzzer_name,
            metadata.fuzzer_binary_name, bundled, upload_metadata.retries,
            upload_metadata.bug_summary_update_flag,
            upload_metadata.quiet_flag, additional_metadata)

    # The upload metadata for the archive is not needed anymore since we created
    # one for each testcase.
    upload_metadata.key.delete()

    shell.clear_testcase_directories()