Exemplo n.º 1
0
def download_model_from_gcs(local_model_directory, fuzzer_name):
    """Pull model from GCS bucket and put them in specified model directory."""
    # ML model is stored in corpus bucket.
    gcs_corpus_bucket = environment.get_value('CORPUS_BUCKET')
    if not gcs_corpus_bucket:
        logs.log('Corpus bucket is not set. Skip generation.')
        return False

    # Get cloud storage path.
    # e.g. gs://clusterfuzz-corpus/rnn/libpng_read_fuzzer
    gcs_model_directory = 'gs://%s/%s/%s' % (
        gcs_corpus_bucket, constants.RNN_MODEL_NAME, fuzzer_name)

    logs.log('GCS model directory for fuzzer %s is %s.' %
             (fuzzer_name, gcs_model_directory))

    # RNN model consists of two files.
    data_filename = constants.RNN_MODEL_NAME + constants.MODEL_DATA_SUFFIX
    index_filename = constants.RNN_MODEL_NAME + constants.MODEL_INDEX_SUFFIX

    # Cloud file paths.
    gcs_data_path = '%s/%s' % (gcs_model_directory, data_filename)
    gcs_index_path = '%s/%s' % (gcs_model_directory, index_filename)

    # Check if model exists.
    if not (storage.exists(gcs_data_path) and storage.exists(gcs_index_path)):
        logs.log(
            'ML RNN model for fuzzer %s does not exist. Skip generation.' %
            fuzzer_name)
        return False

    # Local file paths.
    local_data_path = os.path.join(local_model_directory, data_filename)
    local_index_path = os.path.join(local_model_directory, index_filename)

    # Download model files.
    result = (storage.copy_file_from(gcs_data_path, local_data_path)
              and storage.copy_file_from(gcs_index_path, local_index_path))

    if not result:
        logs.log(
            'Failed to download RNN model for fuzzer %s. Skip generation.' %
            fuzzer_name)
        return False

    return True
Exemplo n.º 2
0
def get_introspector_index():
    """Return introspector projects status"""
    if storage.exists(INTROSPECTOR_INDEX_JSON_URL):
        introspector_index = json.loads(
            storage.read_data(INTROSPECTOR_INDEX_JSON_URL))
    else:
        introspector_index = {}
    logs.log('Loaded introspector status: %d' % len(introspector_index))
    return introspector_index
    def _cross_pollinate_other_fuzzer_corpuses(self):
        """Add other fuzzer corpuses to shared corpus path for cross-pollination."""
        corpus_backup_date = utils.utcnow().date() - datetime.timedelta(
            days=data_types.CORPUS_BACKUP_PUBLIC_LOOKBACK_DAYS)

        for cross_pollinate_fuzzer in self.cross_pollinate_fuzzers:
            project_qualified_name = (
                cross_pollinate_fuzzer.fuzz_target.project_qualified_name())
            backup_bucket_name = cross_pollinate_fuzzer.backup_bucket_name
            corpus_engine_name = cross_pollinate_fuzzer.corpus_engine_name

            corpus_backup_url = corpus_manager.gcs_url_for_backup_file(
                backup_bucket_name, corpus_engine_name, project_qualified_name,
                corpus_backup_date)
            corpus_backup_local_filename = '%s-%s' % (
                project_qualified_name, os.path.basename(corpus_backup_url))
            corpus_backup_local_path = os.path.join(
                self.shared_corpus_path, corpus_backup_local_filename)

            if not storage.exists(corpus_backup_url, ignore_errors=True):
                # This can happen in cases when a new fuzz target is checked in or if
                # missed to capture a backup for a particular day (for OSS-Fuzz, this
                # will result in a 403 instead of 404 since that GCS path belongs to
                # other project). So, just log a warning for debugging purposes only.
                logs.log_warn('Corpus backup does not exist, ignoring: %s.' %
                              corpus_backup_url)
                continue

            if not storage.copy_file_from(corpus_backup_url,
                                          corpus_backup_local_path):
                continue

            corpus_backup_output_directory = os.path.join(
                self.shared_corpus_path, project_qualified_name)
            shell.create_directory(corpus_backup_output_directory)
            result = archive.unpack(corpus_backup_local_path,
                                    corpus_backup_output_directory)
            shell.remove_file(corpus_backup_local_path)

            if result:
                logs.log(
                    'Corpus backup url %s successfully unpacked into shared corpus.'
                    % corpus_backup_url)
            else:
                logs.log_error('Failed to unpack corpus backup from url %s.' %
                               corpus_backup_url)
  def update_recommended_dictionary(self, new_dictionary):
    """Update recommended dictionary stored in GCS with new dictionary elements.

    Args:
      new_dictionary: A set of dictionary elements to be added into dictionary.

    Returns:
      A number of new elements actually added to the dictionary stored in GCS.
    """
    if environment.is_lib():
      return 0

    # If the dictionary does not already exist, then directly update it.
    if not storage.exists(self.gcs_path):
      storage.write_data('\n'.join(new_dictionary).encode('utf-8'),
                         self.gcs_path)
      return len(new_dictionary)

    # Read current version of the dictionary.
    old_dictionary_data = storage.read_data(self.gcs_path).decode('utf-8')

    # Use "Compare-and-swap"-like approach to avoid race conditions and also to
    # avoid having a separate job merging multiple recommended dictionaries.
    succeeded = False
    while not succeeded:
      # If old_dictionary_data is None, there is no dictionary in GCS yet, i.e.
      # it's empty. Otherwise, we parse it and use it.
      old_dictionary = set()
      if old_dictionary_data:
        old_dictionary = set(old_dictionary_data.splitlines())

      # Merge two dictionaries.
      new_dictionary |= old_dictionary
      if new_dictionary == old_dictionary:
        # "New dictionary" elements have been already added to GCS, bail out.
        return 0

      succeeded, old_dictionary_data = self._compare_and_swap_gcs_dictionary(
          old_dictionary_data, '\n'.join(new_dictionary))

    return len(new_dictionary) - len(old_dictionary)
  def download_recommended_dictionary_from_gcs(self, local_dict_path):
    """Download recommended dictionary from GCS to the given location.

    Args:
      local_dict_path: Path to a dictionary file on the disk.

    Returns:
      A boolean indicating whether downloading succeeded or not.
    """
    if environment.is_lib():
      return 0

    # When the fuzz target is initially created or when it has no new
    # coverage or dictionary recommendations, then we won't have a
    # recommended dictionary in GCS.
    if not storage.exists(self.gcs_path):
      return False

    if storage.copy_file_from(self.gcs_path, local_dict_path):
      return True

    logs.log('Downloading %s failed.' % self.gcs_path)
    return False