Exemplo n.º 1
0
def hash_compressed_content(namespace, content):
    """Decompresses and hashes given |content|.

  Returns tuple (hex digest, expanded size).

  Raises ValueError in case of errors.
  """
    expanded_size = 0
    digest = model.get_hash(namespace)
    try:
        for data in model.expand_content(namespace, [content]):
            expanded_size += len(data)
            digest.update(data)
            # Make sure the data is GC'ed.
            del data
        return digest.hexdigest(), expanded_size
    except zlib.error as e:
        raise ValueError('Data is corrupted: %s' % e)
def hash_content(namespace, content):
    """Hashes uncompressed content."""
    d = model.get_hash(namespace)
    d.update(content)
    return d.hexdigest()
Exemplo n.º 3
0
def hash_item(namespace, content):
    h = model.get_hash(namespace)
    h.update(content)
    return h.hexdigest()
Exemplo n.º 4
0
def _payload_to_hashes(request, namespace):
    """Converts a raw payload into hashes as bytes."""
    h = model.get_hash(namespace)
    return _split_payload(request, h.digest_size, model.MAX_KEYS_PER_DB_OPS)
Exemplo n.º 5
0
    def post(self, namespace, hash_key):
        original_request = self.request.get('req')
        entry = model.get_entry_key(namespace, hash_key).get()
        if not entry:
            logging.error('Failed to find entity\n%s', original_request)
            return
        if entry.is_verified:
            logging.warning('Was already verified\n%s', original_request)
            return
        if entry.content is not None:
            logging.error('Should not be called with inline content\n%s',
                          original_request)
            return

        # Get GS file size.
        gs_bucket = config.settings().gs_bucket
        gs_file_info = gcs.get_file_info(gs_bucket, entry.key.id())

        # It's None if file is missing.
        if not gs_file_info:
            # According to the docs, GS is read-after-write consistent, so a file is
            # missing only if it wasn't stored at all or it was deleted, in any case
            # it's not a valid ContentEntry.
            self.purge_entry(entry, 'No such GS file\n%s', original_request)
            return

        # Expected stored length and actual length should match.
        if gs_file_info.size != entry.compressed_size:
            self.purge_entry(
                entry,
                'Bad GS file: expected size is %d, actual size is %d\n%s',
                entry.compressed_size, gs_file_info.size, original_request)
            return

        save_to_memcache = (
            entry.compressed_size <= model.MAX_MEMCACHE_ISOLATED
            and entry.is_isolated)

        try:
            # crbug.com/916644: Verify 2 times to cope with possible data flakiness.
            verified = False
            for i in range(2):
                digest = model.get_hash(namespace)
                expanded_size = 0

                # Start a loop where it reads the data in block.
                stream = gcs.read_file(gs_bucket, entry.key.id())
                if save_to_memcache:
                    # Wraps stream with a generator that accumulates the data.
                    stream = Accumulator(stream)

                for data in model.expand_content(namespace, stream):
                    expanded_size += len(data)
                    digest.update(data)
                    # Make sure the data is GC'ed.
                    del data

                # Hashes should match.
                if digest.hexdigest() == hash_key:
                    verified = True
                    break
                logging.warning(
                    'SHA-1 do not match data in %d-th iteration: got %s, want %s',
                    i + 1, digest.hexdigest(), hash_key)

            if not verified:
                self.purge_entry(
                    entry, 'SHA-1 do not match data\n'
                    '%d bytes, %d bytes expanded, expected %d bytes\n%s',
                    entry.compressed_size, expanded_size, entry.expanded_size,
                    original_request)

        except gcs.NotFoundError as e:
            # Somebody deleted a file between get_file_info and read_file calls.
            self.purge_entry(entry, 'File was unexpectedly deleted\n%s',
                             original_request)
            return
        except (gcs.ForbiddenError, gcs.AuthorizationError) as e:
            # Misconfiguration in Google Storage ACLs. Don't delete an entry, it may
            # be fine. Maybe ACL problems would be fixed before the next retry.
            logging.warning('CloudStorage auth issues (%s): %s',
                            e.__class__.__name__, e)
            # Abort so the job is retried automatically.
            return self.abort(500)
        except (gcs.FatalError, zlib.error, IOError) as e:
            # ForbiddenError and AuthorizationError inherit FatalError, so this except
            # block should be last.
            # It's broken or unreadable.
            self.purge_entry(entry, 'Failed to read the file (%s): %s\n%s',
                             e.__class__.__name__, e, original_request)
            return

        # Verified. Data matches the hash.
        entry.expanded_size = expanded_size
        entry.is_verified = True
        future = entry.put_async()
        logging.info('%d bytes (%d bytes expanded) verified\n%s',
                     entry.compressed_size, expanded_size, original_request)
        if save_to_memcache:
            model.save_in_memcache(namespace, hash_key,
                                   ''.join(stream.accumulated))
        future.wait()
        return