def hash_compressed_content(namespace, content): """Decompresses and hashes given |content|. Returns tuple (hex digest, expanded size). Raises ValueError in case of errors. """ expanded_size = 0 digest = model.get_hash(namespace) try: for data in model.expand_content(namespace, [content]): expanded_size += len(data) digest.update(data) # Make sure the data is GC'ed. del data return digest.hexdigest(), expanded_size except zlib.error as e: raise ValueError('Data is corrupted: %s' % e)
def hash_content(namespace, content): """Hashes uncompressed content.""" d = model.get_hash(namespace) d.update(content) return d.hexdigest()
def hash_item(namespace, content): h = model.get_hash(namespace) h.update(content) return h.hexdigest()
def _payload_to_hashes(request, namespace): """Converts a raw payload into hashes as bytes.""" h = model.get_hash(namespace) return _split_payload(request, h.digest_size, model.MAX_KEYS_PER_DB_OPS)
def post(self, namespace, hash_key): original_request = self.request.get('req') entry = model.get_entry_key(namespace, hash_key).get() if not entry: logging.error('Failed to find entity\n%s', original_request) return if entry.is_verified: logging.warning('Was already verified\n%s', original_request) return if entry.content is not None: logging.error('Should not be called with inline content\n%s', original_request) return # Get GS file size. gs_bucket = config.settings().gs_bucket gs_file_info = gcs.get_file_info(gs_bucket, entry.key.id()) # It's None if file is missing. if not gs_file_info: # According to the docs, GS is read-after-write consistent, so a file is # missing only if it wasn't stored at all or it was deleted, in any case # it's not a valid ContentEntry. self.purge_entry(entry, 'No such GS file\n%s', original_request) return # Expected stored length and actual length should match. if gs_file_info.size != entry.compressed_size: self.purge_entry( entry, 'Bad GS file: expected size is %d, actual size is %d\n%s', entry.compressed_size, gs_file_info.size, original_request) return save_to_memcache = ( entry.compressed_size <= model.MAX_MEMCACHE_ISOLATED and entry.is_isolated) try: # crbug.com/916644: Verify 2 times to cope with possible data flakiness. verified = False for i in range(2): digest = model.get_hash(namespace) expanded_size = 0 # Start a loop where it reads the data in block. stream = gcs.read_file(gs_bucket, entry.key.id()) if save_to_memcache: # Wraps stream with a generator that accumulates the data. stream = Accumulator(stream) for data in model.expand_content(namespace, stream): expanded_size += len(data) digest.update(data) # Make sure the data is GC'ed. del data # Hashes should match. if digest.hexdigest() == hash_key: verified = True break logging.warning( 'SHA-1 do not match data in %d-th iteration: got %s, want %s', i + 1, digest.hexdigest(), hash_key) if not verified: self.purge_entry( entry, 'SHA-1 do not match data\n' '%d bytes, %d bytes expanded, expected %d bytes\n%s', entry.compressed_size, expanded_size, entry.expanded_size, original_request) except gcs.NotFoundError as e: # Somebody deleted a file between get_file_info and read_file calls. self.purge_entry(entry, 'File was unexpectedly deleted\n%s', original_request) return except (gcs.ForbiddenError, gcs.AuthorizationError) as e: # Misconfiguration in Google Storage ACLs. Don't delete an entry, it may # be fine. Maybe ACL problems would be fixed before the next retry. logging.warning('CloudStorage auth issues (%s): %s', e.__class__.__name__, e) # Abort so the job is retried automatically. return self.abort(500) except (gcs.FatalError, zlib.error, IOError) as e: # ForbiddenError and AuthorizationError inherit FatalError, so this except # block should be last. # It's broken or unreadable. self.purge_entry(entry, 'Failed to read the file (%s): %s\n%s', e.__class__.__name__, e, original_request) return # Verified. Data matches the hash. entry.expanded_size = expanded_size entry.is_verified = True future = entry.put_async() logging.info('%d bytes (%d bytes expanded) verified\n%s', entry.compressed_size, expanded_size, original_request) if save_to_memcache: model.save_in_memcache(namespace, hash_key, ''.join(stream.accumulated)) future.wait() return