def __init__(self, limit): self._dict = {} self._ring = Ring() self._hits = 0 self._misses = 0 self.size = 0 self.limit = limit
def __init__(self, jar, target_size=0, cache_size_bytes=0): # TODO: forward-port Dieter's bytes stuff self.jar = jar # We expect the jars to be able to have a pointer to # us; this is a reference cycle, but certain # aspects of invalidation and accessing depend on it. # The actual Connection objects we're used with do set this # automatically, but many test objects don't. # TODO: track this on the persistent objects themself? try: jar._cache = self except AttributeError: # Some ZODB tests pass in an object that cannot have an _cache pass self.cache_size = target_size self.drain_resistance = 0 self.non_ghost_count = 0 self.persistent_classes = {} self.data = _WeakValueDictionary() self.ring = Ring(self.data.cleanup_hook) self.cache_size_bytes = cache_size_bytes
class PickleCache(object): # Tests may modify this to add additional types _CACHEABLE_TYPES = (type, PersistentPy) _SWEEPABLE_TYPES = (PersistentPy,) total_estimated_size = 0 cache_size_bytes = 0 # Set by functions that sweep the entire ring (via _sweeping_ring) # Serves as a pseudo-lock _is_sweeping_ring = False def __init__(self, jar, target_size=0, cache_size_bytes=0): # TODO: forward-port Dieter's bytes stuff self.jar = jar # We expect the jars to be able to have a pointer to # us; this is a reference cycle, but certain # aspects of invalidation and accessing depend on it. # The actual Connection objects we're used with do set this # automatically, but many test objects don't. # TODO: track this on the persistent objects themself? try: jar._cache = self except AttributeError: # Some ZODB tests pass in an object that cannot have an _cache pass self.cache_size = target_size self.drain_resistance = 0 self.non_ghost_count = 0 self.persistent_classes = {} self.data = _WeakValueDictionary() self.ring = Ring(self.data.cleanup_hook) self.cache_size_bytes = cache_size_bytes # IPickleCache API def __len__(self): """ See IPickleCache. """ return (len(self.persistent_classes) + len(self.data)) def __getitem__(self, oid): """ See IPickleCache. """ value = self.data.get(oid, self) if value is not self: return value return self.persistent_classes[oid] def __setitem__(self, oid, value): """ See IPickleCache. """ # The order of checks matters for C compatibility; # the ZODB tests depend on this # The C impl requires either a type or a Persistent subclass if not isinstance(value, self._CACHEABLE_TYPES): raise TypeError("Cache values must be persistent objects.") value_oid = value._p_oid if not isinstance(oid, OID_TYPE) or not isinstance(value_oid, OID_TYPE): raise TypeError('OID must be %s: key=%s _p_oid=%s' % (OID_TYPE, oid, value_oid)) if value_oid != oid: raise ValueError("Cache key does not match oid") if oid in self.persistent_classes or oid in self.data: # Have to be careful here, a GC might have just run # and cleaned up the object existing_data = self.get(oid) if existing_data is not None and existing_data is not value: # Raise the same type of exception as the C impl with the same # message. raise ValueError('A different object already has the same oid') # Match the C impl: it requires a jar. Let this raise AttributeError # if no jar is found. jar = value._p_jar if jar is None: raise ValueError("Cached object jar missing") # It also requires that it cannot be cached more than one place existing_cache = getattr(jar, '_cache', None) # type: PickleCache if (existing_cache is not None and existing_cache is not self and oid in existing_cache.data): raise ValueError("Cache values may only be in one cache.") if isinstance(value, type): # ZODB.persistentclass.PersistentMetaClass self.persistent_classes[oid] = value else: self.data[oid] = value if _OGA(value, '_p_state') != GHOST and value not in self.ring: self.ring.add(value) self.non_ghost_count += 1 elif self.data.cleanup_hook: # Ensure we begin monitoring for ``value`` to # be deallocated. self.ring.ring_node_for(value) def __delitem__(self, oid): """ See IPickleCache. """ if not isinstance(oid, OID_TYPE): raise TypeError('OID must be %s: %s' % (OID_TYPE, oid)) if oid in self.persistent_classes: del self.persistent_classes[oid] else: pobj = self.data.pop(oid) self.ring.delete(pobj) def get(self, oid, default=None): """ See IPickleCache. """ value = self.data.get(oid, self) if value is not self: return value return self.persistent_classes.get(oid, default) def mru(self, oid): """ See IPickleCache. """ if self._is_sweeping_ring: # accessess during sweeping, such as with an # overridden _p_deactivate, don't mutate the ring # because that could leave it inconsistent return False # marker return for tests value = self.data[oid] was_in_ring = value in self.ring if not was_in_ring: if _OGA(value, '_p_state') != GHOST: self.ring.add(value) self.non_ghost_count += 1 else: self.ring.move_to_head(value) return None def ringlen(self): """ See IPickleCache. """ return len(self.ring) def items(self): """ See IPickleCache. """ return self.data.items() def lru_items(self): """ See IPickleCache. """ return [ (obj._p_oid, obj) for obj in self.ring ] def klass_items(self): """ See IPickleCache. """ return self.persistent_classes.items() def incrgc(self, ignored=None): """ See IPickleCache. """ target = self.cache_size if self.drain_resistance >= 1: size = self.non_ghost_count target2 = size - 1 - (size // self.drain_resistance) if target2 < target: target = target2 # return value for testing return self._sweep(target, self.cache_size_bytes) def full_sweep(self, target=None): """ See IPickleCache. """ # return value for testing return self._sweep(0) minimize = full_sweep def new_ghost(self, oid, obj): """ See IPickleCache. """ if obj._p_oid is not None: raise ValueError('Object already has oid') if obj._p_jar is not None: raise ValueError('Object already has jar') if oid in self.persistent_classes or oid in self.data: raise KeyError('Duplicate OID: %s' % oid) obj._p_oid = oid obj._p_jar = self.jar if not isinstance(obj, type): if obj._p_state != GHOST: # The C implementation sets this stuff directly, # but we delegate to the class. However, we must be # careful to avoid broken _p_invalidate and _p_deactivate # that don't call the super class. See ZODB's # testConnection.doctest_proper_ghost_initialization_with_empty__p_deactivate obj._p_invalidate_deactivate_helper(False) self[oid] = obj def reify(self, to_reify): """ See IPickleCache. """ if isinstance(to_reify, OID_TYPE): #bytes to_reify = [to_reify] for oid in to_reify: value = self[oid] if value._p_state == GHOST: value._p_activate() self.non_ghost_count += 1 self.mru(oid) def invalidate(self, to_invalidate): """ See IPickleCache. """ if isinstance(to_invalidate, OID_TYPE): self._invalidate(to_invalidate) else: for oid in to_invalidate: self._invalidate(oid) def debug_info(self): result = [] for oid, klass in self.persistent_classes.items(): result.append(( oid, len(gc.get_referents(klass)), type(klass).__name__, klass._p_state, )) for oid, value in self.data.items(): result.append(( oid, len(gc.get_referents(value)), type(value).__name__, value._p_state, )) return result def update_object_size_estimation(self, oid, new_size): """ See IPickleCache. """ value = self.data.get(oid) if value is not None: # Recall that while the argument is given in bytes, # we have to work with 64-block chunks (plus one) # to match the C implementation. Hence the convoluted # arithmetic new_size_in_24 = _estimated_size_in_24_bits(new_size) p_est_size_in_24 = value._Persistent__size new_est_size_in_bytes = (new_size_in_24 - p_est_size_in_24) * 64 self.total_estimated_size += new_est_size_in_bytes cache_drain_resistance = property( lambda self: self.drain_resistance, lambda self, nv: setattr(self, 'drain_resistance', nv) ) cache_non_ghost_count = property(lambda self: self.non_ghost_count) cache_data = property(lambda self: dict(self.items())) cache_klass_count = property(lambda self: len(self.persistent_classes)) # Helpers # Set to true when a deactivation happens in our code. For # compatibility with the C implementation, we can only remove the # node and decrement our non-ghost count if our implementation # actually runs (broken subclasses can forget to call super; ZODB # has tests for this). This gets set to false everytime we examine # a node and checked afterwards. The C implementation has a very # incestuous relationship between cPickleCache and cPersistence: # the pickle cache calls _p_deactivate, which is responsible for # both decrementing the non-ghost count and removing its node from # the cache ring (and, if it gets deallocated, from the pickle # cache's dictionary). We're trying to keep that to a minimum, but # there's no way around it if we want full compatibility. _persistent_deactivate_ran = False @_sweeping_ring def _sweep(self, target, target_size_bytes=0): ejected = 0 # If we find and eject objects that may have been weak referenced, # we need to run a garbage collection to try to clear those references. # Otherwise, it's highly likely that accessing those objects through those # references will try to ``_p_activate()`` them, and since the jar they came # from is probably closed, that will lead to an error. See # https://github.com/zopefoundation/persistent/issues/149 had_weak_refs = False ring = self.ring for node, value in ring.iteritems(): if ((target or target_size_bytes) # pylint:disable=too-many-boolean-expressions and (not target or self.non_ghost_count <= target) and (self.total_estimated_size <= target_size_bytes or not target_size_bytes)): break if value._p_state == UPTODATE: # The C implementation will only evict things that are specifically # in the up-to-date state self._persistent_deactivate_ran = False # sweeping an object out of the cache should also # ghost it---that's what C does. This winds up # calling `update_object_size_estimation`. # Also in C, if this was the last reference to the object, # it removes itself from the `data` dictionary. # If we're under PyPy or Jython, we need to run a GC collection # to make this happen...this is only noticeable though, when # we eject objects. Also, note that we can only take any of these # actions if our _p_deactivate ran, in case of buggy subclasses. # see _persistent_deactivate_ran. if not had_weak_refs: had_weak_refs |= getattr(value, '__weakref__', None) is not None value._p_deactivate() if (self._persistent_deactivate_ran # Test-cases sneak in non-Persistent objects, sigh, so naturally # they don't cooperate (without this check a bunch of test_picklecache # breaks) or not isinstance(value, self._SWEEPABLE_TYPES)): ring.delete_node(node) ejected += 1 self.non_ghost_count -= 1 if ejected and had_weak_refs: # Clear the iteration variables, so the objects they point to # are subject to GC. node = None value = None gc.collect() return ejected @_sweeping_ring def _invalidate(self, oid): value = self.data.get(oid) if value is not None and value._p_state != GHOST: value._p_invalidate() self.ring.delete(value) self.non_ghost_count -= 1 elif oid in self.persistent_classes: persistent_class = self.persistent_classes.pop(oid) try: # ZODB.persistentclass.PersistentMetaClass objects # have this method and it must be called for transaction abort # and other forms of invalidation to work persistent_class._p_invalidate() except AttributeError: pass
class LocalClientBucket(object): """ A map that keeps a record of its approx. size. keys must be `str`` and values must be byte strings. This class is not threadsafe, accesses to __setitem__ and get_and_bubble_all must be protected by a lock. """ def __init__(self, limit): self._dict = {} self._ring = Ring() self._hits = 0 self._misses = 0 self.size = 0 self.limit = limit def reset_stats(self): self._hits = 0 self._misses = 0 def stats(self): total = self._hits + self._misses return {'hits': self._hits, 'misses': self._misses, 'ratio': self._hits/total if total else 0, 'size': len(self._dict), 'bytes': self.size} def __len__(self): return len(self._dict) def __setitem__(self, key, value): """ Set an item. If the memory limit would be exceeded, remove old items until that is no longer the case. """ # These types are gated by LocalClient, we don't need to double # check. #assert isinstance(key, str) #assert isinstance(value, bytes) sizedelta = len(value) if key in self._dict: entry = self._dict[key] oldvalue = entry.value sizedelta -= len(oldvalue) entry.value = value self._ring.move_to_head(entry) else: sizedelta += len(key) entry = _RingEntry(key, value) self._ring.add(entry) self._dict[key] = entry while self._dict and self.size + sizedelta > self.limit: oldest = next(iter(self._ring)) if oldest._p_oid is key: break self.__delitem__(oldest._p_oid) self.size += sizedelta return True def __contains__(self, key): return key in self._dict def __delitem__(self, key): entry = self._dict[key] oldvalue = entry.value del self._dict[key] self._ring.delete(entry) sizedelta = len(key) sizedelta += len(oldvalue) self.size -= sizedelta def get_and_bubble_all(self, keys): dct = self._dict rng = self._ring res = {} for key in keys: entry = dct.get(key) if entry is not None: self._hits += 1 rng.move_to_head(entry) res[key] = entry.value else: self._misses += 1 return res def get(self, key): # Testing only. Does not bubble. entry = self._dict.get(key) if entry is not None: return entry.value def __getitem__(self, key): # Testing only return self._dict[key].value def load_from_file(self, cache_file): now = time.time() unpick = Unpickler(cache_file) version = unpick.load() if version != 1: # pragma: no cover raise ValueError("Incorrect version of cache_file") count = unpick.load() stored = 0 loaded_dict = unpick.load() if not self._dict: # bulk-update in C for speed stored = len(loaded_dict) self._dict.update(loaded_dict) for ring_entry in itervalues(loaded_dict): if self.size < self.limit: self._ring.add(ring_entry) self.size += len(ring_entry.key) + len(ring_entry.value) else: # We're too big! ignore these things from now on. # This is unlikely. del self._dict[ring_entry.key] else: new_keys = set(loaded_dict.keys()) - set(self._dict.keys()) stored += len(new_keys) # Loading more data into an existing bucket. # Load only the *new* keys, but don't care about LRU, # it's all screwed up anyway at this point for new_key in new_keys: new_ring_entry = loaded_dict[new_key] self._dict[new_key] = new_ring_entry self._ring.add(new_ring_entry) self.size += len(new_key) + len(new_ring_entry.value) if self.size >= self.limit: # pragma: no cover break then = time.time() log.info("Examined %d and stored %d items from %s in %s", count, stored, cache_file, then - now) return count, stored def write_to_file(self, cache_file): now = time.time() # pickling the items is about 2-3x faster than marshal pickler = Pickler(cache_file, -1) # Highest protocol pickler.dump(1) # Version marker assert len(self._dict) == len(self._ring) pickler.dump(len(self._dict)) # How many pairs we write # We lose the order. We'll have to build it up again as we go. pickler.dump(self._dict) then = time.time() stats = self.stats() log.info("Wrote %d items to %s in %s. Total hits %s; misses %s; ratio %s", stats['size'], cache_file, then - now, stats['hits'], stats['misses'], stats['ratio'])