def cloneByPickle(obj, ignore_list=()): """Makes a copy of a ZODB object, loading ghosts as needed. Ignores specified objects along the way, replacing them with None in the copy. """ ignore_dict = {} for o in ignore_list: ignore_dict[id(o)] = o def persistent_id(ob, ignore_dict=ignore_dict): if id(ob) in ignore_dict: return 'ignored' if getattr(ob, '_p_changed', 0) is None: ob._p_changed = 0 return None def persistent_load(ref): assert ref == 'ignored' # Return a placeholder object that will be replaced by # removeNonVersionedData(). placeholder = SimpleItem() placeholder.id = "ignored_subobject" return placeholder stream = BytesIO() p = Pickler(stream, 1) p.persistent_id = persistent_id p.dump(obj) stream.seek(0) u = Unpickler(stream) u.persistent_load = persistent_load return u.load()
def __init__(self, obj=None): self._file = BytesIO() self._p = PersistentPickler(self.persistent_id, self._file, _protocol) self._stack = [] if obj is not None: self._stack.append(obj) jar = obj._p_jar assert myhasattr(jar, "new_oid") self._jar = jar
def dumps(obj): def getpersid(obj): if hasattr(obj, 'getoid'): return obj.getoid() return None s = BytesIO() p = PersistentPickler(getpersid, s, _protocol) p.dump(obj) p.dump(None) return s.getvalue()
def __init__(self, obj=None): self._file = BytesIO() self._p = Pickler(self._file, _protocol) if sys.version_info[0] < 3: self._p.inst_persistent_id = self.persistent_id else: self._p.persistent_id = self.persistent_id self._stack = [] if obj is not None: self._stack.append(obj) jar = obj._p_jar assert myhasattr(jar, "new_oid") self._jar = jar
def dumps(obj): def getpersid(obj): if hasattr(obj, 'getoid'): return obj.getoid() return None s = BytesIO() p = Pickler(s, _protocol) if sys.version_info[0] < 3: p.inst_persistent_id = getpersid else: p.persistent_id = getpersid p.dump(obj) p.dump(None) return s.getvalue()
def _initroot(self): try: load_current(self._storage, ZERO) except KeyError: from ZODB.Connection import TransactionMetaData file = BytesIO() p = Pickler(file, _protocol) p.dump((PersistentMapping, None)) p.dump({'_container': {}}) t = TransactionMetaData() t.description = u'initial database creation' self._storage.tpc_begin(t) self._storage.store(ZERO, None, file.getvalue(), '', t) self._storage.tpc_vote(t) self._storage.tpc_finish(t)
def _initroot(self): try: self._storage.load(ZERO, '') except KeyError: from transaction import Transaction file = BytesIO() p = Pickler(file, _protocol) p.dump((PersistentMapping, None)) p.dump({'_container': {}}) t = Transaction() t.description = 'initial database creation' self._storage.tpc_begin(t) self._storage.store(ZERO, None, file.getvalue(), '', t) self._storage.tpc_vote(t) self._storage.tpc_finish(t)
def zodb_unpickle(data): """Unpickle an object stored using the format expected by ZODB.""" f = BytesIO(data) u = Unpickler(f) u.persistent_load = persistent_load klass_info = u.load() if isinstance(klass_info, tuple): if isinstance(klass_info[0], type): # Unclear: what is the second part of klass_info? klass, xxx = klass_info assert not xxx else: if isinstance(klass_info[0], tuple): modname, klassname = klass_info[0] else: modname, klassname = klass_info if modname == "__main__": ns = globals() else: mod = import_helper(modname) ns = mod.__dict__ try: klass = ns[klassname] except KeyError: print("can't find %s in %r" % (klassname, ns), file=sys.stderr) inst = klass() else: raise ValueError("expected class info: %s" % repr(klass_info)) state = u.load() inst.__setstate__(state) return inst
def get_refs(a_pickle): """Return oid and class information for references in a pickle The result of a list of oid and class information tuples. If the reference doesn't contain class information, then the klass information is None. """ refs = [] u = PersistentUnpickler(None, refs.append, BytesIO(a_pickle)) u.noload() u.noload() # Now we have a list of references. Need to convert to list of # oids and class info: result = [] for reference in refs: if isinstance(reference, tuple): oid, klass = reference elif isinstance(reference, (bytes, str)): data, klass = reference, None else: assert isinstance(reference, list) continue if not isinstance(oid, bytes): assert isinstance(oid, str) # this happens on Python 3 when all bytes in the oid are < 0x80 oid = oid.encode('ascii') result.append((oid, klass)) return result
def state(self, oid, serial, prfactory, p=''): p = p or self.loadSerial(oid, serial) p = self._crs_untransform_record_data(p) file = BytesIO(p) unpickler = PersistentUnpickler(find_global, prfactory.persistent_load, file) unpickler.load() # skip the class tuple return unpickler.load()
def zodb_pickle(obj): """Create a pickle in the format expected by ZODB.""" f = BytesIO() p = PersistentPickler(_persistent_id, f, _protocol) klass = obj.__class__ assert not hasattr(obj, '__getinitargs__'), "not ready for constructors" args = None mod = getattr(klass, '__module__', None) if mod is not None: klass = mod, klass.__name__ state = obj.__getstate__() p.dump((klass, args)) p.dump(state) return f.getvalue()
def __init__(self, obj=None): self._file = BytesIO() self._p = Pickler(self._file, _protocol) if sys.version_info[0] < 3: self._p.inst_persistent_id = self.persistent_id # PyPy uses a python implementation of cPickle in both Python 2 # and Python 3. We can't really detect inst_persistent_id as its # a magic attribute that's not readable, but it doesn't hurt to # simply always assign to persistent_id also self._p.persistent_id = self.persistent_id else: self._p.persistent_id = self.persistent_id self._stack = [] if obj is not None: self._stack.append(obj) jar = obj._p_jar assert myhasattr(jar, "new_oid") self._jar = jar
def _get_unpickler(self, pickle): file = BytesIO(pickle) factory = self._factory conn = self._conn def find_global(modulename, name): return factory(conn, modulename, name) unpickler = PersistentUnpickler(find_global, self._persistent_load, file) return unpickler
def get_type(record): try: unpickled = FakeUnpickler(BytesIO(record.data)).load() except FakeError as err: return "%s.%s" % (err.module, err.name) classinfo = unpickled[0] if isinstance(classinfo, tuple): mod, klass = classinfo return "%s.%s" % (mod, klass) else: return str(classinfo)
def test_zodbcommit(zext): tmpd = mkdtemp('', 'zodbcommit.') defer(lambda: rmtree(tmpd)) stor = storageFromURL('%s/2.fs' % tmpd) defer(stor.close) head = stor.lastTransaction() # commit some transactions via zodbcommit and verify if storage dump gives # what is expected. t1 = Transaction(z64, ' ', b'user name', b'description ...', zext(dumps({'a': 'b'}, _protocol)), [ ObjectData(p64(1), b'data1', 'sha1', sha1(b'data1')), ObjectData(p64(2), b'data2', 'sha1', sha1(b'data2'))]) t1.tid = zodbcommit(stor, head, t1) t2 = Transaction(z64, ' ', b'user2', b'desc2', b'', [ ObjectDelete(p64(2))]) t2.tid = zodbcommit(stor, t1.tid, t2) buf = BytesIO() zodbdump(stor, p64(u64(head)+1), None, out=buf) dumped = buf.getvalue() assert dumped == b''.join([_.zdump() for _ in (t1, t2)]) # ObjectCopy. XXX zodbcommit handled ObjectCopy by actually copying data, # not referencing previous transaction via backpointer. t3 = Transaction(z64, ' ', b'user3', b'desc3', b'', [ ObjectCopy(p64(1), t1.tid)]) t3.tid = zodbcommit(stor, t2.tid, t3) data1_1, _, _ = stor.loadBefore(p64(1), p64(u64(t1.tid)+1)) data1_3, _, _ = stor.loadBefore(p64(1), p64(u64(t3.tid)+1)) assert data1_1 == data1_3 assert data1_1 == b'data1' # just in case
def main(argv): try: optv, argv = getopt.getopt(argv[1:], "h", ["help"]) except getopt.GetoptError as e: print(e, file=sys.stderr) usage(sys.stderr) sys.exit(2) for opt, _ in optv: if opt in ("-h", "--help"): usage(sys.stdout) sys.exit(0) if len(argv) != 2: usage(sys.stderr) sys.exit(2) storurl = argv[0] at = fromhex(argv[1]) stor = storageFromURL(storurl) defer(stor.close) # artificial transaction header with tid=0 to request regular commit zin = b'txn 0000000000000000 " "\n' zin += sys.stdin.read() zin = BytesIO(zin) zr = zodbdump.DumpReader(zin) zr.lineno -= 1 # we prepended txn header txn = zr.readtxn() tail = zin.read() if tail: print('E: +%d: garbage after transaction' % zr.lineno, file=sys.stderr) sys.exit(1) tid = zodbcommit(stor, at, txn) print(ashex(tid))
def serializeext(ext): # ZODB iteration API gives us depickled extensions and only that. # So for dumping in raw form we need to pickle it back hopefully getting # something close to original raw data. if not ext: # ZODB usually does this: encode {} as empty "", not as "}." # https://github.com/zopefoundation/ZODB/blob/2490ae09/src/ZODB/BaseStorage.py#L194 # # and here are decoders: # https://github.com/zopefoundation/ZODB/blob/2490ae09/src/ZODB/FileStorage/FileStorage.py#L1145 # https://github.com/zopefoundation/ZODB/blob/2490ae09/src/ZODB/FileStorage/FileStorage.py#L1990 # https://github.com/zopefoundation/ZODB/blob/2490ae09/src/ZODB/fstools.py#L66 # ... return b"" buf = BytesIO() p = XPickler(buf, _protocol) p.dump(ext) out = buf.getvalue() #out = pickletools.optimize(out) # remove unneeded PUT opcodes assert loads(out) == ext return out
def get_pickle_metadata(data): # Returns a 2-tuple of strings. # ZODB's data records contain two pickles. The first is the class # of the object, the second is the object. We're only trying to # pick apart the first here, to extract the module and class names. if data[0] in ( 0x80, # Py3k indexes bytes -> int b'\x80' # Python2 indexes bytes -> bytes ): # protocol marker, protocol > 1 data = data[2:] if data.startswith(b'(c'): # pickle MARK GLOBAL opcode sequence global_prefix = 2 elif data.startswith(b'c'): # pickle GLOBAL opcode global_prefix = 1 else: global_prefix = 0 if global_prefix: # Formats 1 and 2. # Don't actually unpickle a class, because it will attempt to # load the class. Just break open the pickle and get the # module and class from it. The module and class names are given by # newline-terminated strings following the GLOBAL opcode. modname, classname, rest = data.split(b'\n', 2) modname = modname[global_prefix:] # strip GLOBAL opcode return modname.decode(), classname.decode() # Else there are a bunch of other possible formats. f = BytesIO(data) u = Unpickler(f) try: class_info = u.load() except Exception as err: return '', '' if isinstance(class_info, tuple): if isinstance(class_info[0], tuple): # Formats 3 and 4. modname, classname = class_info[0] else: # Formats 5 and 6 (probably) end up here. modname, classname = class_info else: # This isn't a known format. modname = repr(class_info) classname = '' return modname, classname
def is_blob_record(record): """Check whether a database record is a blob record. This is primarily intended to be used when copying data from one storage to another. """ if record and (b'ZODB.blob' in record): unpickler = PersistentUnpickler(find_global_Blob, None, BytesIO(record)) try: return unpickler.load() is Blob except (MemoryError, KeyboardInterrupt, SystemExit): raise except Exception: pass return False
def test_persistent_id_noload(self): # make sure we can noload weak references and other list-based # references like we expect. Protect explicitly against the # breakage in CPython 2.7 and zodbpickle < 0.6.0 o = PersistentObject() o._p_oid = b'abcd' top = PersistentObject() top._p_oid = b'efgh' top.ref = WeakRef(o) pickle = serialize.ObjectWriter().serialize(top) refs = [] u = PersistentUnpickler(None, refs.append, BytesIO(pickle)) u.noload() u.noload() self.assertEqual(refs, [['w', (b'abcd', )]])
def referencesf(p, oids=None): """Return a list of object ids found in a pickle A list may be passed in, in which case, information is appended to it. Only ordinary internal references are included. Weak and multi-database references are not included. """ refs = [] u = Unpickler(BytesIO(p)) u.persistent_load = refs.append u.noload() u.noload() # Now we have a list of referencs. Need to convert to list of # oids: if oids is None: oids = [] for reference in refs: if isinstance(reference, tuple): oid = reference[0] elif isinstance(reference, (bytes, str)): oid = reference else: assert isinstance(reference, list) continue if not isinstance(oid, bytes): assert isinstance(oid, str) # this happens on Python 3 when all bytes in the oid are < 0x80 oid = oid.encode('ascii') oids.append(oid) return oids
def tryToResolveConflict(self, oid, committedSerial, oldSerial, newpickle, committedData=b''): # class_tuple, old, committed, newstate = ('',''), 0, 0, 0 try: prfactory = PersistentReferenceFactory() newpickle = self._crs_untransform_record_data(newpickle) file = BytesIO(newpickle) unpickler = Unpickler(file) unpickler.find_global = find_global unpickler.persistent_load = prfactory.persistent_load meta = unpickler.load() if isinstance(meta, tuple): klass = meta[0] newargs = meta[1] or () if isinstance(klass, tuple): klass = find_global(*klass) else: klass = meta newargs = () if klass in _unresolvable: raise ConflictError inst = klass.__new__(klass, *newargs) try: resolve = inst._p_resolveConflict except AttributeError: _unresolvable[klass] = 1 raise ConflictError oldData = self.loadSerial(oid, oldSerial) if not committedData: committedData = self.loadSerial(oid, committedSerial) if newpickle == oldData: # old -> new diff is empty, so merge is trivial return committedData if committedData == oldData: # old -> committed diff is empty, so merge is trivial return newpickle newstate = unpickler.load() old = state(self, oid, oldSerial, prfactory, oldData) committed = state(self, oid, committedSerial, prfactory, committedData) resolved = resolve(old, committed, newstate) file = BytesIO() pickler = Pickler(file, _protocol) if sys.version_info[0] < 3: pickler.inst_persistent_id = persistent_id else: pickler.persistent_id = persistent_id pickler.dump(meta) pickler.dump(resolved) return self._crs_transform_record_data(file.getvalue()) except (ConflictError, BadClassName): pass except: # If anything else went wrong, catch it here and avoid passing an # arbitrary exception back to the client. The error here will mask # the original ConflictError. A client can recover from a # ConflictError, but not necessarily from other errors. But log # the error so that any problems can be fixed. logger.error("Unexpected error", exc_info=True) raise ConflictError(oid=oid, serials=(committedSerial, oldSerial), data=newpickle)
def pdumps(obj): s = BytesIO() p = Pickler(s, _protocol) p.dump(obj) p.dump(None) return s.getvalue()
class ObjectWriter: """Serializes objects for storage in the database. The ObjectWriter creates object pickles in the ZODB format. It also detects new persistent objects reachable from the current object. """ _jar = None def __init__(self, obj=None): self._file = BytesIO() self._p = PersistentPickler(self.persistent_id, self._file, _protocol) self._stack = [] if obj is not None: self._stack.append(obj) jar = obj._p_jar assert myhasattr(jar, "new_oid") self._jar = jar def persistent_id(self, obj): """Return the persistent id for obj. >>> from ZODB.tests.util import P >>> class DummyJar: ... xrefs = True ... def new_oid(self): ... return 42 ... def db(self): ... return self ... databases = {} >>> jar = DummyJar() >>> class O: ... _p_jar = jar >>> writer = ObjectWriter(O) Normally, object references include the oid and a cached named reference to the class. Having the class information available allows fast creation of the ghost, avoiding requiring an additional database lookup. >>> bob = P('bob') >>> oid, cls = writer.persistent_id(bob) >>> oid 42 >>> cls is P True If a persistent object does not already have an oid and jar, these will be assigned by persistent_id(): >>> bob._p_oid 42 >>> bob._p_jar is jar True If the object already has a persistent id, the id is not changed: >>> bob._p_oid = 24 >>> oid, cls = writer.persistent_id(bob) >>> oid 24 >>> cls is P True If the jar doesn't match that of the writer, an error is raised: >>> bob._p_jar = DummyJar() >>> writer.persistent_id(bob) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS Traceback (most recent call last): ... InvalidObjectReference: ('Attempt to store an object from a foreign database connection', <ZODB.serialize.DummyJar ...>, P(bob)) Constructor arguments used by __new__(), as returned by __getnewargs__(), can affect memory allocation, but may also change over the life of the object. This makes it useless to cache even the object's class. >>> class PNewArgs(P): ... def __getnewargs__(self): ... return () >>> sam = PNewArgs('sam') >>> writer.persistent_id(sam) 42 >>> sam._p_oid 42 >>> sam._p_jar is jar True Check that simple objects don't get accused of persistence: >>> writer.persistent_id(42) >>> writer.persistent_id(object()) Check that a classic class doesn't get identified improperly: >>> class ClassicClara: ... pass >>> clara = ClassicClara() >>> writer.persistent_id(clara) """ # Most objects are not persistent. The following cheap test # identifies most of them. For these, we return None, # signalling that the object should be pickled normally. if not isinstance(obj, (Persistent, type, WeakRef)): # Not persistent, pickle normally return None # Any persistent object must have an oid: try: oid = obj._p_oid except AttributeError: # Not persistent, pickle normally return None if not (oid is None or isinstance(oid, bytes)): # Deserves a closer look: # Make sure it's not a descriptor if hasattr(oid, '__get__'): # The oid is a descriptor. That means obj is a non-persistent # class whose instances are persistent, so ... # Not persistent, pickle normally return None if oid is WeakRefMarker: # we have a weakref, see weakref.py oid = obj.oid if oid is None: target = obj() # get the referenced object oid = target._p_oid if oid is None: # Here we are causing the object to be saved in # the database. One could argue that we shouldn't # do this, because a weakref should not cause an object # to be added. We'll be optimistic, though, and # assume that the object will be added eventually. oid = self._jar.new_oid() target._p_jar = self._jar target._p_oid = oid self._stack.append(target) obj.oid = oid obj.dm = target._p_jar obj.database_name = obj.dm.db().database_name if obj.dm is self._jar: return ['w', (oid, )] else: return ['w', (oid, obj.database_name)] # Since we have an oid, we have either a persistent instance # (an instance of Persistent), or a persistent class. # NOTE! Persistent classes don't (and can't) subclass persistent. database_name = None if oid is None: oid = obj._p_oid = self._jar.new_oid() obj._p_jar = self._jar self._stack.append(obj) elif obj._p_jar is not self._jar: if not self._jar.db().xrefs: raise InvalidObjectReference( "Database %r doesn't allow implicit cross-database " "references" % self._jar.db().database_name, self._jar, obj) try: otherdb = obj._p_jar.db() database_name = otherdb.database_name except AttributeError: otherdb = self if self._jar.db().databases.get(database_name) is not otherdb: raise InvalidObjectReference( "Attempt to store an object from a foreign " "database connection", self._jar, obj, ) if self._jar.get_connection(database_name) is not obj._p_jar: raise InvalidObjectReference( "Attempt to store a reference to an object from " "a separate connection to the same database or " "multidatabase", self._jar, obj, ) # OK, we have an object from another database. # Lets make sure the object ws not *just* loaded. if obj._p_jar._implicitlyAdding(oid): raise InvalidObjectReference( "A new object is reachable from multiple databases. " "Won't try to guess which one was correct!", self._jar, obj, ) klass = type(obj) if hasattr(klass, '__getnewargs__'): # We don't want to save newargs in object refs. # It's possible that __getnewargs__ is degenerate and # returns (), but we don't want to have to deghostify # the object to find out. # Note that this has the odd effect that, if the class has # __getnewargs__ of its own, we'll lose the optimization # of caching the class info. if database_name is not None: return ['n', (database_name, oid)] return oid # Note that we never get here for persistent classes. # We'll use direct refs for normal classes. if database_name is not None: return ['m', (database_name, oid, klass)] return oid, klass def serialize(self, obj): # We don't use __class__ here, because obj could be a persistent proxy. # We don't want to be fooled by proxies. klass = type(obj) # We want to serialize persistent classes by name if they have # a non-None non-empty module so as not to have a direct # ref. This is important when copying. We probably want to # revisit this in the future. newargs = getattr(obj, "__getnewargs__", None) if (isinstance(getattr(klass, '_p_oid', 0), _oidtypes) and klass.__module__): # This is a persistent class with a non-empty module. This # uses pickle format #3 or #7. klass = klass.__module__, klass.__name__ if newargs is None: meta = klass, None else: meta = klass, newargs() elif newargs is None: # Pickle format #1. meta = klass else: # Pickle format #2. meta = klass, newargs() return self._dump(meta, obj.__getstate__()) def _dump(self, classmeta, state): # To reuse the existing BytesIO object, we must reset # the file position to 0 and truncate the file after the # new pickle is written. self._file.seek(0) self._p.clear_memo() self._p.dump(classmeta) self._p.dump(state) self._file.truncate() return self._file.getvalue() def __iter__(self): return NewObjectIterator(self._stack)
def loads(str, persfunc=self._cache.get): fp = BytesIO(str) u = Unpickler(fp) u.persistent_load = persfunc return u.load()
def tryToResolveConflict(self, oid, committedSerial, oldSerial, newpickle, committedData=b''): # class_tuple, old, committed, newstate = ('',''), 0, 0, 0 klass = 'n/a' try: prfactory = PersistentReferenceFactory() newpickle = self._crs_untransform_record_data(newpickle) file = BytesIO(newpickle) unpickler = PersistentUnpickler(find_global, prfactory.persistent_load, file) meta = unpickler.load() if isinstance(meta, tuple): klass = meta[0] newargs = meta[1] or () if isinstance(klass, tuple): klass = find_global(*klass) else: klass = meta newargs = () if klass in _unresolvable: raise ConflictError inst = klass.__new__(klass, *newargs) try: resolve = inst._p_resolveConflict except AttributeError: _unresolvable[klass] = 1 raise ConflictError oldData = self.loadSerial(oid, oldSerial) if not committedData: committedData = self.loadSerial(oid, committedSerial) newstate = unpickler.load() old = state(self, oid, oldSerial, prfactory, oldData) committed = state(self, oid, committedSerial, prfactory, committedData) resolved = resolve(old, committed, newstate) file = BytesIO() pickler = PersistentPickler(persistent_id, file, _protocol) pickler.dump(meta) pickler.dump(resolved) return self._crs_transform_record_data(file.getvalue()) except (ConflictError, BadClassName) as e: logger.debug("Conflict resolution on %s failed with %s: %s", klass, e.__class__.__name__, str(e)) except: # If anything else went wrong, catch it here and avoid passing an # arbitrary exception back to the client. The error here will mask # the original ConflictError. A client can recover from a # ConflictError, but not necessarily from other errors. But log # the error so that any problems can be fixed. logger.exception( "Unexpected error while trying to resolve conflict on %s", klass) raise ConflictError(oid=oid, serials=(committedSerial, oldSerial), data=newpickle)
def make_pickle(ob): sio = BytesIO() p = Pickler(sio, _protocol) p.dump(ob) return sio.getvalue()
def __init__(self, storage, pool_size=7, pool_timeout=1<<31, cache_size=400, cache_size_bytes=0, historical_pool_size=3, historical_cache_size=1000, historical_cache_size_bytes=0, historical_timeout=300, database_name='unnamed', databases=None, xrefs=True, large_record_size=1<<24, **storage_args): """Create an object database. :Parameters: - `storage`: the storage used by the database, e.g. FileStorage - `pool_size`: expected maximum number of open connections - `cache_size`: target size of Connection object cache - `cache_size_bytes`: target size measured in total estimated size of objects in the Connection object cache. "0" means unlimited. - `historical_pool_size`: expected maximum number of total historical connections - `historical_cache_size`: target size of Connection object cache for historical (`at` or `before`) connections - `historical_cache_size_bytes` -- similar to `cache_size_bytes` for the historical connection. - `historical_timeout`: minimum number of seconds that an unused historical connection will be kept, or None. - `xrefs` - Boolian flag indicating whether implicit cross-database references are allowed """ if isinstance(storage, six.string_types): from ZODB import FileStorage storage = ZODB.FileStorage.FileStorage(storage, **storage_args) elif storage is None: from ZODB import MappingStorage storage = ZODB.MappingStorage.MappingStorage(**storage_args) # Allocate lock. x = threading.RLock() self._a = x.acquire self._r = x.release # pools and cache sizes self.pool = ConnectionPool(pool_size, pool_timeout) self.historical_pool = KeyedConnectionPool(historical_pool_size, historical_timeout) self._cache_size = cache_size self._cache_size_bytes = cache_size_bytes self._historical_cache_size = historical_cache_size self._historical_cache_size_bytes = historical_cache_size_bytes # Setup storage self.storage = storage self.references = ZODB.serialize.referencesf try: storage.registerDB(self) except TypeError: storage.registerDB(self, None) # Backward compat if (not hasattr(storage, 'tpc_vote')) and not storage.isReadOnly(): warnings.warn( "Storage doesn't have a tpc_vote and this violates " "the storage API. Violently monkeypatching in a do-nothing " "tpc_vote.", DeprecationWarning, 2) storage.tpc_vote = lambda *args: None if IMVCCStorage.providedBy(storage): temp_storage = storage.new_instance() else: temp_storage = storage try: try: temp_storage.load(z64, '') except KeyError: # Create the database's root in the storage if it doesn't exist from persistent.mapping import PersistentMapping root = PersistentMapping() # Manually create a pickle for the root to put in the storage. # The pickle must be in the special ZODB format. file = BytesIO() p = Pickler(file, _protocol) p.dump((root.__class__, None)) p.dump(root.__getstate__()) t = transaction.Transaction() t.description = 'initial database creation' temp_storage.tpc_begin(t) temp_storage.store(z64, None, file.getvalue(), '', t) temp_storage.tpc_vote(t) temp_storage.tpc_finish(t) finally: if IMVCCStorage.providedBy(temp_storage): temp_storage.release() # Multi-database setup. if databases is None: databases = {} self.databases = databases self.database_name = database_name if database_name in databases: raise ValueError("database_name %r already in databases" % database_name) databases[database_name] = self self.xrefs = xrefs self.large_record_size = large_record_size
def zodbdump(stor, tidmin, tidmax, hashonly=False, pretty='raw', out=asbinstream(sys.stdout)): def badpretty(): raise ValueError("invalid pretty format %s" % pretty) for txn in stor.iterator(tidmin, tidmax): # XXX .status not covered by IStorageTransactionInformation # XXX but covered by BaseStorage.TransactionRecord out.write(b"txn %s %s\nuser %s\ndescription %s\n" % (ashex( txn.tid), qq(txn.status), qq(txn.user), qq(txn.description))) # extension is saved by ZODB as either empty or as pickle dump of an object rawext = txn_raw_extension(stor, txn) if pretty == 'raw': out.write(b"extension %s\n" % qq(rawext)) elif pretty == 'zpickledis': if len(rawext) == 0: out.write(b'extension ""\n') else: out.write(b"extension\n") extf = BytesIO(rawext) disf = BytesIO() pickletools.dis(extf, disf) out.write(indent(disf.getvalue(), " ")) extra = extf.read() if len(extra) > 0: out.write(b" + extra data %s\n" % qq(extra)) else: badpretty() objv = txnobjv(txn) for obj in objv: entry = b"obj %s " % ashex(obj.oid) write_data = False if obj.data is None: entry += b"delete" # was undo and data taken from obj.data_txn elif obj.data_txn is not None: entry += b"from %s" % ashex(obj.data_txn) else: # XXX sha1 is hardcoded for now. Dump format allows other hashes. entry += b"%i sha1:%s" % (len(obj.data), ashex(sha1(obj.data))) write_data = True out.write(b(entry)) if write_data: if hashonly: out.write(b" -") else: out.write(b"\n") if pretty == 'raw': out.write(obj.data) elif pretty == 'zpickledis': # https://github.com/zopefoundation/ZODB/blob/5.6.0-55-g1226c9d35/src/ZODB/serialize.py#L24-L29 dataf = BytesIO(obj.data) disf = BytesIO() pickletools.dis(dataf, disf) # class pickletools.dis(dataf, disf) # state out.write(indent(disf.getvalue(), " ")) extra = dataf.read() if len(extra) > 0: out.write(b" + extra data %s\n" % qq(extra)) else: badpretty() out.write(b"\n") out.write(b"\n")
def tryToResolveConflict(self, oid, committedSerial, oldSerial, newpickle, committedData=b''): # class_tuple, old, committed, newstate = ('',''), 0, 0, 0 klass = 'n/a' try: prfactory = PersistentReferenceFactory() newpickle = self._crs_untransform_record_data(newpickle) file = BytesIO(newpickle) unpickler = PersistentUnpickler( find_global, prfactory.persistent_load, file) meta = unpickler.load() if isinstance(meta, tuple): klass = meta[0] newargs = meta[1] or () if isinstance(klass, tuple): klass = find_global(*klass) else: klass = meta newargs = () if klass in _unresolvable: raise ConflictError inst = klass.__new__(klass, *newargs) try: resolve = inst._p_resolveConflict except AttributeError: _unresolvable[klass] = 1 raise ConflictError oldData = self.loadSerial(oid, oldSerial) if not committedData: committedData = self.loadSerial(oid, committedSerial) newstate = unpickler.load() old = state(self, oid, oldSerial, prfactory, oldData) committed = state(self, oid, committedSerial, prfactory, committedData) resolved = resolve(old, committed, newstate) file = BytesIO() pickler = PersistentPickler(persistent_id, file, _protocol) pickler.dump(meta) pickler.dump(resolved) return self._crs_transform_record_data(file.getvalue()) except (ConflictError, BadClassName) as e: logger.debug( "Conflict resolution on %s failed with %s: %s", klass, e.__class__.__name__, str(e)) except: # If anything else went wrong, catch it here and avoid passing an # arbitrary exception back to the client. The error here will mask # the original ConflictError. A client can recover from a # ConflictError, but not necessarily from other errors. But log # the error so that any problems can be fixed. logger.exception( "Unexpected error while trying to resolve conflict on %s", klass) raise ConflictError(oid=oid, serials=(committedSerial, oldSerial), data=newpickle)
def __init__(self, storage, pool_size=7, pool_timeout=1 << 31, cache_size=400, cache_size_bytes=0, historical_pool_size=3, historical_cache_size=1000, historical_cache_size_bytes=0, historical_timeout=300, database_name='unnamed', databases=None, xrefs=True, large_record_size=1 << 24, **storage_args): """Create an object database. :Parameters: - `storage`: the storage used by the database, e.g. FileStorage - `pool_size`: expected maximum number of open connections - `cache_size`: target size of Connection object cache - `cache_size_bytes`: target size measured in total estimated size of objects in the Connection object cache. "0" means unlimited. - `historical_pool_size`: expected maximum number of total historical connections - `historical_cache_size`: target size of Connection object cache for historical (`at` or `before`) connections - `historical_cache_size_bytes` -- similar to `cache_size_bytes` for the historical connection. - `historical_timeout`: minimum number of seconds that an unused historical connection will be kept, or None. - `xrefs` - Boolian flag indicating whether implicit cross-database references are allowed """ if isinstance(storage, six.string_types): from ZODB import FileStorage storage = ZODB.FileStorage.FileStorage(storage, **storage_args) elif storage is None: from ZODB import MappingStorage storage = ZODB.MappingStorage.MappingStorage(**storage_args) # Allocate lock. x = threading.RLock() self._a = x.acquire self._r = x.release # pools and cache sizes self.pool = ConnectionPool(pool_size, pool_timeout) self.historical_pool = KeyedConnectionPool(historical_pool_size, historical_timeout) self._cache_size = cache_size self._cache_size_bytes = cache_size_bytes self._historical_cache_size = historical_cache_size self._historical_cache_size_bytes = historical_cache_size_bytes # Setup storage self.storage = storage self.references = ZODB.serialize.referencesf try: storage.registerDB(self) except TypeError: storage.registerDB(self, None) # Backward compat if (not hasattr(storage, 'tpc_vote')) and not storage.isReadOnly(): warnings.warn( "Storage doesn't have a tpc_vote and this violates " "the storage API. Violently monkeypatching in a do-nothing " "tpc_vote.", DeprecationWarning, 2) storage.tpc_vote = lambda *args: None if IMVCCStorage.providedBy(storage): temp_storage = storage.new_instance() else: temp_storage = storage try: try: temp_storage.load(z64, '') except KeyError: # Create the database's root in the storage if it doesn't exist from persistent.mapping import PersistentMapping root = PersistentMapping() # Manually create a pickle for the root to put in the storage. # The pickle must be in the special ZODB format. file = BytesIO() p = Pickler(file, _protocol) p.dump((root.__class__, None)) p.dump(root.__getstate__()) t = transaction.Transaction() t.description = 'initial database creation' temp_storage.tpc_begin(t) temp_storage.store(z64, None, file.getvalue(), '', t) temp_storage.tpc_vote(t) temp_storage.tpc_finish(t) finally: if IMVCCStorage.providedBy(temp_storage): temp_storage.release() # Multi-database setup. if databases is None: databases = {} self.databases = databases self.database_name = database_name if database_name in databases: raise ValueError("database_name %r already in databases" % database_name) databases[database_name] = self self.xrefs = xrefs self.large_record_size = large_record_size
def _importDuringCommit(self, transaction, f, return_oid_list): """Import data during two-phase commit. Invoked by the transaction manager mid commit. Appends one item, the OID of the first object created, to return_oid_list. """ oids = {} # IMPORTANT: This code should be consistent with the code in # serialize.py. It is currently out of date and doesn't handle # weak references. def persistent_load(ooid): """Remap a persistent id to a new ID and create a ghost for it.""" klass = None if isinstance(ooid, tuple): ooid, klass = ooid if not isinstance(ooid, bytes): assert isinstance(ooid, str) # this happens on Python 3 when all bytes in the oid are < 0x80 ooid = ooid.encode('ascii') if ooid in oids: oid = oids[ooid] else: if klass is None: oid = self._storage.new_oid() else: oid = self._storage.new_oid(), klass oids[ooid] = oid return Ghost(oid) while 1: header = f.read(16) if header == export_end_marker: break if len(header) != 16: raise ExportError("Truncated export file") # Extract header information ooid = header[:8] length = u64(header[8:16]) data = f.read(length) if len(data) != length: raise ExportError("Truncated export file") if oids: oid = oids[ooid] if isinstance(oid, tuple): oid = oid[0] else: oids[ooid] = oid = self._storage.new_oid() return_oid_list.append(oid) # Blob support blob_begin = f.read(len(blob_begin_marker)) if blob_begin == blob_begin_marker: # Copy the blob data to a temporary file # and remember the name blob_len = u64(f.read(8)) blob_filename = mktemp() blob_file = open(blob_filename, "wb") cp(f, blob_file, blob_len) blob_file.close() else: f.seek(-len(blob_begin_marker),1) blob_filename = None pfile = BytesIO(data) unpickler = Unpickler(pfile) unpickler.persistent_load = persistent_load newp = BytesIO() pickler = PersistentPickler(persistent_id, newp, _protocol) pickler.dump(unpickler.load()) pickler.dump(unpickler.load()) data = newp.getvalue() if blob_filename is not None: self._storage.storeBlob(oid, None, data, blob_filename, '', transaction) else: self._storage.store(oid, None, data, '', transaction)
def _importDuringCommit(self, transaction, f, return_oid_list): """Import data during two-phase commit. Invoked by the transaction manager mid commit. Appends one item, the OID of the first object created, to return_oid_list. """ oids = {} # IMPORTANT: This code should be consistent with the code in # serialize.py. It is currently out of date and doesn't handle # weak references. def persistent_load(ooid): """Remap a persistent id to a new ID and create a ghost for it.""" klass = None if isinstance(ooid, tuple): ooid, klass = ooid if not isinstance(ooid, bytes): assert isinstance(ooid, str) # this happens on Python 3 when all bytes in the oid are < 0x80 ooid = ooid.encode('ascii') if ooid in oids: oid = oids[ooid] else: if klass is None: oid = self._storage.new_oid() else: oid = self._storage.new_oid(), klass oids[ooid] = oid return Ghost(oid) while 1: header = f.read(16) if header == export_end_marker: break if len(header) != 16: raise ExportError("Truncated export file") # Extract header information ooid = header[:8] length = u64(header[8:16]) data = f.read(length) if len(data) != length: raise ExportError("Truncated export file") if oids: oid = oids[ooid] if isinstance(oid, tuple): oid = oid[0] else: oids[ooid] = oid = self._storage.new_oid() return_oid_list.append(oid) # Blob support blob_begin = f.read(len(blob_begin_marker)) if blob_begin == blob_begin_marker: # Copy the blob data to a temporary file # and remember the name blob_len = u64(f.read(8)) blob_filename = mktemp() blob_file = open(blob_filename, "wb") cp(f, blob_file, blob_len) blob_file.close() else: f.seek(-len(blob_begin_marker), 1) blob_filename = None pfile = BytesIO(data) unpickler = Unpickler(pfile) unpickler.persistent_load = persistent_load newp = BytesIO() pickler = PersistentPickler(persistent_id, newp, _protocol) pickler.dump(unpickler.load()) pickler.dump(unpickler.load()) data = newp.getvalue() if blob_filename is not None: self._storage.storeBlob(oid, None, data, blob_filename, '', transaction) else: self._storage.store(oid, None, data, '', transaction)
class ObjectWriter: """Serializes objects for storage in the database. The ObjectWriter creates object pickles in the ZODB format. It also detects new persistent objects reachable from the current object. """ _jar = None def __init__(self, obj=None): self._file = BytesIO() self._p = Pickler(self._file, _protocol) if sys.version_info[0] < 3: self._p.inst_persistent_id = self.persistent_id # PyPy uses a python implementation of cPickle in both Python 2 # and Python 3. We can't really detect inst_persistent_id as its # a magic attribute that's not readable, but it doesn't hurt to # simply always assign to persistent_id also self._p.persistent_id = self.persistent_id else: self._p.persistent_id = self.persistent_id self._stack = [] if obj is not None: self._stack.append(obj) jar = obj._p_jar assert myhasattr(jar, "new_oid") self._jar = jar def persistent_id(self, obj): """Return the persistent id for obj. >>> from ZODB.tests.util import P >>> class DummyJar: ... xrefs = True ... def new_oid(self): ... return 42 ... def db(self): ... return self ... databases = {} >>> jar = DummyJar() >>> class O: ... _p_jar = jar >>> writer = ObjectWriter(O) Normally, object references include the oid and a cached named reference to the class. Having the class information available allows fast creation of the ghost, avoiding requiring an additional database lookup. >>> bob = P('bob') >>> oid, cls = writer.persistent_id(bob) >>> oid 42 >>> cls is P True If a persistent object does not already have an oid and jar, these will be assigned by persistent_id(): >>> bob._p_oid 42 >>> bob._p_jar is jar True If the object already has a persistent id, the id is not changed: >>> bob._p_oid = 24 >>> oid, cls = writer.persistent_id(bob) >>> oid 24 >>> cls is P True If the jar doesn't match that of the writer, an error is raised: >>> bob._p_jar = DummyJar() >>> writer.persistent_id(bob) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS Traceback (most recent call last): ... InvalidObjectReference: ('Attempt to store an object from a foreign database connection', <ZODB.serialize.DummyJar ...>, P(bob)) Constructor arguments used by __new__(), as returned by __getnewargs__(), can affect memory allocation, but may also change over the life of the object. This makes it useless to cache even the object's class. >>> class PNewArgs(P): ... def __getnewargs__(self): ... return () >>> sam = PNewArgs('sam') >>> writer.persistent_id(sam) 42 >>> sam._p_oid 42 >>> sam._p_jar is jar True Check that simple objects don't get accused of persistence: >>> writer.persistent_id(42) >>> writer.persistent_id(object()) Check that a classic class doesn't get identified improperly: >>> class ClassicClara: ... pass >>> clara = ClassicClara() >>> writer.persistent_id(clara) """ # Most objects are not persistent. The following cheap test # identifies most of them. For these, we return None, # signalling that the object should be pickled normally. if not isinstance(obj, (Persistent, type, WeakRef)): # Not persistent, pickle normally return None # Any persistent object must have an oid: try: oid = obj._p_oid except AttributeError: # Not persistent, pickle normally return None if not (oid is None or isinstance(oid, bytes)): # Deserves a closer look: # Make sure it's not a descriptor if hasattr(oid, "__get__"): # The oid is a descriptor. That means obj is a non-persistent # class whose instances are persistent, so ... # Not persistent, pickle normally return None if oid is WeakRefMarker: # we have a weakref, see weakref.py oid = obj.oid if oid is None: target = obj() # get the referenced object oid = target._p_oid if oid is None: # Here we are causing the object to be saved in # the database. One could argue that we shouldn't # do this, because a weakref should not cause an object # to be added. We'll be optimistic, though, and # assume that the object will be added eventually. oid = self._jar.new_oid() target._p_jar = self._jar target._p_oid = oid self._stack.append(target) obj.oid = oid obj.dm = target._p_jar obj.database_name = obj.dm.db().database_name if obj.dm is self._jar: return ["w", (oid,)] else: return ["w", (oid, obj.database_name)] # Since we have an oid, we have either a persistent instance # (an instance of Persistent), or a persistent class. # NOTE! Persistent classes don't (and can't) subclass persistent. database_name = None if oid is None: oid = obj._p_oid = self._jar.new_oid() obj._p_jar = self._jar self._stack.append(obj) elif obj._p_jar is not self._jar: if not self._jar.db().xrefs: raise InvalidObjectReference( "Database %r doesn't allow implicit cross-database " "references" % self._jar.db().database_name, self._jar, obj, ) try: otherdb = obj._p_jar.db() database_name = otherdb.database_name except AttributeError: otherdb = self if self._jar.db().databases.get(database_name) is not otherdb: raise InvalidObjectReference( "Attempt to store an object from a foreign " "database connection", self._jar, obj ) if self._jar.get_connection(database_name) is not obj._p_jar: raise InvalidObjectReference( "Attempt to store a reference to an object from " "a separate connection to the same database or " "multidatabase", self._jar, obj, ) # OK, we have an object from another database. # Lets make sure the object ws not *just* loaded. if obj._p_jar._implicitlyAdding(oid): raise InvalidObjectReference( "A new object is reachable from multiple databases. " "Won't try to guess which one was correct!", self._jar, obj, ) klass = type(obj) if hasattr(klass, "__getnewargs__"): # We don't want to save newargs in object refs. # It's possible that __getnewargs__ is degenerate and # returns (), but we don't want to have to deghostify # the object to find out. # Note that this has the odd effect that, if the class has # __getnewargs__ of its own, we'll lose the optimization # of caching the class info. if database_name is not None: return ["n", (database_name, oid)] return oid # Note that we never get here for persistent classes. # We'll use direct refs for normal classes. if database_name is not None: return ["m", (database_name, oid, klass)] return oid, klass def serialize(self, obj): # We don't use __class__ here, because obj could be a persistent proxy. # We don't want to be fooled by proxies. klass = type(obj) # We want to serialize persistent classes by name if they have # a non-None non-empty module so as not to have a direct # ref. This is important when copying. We probably want to # revisit this in the future. newargs = getattr(obj, "__getnewargs__", None) if isinstance(getattr(klass, "_p_oid", 0), _oidtypes) and klass.__module__: # This is a persistent class with a non-empty module. This # uses pickle format #3 or #7. klass = klass.__module__, klass.__name__ if newargs is None: meta = klass, None else: meta = klass, newargs() elif newargs is None: # Pickle format #1. meta = klass else: # Pickle format #2. meta = klass, newargs() return self._dump(meta, obj.__getstate__()) def _dump(self, classmeta, state): # To reuse the existing BytesIO object, we must reset # the file position to 0 and truncate the file after the # new pickle is written. self._file.seek(0) self._p.clear_memo() self._p.dump(classmeta) self._p.dump(state) self._file.truncate() return self._file.getvalue() def __iter__(self): return NewObjectIterator(self._stack)