def getB64TStampToInt( ts, TimeStamp=TimeStamp.TimeStamp, a2b=binascii.a2b_base64 ): stamp = TimeStamp(a2b(ts.replace('.', '/').replace('-', '+') + '=')) return stamp.timeTime()
def getB64TStamp( b2a=binascii.b2a_base64, gmtime=time.gmtime, time=time.time, TimeStamp=TimeStamp.TimeStamp, ): t = time() stamp = TimeStamp(*gmtime(t)[:5] + (t % 60,)) ts = b2a(stamp.raw()).split(b'=')[:-1][0] return ts.replace(b'/', b'.').replace(b'+', b'-').decode('ascii')
def getB64TStamp( b2a=binascii.b2a_base64,gmtime=time.gmtime, time=time.time, b64_trans=b64_trans, split=string.split, TimeStamp=TimeStamp.TimeStamp, translate=string.translate ): t=time() ts=split(b2a(`TimeStamp(*gmtime(t)[:5]+(t%60,))`)[:-1],'=')[0] return translate(ts, b64_trans)
def getB64TStampToInt(ts, TimeStamp=TimeStamp.TimeStamp, b64_untrans=b64_untrans, a2b=binascii.a2b_base64, translate=string.translate): return TimeStamp(a2b(translate(ts + '=', b64_untrans))).timeTime()
def gc_(close, conf, days, ignore, conf2, fs, untransform, ptid): FileIterator = ZODB.FileStorage.FileIterator if untransform is not None: def FileIterator(*args): def transit(trans): for record in trans: if record.data: record.data = untransform(record.data) yield record zfsit = ZODB.FileStorage.FileIterator(*args) try: for t in zfsit: yield transit(t) finally: zfsit.close() def iter_storage(name, storage, start=None, stop=None): fsname = name or '' if fsname in fs: it = FileIterator(fs[fsname], start, stop) else: it = storage.iterator(start, stop) # We need to be sure to always close iterators # in case we raise an exception close.append(it) return it with open(conf) as f: db1 = ZODB.config.databaseFromFile(f) close.append(db1) if conf2 is None: db2 = db1 else: logger.info("Using secondary configuration, %s, for analysis", conf2) with open(conf2) as f: db2 = ZODB.config.databaseFromFile(f) close.append(db2) if set(db1.databases) != set(db2.databases): raise ValueError("primary and secondary databases don't match.") databases = db2.databases storages = sorted((name, d.storage) for (name, d) in databases.items()) if ptid is None: ptid = TimeStamp.TimeStamp( *time.gmtime(time.time() - 86400 * days)[:6] ).raw() good = oidset(databases) bad = Bad(databases) close.append(bad) deleted = oidset(databases) for name, storage in storages: fsname = name or '' logger.info("%s: roots", fsname) # Make sure we can get the roots data, s = storage.load(z64, '') good.insert(name, z64) for ref in getrefs(data, name, ignore): good.insert(*ref) n = 0 if days: # All non-deleted new records are good logger.info("%s: recent", name) for trans in iter_storage(name, storage, start=ptid): for record in trans: if n and n % 10000 == 0: logger.info("%s: %s recent", name, n) n += 1 oid = record.oid data = record.data if data: if deleted.has(name, oid): raise AssertionError( "Non-deleted record after deleted") good.insert(name, oid) # and anything they reference for ref_name, ref_oid in getrefs(data, name, ignore): if not deleted.has(ref_name, ref_oid): good.insert(ref_name, ref_oid) bad.remove(ref_name, ref_oid) else: # deleted record deleted.insert(name, oid) good.remove(name, oid) for name, storage in storages: # Now iterate over older records for trans in iter_storage(name, storage, start=None, stop=ptid): for record in trans: if n and n % 10000 == 0: logger.info("%s: %s old", name, n) n += 1 oid = record.oid data = record.data if data: if deleted.has(name, oid): continue if good.has(name, oid): for ref in getrefs(data, name, ignore): if deleted.has(*ref): continue if good.insert(*ref) and bad.has(*ref): to_do = [ref] while to_do: for ref in bad.pop(*to_do.pop()): if good.insert(*ref) and bad.has(*ref): to_do.append(ref) else: bad.insert(name, oid, record.tid, getrefs(data, name, ignore)) else: # deleted record if good.has(name, oid): good.remove(name, oid) elif bad.has(name, oid): bad.remove(name, oid) deleted.insert(name, oid) if conf2 is not None: for db in db2.databases.values(): db.close() close.remove(db2) # Now, we have the garbage in bad. Remove it. batch_size = 100 for name, db in sorted(db1.databases.items()): logger.info("%s: remove garbage", name) storage = db.storage nd = 0 t = transaction.begin() txn_meta = TransactionMetaData() storage.tpc_begin(txn_meta) start = time.time() for oid, tid in bad.iterator(name): try: storage.deleteObject(oid, tid, txn_meta) except (ZODB.POSException.POSKeyError, ZODB.POSException.ConflictError): continue nd += 1 if (nd % batch_size) == 0: storage.tpc_vote(txn_meta) storage.tpc_finish(txn_meta) t.commit() logger.info("%s: deleted %s", name, nd) duration = time.time() - start time.sleep(duration * 2) batch_size = max(10, int(batch_size * .5 / duration)) t = transaction.begin() txn_meta = TransactionMetaData() storage.tpc_begin(txn_meta) start = time.time() logger.info("Removed %s objects from %s", nd, name) if nd: storage.tpc_vote(txn_meta) storage.tpc_finish(txn_meta) t.commit() else: storage.tpc_abort(txn_meta) t.abort() return bad