def packer(): yield "started %s" % datetime.now() seen = IntSet() items = self.gen_oid_record(start_oid=int8_to_str(0), seen=seen) for step in Shelf.generate_shelf(file, items): yield step file.flush() file.fsync() shelf = Shelf(file) yield "base written %s" % datetime.now() # Invalidate oids that have been removed. for hole in shelf.get_offset_map().gen_holes(): yield hole oid = int8_to_str(hole) if self.shelf.get_position(oid) is not None: assert shelf.get_position(oid) is None self.invalid.add(oid) yield "invalidations identified %s" % datetime.now() for oid in self.pack_extra: seen.discard(str_to_int8(oid)) for oid in self.pack_extra: shelf.store(self.gen_oid_record(start_oid=oid, seen=seen)) file.flush() file.fsync() if not self.shelf.get_file().is_temporary(): self.shelf.get_file().rename(file_path + '.prepack') self.shelf.get_file().close() shelf.get_file().rename(file_path) self.shelf = shelf self.pack_extra = None yield "finished %s" % datetime.now()
def a(self): int_set = IntSet(size=1000) for x in range(1000): assert x not in int_set int_set.add(x) assert x in int_set if x > 0: assert x - 1 in int_set for x in range(1000): int_set.discard(x) assert x not in int_set if x > 0: assert x - 1 not in int_set
def gen_oid_record(self, start_oid=None, **other): if start_oid is None: for item in iteritems(self.shelf): yield item else: todo = [start_oid] seen = IntSet() # This eventually contains them all. while todo: oid = todo.pop() if str_to_int8(oid) in seen: continue seen.add(str_to_int8(oid)) record = self.load(oid) record_oid, data, refdata = unpack_record(record) assert oid == record_oid todo.extend(split_oids(refdata)) yield oid, record
def gen_oid_record(self, start_oid=None, seen=None, **other): if start_oid is None: for item in iteritems(self.shelf): yield item else: todo = [start_oid] if seen is None: seen = IntSet() # This eventually contains them all. while todo: oid = heapq.heappop(todo) if str_to_int8(oid) in seen: continue seen.add(str_to_int8(oid)) record = self.load(oid) record_oid, data, refdata = unpack_record(record) assert oid == record_oid for ref_oid in split_oids(refdata): heapq.heappush(todo, ref_oid) yield oid, record