def generate_shelf(klass, file, items): """(File, [(str, str)]) This returns a generator that writes a new Shelf into file, iterating once through the given items. The use of an iterator makes it possible to build a new Shelf incrementally. """ file.seek_end() if not file.tell() == 0: raise ValueError("Expected %s to be empty." % file) write(file, klass.prefix) if not items: # Just write an empty transaction. write_int8(file, 0) # Write an empty index array. offset_map = OffsetMap(file) else: # Write a transaction here with the given items. transaction_start = file.tell() # Write a placeholder for the length. write_int8(file, 0) # Loop over the items, writing their records. # Keep track of max_key and max_offset. max_key = 0 max_offset = 0 n = 0 for name, value in items: max_key = max(max_key, str_to_int8(name)) max_offset = max(max_offset, file.tell()) write_int8(file, len(name) + len(value)) write(file, name) write(file, value) n += 1 yield n transaction_end = file.tell() # Write the correct transaction length. file.seek(transaction_start) write_int8(file, transaction_end - transaction_start - 8) # Write the empty array with the calculated dimensions. file.seek(transaction_end) for step in OffsetMap.generate(file, max_key, max_offset): yield step offset_map = OffsetMap(file) # Now read through the records and record the offsets in the array. file.seek(transaction_start + 8) while file.tell() < transaction_end: position = file.tell() record_length = read_int8(file) name = read(file, 8) k = str_to_int8(name) offset_map[k] = position file.seek(position + 8 + record_length) n -= 1 yield n for index in offset_map.gen_stitch(): yield index
def gen_items(records): for record in records: oid, data, refdata = unpack_record(record) yield str_to_int8(oid), as_bytes(data), as_bytes(refdata) if self.pack_extra is not None: # ensure object and refs are marked alive and not removed self.pack_extra.append(oid)
def __init__(self, file=None, readonly=False, repair=False): """(file:str=None, readonly:bool=False, repair:bool=False) If file is empty (or None), a temporary file will be used. """ self.oid = -1 if file is None: file = File() assert not readonly assert not repair elif not hasattr(file, 'seek'): file = File(file, readonly=readonly) if not readonly: file.obtain_lock() self.fp = file self.pending_records = {} self.pack_extra = None self.fp.seek(0, 2) if self.fp.tell() != 0: assert self.has_format(self.fp) else: # Write header for new file. self.fp.seek(len(self.MAGIC)) self._write_header(self.fp) self._write_index(self.fp, {}) self.index = {} self._build_index(repair) max_oid = -1 for oid in self.index: max_oid = max(max_oid, str_to_int8(oid)) self.oid = max_oid self.invalid = set()
def packer(): yield "started %s" % datetime.now() seen = IntSet() items = self.gen_oid_record(start_oid=int8_to_str(0), seen=seen) for step in Shelf.generate_shelf(file, items): yield step file.flush() file.fsync() shelf = Shelf(file) yield "base written %s" % datetime.now() # Invalidate oids that have been removed. for hole in shelf.get_offset_map().gen_holes(): yield hole oid = int8_to_str(hole) if self.shelf.get_position(oid) is not None: assert shelf.get_position(oid) is None self.invalid.add(oid) yield "invalidations identified %s" % datetime.now() for oid in self.pack_extra: seen.discard(str_to_int8(oid)) for oid in self.pack_extra: shelf.store(self.gen_oid_record(start_oid=oid, seen=seen)) file.flush() file.fsync() if not self.shelf.get_file().is_temporary(): self.shelf.get_file().rename(file_path + '.prepack') self.shelf.get_file().close() shelf.get_file().rename(file_path) self.shelf = shelf self.pack_extra = None yield "finished %s" % datetime.now()
def _get_refs(self, oid): c = self._conn.cursor() c.execute('SELECT refs FROM objects WHERE id = ?', (str_to_int8(oid),)) v = c.fetchone() if v is None: raise KeyError(oid) return split_oids(v[0])
def gen_oid_record(self, start_oid=None, **other): if start_oid is None: for item in iteritems(self.shelf): yield item else: todo = [start_oid] seen = IntSet() # This eventually contains them all. while todo: oid = todo.pop() if str_to_int8(oid) in seen: continue seen.add(str_to_int8(oid)) record = self.load(oid) record_oid, data, refdata = unpack_record(record) assert oid == record_oid todo.extend(split_oids(refdata)) yield oid, record
def b(self): n = 1000 s = BytesIO() word_array = WordArray(file=s, bytes_per_word=8, number_of_words=n) for x in xrange(n): word_array[x] = int8_to_str(x) assert word_array[-1] == int8_to_str(n - 1) for x in xrange(n): assert x == str_to_int8(word_array[x]) word_array[x] = int8_to_str(2*x) assert x == str_to_int8(word_array[x]) / 2 assert len(word_array) == n assert raises(IndexError, word_array.__getitem__, n + 1) s.seek(0) word_array2 = WordArray(file=s) word_array2[-1] = as_bytes('mmmmmmmm') assert word_array2[-1] == as_bytes('mmmmmmmm')
def __str__(self): if self.oids is None: return "conflicting oids not available" else: if len(self.oids) > 1: s = "oids=[%s ...]" else: s = "oids=[%s]" first_oid = self.oids[0] return s % (first_oid and str_to_int8(first_oid))
def gen_oid_record(self, start_oid=None, seen=None, **other): if start_oid is None: for item in iteritems(self.shelf): yield item else: todo = [start_oid] if seen is None: seen = IntSet() # This eventually contains them all. while todo: oid = heapq.heappop(todo) if str_to_int8(oid) in seen: continue seen.add(str_to_int8(oid)) record = self.load(oid) record_oid, data, refdata = unpack_record(record) assert oid == record_oid for ref_oid in split_oids(refdata): heapq.heappush(todo, ref_oid) yield oid, record
def handle_commit(self, client, db_name): # C log(20, 'Commit %s' % db_name) storage = self.storages[db_name] self._sync_storage(db_name, storage) invalid = client.invalid[db_name] yield client.write(int4_to_str(len(invalid))) yield client.write(join_bytes(invalid)) yield client.flush() invalid.clear() tdata_len = str_to_int4((yield client.read(4))) if tdata_len == 0: # Client decided not to commit (e.g. conflict) return tdata = yield client.read(tdata_len) logging_debug = is_logging(10) logging_debug and log(10, 'Committing %s bytes', tdata_len) storage.begin() i = 0 oids = [] while i < tdata_len: rlen = str_to_int4(tdata[i:i+4]) i += 4 oid = tdata[i:i+8] record = tdata[i+8:i+rlen] i += rlen if logging_debug: class_name = extract_class_name(record) log(10, ' oid=%-6s rlen=%-6s %s', str_to_int8(oid), rlen, class_name) storage.store(oid, record) oids.append(oid) assert i == tdata_len oid_set = set(oids) for c in self.clients: if c is not client: if oid_set.intersection(c.unused_oids[db_name]): raise ClientError('invalid oid: %r' % oid) try: handle_invalidations = ( lambda oids: self._handle_invalidations(db_name, oids)) storage.end(handle_invalidations=handle_invalidations) except ConflictError: log(20, 'Conflict during commit') yield client.write(STATUS_INVALID) else: self._report_load_record(storage) log(20, 'Committed %3s objects %s bytes at %s', len(oids), tdata_len, datetime.now()) yield client.write(STATUS_OKAY) client.unused_oids[db_name] -= oid_set for c in self.clients: if c is not client: c.invalid[db_name].update(oids) storage.d_bytes_since_pack += tdata_len + 8
def load(self, oid): """(str) -> str Return object record identified by 'oid'. """ c = self._conn.cursor() c.execute('SELECT id, data, refs FROM objects WHERE id = ?', (str_to_int8(oid),)) v = c.fetchone() if v is None: raise KeyError(oid) return pack_record(int8_to_str(v[0]), v[1], v[2])
def _send_load_response(self, s, oid): if oid in self._find_client(s).invalid: write(s, STATUS_INVALID) else: try: record = self.storage.load(oid) except KeyError: log(10, 'KeyError %s', str_to_int8(oid)) write(s, STATUS_KEYERROR) except ReadConflictError: log(10, 'ReadConflictError %s', str_to_int8(oid)) write(s, STATUS_INVALID) else: if is_logging(5): class_name = extract_class_name(record) if class_name in self.load_record: self.load_record[class_name] += 1 else: self.load_record[class_name] = 1 log(4, 'Load %-7s %s', str_to_int8(oid), class_name) write(s, STATUS_OKAY) write_int4_str(s, record)
def _send_load_response(self, client, db_name, storage, oid): if oid in client.invalid[db_name]: yield client.write(STATUS_INVALID) else: try: record = storage.load(oid) except KeyError: log(10, 'KeyError %s', str_to_int8(oid)) yield client.write(STATUS_KEYERROR) except ReadConflictError: log(10, 'ReadConflictError %s', str_to_int8(oid)) yield client.write(STATUS_INVALID) else: if is_logging(5): class_name = extract_class_name(record) if class_name in storage.d_load_record: storage.d_load_record[class_name] += 1 else: storage.d_load_record[class_name] = 1 log(4, 'Load %-7s %s', str_to_int8(oid), class_name) yield client.write(STATUS_OKAY) yield client.write(int4_to_str(len(record))) yield client.write(record)
def handle_C(self, s): # commit self._sync_storage() client = self._find_client(s) write_all(s, int4_to_str(len(client.invalid)), join_bytes(client.invalid)) client.invalid.clear() tdata = read_int4_str(s) if len(tdata) == 0: return # client decided not to commit (e.g. conflict) logging_debug = is_logging(10) logging_debug and log(10, 'Committing %s bytes', len(tdata)) self.storage.begin() i = 0 oids = [] while i < len(tdata): rlen = str_to_int4(tdata[i:i + 4]) i += 4 oid = tdata[i:i + 8] record = tdata[i + 8:i + rlen] i += rlen if logging_debug: class_name = extract_class_name(record) log(10, ' oid=%-6s rlen=%-6s %s', str_to_int8(oid), rlen, class_name) self.storage.store(oid, record) oids.append(oid) assert i == len(tdata) oid_set = set(oids) for other_client in self.clients: if other_client is not client: if oid_set.intersection(other_client.unused_oids): raise ClientError("invalid oid: %r" % oid) try: self.storage.end(handle_invalidations=self._handle_invalidations) except ConflictError: log(20, 'Conflict during commit') write(s, STATUS_INVALID) else: self._report_load_record() log(20, 'Committed %3s objects %s bytes at %s', len(oids), len(tdata), datetime.now()) write(s, STATUS_OKAY) client.unused_oids -= oid_set for c in self.clients: if c is not client: c.invalid.update(oids) self.bytes_since_pack += len(tdata) + 8
def handle_C(self, s): # commit self._sync_storage() client = self._find_client(s) write_all(s, int4_to_str(len(client.invalid)), join_bytes(client.invalid)) client.invalid.clear() tdata = read_int4_str(s) if len(tdata) == 0: return # client decided not to commit (e.g. conflict) logging_debug = is_logging(10) logging_debug and log(10, 'Committing %s bytes', len(tdata)) self.storage.begin() i = 0 oids = [] while i < len(tdata): rlen = str_to_int4(tdata[i:i+4]) i += 4 oid = tdata[i:i+8] record = tdata[i+8:i+rlen] i += rlen if logging_debug: class_name = extract_class_name(record) log(10, ' oid=%-6s rlen=%-6s %s', str_to_int8(oid), rlen, class_name) self.storage.store(oid, record) oids.append(oid) assert i == len(tdata) oid_set = set(oids) for other_client in self.clients: if other_client is not client: if oid_set.intersection(other_client.unused_oids): raise ClientError("invalid oid: %r" % oid) try: self.storage.end(handle_invalidations=self._handle_invalidations) except ConflictError: log(20, 'Conflict during commit') write(s, STATUS_INVALID) else: self._report_load_record() log(20, 'Committed %3s objects %s bytes at %s', len(oids), len(tdata), datetime.now()) write(s, STATUS_OKAY) client.unused_oids -= oid_set for c in self.clients: if c is not client: c.invalid.update(oids) self.bytes_since_pack += len(tdata) + 8
def get_position(self, name): """(str) -> int Return the position of the most recent value with this name. """ if len(name) != 8: raise ValueError("Expected a string with 8 bytes.") p = self.memory_index.get(name, None) if p is not None: return p current = self.file.tell() result = self.offset_map.get(str_to_int8(name), None) self.file.seek(current) if result is None or result >= self.offset_map.get_start(): return None else: return result
def __str__(self): first_oid = self.oids[0] return str(first_oid and str_to_int8(first_oid))
def _p_format_oid(self): oid = self._p_oid return str(oid and str_to_int8(as_bytes(oid)))
def check_int8_to_str_str_to_int8(self): for x in range(3): assert len(int8_to_str(x)) == 8 assert str_to_int8(int8_to_str(x)) == x
def gen_ids(): for oid in oids: yield (str_to_int8(oid),)