def _copy_list_ptr(src, p, src_pos, dst, dst_pos): src_pos = ptr.deref(p, src_pos) count = ptr.list_item_count(p) body_length = count * 8 dst_pos = dst.alloc_list(dst_pos, ptr.LIST_SIZE_PTR, count, body_length) check_bounds(src, body_length, src_pos) _copy_many_ptrs(count, src, src_pos, dst, dst_pos)
def _copy_list_composite(src, p, src_pos, dst, dst_pos): src_pos = ptr.deref(p, src_pos) total_words = ptr.list_item_count(p) # n of words NOT including the tag body_length = (total_words + 1) * 8 # total length INCLUDING the tag # # check that there is enough data for both the tag AND the whole body; # this way we do the bound checking only once check_bounds(src, body_length, src_pos) tag = read_int64_fast(src, src_pos) count = ptr.offset(tag) data_size = ptr.struct_data_size(tag) ptrs_size = ptr.struct_ptrs_size(tag) # # allocate the list and copy the whole body at once dst_pos = dst.alloc_list(dst_pos, ptr.LIST_SIZE_COMPOSITE, total_words, body_length) dst.write_slice(dst_pos, src, src_pos, body_length) # # iterate over the elements, fix the pointers and copy the content i = 0 item_length = (data_size + ptrs_size) * 8 ptrs_section_offset = 0 for i in range(count): ptrs_section_offset = 8 + item_length * i + data_size * 8 _copy_many_ptrs(ptrs_size, src, src_pos + ptrs_section_offset, dst, dst_pos + ptrs_section_offset)
def visit(self, buf, p, offset): kind = ptr.kind(p) offset = ptr.deref(p, offset) if kind == ptr.STRUCT: data_size = ptr.struct_data_size(p) ptrs_size = ptr.struct_ptrs_size(p) return self.visit_struct(buf, p, offset, data_size, ptrs_size) elif kind == ptr.LIST: item_size = ptr.list_size_tag(p) count = ptr.list_item_count(p) if item_size == ptr.LIST_SIZE_COMPOSITE: tag = buf.read_ptr(offset) count = ptr.offset(tag) data_size = ptr.struct_data_size(tag) ptrs_size = ptr.struct_ptrs_size(tag) return self.visit_list_composite(buf, p, offset, count, data_size, ptrs_size) elif item_size == ptr.LIST_SIZE_PTR: return self.visit_list_ptr(buf, p, offset, count) elif item_size == ptr.LIST_SIZE_BIT: return self.visit_list_bit(buf, p, offset, count) else: return self.visit_list_primitive(buf, p, offset, item_size, count) elif kind == ptr.FAR: raise NotImplementedError('Far pointer not supported') else: assert False, 'unknown ptr kind'
def hash_str(self, p, offset, default_, additional_size): if p == 0: return default_ assert ptr.kind(p) == ptr.LIST assert ptr.list_size_tag(p) == ptr.LIST_SIZE_8 start = ptr.deref(p, offset) size = ptr.list_item_count(p) + additional_size return _hash.strhash(self.buf, start, size)
def start_of_ptrs(self, buf, offset, ptrs_size): i = 0 while i < ptrs_size: p2_offset = offset + i * 8 p2 = buf.read_ptr(p2_offset) if p2: return ptr.deref(p2, p2_offset) i += 1 return -1
def _copy_struct(src, p, src_pos, dst, dst_pos): src_pos = ptr.deref(p, src_pos) data_size = ptr.struct_data_size(p) ptrs_size = ptr.struct_ptrs_size(p) ds = data_size * 8 dst_pos = dst.alloc_struct(dst_pos, data_size, ptrs_size) check_bounds(src, ds, src_pos) dst.write_slice(dst_pos, src, src_pos, ds) # copy data section _copy_many_ptrs(ptrs_size, src, src_pos + ds, dst, dst_pos + ds)
def test__as_pointer(): buf = b('garbage0' '\x01\x00\x00\x00\x00\x00\x00\x00' # 1 '\x02\x00\x00\x00\x00\x00\x00\x00') # 2 b1 = Struct.from_buffer(buf, 8, data_size=2, ptrs_size=0) p = b1._as_pointer(24) # arbitrary offset assert ptr.kind(p) == ptr.STRUCT assert ptr.deref(p, 24) == 8 assert ptr.struct_data_size(p) == 2 assert ptr.struct_ptrs_size(p) == 0
def read_item(self, lst, i): offset = lst._offset + (i * 8) p = lst._seg.read_ptr(offset) if ptr.kind(p) == ptr.FAR: raise NotImplementedError('FAR pointers not supported here') obj = List.__new__(List) obj._init_from_buffer(lst._seg, ptr.deref(p, offset), ptr.list_size_tag(p), ptr.list_item_count(p), self.inner_item_type) return obj
def read_item(self, lst, i): offset = lst._offset + (i * 8) p = lst._seg.read_ptr(offset) if ptr.kind(p) == ptr.FAR: offset, p = lst._seg.read_far_ptr(offset) obj = List.__new__(List) obj._init_from_buffer(lst._seg, ptr.deref(p, offset), ptr.list_size_tag(p), ptr.list_item_count(p), self.inner_item_type) return obj
def _get_extra_start(self): if self._ptrs_size == 0: return self._get_body_end() for i in range(self._ptrs_size): p = self._read_raw_ptr(i*8) assert ptr.kind(p) != ptr.FAR if p != 0: return self._ptrs_offset + ptr.deref(p, i*8) # # if we are here, it means that all ptrs are null return self._get_body_end()
def _copy_list_primitive(src, p, src_pos, dst, dst_pos): src_pos = ptr.deref(p, src_pos) count = ptr.list_item_count(p) size_tag = ptr.list_size_tag(p) body_length = 0 if size_tag == ptr.LIST_SIZE_BIT: body_length = (count + 8 - 1) / 8 # divide by 8 and round up else: body_length = count * ptr.list_item_length(size_tag) # dst_pos = dst.alloc_list(dst_pos, size_tag, count, body_length) check_bounds(src, body_length, src_pos) dst.write_slice(dst_pos, src, src_pos, body_length)
def _copy_struct(src, p, src_pos, dst, dst_pos): src_pos = ptr.deref(p, src_pos) data_size = ptr.struct_data_size(p) ptrs_size = ptr.struct_ptrs_size(p) if data_size + ptrs_size == 0: # "empty" struct, no need to allocate dst.write_int64(dst_pos, ptr.new_struct(-1, 0, 0)) return ds = data_size * 8 dst_pos = dst.alloc_struct(dst_pos, data_size, ptrs_size) check_bounds(src, ds, src_pos) dst.write_slice(dst_pos, src, src_pos, ds) # copy data section _copy_many_ptrs(ptrs_size, src, src_pos + ds, dst, dst_pos + ds)
def _get_extra_start(self): if self._ptrs_size == 0: return self._get_body_end() i = 0 while i < self._ptrs_size: p = self._read_fast_ptr(i * 8) assert ptr.kind(p) != ptr.FAR if p != 0: return self._ptrs_offset + ptr.deref(p, i * 8) i += 1 # # if we are here, it means that all ptrs are null return self._get_body_end()
def visit_ptrs(self, buf, offset, ptrs_size, current_end): i = 0 while i < ptrs_size: p_offset = offset + i*8 i += 1 p = buf.read_ptr(p_offset) if not p: continue new_start = ptr.deref(p, p_offset) if new_start != current_end: raise NotCompact current_end = self.visit(buf, p, p_offset) # return current_end
def _endof_ptrs(seg, offset, ptrs_size, current_end): i = 0 while i < ptrs_size: p_offset = offset + i * 8 i += 1 p = seg.read_ptr(p_offset) if not p: continue new_start = ptr.deref(p, p_offset) if new_start != current_end: return -1 current_end = endof(seg, p, p_offset) # return current_end
def read_str(self, p, offset, default_, additional_size): """ Read Text or Data from the pointer ``p``, which was read from the given offset. If you want to read a Text, pass additional_size=-1 to remove the trailing '\0'. If you want to read a Data, pass additional_size=0. """ if p == 0: return default_ assert ptr.kind(p) == ptr.LIST assert ptr.list_size_tag(p) == ptr.LIST_SIZE_8 start = ptr.deref(p, offset) end = start + ptr.list_item_count(p) + additional_size return self.buf[start:end]
def _read_list(self, offset, item_type, default_=None): p = self._read_fast_ptr(offset) if ptr.kind(p) == ptr.FAR: offset, p = self._read_far_ptr(offset) else: offset += self._ptrs_offset if p == 0: return default_ assert ptr.kind(p) == ptr.LIST list_offset = ptr.deref(p, offset) # in theory we could simply use List.from_buffer; however, Cython is # not able to compile classmethods, so we create it manually obj = List.__new__(List) obj._init_from_buffer(self._seg, list_offset, ptr.list_size_tag(p), ptr.list_item_count(p), item_type) return obj
def _read_list_or_struct(self, ptr_offset, default_=None): ptr_offset, p = self._read_ptr_generic(ptr_offset) if p == 0: return default_ blob_offet = ptr.deref(p, ptr_offset) if ptr.kind(p) == ptr.STRUCT: Struct = capnpy.struct_.Struct return Struct.from_buffer(self._buf, blob_offet, ptr.struct_data_size(p), ptr.struct_ptrs_size(p)) elif ptr.kind(p) == ptr.LIST: List = capnpy.list.List return List.from_buffer(self._buf, blob_offet, ptr.list_size_tag(p), ptr.list_item_count(p), capnpy.list.StructItemType(Blob)) else: assert False, 'Unkwown pointer kind: %s' % ptr.kind(p)
def ptr(self, offset, s): p = struct.unpack('q', s)[0] if ptr.kind(p) not in (ptr.STRUCT, ptr.LIST, ptr.FAR): return ' ' * 25 # # try to display only "reasonable" ptrs; if the fields are too big, it # probably means that the current word is not a pointer def if_in_range(x, min, max): if min <= x < max: return str(x) else: return '?' # if p == 0: return 'NULL'.ljust(25) if ptr.kind(p) == ptr.STRUCT: descr = 'struct {:>4} {:>3}'.format( if_in_range(ptr.struct_data_size(p), 0, 100), if_in_range(ptr.struct_ptrs_size(p), 0, 100)) elif ptr.kind(p) == ptr.LIST: tag = '<%s>' % self._list_tag(ptr.list_size_tag(p)) descr = 'list{:<5} {:>5}'.format( tag, if_in_range(ptr.list_item_count(p), 0, 65536)) elif ptr.kind(p) == ptr.FAR: descr = 'far {:>7} {:>3}'.format( ptr.far_landing_pad(p), if_in_range(ptr.far_target(p), 0, 100)) else: descr = 'unknown ptr ' # if -1000 < ptr.offset(p) < 1000: dest = ptr.deref(p, offset) dest = self.addr(dest) dest = dest.ljust(16) else: dest = '? ' line = '{0} to {1}'.format(descr, dest) if '?' in line: return Color.set(Color.lightgray, line) else: return line
def endof(seg, p, offset): """ Check whether the given object is compact, and in that case compute its end boundary. If it's not compact, return -1. An object is compact if: 1. there is no gap between its data section and its ptrs section 2. there is no gap between children 3. its children are compact 4. there are no FAR pointers """ kind = ptr.kind(p) offset = ptr.deref(p, offset) if kind == ptr.STRUCT: data_size = ptr.struct_data_size(p) ptrs_size = ptr.struct_ptrs_size(p) return _endof_struct(seg, p, offset, data_size, ptrs_size) elif kind == ptr.LIST: item_size = ptr.list_size_tag(p) count = ptr.list_item_count(p) if item_size == ptr.LIST_SIZE_COMPOSITE: tag = seg.read_ptr(offset) count = ptr.offset(tag) data_size = ptr.struct_data_size(tag) ptrs_size = ptr.struct_ptrs_size(tag) return _endof_list_composite(seg, p, offset, count, data_size, ptrs_size) elif item_size == ptr.LIST_SIZE_PTR: return _endof_list_ptr(seg, p, offset, count) elif item_size == ptr.LIST_SIZE_BIT: return _endof_list_bit(seg, p, offset, count) else: return _endof_list_primitive(seg, p, offset, item_size, count) elif kind == ptr.FAR: return -1 else: assert False, 'unknown ptr kind'
def _copy_struct_inline(src, p, src_pos, dst, dst_pos): # this does the same as _copy_struct, but instead of allocating space for # it, it fills an already-allocated space (useful e.g. for writing structs # into lists). # # I have tried several ways to reduce code duplication (such as adding a # do_allocation param to determine whether to allocate or not), but it # always caused a ~30% slowdown. Apparently, it seems to be related to the # number of call-sites to _copy_struct: at the moment of writing is it # called only from copy_pointer, but it seems enough to add another call # site (even if it's never actually called!) to cause the slowdown. Maybe # it's because this causes GCC not to inline it? Anyway, the only solution # I found, was to duplicate some of the code :( # src_pos = ptr.deref(p, src_pos) data_size = ptr.struct_data_size(p) ptrs_size = ptr.struct_ptrs_size(p) ds = data_size * 8 check_bounds(src, ds, src_pos) dst.write_slice(dst_pos, src, src_pos, ds) # copy data section _copy_many_ptrs(ptrs_size, src, src_pos + ds, dst, dst_pos + ds)
def _init_from_pointer(self, buf, offset, p): assert ptr.kind(p) == ptr.STRUCT struct_offset = ptr.deref(p, offset) data_size = ptr.struct_data_size(p) ptrs_size = ptr.struct_ptrs_size(p) self._init_from_buffer(buf, struct_offset, data_size, ptrs_size)
def test_deref(): p = 0x0004000200000190 assert ptr.offset(p) == 100 offset = ptr.deref(p, 8) assert offset == 816
def test_read_ptr(): buf = '\x90\x01\x00\x00\x02\x00\x04\x00' b = CapnpBuffer(buf) p = b.read_ptr(0) offset = ptr.deref(p, 0) assert offset == 808