def to_string(self): # Encode the lengths as 0-255 values ml = 0 if self._minlength is None else length_to_byte(self._minlength) xl = length_to_byte(self._maxlength) # Convert None values to the out-of-band NO_ID constant so they can be # stored as unsigned ints mid = NO_ID if self._minid is None else self._minid xid = NO_ID if self._maxid is None else self._maxid # Pack the term info into bytes st = self.struct.pack(self._weight, self._df, ml, xl, self._maxweight, 0, mid, xid) if isinstance(self.postings, tuple): # Postings are inlined - dump them using the pickle protocol isinlined = 1 st += dumps(self.postings, -1)[2:-1] else: # Append postings pointer as long to end of term info bytes isinlined = 0 # It's possible for a term info to not have a pointer to postings # on disk, in which case postings will be None. Convert a None # value to -1 so it can be stored as a long. p = -1 if self.postings is None else self.postings st += pack_long(p) # Prepend byte indicating whether the postings are inlined to the term # info bytes return pack("B", isinlined) + st
def to_bytes(self): isinlined = self.is_inlined() # Encode the lengths as 0-255 values minlength = ( 0 if self._minlength is None else length_to_byte(self._minlength) ) maxlength = length_to_byte(self._maxlength) # Convert None values to the out-of-band NO_ID constant so they can be # stored as unsigned ints minid = 0xFFFFFFFF if self._minid is None else self._minid maxid = 0xFFFFFFFF if self._maxid is None else self._maxid # Pack the term info into bytes st = self._struct.pack( isinlined, self._weight, self._df, minlength, maxlength, self._maxweight, minid, maxid, ) if isinlined: # Postings are inlined - dump them using the pickle protocol postbytes = dumps(self._inlined, 2) else: postbytes = pack_long(self._offset) + pack_int(self._length) st += postbytes return st
def digest(self): if self._digest is None: d = sha1() vtype = self.owner.vtype for arc in self.arcs: d.update(arc.label) if arc.target: d.update(pack_long(arc.target)) else: d.update(b("z")) if arc.value: d.update(vtype.to_bytes(arc.value)) if arc.accept: d.update(b("T")) self._digest = d.digest() return self._digest
def to_bytes(self): isinlined = self.is_inlined() # Encode the lengths as 0-255 values minlength = (0 if self._minlength is None else length_to_byte(self._minlength)) maxlength = length_to_byte(self._maxlength) # Convert None values to the out-of-band NO_ID constant so they can be # stored as unsigned ints minid = 0xffffffff if self._minid is None else self._minid maxid = 0xffffffff if self._maxid is None else self._maxid # Pack the term info into bytes st = self._struct.pack(isinlined, self._weight, self._df, minlength, maxlength, self._maxweight, minid, maxid) if isinlined: # Postings are inlined - dump them using the pickle protocol postbytes = dumps(self._inlined, -1) else: postbytes = pack_long(self._offset) + pack_int(self._length) st += postbytes return st
def valuecoder(self, offset): return pack_long(offset)
def write_long(self, n): self.file.write(pack_long(n))