예제 #1
0
    def to_string(self):
        # Encode the lengths as 0-255 values
        ml = 0 if self._minlength is None else length_to_byte(self._minlength)
        xl = length_to_byte(self._maxlength)
        # Convert None values to the out-of-band NO_ID constant so they can be
        # stored as unsigned ints
        mid = NO_ID if self._minid is None else self._minid
        xid = NO_ID if self._maxid is None else self._maxid

        # Pack the term info into bytes
        st = self.struct.pack(self._weight, self._df, ml, xl, self._maxweight,
                              0, mid, xid)

        if isinstance(self.postings, tuple):
            # Postings are inlined - dump them using the pickle protocol
            isinlined = 1
            st += dumps(self.postings, -1)[2:-1]
        else:
            # Append postings pointer as long to end of term info bytes
            isinlined = 0
            # It's possible for a term info to not have a pointer to postings
            # on disk, in which case postings will be None. Convert a None
            # value to -1 so it can be stored as a long.
            p = -1 if self.postings is None else self.postings
            st += pack_long(p)

        # Prepend byte indicating whether the postings are inlined to the term
        # info bytes
        return pack("B", isinlined) + st
예제 #2
0
    def to_bytes(self):
        isinlined = self.is_inlined()

        # Encode the lengths as 0-255 values
        minlength = (
            0 if self._minlength is None else length_to_byte(self._minlength)
        )
        maxlength = length_to_byte(self._maxlength)
        # Convert None values to the out-of-band NO_ID constant so they can be
        # stored as unsigned ints
        minid = 0xFFFFFFFF if self._minid is None else self._minid
        maxid = 0xFFFFFFFF if self._maxid is None else self._maxid

        # Pack the term info into bytes
        st = self._struct.pack(
            isinlined,
            self._weight,
            self._df,
            minlength,
            maxlength,
            self._maxweight,
            minid,
            maxid,
        )

        if isinlined:
            # Postings are inlined - dump them using the pickle protocol
            postbytes = dumps(self._inlined, 2)
        else:
            postbytes = pack_long(self._offset) + pack_int(self._length)
        st += postbytes
        return st
예제 #3
0
파일: fst.py 프로젝트: adamhorner/yaki-tng
 def digest(self):
     if self._digest is None:
         d = sha1()
         vtype = self.owner.vtype
         for arc in self.arcs:
             d.update(arc.label)
             if arc.target:
                 d.update(pack_long(arc.target))
             else:
                 d.update(b("z"))
             if arc.value:
                 d.update(vtype.to_bytes(arc.value))
             if arc.accept:
                 d.update(b("T"))
         self._digest = d.digest()
     return self._digest
예제 #4
0
파일: dawg.py 프로젝트: skrieder/microblog
 def digest(self):
     if self._digest is None:
         d = sha1()
         vtype = self.owner.vtype
         for arc in self.arcs:
             d.update(arc.label)
             if arc.target:
                 d.update(pack_long(arc.target))
             else:
                 d.update(b("z"))
             if arc.value:
                 d.update(vtype.to_bytes(arc.value))
             if arc.accept:
                 d.update(b("T"))
         self._digest = d.digest()
     return self._digest
    def to_bytes(self):
        isinlined = self.is_inlined()

        # Encode the lengths as 0-255 values
        minlength = (0 if self._minlength is None
                     else length_to_byte(self._minlength))
        maxlength = length_to_byte(self._maxlength)
        # Convert None values to the out-of-band NO_ID constant so they can be
        # stored as unsigned ints
        minid = 0xffffffff if self._minid is None else self._minid
        maxid = 0xffffffff if self._maxid is None else self._maxid

        # Pack the term info into bytes
        st = self._struct.pack(isinlined, self._weight, self._df,
                               minlength, maxlength, self._maxweight,
                               minid, maxid)

        if isinlined:
            # Postings are inlined - dump them using the pickle protocol
            postbytes = dumps(self._inlined, -1)
        else:
            postbytes = pack_long(self._offset) + pack_int(self._length)
        st += postbytes
        return st
예제 #6
0
 def valuecoder(self, offset):
     return pack_long(offset)
예제 #7
0
 def write_long(self, n):
     self.file.write(pack_long(n))
예제 #8
0
 def valuecoder(self, offset):
     return pack_long(offset)
예제 #9
0
 def write_long(self, n):
     self.file.write(pack_long(n))