def from_bytes(cls, s): st = cls._struct vals = st.unpack(s[:st.size]) terminfo = cls() flags = vals[0] terminfo._weight = vals[1] terminfo._df = vals[2] terminfo._minlength = byte_to_length(vals[3]) terminfo._maxlength = byte_to_length(vals[4]) terminfo._maxweight = vals[5] terminfo._minid = None if vals[6] == 0xffffffff else vals[6] terminfo._maxid = None if vals[7] == 0xffffffff else vals[7] if flags: # Postings are stored inline terminfo._inlined = loads(s[st.size:]) else: # Last bytes are pointer into posting file and length offpos = st.size lenpos = st.size + _LONG_SIZE terminfo._offset = unpack_long(s[offpos:lenpos])[0] terminfo._length = unpack_int(s[lenpos:lenpos + _INT_SIZE]) return terminfo
def from_string(cls, s): hbyte = ord(s[0:1]) if hbyte < 2: st = cls.struct # Freq, Doc freq, min len, max len, max w, max WOL, min ID, max ID f, df, ml, xl, xw, xwol, mid, xid = st.unpack(s[1:st.size + 1]) mid = None if mid == NO_ID else mid xid = None if xid == NO_ID else xid # Postings pstr = s[st.size + 1:] if hbyte == 0: p = unpack_long(pstr)[0] else: p = loads(pstr + b(".")) else: # Old format was encoded as a variable length pickled tuple v = loads(s + b(".")) if len(v) == 1: f = df = 1 p = v[0] elif len(v) == 2: f = df = v[1] p = v[0] else: f, p, df = v # Fake values for stats which weren't stored before ml = 1 xl = 106374 xw = 999999999 xwol = 999999999 mid = -1 xid = -1 return cls(f, df, ml, xl, xw, xwol, mid, xid, p)
def from_file(file, stringids=False): here = file.tell() encoded_header = file.read(BlockInfo._struct.size) header = BlockInfo._struct.unpack(encoded_header) (flags, _, _, nextoffset, idslen, weightslen, postcount, maxweight, maxwol, _, minlength) = header if not flags: nextoffset = unpack_long(encoded_header[:8]) else: nextoffset = here + nextoffset assert postcount > 0 minlength = byte_to_length(minlength) if stringids: maxid = utf8decode(file.read_string())[0] else: maxid = file.read_uint() dataoffset = file.tell() return BlockInfo(flags=flags, nextoffset=nextoffset, postcount=postcount, maxweight=maxweight, maxwol=maxwol, maxid=maxid, minlength=minlength, dataoffset=dataoffset, idslen=idslen, weightslen=weightslen)
def from_string(cls, s): assert isinstance(s, bytes_type) if isinstance(s, string_type): hbyte = ord(s[0]) # Python 2.x - str else: hbyte = s[0] # Python 3 - bytes if hbyte < 2: st = cls.struct # Weight, Doc freq, min len, max len, max w, unused, min ID, max ID w, df, ml, xl, xw, _, mid, xid = st.unpack(s[1:st.size + 1]) mid = None if mid == NO_ID else mid xid = None if xid == NO_ID else xid # Postings pstr = s[st.size + 1:] if hbyte == 0: p = unpack_long(pstr)[0] else: p = loads(pstr + b(".")) else: # Old format was encoded as a variable length pickled tuple v = loads(s + b(".")) if len(v) == 1: w = df = 1 p = v[0] elif len(v) == 2: w = df = v[1] p = v[0] else: w, p, df = v # Fake values for stats which weren't stored before ml = 1 xl = 255 xw = 999999999 mid = -1 xid = -1 ml = byte_to_length(ml) xl = byte_to_length(xl) obj = cls(w, df, ml, xl, xw, mid, xid) obj.postings = p return obj
def valuedecoder(self, v): return unpack_long(v)[0]
def get_long(self, position): return unpack_long(self.map[position:position + _LONG_SIZE])[0]
def read_long(self): return unpack_long(self.file.read(_LONG_SIZE))[0]
def get_long(self, position): return unpack_long(self.get(position, _LONG_SIZE))[0]