def _btexts(self, ixreader): fieldname = self.fieldname field = ixreader.schema[fieldname] startexcl = self.startexcl endexcl = self.endexcl if self.start is None: start = b("") else: try: start = field.to_bytes(self.start) except ValueError: return if self.end is None: end = b("\xFF\xFF\xFF\xFF") else: try: end = field.to_bytes(self.end) except ValueError: return for fname, t in ixreader.terms_from(fieldname, start): if fname != fieldname: break if t == start and startexcl: continue if t == end and endexcl: break if t > end: break yield t
def decode_positions(self, valuestring): if not valuestring.endswith(b(".")): valuestring += b(".") codes = loads(valuestring[_INT_SIZE:]) position = 0 posns = [] for code in codes: position = code[0] + position posns.append(position) return posns
def decode_position_boosts(self, valuestring): if not valuestring.endswith(b(".")): valuestring += b(".") codes = loads(valuestring[_INT_SIZE + _FLOAT_SIZE:]) position = 0 posns_boosts = [] for code in codes: position = code[0] + position posns_boosts.append((position, code[1])) return posns_boosts
def decode_character_boosts(self, valuestring): if not valuestring.endswith(b(".")): valuestring += b(".") codes = loads(valuestring[_INT_SIZE + _FLOAT_SIZE:]) position = 0 endchar = 0 posn_char_boosts = [] for code in codes: position = position + code[0] startchar = endchar + code[1] endchar = startchar + code[2] posn_char_boosts.append((position, startchar, endchar, code[3])) return posn_char_boosts
def decode_characters(self, valuestring): if not valuestring.endswith(b(".")): valuestring += b(".") codes = loads(valuestring[_INT_SIZE:]) position = 0 endchar = 0 posns_chars = [] for code in codes: position = code[0] + position startchar = code[1] + endchar endchar = code[2] + startchar posns_chars.append((position, startchar, endchar)) return posns_chars
def __init__(self, fixedlen, default=None): """ :param fixedlen: the fixed length of byte strings in this column. :param default: the default value to use for documents that don't specify a value. If you don't specify a default, the column will use ``b'\\x00' * fixedlen``. """ self._fixedlen = fixedlen if default is None: default = b("\x00") * fixedlen elif len(default) != fixedlen: raise ValueError self._default = default
def __init__(self, fixedlen=0, default=None): """ :param fixedlen: an optional fixed length for the values. If you specify a number other than 0, the column will require all values to be the specified length. :param default: a default value to use for documents that don't specify one. If you don't specify a default, the column will use an empty bytestring (``b''``), or if you specify a fixed length, ``b'\\x00' * fixedlen``. """ self._fixedlen = fixedlen if default is None: default = b("\x00") * fixedlen if fixedlen else emptybytes elif fixedlen and len(default) != fixedlen: raise ValueError self._default = default
def _prep_vectors(self): self._vpostfile = self._create_file(W3Codec.VPOSTS_EXT) # We'll use offset==0 as a marker for "no vectors", so we can't start # postings at position 0, so just write a few header bytes :) self._vpostfile.write(b("VPST"))
pack_ushort, unpack_int, unpack_long, unpack_ushort, ) from whoosh.util.numeric import byte_to_length, length_to_byte from whoosh.util.numlists import delta_decode, delta_encode try: import zlib except ImportError: zlib = None # This byte sequence is written at the start of a posting list to identify the # codec/version WHOOSH3_HEADER_MAGIC = b("W3Bl") # Column type to store field length info LENGTHS_COLUMN = columns.NumericColumn("B", default=0) # Column type to store pointers to vector posting lists VECTOR_COLUMN = columns.NumericColumn("I") # Column type to store vector posting list lengths VECTOR_LEN_COLUMN = columns.NumericColumn("i") # Column type to store values of stored fields STORED_COLUMN = columns.PickleColumn(columns.CompressedBytesColumn()) class W3Codec(base.Codec): # File extensions TERMS_EXT = ".trm" # Term index POSTS_EXT = ".pst" # Term postings