def from_bytes(cls, s):
        st = cls._struct
        vals = st.unpack(s[:st.size])
        terminfo = cls()

        flags = vals[0]
        terminfo._weight = vals[1]
        terminfo._df = vals[2]
        terminfo._minlength = byte_to_length(vals[3])
        terminfo._maxlength = byte_to_length(vals[4])
        terminfo._maxweight = vals[5]
        terminfo._minid = None if vals[6] == 0xffffffff else vals[6]
        terminfo._maxid = None if vals[7] == 0xffffffff else vals[7]

        if flags:
            # Postings are stored inline
            terminfo._inlined = loads(s[st.size:])
        else:
            # Last bytes are pointer into posting file and length
            offpos = st.size
            lenpos = st.size + _LONG_SIZE
            terminfo._offset = unpack_long(s[offpos:lenpos])[0]
            terminfo._length = unpack_int(s[lenpos:lenpos + _INT_SIZE])

        return terminfo
Exemplo n.º 2
0
    def _goto(self, position):
        # Read the posting block at the given position

        postfile = self._postfile

        # Reset block data -- we'll lazy load the data from the new block as
        # needed
        self._data = None
        self._ids = None
        self._weights = None
        self._values = None
        # Reset pointer into the block
        self._i = 0

        # Seek to the start of the block
        postfile.seek(position)
        # Read the block length
        length = postfile.read_int()
        # If the block length is negative, that means this is the last block
        if length < 0:
            self._lastblock = True
            length *= -1

        # Remember the offset of the next block
        self._nextoffset = position + _INT_SIZE + length
        # Read the pickled block info tuple
        info = postfile.read_pickle()
        # Remember the offset of the block's data
        self._dataoffset = postfile.tell()

        # Decompose the info tuple to set the current block info
        (self._blocklength, self._maxid, self._maxweight, self._compression,
         mnlen, mxlen) = info
        self._minlength = byte_to_length(mnlen)
        self._maxlength = byte_to_length(mxlen)
    def _goto(self, position):
        # Read the posting block at the given position

        postfile = self._postfile

        # Reset block data -- we'll lazy load the data from the new block as
        # needed
        self._data = None
        self._ids = None
        self._weights = None
        self._values = None
        # Reset pointer into the block
        self._i = 0

        # Seek to the start of the block
        postfile.seek(position)
        # Read the block length
        length = postfile.read_int()
        # If the block length is negative, that means this is the last block
        if length < 0:
            self._lastblock = True
            length *= -1

        # Remember the offset of the next block
        self._nextoffset = position + _INT_SIZE + length
        # Read the pickled block info tuple
        info = postfile.read_pickle()
        # Remember the offset of the block's data
        self._dataoffset = postfile.tell()

        # Decompose the info tuple to set the current block info
        (self._blocklength, self._maxid, self._maxweight, self._compression,
         mnlen, mxlen) = info
        self._minlength = byte_to_length(mnlen)
        self._maxlength = byte_to_length(mxlen)
Exemplo n.º 4
0
    def from_bytes(cls, s):
        st = cls._struct
        vals = st.unpack(s[:st.size])
        terminfo = cls()

        flags = vals[0]
        terminfo._weight = vals[1]
        terminfo._df = vals[2]
        terminfo._minlength = byte_to_length(vals[3])
        terminfo._maxlength = byte_to_length(vals[4])
        terminfo._maxweight = vals[5]
        terminfo._minid = None if vals[6] == 0xffffffff else vals[6]
        terminfo._maxid = None if vals[7] == 0xffffffff else vals[7]

        if flags:
            # Postings are stored inline
            terminfo._inlined = loads(s[st.size:])
        else:
            # Last bytes are pointer into posting file and length
            offpos = st.size
            lenpos = st.size + _LONG_SIZE
            terminfo._offset = unpack_long(s[offpos:lenpos])[0]
            terminfo._length = unpack_int(s[lenpos:lenpos + _INT_SIZE])

        return terminfo
Exemplo n.º 5
0
    def _minmax_length(self, fieldname, op, cache):
        if fieldname in cache:
            return cache[fieldname]

        lenfield = _lenfield(fieldname)
        reader = self._cached_reader(lenfield, LENGTHS_COLUMN)
        length = byte_to_length(op(reader))
        cache[fieldname] = length
        return length
    def _minmax_length(self, fieldname, op, cache):
        if fieldname in cache:
            return cache[fieldname]

        lenfield = _lenfield(fieldname)
        reader = self._cached_reader(lenfield, LENGTHS_COLUMN)
        length = byte_to_length(op(reader))
        cache[fieldname] = length
        return length
Exemplo n.º 7
0
    def doc_field_length(self, docnum, fieldname, default=0):
        if docnum > self._doccount:
            raise IndexError("Asked for docnum %r of %d" %
                             (docnum, self._doccount))

        lenfield = _lenfield(fieldname)
        reader = self._cached_reader(lenfield, LENGTHS_COLUMN)
        if reader is None:
            return default

        lbyte = reader[docnum]
        if lbyte:
            return byte_to_length(lbyte)
    def doc_field_length(self, docnum, fieldname, default=0):
        if docnum > self._doccount:
            raise IndexError("Asked for docnum %r of %d"
                             % (docnum, self._doccount))

        lenfield = _lenfield(fieldname)
        reader = self._cached_reader(lenfield, LENGTHS_COLUMN)
        if reader is None:
            return default

        lbyte = reader[docnum]
        if lbyte:
            return byte_to_length(lbyte)
Exemplo n.º 9
0
def test_many_lengths():
    domain = u("alfa bravo charlie delta echo").split()
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    for i, word in enumerate(domain):
        length = (i + 1) ** 6
        w.add_document(text=" ".join(word for _ in xrange(length)))
    w.commit()

    s = ix.searcher()
    for i, word in enumerate(domain):
        target = byte_to_length(length_to_byte((i + 1) ** 6))
        ti = s.term_info("text", word)
        assert ti.min_length() == target
        assert ti.max_length() == target
Exemplo n.º 10
0
def test_many_lengths():
    domain = u("alfa bravo charlie delta echo").split()
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    for i, word in enumerate(domain):
        length = (i + 1)**6
        w.add_document(text=" ".join(word for _ in xrange(length)))
    w.commit()

    s = ix.searcher()
    for i, word in enumerate(domain):
        target = byte_to_length(length_to_byte((i + 1)**6))
        ti = s.term_info("text", word)
        assert ti.min_length() == target
        assert ti.max_length() == target
Exemplo n.º 11
0
def test_lengths():
    s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                      f2=fields.KEYWORD(stored=True, scorable=True))
    with TempIndex(s, "testlengths") as ix:
        w = ix.writer()
        items = u("ABCDEFG")
        from itertools import cycle, islice
        lengths = [10, 20, 2, 102, 45, 3, 420, 2]
        for length in lengths:
            w.add_document(f2=u(" ").join(islice(cycle(items), length)))
        w.commit()

        with ix.reader() as dr:
            ls1 = [dr.doc_field_length(i, "f1")
                   for i in xrange(0, len(lengths))]
            assert ls1 == [0] * len(lengths)
            ls2 = [dr.doc_field_length(i, "f2")
                   for i in xrange(0, len(lengths))]
            assert ls2 == [byte_to_length(length_to_byte(l)) for l in lengths]
Exemplo n.º 12
0
def test_lengths():
    s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                      f2=fields.KEYWORD(stored=True, scorable=True))
    with TempIndex(s, "testlengths") as ix:
        w = ix.writer()
        items = u("ABCDEFG")
        from itertools import cycle, islice
        lengths = [10, 20, 2, 102, 45, 3, 420, 2]
        for length in lengths:
            w.add_document(f2=u(" ").join(islice(cycle(items), length)))
        w.commit()

        with ix.reader() as dr:
            ls1 = [
                dr.doc_field_length(i, "f1") for i in xrange(0, len(lengths))
            ]
            assert ls1 == [0] * len(lengths)
            ls2 = [
                dr.doc_field_length(i, "f2") for i in xrange(0, len(lengths))
            ]
            assert ls2 == [byte_to_length(length_to_byte(l)) for l in lengths]
Exemplo n.º 13
0
 def read_min_and_max_length(cls, dbfile, datapos):
     lenpos = datapos + 1 + _FLOAT_SIZE + _INT_SIZE
     ml = byte_to_length(dbfile.get_byte(lenpos))
     xl = byte_to_length(dbfile.get_byte(lenpos + 1))
     return ml, xl
Exemplo n.º 14
0
def _byten(n):
    return byte_to_length(length_to_byte(n))
Exemplo n.º 15
0
def test_length_byte():
    source = list(range(11))
    xform = [length_to_byte(n) for n in source]
    result = [byte_to_length(n) for n in xform]
    assert source == result
Exemplo n.º 16
0
def test_length_byte():
    source = list(range(11))
    xform = [length_to_byte(n) for n in source]
    result = [byte_to_length(n) for n in xform]
    assert source == result
 def read_min_and_max_length(cls, dbfile, datapos):
     lenpos = datapos + 1 + _FLOAT_SIZE + _INT_SIZE
     ml = byte_to_length(dbfile.get_byte(lenpos))
     xl = byte_to_length(dbfile.get_byte(lenpos + 1))
     return ml, xl
Exemplo n.º 18
0
def _discreet(length):
    return byte_to_length(length_to_byte(length))
Exemplo n.º 19
0
def _discreet(length):
    return byte_to_length(length_to_byte(length))