Пример #1
0
def test_block():
    st = RamStorage()
    f = st.create_file("postfile")

    b = current(f, 0)
    b.append(0, 1.0, '', 1)
    b.append(1, 2.0, '', 2)
    b.append(2, 12.0, '', 6)
    b.append(5, 6.5, '', 420)
    assert b

    assert_equal(len(b), 4)
    assert_equal(list(b.ids), [0, 1, 2, 5])
    assert_equal(list(b.weights), [1.0, 2.0, 12.0, 6.5])
    assert_equal(b.values, None)
    assert_equal(b.min_length(), 1)
    assert_equal(b.max_length(), byte_to_length(length_to_byte(420)))
    assert_equal(b.max_weight(), 12.0)
    assert_equal(b.max_wol(), 2.0)

    ti = FileTermInfo()
    ti.add_block(b)
    assert_equal(ti.weight(), 21.5)
    assert_equal(ti.doc_frequency(), 4)
    assert_equal(ti.min_length(), 1)
    assert_equal(ti.max_length(), byte_to_length(length_to_byte(420)))
    assert_equal(ti.max_weight(), 12.0)
    assert_equal(ti.max_wol(), 2.0)

    b.write(compression=3)
    f.close()
    f = st.open_file("postfile")
    bb = current.from_file(f, 0)

    bb.read_ids()
    assert_equal(list(bb.ids), [0, 1, 2, 5])
    bb.read_weights()
    assert_equal(list(bb.weights), [1.0, 2.0, 12.0, 6.5])
    bb.read_values()
    assert_equal(b.values, None)
    assert_equal(bb.min_length(), 1)
    assert_equal(bb.max_length(), byte_to_length(length_to_byte(420)))
    assert_equal(bb.max_weight(), 12.0)
    assert_equal(bb.max_wol(), 2.0)
Пример #2
0
class FilePostingWriter(PostingWriter):
    blockclass = postblocks.current

    def __init__(self, postfile, stringids=False, blocklimit=128,
                 compression=3):
        self.postfile = postfile
        self.stringids = stringids

        if blocklimit > 255:
            raise ValueError("blocklimit argument must be <= 255")
        elif blocklimit < 1:
            raise ValueError("blocklimit argument must be > 0")
        self.blocklimit = blocklimit
        self.compression = compression
        self.block = None

    def _reset_block(self):
        self.block = self.blockclass(self.postfile, self.format.posting_size,
                                     stringids=self.stringids)

    def start(self, format):
        if self.block is not None:
            raise Exception("Called start() in a block")

        self.format = format
        self.blockcount = 0
        self.startoffset = self.postfile.tell()
        self.terminfo = FileTermInfo()

        # Magic number
        self.postfile.write_int(self.blockclass.magic)
        # Placeholder for block count
        self.postfile.write_uint(0)

        self._reset_block()
        return self.startoffset

    def write(self, id, weight, valuestring, dfl):
        self.block.append(id, weight, valuestring, dfl)
        if len(self.block) >= self.blocklimit:
            self._write_block()

    def finish(self, inlinelimit=1):
        assert isinstance(inlinelimit, integer_types)
        if self.block is None:
            raise Exception("Called finish() when not in a block")

        block = self.block
        terminfo = self.terminfo

        if self.blockcount < 1 and block and len(block) <= inlinelimit:
            terminfo.add_block(block)
            vals = None if not block.values else tuple(block.values)
            postings = (tuple(block.ids), tuple(block.weights), vals)
        else:
            if block:
                self._write_block()

            # Seek back to the start of this list of posting blocks and write
            # the number of blocks
            pf = self.postfile
            pf.flush()
            offset = pf.tell()
            pf.seek(self.startoffset + _INT_SIZE)
            pf.write_uint(self.blockcount)
            pf.seek(offset)
            postings = self.startoffset

        self.block = None

        terminfo.postings = postings
        return terminfo

    def close(self):
        if self.block:
            raise Exception("Closed posting writer without finishing")
        self.postfile.close()

    def block_stats(self):
        return self.block.stats()

    def _write_block(self):
        self.block.write(compression=self.compression)
        self.terminfo.add_block(self.block)
        self._reset_block()
        self.blockcount += 1
Пример #3
0
class FilePostingWriter(PostingWriter):
    blockclass = postblocks.current

    def __init__(self,
                 postfile,
                 stringids=False,
                 blocklimit=128,
                 compression=3):
        self.postfile = postfile
        self.stringids = stringids

        if blocklimit > 255:
            raise ValueError("blocklimit argument must be <= 255")
        elif blocklimit < 1:
            raise ValueError("blocklimit argument must be > 0")
        self.blocklimit = blocklimit
        self.compression = compression
        self.block = None

    def _reset_block(self):
        self.block = self.blockclass(self.postfile,
                                     self.format.posting_size,
                                     stringids=self.stringids)

    def start(self, format):
        if self.block is not None:
            raise Exception("Called start() in a block")

        self.format = format
        self.blockcount = 0
        self.startoffset = self.postfile.tell()
        self.terminfo = FileTermInfo()

        # Magic number
        self.postfile.write_int(self.blockclass.magic)
        # Placeholder for block count
        self.postfile.write_uint(0)

        self._reset_block()
        return self.startoffset

    def write(self, id, weight, valuestring, dfl):
        self.block.append(id, weight, valuestring, dfl)
        if len(self.block) >= self.blocklimit:
            self._write_block()

    def finish(self, inlinelimit=1):
        assert isinstance(inlinelimit, integer_types)
        if self.block is None:
            raise Exception("Called finish() when not in a block")

        block = self.block
        terminfo = self.terminfo

        if self.blockcount < 1 and block and len(block) <= inlinelimit:
            terminfo.add_block(block)
            vals = None if not block.values else tuple(block.values)
            postings = (tuple(block.ids), tuple(block.weights), vals)
        else:
            if block:
                self._write_block()

            # Seek back to the start of this list of posting blocks and write
            # the number of blocks
            pf = self.postfile
            pf.flush()
            offset = pf.tell()
            pf.seek(self.startoffset + _INT_SIZE)
            pf.write_uint(self.blockcount)
            pf.seek(offset)
            postings = self.startoffset

        self.block = None

        terminfo.postings = postings
        return terminfo

    def close(self):
        if self.block:
            raise Exception("Closed posting writer without finishing")
        self.postfile.close()

    def block_stats(self):
        return self.block.stats()

    def _write_block(self):
        self.block.write(compression=self.compression)
        self.terminfo.add_block(self.block)
        self._reset_block()
        self.blockcount += 1