def test_huge_postfile():
    with TempStorage("hugeindex") as st:
        pf = st.create_file("test.pst")

        gb5 = 5 * 1024 * 1024 * 1024
        pf.seek(gb5)
        pf.write("\x00\x00\x00\x00")
        assert_equal(pf.tell(), gb5 + 4)

        fpw = FilePostingWriter(pf)
        format = formats.Frequency(None)
        offset = fpw.start(format)
        for i in xrange(10):
            fpw.write(i, float(i), struct.pack("!I", i), 10)
        posttotal = fpw.finish()
        assert_equal(posttotal, 10)
        fpw.close()

        pf = st.open_file("test.pst")
        pfr = FilePostingReader(pf, offset, format)
        i = 0
        while pfr.is_active():
            assert_equal(pfr.id(), i)
            assert_equal(pfr.weight(), float(i))
            assert_equal(pfr.value(), struct.pack("!I", i))
            pfr.next()
            i += 1
        pf.close()
예제 #2
0
def roundtrip(postings, format, astype):
    with TempStorage("roundtrip") as st:
        postfile = st.create_file(astype)
        getweight = format.decoder("weight")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, value in postings:
            v = format.encode(value)
            fpw.write(id, getweight(v), v, 0)
        fpw.finish()
        fpw.close()
        
        postfile = st.open_file(astype)
        fpr = FilePostingReader(postfile, 0, format)
        readback = list(fpr.items_as(astype))
        postfile.close()
        return readback
예제 #3
0
def test_readwrite():
    with TempStorage("readwrite") as st:
        format = Frequency()
        postings = make_postings()
        
        postfile = st.create_file("readwrite")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, freq in postings:
            fpw.write(id, float(freq), format.encode(freq), 0)
        fpw.finish()
        fpw.close()
        
        postfile = st.open_file("readwrite")
        fpr = FilePostingReader(postfile, 0, format)
        assert_equal(postings, list(fpr.items_as("frequency")))
        postfile.close()
예제 #4
0
    def roundtrip(self, postings, format, astype):
        postfile = self.make_file(astype)
        readback = None
        try:
            fpw = FilePostingWriter(postfile, blocklimit=8)
            fpw.start(format)
            for id, value in postings:
                fpw.write(id, format.encode(value))
            fpw.close()

            postfile = self.open_file(astype)
            fpr = FilePostingReader(postfile, 0, format)
            readback = list(fpr.all_as(astype))
            fpr.close()
        finally:
            self.delete_file(astype)
        return readback
예제 #5
0
 def roundtrip(self, postings, format, astype):
     postfile = self.make_file(astype)
     readback = None
     try:
         fpw = FilePostingWriter(postfile, blocklimit=8)
         fpw.start(format)
         for id, value in postings:
             fpw.write(id, format.encode(value))
         fpw.close()
         
         postfile = self.open_file(astype)
         fpr = FilePostingReader(postfile, 0, format)
         readback = list(fpr.all_as(astype))
         fpr.close()
     finally:
         self.delete_file(astype)
     return readback
예제 #6
0
def roundtrip(postings, format, astype):
    with TempStorage("roundtrip") as st:
        postfile = st.create_file(astype)
        getweight = format.decoder("weight")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, value in postings:
            v = format.encode(value)
            fpw.write(id, getweight(v), v, 0)
        fpw.finish()
        fpw.close()

        postfile = st.open_file(astype)
        fpr = FilePostingReader(postfile, 0, format)
        readback = list(fpr.items_as(astype))
        postfile.close()
        return readback
예제 #7
0
def test_readwrite():
    with TempStorage("readwrite") as st:
        format = Frequency()
        postings = make_postings()

        postfile = st.create_file("readwrite")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, freq in postings:
            fpw.write(id, float(freq), format.encode(freq), 0)
        fpw.finish()
        fpw.close()

        postfile = st.open_file("readwrite")
        fpr = FilePostingReader(postfile, 0, format)
        assert_equal(postings, list(fpr.items_as("frequency")))
        postfile.close()
예제 #8
0
    def test_readwrite(self):
        format = Frequency(None)
        postings = self.make_postings()

        postfile = self.make_file("readwrite")
        try:
            fpw = FilePostingWriter(postfile, blocklimit=8)
            fpw.start(format)
            for id, freq in postings:
                fpw.write(id, format.encode(freq))
            fpw.close()

            postfile = self.open_file("readwrite")
            fpr = FilePostingReader(postfile, 0, format)
            #self.assertEqual(postings, list(fpr.items_as("frequency")))
            fpr.close()
        finally:
            self.delete_file("readwrite")
예제 #9
0
 def test_readwrite(self):
     format = Frequency(None)
     postings = self.make_postings()
     
     postfile = self.make_file("readwrite")
     try:
         fpw = FilePostingWriter(postfile, blocklimit=8)
         fpw.start(format)
         for id, freq in postings:
             fpw.write(id, format.encode(freq))
         fpw.close()
         
         postfile = self.open_file("readwrite")
         fpr = FilePostingReader(postfile, 0, format)
         #self.assertEqual(postings, list(fpr.items_as("frequency")))
         fpr.close()
     finally:
         self.delete_file("readwrite")
예제 #10
0
def test_lowlevel_block_writing():
    st = RamStorage()
    f = st.create_file("postfile")
    fpw = FilePostingWriter(f, blocklimit=4)
    fmt = formats.Frequency()
    fpw.start(fmt)
    fpw.write(0, 1.0, fmt.encode(1.0), 1)
    fpw.write(1, 2.0, fmt.encode(2.0), 2)
    fpw.write(2, 12.0, fmt.encode(12.0), 6)
    fpw.write(5, 6.5, fmt.encode(6.5), 420)

    fpw.write(11, 1.5, fmt.encode(1.5), 1)
    fpw.write(12, 2.5, fmt.encode(2.5), 2)
    fpw.write(26, 100.5, fmt.encode(100.5), 21)
    fpw.write(50, 8.0, fmt.encode(8.0), 1020)
    ti = fpw.finish()

    assert_equal(ti.weight(), 134.0)
    assert_equal(ti.doc_frequency(), 8)
    assert_equal(ti.min_length(), 1)
    assert_equal(ti.max_length(), byte_to_length(length_to_byte(1020)))
    assert_equal(ti.max_weight(), 100.5)
    assert_equal(ti.max_wol(), 100.5 / byte_to_length(length_to_byte(21)))