def test_huge_postfile():
    with TempStorage("hugeindex") as st:
        pf = st.create_file("test.pst")

        gb5 = 5 * 1024 * 1024 * 1024
        pf.seek(gb5)
        pf.write("\x00\x00\x00\x00")
        assert_equal(pf.tell(), gb5 + 4)

        fpw = FilePostingWriter(pf)
        format = formats.Frequency(None)
        offset = fpw.start(format)
        for i in xrange(10):
            fpw.write(i, float(i), struct.pack("!I", i), 10)
        posttotal = fpw.finish()
        assert_equal(posttotal, 10)
        fpw.close()

        pf = st.open_file("test.pst")
        pfr = FilePostingReader(pf, offset, format)
        i = 0
        while pfr.is_active():
            assert_equal(pfr.id(), i)
            assert_equal(pfr.weight(), float(i))
            assert_equal(pfr.value(), struct.pack("!I", i))
            pfr.next()
            i += 1
        pf.close()
예제 #2
0
 def first_id(self, fieldname, text):
     self._test_field(fieldname)
     format = self.format(fieldname)
     
     offset = self.termsindex[(fieldname, text)][1]
     if isinstance(offset, (int, long)):
         postreader = FilePostingReader(self.postfile, offset, format)
         return postreader.id()
     else:
         return offset[0][0]
예제 #3
0
 def first_ids(self, fieldname):
     self._test_field(fieldname)
     format = self.format(fieldname)
     
     for (fn, t), (totalfreq, offset, postcount) in self.termsindex.items_from((fieldname, '')):
         if fn != fieldname:
             break
         
         if isinstance(offset, (int, long)):
             postreader = FilePostingReader(self.postfile, offset, format)
             id = postreader.id()
         else:
             id = offset[0][0]
         
         yield (t, id)