def test_huge_postfile(): with TempStorage("hugeindex") as st: pf = st.create_file("test.pst") gb5 = 5 * 1024 * 1024 * 1024 pf.seek(gb5) pf.write("\x00\x00\x00\x00") assert_equal(pf.tell(), gb5 + 4) fpw = FilePostingWriter(pf) format = formats.Frequency(None) offset = fpw.start(format) for i in xrange(10): fpw.write(i, float(i), struct.pack("!I", i), 10) posttotal = fpw.finish() assert_equal(posttotal, 10) fpw.close() pf = st.open_file("test.pst") pfr = FilePostingReader(pf, offset, format) i = 0 while pfr.is_active(): assert_equal(pfr.id(), i) assert_equal(pfr.weight(), float(i)) assert_equal(pfr.value(), struct.pack("!I", i)) pfr.next() i += 1 pf.close()
def roundtrip(postings, format, astype): with TempStorage("roundtrip") as st: postfile = st.create_file(astype) getweight = format.decoder("weight") fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, value in postings: v = format.encode(value) fpw.write(id, getweight(v), v, 0) fpw.finish() fpw.close() postfile = st.open_file(astype) fpr = FilePostingReader(postfile, 0, format) readback = list(fpr.items_as(astype)) postfile.close() return readback
def test_readwrite(): with TempStorage("readwrite") as st: format = Frequency() postings = make_postings() postfile = st.create_file("readwrite") fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, freq in postings: fpw.write(id, float(freq), format.encode(freq), 0) fpw.finish() fpw.close() postfile = st.open_file("readwrite") fpr = FilePostingReader(postfile, 0, format) assert_equal(postings, list(fpr.items_as("frequency"))) postfile.close()
def roundtrip(self, postings, format, astype): postfile = self.make_file(astype) readback = None try: fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, value in postings: fpw.write(id, format.encode(value)) fpw.close() postfile = self.open_file(astype) fpr = FilePostingReader(postfile, 0, format) readback = list(fpr.all_as(astype)) fpr.close() finally: self.delete_file(astype) return readback
def roundtrip(self, postings, format, astype): postfile = self.make_file(astype) readback = None try: fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, value in postings: fpw.write(id, format.encode(value)) fpw.close() postfile = self.open_file(astype) fpr = FilePostingReader(postfile, 0, format) readback = list(fpr.all_as(astype)) fpr.close() finally: self.delete_file(astype) return readback
def roundtrip(postings, format, astype): with TempStorage("roundtrip") as st: postfile = st.create_file(astype) getweight = format.decoder("weight") fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, value in postings: v = format.encode(value) fpw.write(id, getweight(v), v, 0) fpw.finish() fpw.close() postfile = st.open_file(astype) fpr = FilePostingReader(postfile, 0, format) readback = list(fpr.items_as(astype)) postfile.close() return readback
def test_readwrite(): with TempStorage("readwrite") as st: format = Frequency() postings = make_postings() postfile = st.create_file("readwrite") fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, freq in postings: fpw.write(id, float(freq), format.encode(freq), 0) fpw.finish() fpw.close() postfile = st.open_file("readwrite") fpr = FilePostingReader(postfile, 0, format) assert_equal(postings, list(fpr.items_as("frequency"))) postfile.close()
def test_readwrite(self): format = Frequency(None) postings = self.make_postings() postfile = self.make_file("readwrite") try: fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, freq in postings: fpw.write(id, format.encode(freq)) fpw.close() postfile = self.open_file("readwrite") fpr = FilePostingReader(postfile, 0, format) #self.assertEqual(postings, list(fpr.items_as("frequency"))) fpr.close() finally: self.delete_file("readwrite")
def test_readwrite(self): format = Frequency(None) postings = self.make_postings() postfile = self.make_file("readwrite") try: fpw = FilePostingWriter(postfile, blocklimit=8) fpw.start(format) for id, freq in postings: fpw.write(id, format.encode(freq)) fpw.close() postfile = self.open_file("readwrite") fpr = FilePostingReader(postfile, 0, format) #self.assertEqual(postings, list(fpr.items_as("frequency"))) fpr.close() finally: self.delete_file("readwrite")
def test_lowlevel_block_writing(): st = RamStorage() f = st.create_file("postfile") fpw = FilePostingWriter(f, blocklimit=4) fmt = formats.Frequency() fpw.start(fmt) fpw.write(0, 1.0, fmt.encode(1.0), 1) fpw.write(1, 2.0, fmt.encode(2.0), 2) fpw.write(2, 12.0, fmt.encode(12.0), 6) fpw.write(5, 6.5, fmt.encode(6.5), 420) fpw.write(11, 1.5, fmt.encode(1.5), 1) fpw.write(12, 2.5, fmt.encode(2.5), 2) fpw.write(26, 100.5, fmt.encode(100.5), 21) fpw.write(50, 8.0, fmt.encode(8.0), 1020) ti = fpw.finish() assert_equal(ti.weight(), 134.0) assert_equal(ti.doc_frequency(), 8) assert_equal(ti.min_length(), 1) assert_equal(ti.max_length(), byte_to_length(length_to_byte(1020))) assert_equal(ti.max_weight(), 100.5) assert_equal(ti.max_wol(), 100.5 / byte_to_length(length_to_byte(21)))