def test_posboost_postings(self): postings = [] docnum = 0 for _ in xrange(0, 3): docnum += randint(1, 10) posns = [] pos = 0 for __ in xrange(0, randint(1, 3)): pos += randint(1, 10) boost = byte_to_float(float_to_byte(random() * 2)) posns.append((pos, boost)) postings.append((docnum, posns)) self.assertEqual( postings, self.roundtrip(postings, PositionBoosts(None), "position_boosts")) as_posns = [(docnum, [pos for pos, boost in posns]) for docnum, posns in postings] self.assertEqual( as_posns, self.roundtrip(postings, PositionBoosts(None), "positions")) as_freq = [(docnum, len(posns)) for docnum, posns in postings] self.assertEqual( as_freq, self.roundtrip(postings, PositionBoosts(None), "frequency"))
def test_charboost_postings(): postings = [] docnum = 0 for _ in xrange(0, 20): docnum += randint(1, 10) posns = [] pos = 0 endchar = 0 for __ in xrange(0, randint(1, 10)): pos += randint(1, 10) startchar = endchar + randint(3, 10) endchar = startchar + randint(3, 10) boost = byte_to_float(float_to_byte(random() * 2)) posns.append((pos, startchar, endchar, boost)) postings.append((docnum, posns)) assert_equal(postings, roundtrip(postings, CharacterBoosts(), "character_boosts")) as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns]) for docnum, posns in postings] assert_equal(as_chars, roundtrip(postings, CharacterBoosts(), "characters")) as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns]) for docnum, posns in postings] assert_equal(as_posbsts, roundtrip(postings, CharacterBoosts(), "position_boosts")) as_posns = [(docnum, [pos for pos, sc, ec, bst in posns]) for docnum, posns in postings] assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions")) as_freq = [(docnum, len(posns)) for docnum, posns in as_posns] assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
def write_8bitfloat(self, f, mantissabits=5, zeroexp=2): """Writes a byte-sized representation of floating point value f to the wrapped file. mantissabits is the number of bits to use for the mantissa (with the rest used for the exponent). zeroexp is the zero point for the exponent. """ self.write_byte(float_to_byte(f, mantissabits, zeroexp))
def write_8bitfloat(self, f, mantissabits = 5, zeroexp = 2): """Writes a byte-sized representation of floating point value f to the wrapped file. mantissabits is the number of bits to use for the mantissa (with the rest used for the exponent). zeroexp is the zero point for the exponent. """ self.write_byte(float_to_byte(f, mantissabits, zeroexp))
def test_docboost_postings(self): postings = [] docnum = 0 for _ in xrange(0, 20): docnum += randint(1, 10) freq = randint(1, 1000) boost = byte_to_float(float_to_byte(random() * 2)) postings.append((docnum, (freq, boost))) self.assertEqual(postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
def encode(self, posns_boosts): # posns_boosts = [(pos, boost), ...] codes = [] base = 0 summedboost = 0 for pos, boost in posns_boosts: summedboost += boost codes.extend((varint(pos - base), float_to_byte(boost))) base = pos return pack("!If", len(posns_boosts), summedboost) + "".join(codes)
def test_docboost_postings(self): postings = [] docnum = 0 for _ in xrange(0, 20): docnum += randint(1, 10) freq = randint(1, 1000) boost = byte_to_float(float_to_byte(random() * 2)) postings.append((docnum, (freq, boost))) self.assertEqual( postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
def encode(self, posns_chars_boosts): # posns_chars_boosts = [(pos, startchar, endchar, boost), ...] codes = [] posbase = 0 charbase = 0 summedboost = 0 for pos, startchar, endchar, boost in posns_chars_boosts: summedboost += boost codes.append(varint(pos - posbase)) posbase = pos codes.extend((varint(startchar - charbase), varint(endchar - startchar), float_to_byte(boost))) charbase = endchar b = pack("!If", len(posns_chars_boosts), summedboost) return b + "".join(codes)
def encode(self, posns_chars_boosts): # posns_chars_boosts = [(pos, startchar, endchar, boost), ...] codes = [] posbase = 0 charbase = 0 summedboost = 0 for pos, startchar, endchar, boost in posns_chars_boosts: summedboost += boost codes.append(varint(pos - posbase)) posbase = pos codes.extend((varint(startchar - charbase), varint(endchar - startchar), float_to_byte(boost))) charbase = endchar return pack("!If", len(posns_chars_boosts), summedboost) + "".join(codes)
def test_posboost_postings(): postings = [] docnum = 0 for _ in xrange(0, 3): docnum += randint(1, 10) posns = [] pos = 0 for __ in xrange(0, randint(1, 3)): pos += randint(1, 10) boost = byte_to_float(float_to_byte(random() * 2)) posns.append((pos, boost)) postings.append((docnum, posns)) assert_equal(postings, roundtrip(postings, PositionBoosts(), "position_boosts")) as_posns = [(docnum, [pos for pos, boost in posns]) for docnum, posns in postings] assert_equal(as_posns, roundtrip(postings, PositionBoosts(), "positions")) as_freq = [(docnum, len(posns)) for docnum, posns in postings] assert_equal(as_freq, roundtrip(postings, PositionBoosts(), "frequency"))
def encode(self, freq_docboost): freq, docboost = freq_docboost return pack("!I", freq) + float_to_byte(docboost)