Пример #1
0
 def read_8bitfloat(self, mantissabits=5, zeroexp=2):
     """Reads a byte-sized representation of a floating point value.
     mantissabits is the number of bits to use for the mantissa
     (with the rest used for the exponent).
     zeroexp is the zero point for the exponent.
     """
     return byte_to_float(self.read_byte(), mantissabits, zeroexp)
Пример #2
0
    def test_posboost_postings(self):
        postings = []
        docnum = 0
        for _ in xrange(0, 3):
            docnum += randint(1, 10)
            posns = []
            pos = 0
            for __ in xrange(0, randint(1, 3)):
                pos += randint(1, 10)
                boost = byte_to_float(float_to_byte(random() * 2))
                posns.append((pos, boost))
            postings.append((docnum, posns))

        self.assertEqual(
            postings,
            self.roundtrip(postings, PositionBoosts(None), "position_boosts"))

        as_posns = [(docnum, [pos for pos, boost in posns])
                    for docnum, posns in postings]
        self.assertEqual(
            as_posns,
            self.roundtrip(postings, PositionBoosts(None), "positions"))

        as_freq = [(docnum, len(posns)) for docnum, posns in postings]
        self.assertEqual(
            as_freq, self.roundtrip(postings, PositionBoosts(None),
                                    "frequency"))
Пример #3
0
def test_charboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 20):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        endchar = 0
        for __ in xrange(0, randint(1, 10)):
            pos += randint(1, 10)
            startchar = endchar + randint(3, 10)
            endchar = startchar + randint(3, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, startchar, endchar, boost))
        postings.append((docnum, posns))

    assert_equal(postings, roundtrip(postings, CharacterBoosts(), "character_boosts"))
    
    as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_chars, roundtrip(postings, CharacterBoosts(), "characters"))
    
    as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_posbsts, roundtrip(postings, CharacterBoosts(), "position_boosts"))
    
    as_posns = [(docnum, [pos for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions"))
    
    as_freq = [(docnum, len(posns)) for docnum, posns in as_posns]
    assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
Пример #4
0
 def read_8bitfloat(self, mantissabits = 5, zeroexp = 2):
     """Reads a byte-sized representation of a floating point value.
     mantissabits is the number of bits to use for the mantissa
     (with the rest used for the exponent).
     zeroexp is the zero point for the exponent.
     """
     return byte_to_float(self.read_byte(), mantissabits, zeroexp)
Пример #5
0
def test_charboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 20):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        endchar = 0
        for __ in xrange(0, randint(1, 10)):
            pos += randint(1, 10)
            startchar = endchar + randint(3, 10)
            endchar = startchar + randint(3, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, startchar, endchar, boost))
        postings.append((docnum, posns))

    assert_equal(postings,
                 roundtrip(postings, CharacterBoosts(), "character_boosts"))

    as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns])
                for docnum, posns in postings]
    assert_equal(as_chars, roundtrip(postings, CharacterBoosts(),
                                     "characters"))

    as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns])
                  for docnum, posns in postings]
    assert_equal(as_posbsts,
                 roundtrip(postings, CharacterBoosts(), "position_boosts"))

    as_posns = [(docnum, [pos for pos, sc, ec, bst in posns])
                for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions"))

    as_freq = [(docnum, len(posns)) for docnum, posns in as_posns]
    assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
Пример #6
0
 def test_docboost_postings(self):
     postings = []
     docnum = 0
     for _ in xrange(0, 20):
         docnum += randint(1, 10)
         freq = randint(1, 1000)
         boost = byte_to_float(float_to_byte(random() * 2))
         postings.append((docnum, (freq, boost)))
     
     self.assertEqual(postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
Пример #7
0
    def test_docboost_postings(self):
        postings = []
        docnum = 0
        for _ in xrange(0, 20):
            docnum += randint(1, 10)
            freq = randint(1, 1000)
            boost = byte_to_float(float_to_byte(random() * 2))
            postings.append((docnum, (freq, boost)))

        self.assertEqual(
            postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
Пример #8
0
    def decode_position_boosts(self, valuestring):
        f = StringIO(valuestring)
        read = f.read
        freq = unpack("!I", read(_INT_SIZE))[0]

        # Skip summed boost
        f.seek(_FLOAT_SIZE, 1)

        position = 0
        posns_boosts = []
        for _ in xrange(freq):
            position = read_varint(read) + position
            boost = byte_to_float(read(1))
            posns_boosts.append((position, boost))
        return posns_boosts
Пример #9
0
 def decode_position_boosts(self, valuestring):
     f = StringIO(valuestring)
     read = f.read
     freq = unpack("!I", read(_INT_SIZE))[0]
     
     # Skip summed boost
     f.seek(_FLOAT_SIZE, 1)
     
     position = 0
     posns_boosts = []
     for _ in xrange(freq):
         position = read_varint(read) + position
         boost = byte_to_float(read(1))
         posns_boosts.append((position, boost))
     return posns_boosts
Пример #10
0
    def decode_character_boosts(self, valuestring):
        f = StringIO(valuestring)
        read = f.read

        freq = unpack("!I", read(_INT_SIZE))[0]
        # Skip summed boost
        f.seek(_FLOAT_SIZE, 1)

        position = 0
        endchar = 0
        posns_chars = []
        for _ in xrange(freq):
            position = read_varint(read) + position
            startchar = endchar + read_varint(read)
            endchar = startchar + read_varint(read)
            boost = byte_to_float(read(1))
            posns_chars.append((position, startchar, endchar, boost))
        return posns_chars
Пример #11
0
 def decode_character_boosts(self, valuestring):
     f = StringIO(valuestring)
     read = f.read
     
     freq = unpack("!I", read(_INT_SIZE))[0]
     # Skip summed boost
     f.seek(_FLOAT_SIZE, 1)
     
     position = 0
     endchar = 0
     posns_chars = []
     for _ in xrange(freq):
         position = read_varint(read) + position
         startchar = endchar + read_varint(read)
         endchar = startchar + read_varint(read)
         boost = byte_to_float(read(1))
         posns_chars.append((position, startchar, endchar, boost))
     return posns_chars
Пример #12
0
def test_posboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 3):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        for __ in xrange(0, randint(1, 3)):
            pos += randint(1, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, boost))
        postings.append((docnum, posns))
    
    assert_equal(postings, roundtrip(postings, PositionBoosts(), "position_boosts"))
    
    as_posns = [(docnum, [pos for pos, boost in posns]) for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, PositionBoosts(), "positions"))
    
    as_freq = [(docnum, len(posns)) for docnum, posns in postings]
    assert_equal(as_freq, roundtrip(postings, PositionBoosts(), "frequency"))
Пример #13
0
 def decode_weight(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return freq * docboost * self.field_boost
Пример #14
0
 def decode_docboosts(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return (freq, docboost)
Пример #15
0
 def decode_weight(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return freq * docboost * self.field_boost
Пример #16
0
 def decode_docboosts(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return (freq, docboost)