Exemplo n.º 1
0
    def test_posboost_postings(self):
        postings = []
        docnum = 0
        for _ in xrange(0, 3):
            docnum += randint(1, 10)
            posns = []
            pos = 0
            for __ in xrange(0, randint(1, 3)):
                pos += randint(1, 10)
                boost = byte_to_float(float_to_byte(random() * 2))
                posns.append((pos, boost))
            postings.append((docnum, posns))

        self.assertEqual(
            postings,
            self.roundtrip(postings, PositionBoosts(None), "position_boosts"))

        as_posns = [(docnum, [pos for pos, boost in posns])
                    for docnum, posns in postings]
        self.assertEqual(
            as_posns,
            self.roundtrip(postings, PositionBoosts(None), "positions"))

        as_freq = [(docnum, len(posns)) for docnum, posns in postings]
        self.assertEqual(
            as_freq, self.roundtrip(postings, PositionBoosts(None),
                                    "frequency"))
Exemplo n.º 2
0
def test_charboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 20):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        endchar = 0
        for __ in xrange(0, randint(1, 10)):
            pos += randint(1, 10)
            startchar = endchar + randint(3, 10)
            endchar = startchar + randint(3, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, startchar, endchar, boost))
        postings.append((docnum, posns))

    assert_equal(postings, roundtrip(postings, CharacterBoosts(), "character_boosts"))
    
    as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_chars, roundtrip(postings, CharacterBoosts(), "characters"))
    
    as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_posbsts, roundtrip(postings, CharacterBoosts(), "position_boosts"))
    
    as_posns = [(docnum, [pos for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions"))
    
    as_freq = [(docnum, len(posns)) for docnum, posns in as_posns]
    assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
Exemplo n.º 3
0
def test_charboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 20):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        endchar = 0
        for __ in xrange(0, randint(1, 10)):
            pos += randint(1, 10)
            startchar = endchar + randint(3, 10)
            endchar = startchar + randint(3, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, startchar, endchar, boost))
        postings.append((docnum, posns))

    assert_equal(postings,
                 roundtrip(postings, CharacterBoosts(), "character_boosts"))

    as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns])
                for docnum, posns in postings]
    assert_equal(as_chars, roundtrip(postings, CharacterBoosts(),
                                     "characters"))

    as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns])
                  for docnum, posns in postings]
    assert_equal(as_posbsts,
                 roundtrip(postings, CharacterBoosts(), "position_boosts"))

    as_posns = [(docnum, [pos for pos, sc, ec, bst in posns])
                for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions"))

    as_freq = [(docnum, len(posns)) for docnum, posns in as_posns]
    assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
Exemplo n.º 4
0
    def write_8bitfloat(self, f, mantissabits=5, zeroexp=2):
        """Writes a byte-sized representation of floating point value
        f to the wrapped file.
        mantissabits is the number of bits to use for the mantissa
        (with the rest used for the exponent).
        zeroexp is the zero point for the exponent.
        """

        self.write_byte(float_to_byte(f, mantissabits, zeroexp))
Exemplo n.º 5
0
 def write_8bitfloat(self, f, mantissabits = 5, zeroexp = 2):
     """Writes a byte-sized representation of floating point value
     f to the wrapped file.
     mantissabits is the number of bits to use for the mantissa
     (with the rest used for the exponent).
     zeroexp is the zero point for the exponent.
     """
     
     self.write_byte(float_to_byte(f, mantissabits, zeroexp))
Exemplo n.º 6
0
 def test_docboost_postings(self):
     postings = []
     docnum = 0
     for _ in xrange(0, 20):
         docnum += randint(1, 10)
         freq = randint(1, 1000)
         boost = byte_to_float(float_to_byte(random() * 2))
         postings.append((docnum, (freq, boost)))
     
     self.assertEqual(postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
Exemplo n.º 7
0
    def encode(self, posns_boosts):
        # posns_boosts = [(pos, boost), ...]
        codes = []
        base = 0
        summedboost = 0
        for pos, boost in posns_boosts:
            summedboost += boost
            codes.extend((varint(pos - base), float_to_byte(boost)))
            base = pos

        return pack("!If", len(posns_boosts), summedboost) + "".join(codes)
Exemplo n.º 8
0
 def encode(self, posns_boosts):
     # posns_boosts = [(pos, boost), ...]
     codes = []
     base = 0
     summedboost = 0
     for pos, boost in posns_boosts:
         summedboost += boost
         codes.extend((varint(pos - base), float_to_byte(boost)))
         base = pos
     
     return pack("!If", len(posns_boosts), summedboost) + "".join(codes)
Exemplo n.º 9
0
    def test_docboost_postings(self):
        postings = []
        docnum = 0
        for _ in xrange(0, 20):
            docnum += randint(1, 10)
            freq = randint(1, 1000)
            boost = byte_to_float(float_to_byte(random() * 2))
            postings.append((docnum, (freq, boost)))

        self.assertEqual(
            postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
Exemplo n.º 10
0
    def encode(self, posns_chars_boosts):
        # posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
        codes = []

        posbase = 0
        charbase = 0
        summedboost = 0
        for pos, startchar, endchar, boost in posns_chars_boosts:
            summedboost += boost
            codes.append(varint(pos - posbase))
            posbase = pos
            codes.extend((varint(startchar - charbase),
                          varint(endchar - startchar), float_to_byte(boost)))
            charbase = endchar

        b = pack("!If", len(posns_chars_boosts), summedboost)
        return b + "".join(codes)
Exemplo n.º 11
0
 def encode(self, posns_chars_boosts):
     # posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
     codes = []
     
     posbase = 0
     charbase = 0
     summedboost = 0
     for pos, startchar, endchar, boost in posns_chars_boosts:
         summedboost += boost
         codes.append(varint(pos - posbase))
         posbase = pos
         codes.extend((varint(startchar - charbase),
                       varint(endchar - startchar),
                       float_to_byte(boost)))
         charbase = endchar
     
     return pack("!If", len(posns_chars_boosts), summedboost) + "".join(codes)
Exemplo n.º 12
0
def test_posboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 3):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        for __ in xrange(0, randint(1, 3)):
            pos += randint(1, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, boost))
        postings.append((docnum, posns))
    
    assert_equal(postings, roundtrip(postings, PositionBoosts(), "position_boosts"))
    
    as_posns = [(docnum, [pos for pos, boost in posns]) for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, PositionBoosts(), "positions"))
    
    as_freq = [(docnum, len(posns)) for docnum, posns in postings]
    assert_equal(as_freq, roundtrip(postings, PositionBoosts(), "frequency"))
Exemplo n.º 13
0
 def encode(self, freq_docboost):
     freq, docboost = freq_docboost
     return pack("!I", freq) + float_to_byte(docboost)
Exemplo n.º 14
0
 def encode(self, freq_docboost):
     freq, docboost = freq_docboost
     return pack("!I", freq) + float_to_byte(docboost)