示例#1
0
    def word_values(self, value, analyzer, **kwargs):
        fb = self.field_boost
        seen = defaultdict(list)

        kwargs["positions"] = True
        kwargs["chars"] = True
        kwargs["boosts"] = True
        for t in tokens(value, analyzer, kwargs):
            seen[t.text].append((t.pos, t.startchar, t.endchar, t.boost))

        for w, poses in iteritems(seen):
            # posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
            codes = []
            posbase = 0
            charbase = 0
            summedboost = 0
            for pos, startchar, endchar, boost in poses:
                codes.append((pos - posbase, startchar - charbase,
                              endchar - startchar, boost))
                posbase = pos
                charbase = endchar
                summedboost += boost

            value = (pack_uint(len(poses)) + pack_float(summedboost * fb)
                     + dumps(codes, -1)[2:-1])

            yield (w, len(poses), summedboost * fb, value)
示例#2
0
 def encode(self, poses):
     codes = []
     base = 0
     summedboost = 0
     for pos, boost in poses:
         summedboost += boost
         codes.append((pos - base, boost))
         base = pos
     return (pack_uint(len(poses)) + pack_float(summedboost) +
             dumps(codes, -1)[2:-1])
示例#3
0
 def encode(self, poses):
     codes = []
     base = 0
     summedboost = 0
     for pos, boost in poses:
         summedboost += boost
         codes.append((pos - base, boost))
         base = pos
     return (pack_uint(len(poses)) + pack_float(summedboost)
             + dumps(codes, -1)[2:-1])
示例#4
0
    def encode(self, posns_chars_boosts):
        # posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
        codes = []
        posbase = 0
        charbase = 0
        summedboost = 0
        for pos, startchar, endchar, boost in posns_chars_boosts:
            codes.append((pos - posbase, startchar - charbase,
                          endchar - startchar, boost))
            posbase = pos
            charbase = endchar
            summedboost += boost

        return (pack_uint(len(posns_chars_boosts)) + pack_float(summedboost) +
                dumps(codes, -1)[2:-1])
示例#5
0
文件: formats.py 项目: ljarufe/mp100
 def encode(self, posns_chars_boosts):
     # posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
     codes = []
     posbase = 0
     charbase = 0
     summedboost = 0
     for pos, startchar, endchar, boost in posns_chars_boosts:
         codes.append((pos - posbase, startchar - charbase,
                       endchar - startchar, boost))
         posbase = pos
         charbase = endchar
         summedboost += boost
     
     return (pack_uint(len(posns_chars_boosts)) + pack_float(summedboost)
             + dumps(codes, -1)[2:-1])
示例#6
0
    def encode(self, poses):
        fb = self.field_boost
        # posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
        codes = []
        posbase = 0
        charbase = 0
        summedboost = 0
        for pos, startchar, endchar, boost in poses:
            codes.append((pos - posbase, startchar - charbase,
                          endchar - startchar, boost))
            posbase = pos
            charbase = endchar
            summedboost += boost

        return ((pack_uint(len(poses)) + pack_float(summedboost * fb) +
                 dumps(codes, 2)), summedboost)
示例#7
0
    def encode(self, poses):
        fb = self.field_boost
        # posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
        codes = []
        posbase = 0
        charbase = 0
        summedboost = 0
        for pos, startchar, endchar, boost in poses:
            codes.append((pos - posbase, startchar - charbase,
                          endchar - startchar, boost))
            posbase = pos
            charbase = endchar
            summedboost += boost

        return ((pack_uint(len(poses)) + pack_float(summedboost * fb)
                 + dumps(codes, 2)), summedboost)
示例#8
0
    def word_values(self, value, analyzer, **kwargs):
        fb = self.field_boost
        seen = defaultdict(list)

        kwargs["positions"] = True
        kwargs["boosts"] = True
        for t in tokens(value, analyzer, kwargs):
            pos = t.pos
            boost = t.boost
            seen[t.text].append((pos, boost))

        for w, poses in iteritems(seen):
            codes = []
            base = 0
            summedboost = 0
            for pos, boost in poses:
                summedboost += boost
                codes.append((pos - base, boost))
                base = pos
            value = (pack_uint(len(poses)) + pack_float(summedboost)
                     + dumps(codes, -1)[2:-1])
            yield (w, len(poses), sum(p[1] for p in poses) * fb, value)
示例#9
0
 def write_float(self, n):
     self.file.write(pack_float(n))
示例#10
0
 def write_float(self, n):
     self.file.write(pack_float(n))