Пример #1
0
    def word_datas(self, value, **kwargs):
        seen = defaultdict(int)
        if self.boost_as_freq:
            for t in unstopped(self.analyzer(value, boosts=True)):
                seen[t.text] += int(t.boost)
        else:
            for t in unstopped(self.analyzer(value)):
                seen[t.text] += 1

        return ((w, freq, freq) for w, freq in seen.iteritems())
Пример #2
0
    def word_datas(self, value, **kwargs):
        seen = defaultdict(int)
        if self.boost_as_freq:
            for t in unstopped(self.analyzer(value, boosts=True)):
                seen[t.text] += int(t.boost)
        else:
            for t in unstopped(self.analyzer(value)):
                seen[t.text] += 1

        return ((w, freq, freq) for w, freq in seen.iteritems())
Пример #3
0
    def word_values(self, value, **kwargs):
        seen = defaultdict(int)
        if self.boost_as_freq:
            for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
                seen[t.text] += int(t.boost)
        else:
            for t in unstopped(self.analyzer(value, **kwargs)):
                seen[t.text] += 1

        encode = self.encode
        return ((w, freq, float(freq), encode(freq))
                for w, freq in seen.iteritems())
Пример #4
0
 def word_values(self, value, **kwargs):
     seen = defaultdict(int)
     if self.boost_as_freq:
         for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
             seen[t.text] += int(t.boost)
     else:
         for t in unstopped(self.analyzer(value, **kwargs)):
             seen[t.text] += 1
     
     encode = self.encode
     return ((w, freq, float(freq), encode(freq))
             for w, freq in seen.iteritems())
Пример #5
0
 def word_values(self, value, doc_boost = 1.0, **kwargs):
     seen = defaultdict(int)
     for t in unstopped(self.analyzer(value, **kwargs)):
         seen[t.text] += 1
     
     encode = self.encode
     return ((w, freq, encode((freq, doc_boost))) for w, freq in seen.iteritems())
Пример #6
0
    def word_datas(self, value, start_pos=0, **kwargs):
        seen = defaultdict(list)
        for t in unstopped(
                self.analyzer(value, positions=True, start_pos=start_pos)):
            seen[t.text].append(start_pos + t.pos)

        return ((w, len(poslist), poslist) for w, poslist in seen.iteritems())
Пример #7
0
 def word_values(self, value, start_pos = 0, **kwargs):
     seen = defaultdict(list)
     for t in unstopped(self.analyzer(value, positions = True, start_pos = start_pos, **kwargs)):
         seen[t.text].append(start_pos + t.pos)
     
     encode = self.encode
     return ((w, len(poslist), encode(poslist)) for w, poslist in seen.iteritems())
Пример #8
0
    def word_values(self, value, doc_boost=1.0, **kwargs):
        seen = defaultdict(int)
        for t in unstopped(self.analyzer(value, **kwargs)):
            seen[t.text] += 1

        encode = self.encode
        return ((w, freq, encode((freq, doc_boost)))
                for w, freq in seen.iteritems())
Пример #9
0
    def word_datas(self, value, start_pos=0, **kwargs):
        seen = defaultdict(iter)
        for t in unstopped(self.analyzer(value, positions=True, boosts=True, start_pos=start_pos)):
            pos = t.pos
            boost = t.boost
            seen[t.text].append((pos, boost))

        return ((w, len(poslist), poslist) for w, poslist in seen.iteritems())
Пример #10
0
 def word_datas(self, value, start_pos = 0, start_char = 0, **kwargs):
     seen = defaultdict(list)
     
     for t in unstopped(self.analyzer(value, positions = True, chars = True,
                                      start_pos = start_pos, start_char = start_char)):
         seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar))
     
     return ((w, len(ls), ls) for w, ls in seen.iteritems())
Пример #11
0
 def word_datas(self, value, start_pos = 0, start_char = 0, **kwargs):
     seen = defaultdict(list)
     
     for t in unstopped(self.analyzer(value, positions = True, chars = True,
                                      start_pos = start_pos, start_char = start_char)):
         seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar))
     
     return ((w, len(ls), ls) for w, ls in seen.iteritems())
Пример #12
0
 def word_values(self, value, doc_boost=1.0, **kwargs):
     freqs = defaultdict(int)
     weights = defaultdict(float)
     for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
         weights[t.text] += t.boost
         freqs[t.text] += 1
     
     encode = self.encode
     return ((w, freq, weights[w] * doc_boost, encode((freq, doc_boost)))
             for w, freq in freqs.iteritems())
Пример #13
0
 def word_values(self, value, start_pos=0, **kwargs):
     seen = defaultdict(iter)
     for t in unstopped(self.analyzer(value, positions=True, boosts=True,
                                      start_pos=start_pos, **kwargs)):
         pos = t.pos
         boost = t.boost
         seen[t.text].append((pos, boost))
     
     encode = self.encode
     return ((w, len(poslist), sum(p[1] for p in poslist), encode(poslist))
             for w, poslist in seen.iteritems())
Пример #14
0
 def word_values(self, value, start_pos=0, **kwargs):
     poses = defaultdict(list)
     weights = defaultdict(float)
     for t in unstopped(self.analyzer(value, positions=True,
                                      start_pos=start_pos, **kwargs)):
         poses[t.text].append(start_pos + t.pos)
         weights[t.text] += t.boost
     
     encode = self.encode
     return ((w, len(poslist), weights[w], encode(poslist))
             for w, poslist in poses.iteritems())
Пример #15
0
    def word_values(self, value, **kwargs):
        fb = self.field_boost
        freqs = defaultdict(int)
        weights = defaultdict(float)

        for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
            freqs[t.text] += 1
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, freq, weights[w] * fb, encode(freq))
                for w, freq in freqs.iteritems())
Пример #16
0
    def word_values(self, value, start_pos=0, **kwargs):
        seen = defaultdict(list)
        for t in unstopped(
                self.analyzer(value,
                              positions=True,
                              start_pos=start_pos,
                              **kwargs)):
            seen[t.text].append(start_pos + t.pos)

        encode = self.encode
        return ((w, len(poslist), float(len(poslist)), encode(poslist))
                for w, poslist in seen.iteritems())
Пример #17
0
    def word_values(self, value, **kwargs):
        fb = self.field_boost
        freqs = defaultdict(int)
        weights = defaultdict(float)

        for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
            freqs[t.text] += 1
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, freq, weights[w] * fb, encode(freq))
                for w, freq in freqs.iteritems())
Пример #18
0
 def word_values(self, value, start_pos = 0, start_char = 0, **kwargs):
     seen = defaultdict(iter)
     for t in unstopped(self.analyzer(value, positions = True, characters = True,
                                      boosts = True,
                                      start_pos = start_pos, start_char = start_char,
                                      **kwargs)):
         seen[t.text].append((t.pos,
                              start_char + t.startchar, start_char + t.endchar,
                              t.boost))
     
     encode = self.encode
     return ((w, len(poslist), encode(poslist)) for w, poslist in seen.iteritems())
Пример #19
0
 def word_values(self, value, start_pos=0, start_char=0, **kwargs):
     seen = defaultdict(iter)
     for t in unstopped(self.analyzer(value, positions=True,
                                      characters=True, boosts=True,
                                      start_pos=start_pos,
                                      start_char=start_char, **kwargs)):
         seen[t.text].append((t.pos,
                              start_char + t.startchar,
                              start_char + t.endchar,
                              t.boost))
     
     encode = self.encode
     return ((w, len(poslist), sum(p[3] for p in poslist), encode(poslist))
             for w, poslist in seen.iteritems())
Пример #20
0
    def word_values(self, value, start_pos=0, start_char=0, **kwargs):
        fb = self.field_boost
        seen = defaultdict(list)
        weights = defaultdict(float)

        for t in unstopped(self.analyzer(value, positions=True, chars=True,
                                         boosts=True, start_pos=start_pos,
                                         start_char=start_char, **kwargs)):
            seen[t.text].append((t.pos, start_char + t.startchar,
                                 start_char + t.endchar))
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, len(ls), weights[w] * fb, encode(ls))
                for w, ls in seen.iteritems())
Пример #21
0
    def word_values(self, value, start_pos=0, start_char=0, **kwargs):
        fb = self.field_boost
        seen = defaultdict(list)
        weights = defaultdict(float)

        for t in unstopped(
                self.analyzer(value,
                              positions=True,
                              chars=True,
                              boosts=True,
                              start_pos=start_pos,
                              start_char=start_char,
                              **kwargs)):
            seen[t.text].append(
                (t.pos, start_char + t.startchar, start_char + t.endchar))
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, len(ls), weights[w] * fb, encode(ls))
                for w, ls in seen.iteritems())
Пример #22
0
def tokens(value, analyzer, kwargs):
    if isinstance(value, (tuple, list)):
        gen = entoken(value, **kwargs)
    else:
        gen = analyzer(value, **kwargs)
    return unstopped(gen)
Пример #23
0
def tokens(value, analyzer, kwargs):
    if isinstance(value, (tuple, list)):
        gen = entoken(value, **kwargs)
    else:
        gen = analyzer(value, **kwargs)
    return unstopped(gen)
Пример #24
0
 def word_values(self, value, **kwargs):
     fb = self.field_boost
     wordset = set(t.text for t
                   in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, fb, '') for w in wordset)
Пример #25
0
 def word_values(self, value, **kwargs):
     wordset = set(t.text
                   for t in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, '') for w in wordset)
Пример #26
0
    def word_datas(self, value, doc_boost=1.0, **kwargs):
        seen = defaultdict(int)
        for t in unstopped(self.analyzer(value)):
            seen[t.text] += 1

        return ((w, freq, (freq, doc_boost)) for w, freq in seen.iteritems())
Пример #27
0
 def word_values(self, value, **kwargs):
     wordset = set(t.text for t
                   in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, 1.0, '') for w in wordset)
Пример #28
0
    def word_datas(self, value, **kwargs):
        seen = set()
        for t in unstopped(self.analyzer(value)):
            seen.add(t.text)

        return ((w, 1, None) for w in seen)
Пример #29
0
    def word_datas(self, value, doc_boost=1.0, **kwargs):
        seen = defaultdict(int)
        for t in unstopped(self.analyzer(value)):
            seen[t.text] += 1

        return ((w, freq, (freq, doc_boost)) for w, freq in seen.iteritems())
Пример #30
0
 def word_values(self, value, **kwargs):
     return ((w, 1, '') for w
             in set(t.text for t in unstopped(self.analyzer(value, **kwargs))))
Пример #31
0
    def word_datas(self, value, **kwargs):
        seen = set()
        for t in unstopped(self.analyzer(value)):
            seen.add(t.text)

        return ((w, 1, None) for w in seen)