예제 #1
0
파일: fields.py 프로젝트: Mplsbeb/whoosh
    def word_datas(self, value, **kwargs):
        seen = defaultdict(int)
        if self.boost_as_freq:
            for t in unstopped(self.analyzer(value, boosts=True)):
                seen[t.text] += int(t.boost)
        else:
            for t in unstopped(self.analyzer(value)):
                seen[t.text] += 1

        return ((w, freq, freq) for w, freq in seen.iteritems())
예제 #2
0
    def word_datas(self, value, **kwargs):
        seen = defaultdict(int)
        if self.boost_as_freq:
            for t in unstopped(self.analyzer(value, boosts=True)):
                seen[t.text] += int(t.boost)
        else:
            for t in unstopped(self.analyzer(value)):
                seen[t.text] += 1

        return ((w, freq, freq) for w, freq in seen.iteritems())
예제 #3
0
    def word_values(self, value, **kwargs):
        seen = defaultdict(int)
        if self.boost_as_freq:
            for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
                seen[t.text] += int(t.boost)
        else:
            for t in unstopped(self.analyzer(value, **kwargs)):
                seen[t.text] += 1

        encode = self.encode
        return ((w, freq, float(freq), encode(freq))
                for w, freq in seen.iteritems())
예제 #4
0
 def word_values(self, value, **kwargs):
     seen = defaultdict(int)
     if self.boost_as_freq:
         for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
             seen[t.text] += int(t.boost)
     else:
         for t in unstopped(self.analyzer(value, **kwargs)):
             seen[t.text] += 1
     
     encode = self.encode
     return ((w, freq, float(freq), encode(freq))
             for w, freq in seen.iteritems())
예제 #5
0
 def word_values(self, value, doc_boost = 1.0, **kwargs):
     seen = defaultdict(int)
     for t in unstopped(self.analyzer(value, **kwargs)):
         seen[t.text] += 1
     
     encode = self.encode
     return ((w, freq, encode((freq, doc_boost))) for w, freq in seen.iteritems())
예제 #6
0
    def word_datas(self, value, start_pos=0, **kwargs):
        seen = defaultdict(list)
        for t in unstopped(
                self.analyzer(value, positions=True, start_pos=start_pos)):
            seen[t.text].append(start_pos + t.pos)

        return ((w, len(poslist), poslist) for w, poslist in seen.iteritems())
예제 #7
0
 def word_values(self, value, start_pos = 0, **kwargs):
     seen = defaultdict(list)
     for t in unstopped(self.analyzer(value, positions = True, start_pos = start_pos, **kwargs)):
         seen[t.text].append(start_pos + t.pos)
     
     encode = self.encode
     return ((w, len(poslist), encode(poslist)) for w, poslist in seen.iteritems())
예제 #8
0
파일: formats.py 프로젝트: gnuaha7/tagfs
    def word_values(self, value, doc_boost=1.0, **kwargs):
        seen = defaultdict(int)
        for t in unstopped(self.analyzer(value, **kwargs)):
            seen[t.text] += 1

        encode = self.encode
        return ((w, freq, encode((freq, doc_boost)))
                for w, freq in seen.iteritems())
예제 #9
0
파일: fields.py 프로젝트: Mplsbeb/whoosh
    def word_datas(self, value, start_pos=0, **kwargs):
        seen = defaultdict(iter)
        for t in unstopped(self.analyzer(value, positions=True, boosts=True, start_pos=start_pos)):
            pos = t.pos
            boost = t.boost
            seen[t.text].append((pos, boost))

        return ((w, len(poslist), poslist) for w, poslist in seen.iteritems())
예제 #10
0
파일: fields.py 프로젝트: artemrizhov/itcm
 def word_datas(self, value, start_pos = 0, start_char = 0, **kwargs):
     seen = defaultdict(list)
     
     for t in unstopped(self.analyzer(value, positions = True, chars = True,
                                      start_pos = start_pos, start_char = start_char)):
         seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar))
     
     return ((w, len(ls), ls) for w, ls in seen.iteritems())
예제 #11
0
 def word_datas(self, value, start_pos = 0, start_char = 0, **kwargs):
     seen = defaultdict(list)
     
     for t in unstopped(self.analyzer(value, positions = True, chars = True,
                                      start_pos = start_pos, start_char = start_char)):
         seen[t.text].append((t.pos, start_char + t.startchar, start_char + t.endchar))
     
     return ((w, len(ls), ls) for w, ls in seen.iteritems())
예제 #12
0
파일: formats.py 프로젝트: oier/Yaki
 def word_values(self, value, doc_boost=1.0, **kwargs):
     freqs = defaultdict(int)
     weights = defaultdict(float)
     for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
         weights[t.text] += t.boost
         freqs[t.text] += 1
     
     encode = self.encode
     return ((w, freq, weights[w] * doc_boost, encode((freq, doc_boost)))
             for w, freq in freqs.iteritems())
예제 #13
0
파일: formats.py 프로젝트: oier/Yaki
 def word_values(self, value, start_pos=0, **kwargs):
     seen = defaultdict(iter)
     for t in unstopped(self.analyzer(value, positions=True, boosts=True,
                                      start_pos=start_pos, **kwargs)):
         pos = t.pos
         boost = t.boost
         seen[t.text].append((pos, boost))
     
     encode = self.encode
     return ((w, len(poslist), sum(p[1] for p in poslist), encode(poslist))
             for w, poslist in seen.iteritems())
예제 #14
0
파일: formats.py 프로젝트: oier/Yaki
 def word_values(self, value, start_pos=0, **kwargs):
     poses = defaultdict(list)
     weights = defaultdict(float)
     for t in unstopped(self.analyzer(value, positions=True,
                                      start_pos=start_pos, **kwargs)):
         poses[t.text].append(start_pos + t.pos)
         weights[t.text] += t.boost
     
     encode = self.encode
     return ((w, len(poslist), weights[w], encode(poslist))
             for w, poslist in poses.iteritems())
예제 #15
0
파일: formats.py 프로젝트: MapofLife/MOL
    def word_values(self, value, **kwargs):
        fb = self.field_boost
        freqs = defaultdict(int)
        weights = defaultdict(float)

        for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
            freqs[t.text] += 1
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, freq, weights[w] * fb, encode(freq))
                for w, freq in freqs.iteritems())
예제 #16
0
    def word_values(self, value, start_pos=0, **kwargs):
        seen = defaultdict(list)
        for t in unstopped(
                self.analyzer(value,
                              positions=True,
                              start_pos=start_pos,
                              **kwargs)):
            seen[t.text].append(start_pos + t.pos)

        encode = self.encode
        return ((w, len(poslist), float(len(poslist)), encode(poslist))
                for w, poslist in seen.iteritems())
예제 #17
0
    def word_values(self, value, **kwargs):
        fb = self.field_boost
        freqs = defaultdict(int)
        weights = defaultdict(float)

        for t in unstopped(self.analyzer(value, boosts=True, **kwargs)):
            freqs[t.text] += 1
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, freq, weights[w] * fb, encode(freq))
                for w, freq in freqs.iteritems())
예제 #18
0
 def word_values(self, value, start_pos = 0, start_char = 0, **kwargs):
     seen = defaultdict(iter)
     for t in unstopped(self.analyzer(value, positions = True, characters = True,
                                      boosts = True,
                                      start_pos = start_pos, start_char = start_char,
                                      **kwargs)):
         seen[t.text].append((t.pos,
                              start_char + t.startchar, start_char + t.endchar,
                              t.boost))
     
     encode = self.encode
     return ((w, len(poslist), encode(poslist)) for w, poslist in seen.iteritems())
예제 #19
0
파일: formats.py 프로젝트: oier/Yaki
 def word_values(self, value, start_pos=0, start_char=0, **kwargs):
     seen = defaultdict(iter)
     for t in unstopped(self.analyzer(value, positions=True,
                                      characters=True, boosts=True,
                                      start_pos=start_pos,
                                      start_char=start_char, **kwargs)):
         seen[t.text].append((t.pos,
                              start_char + t.startchar,
                              start_char + t.endchar,
                              t.boost))
     
     encode = self.encode
     return ((w, len(poslist), sum(p[3] for p in poslist), encode(poslist))
             for w, poslist in seen.iteritems())
예제 #20
0
파일: formats.py 프로젝트: MapofLife/MOL
    def word_values(self, value, start_pos=0, start_char=0, **kwargs):
        fb = self.field_boost
        seen = defaultdict(list)
        weights = defaultdict(float)

        for t in unstopped(self.analyzer(value, positions=True, chars=True,
                                         boosts=True, start_pos=start_pos,
                                         start_char=start_char, **kwargs)):
            seen[t.text].append((t.pos, start_char + t.startchar,
                                 start_char + t.endchar))
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, len(ls), weights[w] * fb, encode(ls))
                for w, ls in seen.iteritems())
예제 #21
0
    def word_values(self, value, start_pos=0, start_char=0, **kwargs):
        fb = self.field_boost
        seen = defaultdict(list)
        weights = defaultdict(float)

        for t in unstopped(
                self.analyzer(value,
                              positions=True,
                              chars=True,
                              boosts=True,
                              start_pos=start_pos,
                              start_char=start_char,
                              **kwargs)):
            seen[t.text].append(
                (t.pos, start_char + t.startchar, start_char + t.endchar))
            weights[t.text] += t.boost

        encode = self.encode
        return ((w, len(ls), weights[w] * fb, encode(ls))
                for w, ls in seen.iteritems())
예제 #22
0
def tokens(value, analyzer, kwargs):
    if isinstance(value, (tuple, list)):
        gen = entoken(value, **kwargs)
    else:
        gen = analyzer(value, **kwargs)
    return unstopped(gen)
예제 #23
0
def tokens(value, analyzer, kwargs):
    if isinstance(value, (tuple, list)):
        gen = entoken(value, **kwargs)
    else:
        gen = analyzer(value, **kwargs)
    return unstopped(gen)
예제 #24
0
파일: formats.py 프로젝트: MapofLife/MOL
 def word_values(self, value, **kwargs):
     fb = self.field_boost
     wordset = set(t.text for t
                   in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, fb, '') for w in wordset)
예제 #25
0
파일: formats.py 프로젝트: gnuaha7/tagfs
 def word_values(self, value, **kwargs):
     wordset = set(t.text
                   for t in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, '') for w in wordset)
예제 #26
0
    def word_datas(self, value, doc_boost=1.0, **kwargs):
        seen = defaultdict(int)
        for t in unstopped(self.analyzer(value)):
            seen[t.text] += 1

        return ((w, freq, (freq, doc_boost)) for w, freq in seen.iteritems())
예제 #27
0
파일: formats.py 프로젝트: 20after4/Yaki
 def word_values(self, value, **kwargs):
     wordset = set(t.text for t
                   in unstopped(self.analyzer(value, **kwargs)))
     return ((w, 1, 1.0, '') for w in wordset)
예제 #28
0
    def word_datas(self, value, **kwargs):
        seen = set()
        for t in unstopped(self.analyzer(value)):
            seen.add(t.text)

        return ((w, 1, None) for w in seen)
예제 #29
0
파일: fields.py 프로젝트: Mplsbeb/whoosh
    def word_datas(self, value, doc_boost=1.0, **kwargs):
        seen = defaultdict(int)
        for t in unstopped(self.analyzer(value)):
            seen[t.text] += 1

        return ((w, freq, (freq, doc_boost)) for w, freq in seen.iteritems())
예제 #30
0
 def word_values(self, value, **kwargs):
     return ((w, 1, '') for w
             in set(t.text for t in unstopped(self.analyzer(value, **kwargs))))
예제 #31
0
파일: fields.py 프로젝트: Mplsbeb/whoosh
    def word_datas(self, value, **kwargs):
        seen = set()
        for t in unstopped(self.analyzer(value)):
            seen.add(t.text)

        return ((w, 1, None) for w in seen)