def fuzzy(qs, prefix=0, maxdist=2): prefix = int(prefix) maxdist = int(maxdist) return query.FuzzyTerm(qs[0].fieldname, qs[0].text, prefixlength=prefix, maxdist=maxdist)
def test_fuzzyterm(): schema = fields.Schema(id=fields.STORED, f=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=1, f=u("alfa bravo charlie delta")) w.add_document(id=2, f=u("bravo charlie delta echo")) w.add_document(id=3, f=u("charlie delta echo foxtrot")) w.add_document(id=4, f=u("delta echo foxtrot golf")) w.commit() with ix.searcher() as s: q = query.FuzzyTerm("f", "brave") assert_equal([d["id"] for d in s.search(q)], [1, 2])
def find(self, wordlist): '''look up all the words in the wordlist. If none are found return an empty dictionary * more rules here ''' wordlist = [ word for word in wordlist if (self.minlength <= len(word) <= self.maxlength) and not self.is_stopword(word.upper()) ] if not wordlist: return {} searcher = self._get_searcher() q = query.And( [query.FuzzyTerm("content", word.lower()) for word in wordlist]) results = searcher.search(q, limit=None) return [tuple(result["identifier"].split(':')) for result in results]