def _btexts(self, ixreader): fieldname = self.fieldname to_bytes = ixreader.schema[fieldname].to_bytes for word in variations(self.text): btext = to_bytes(word) if (fieldname, btext) in ixreader: yield btext
def queryParsing(query): from whoosh import qparser from whoosh.analysis import RegexTokenizer from whoosh.lang.porter import stem from whoosh.lang.morph_en import variations from whoosh.analysis import StopFilter print("inside queryParsing") tokenizer = RegexTokenizer() return_list = [] #Removing stop words stopper = StopFilter() tokens = stopper(tokenizer(query)) for t in tokens: #converting to lower case t.text = t.text.lower() #stemming s=stem(t.text) return_list.append(s) #adding variations termVariations = variations(t.text) for u in termVariations: return_list.append(u) return return_list
def prepare(self, obj): res = super(VariationCharField, self).prepare(obj) all_terms = re.findall("[\w]+", res, re.IGNORECASE) all_variations = [' '.join(variations(term.lower())) for term in all_terms] for variation in all_variations: res += '\n%s' % variation return res
def _btexts(self, ixreader): fieldname = self.fieldname to_bytes = ixreader.schema[fieldname].to_bytes for word in variations(self.text): try: btext = to_bytes(word) except ValueError: continue if (fieldname, btext) in ixreader: yield btext
def prepare(self, obj): res = super(VariationCharField, self).prepare(obj) all_terms = re.findall("[\w]+", res, re.IGNORECASE) all_variations = [ ' '.join(variations(term.lower())) for term in all_terms ] for variation in all_variations: res += '\n%s' % variation return res
def queryParsing(query): print("inside queryParsing") tokenizer = RegexTokenizer() return_list = [] #Removing stop words stopper = StopFilter() tokens = stopper(tokenizer(query)) for t in tokens: #converting to lower case t.text = t.text.lower() #stemming s=stem(t.text) return_list.append(s) #adding variations termVariations = variations(t.text) for u in termVariations: return_list.append(u) return return_list
def __init__(self, fieldname, text, boost = 1.0): self.fieldname = fieldname self.text = text self.boost = boost self.words = variations(self.text)
def get_variations(term): return variations(term)
def _words(self, ixreader): fieldname = self.fieldname return [ word for word in variations(self.text) if (fieldname, word) in ixreader ]
writer = ix.writer() writer.add_document(title=u"My the document", content=myfile.read(), path=u"/a") writer.add_document(title=u"My the document two", content=u"This is my third test document!", path=u"/a") '''writer.add_document(title=u"Second try", content=u"This is the second third example.", path=u"/b") writer.add_document(title=u"Third time's the charm", content=u"Examples are third many.", path=u"/c")''' writer.commit() from whoosh.qparser import QueryParser with ix.searcher() as s: qp = QueryParser("content", schema=ix.schema) for i in variations("enhanced"): q = qp.parse(i) #q = stem(q) results = s.search(q) print(results) print(variations("enhanced")) '''from whoosh.index import create_in from whoosh.fields import * schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) ix = create_in("indexdir", schema) writer = ix.writer() writer.add_document(title=u"First document", path=u"/a",content=u"This is the first document we've added!") writer.add_document(title=u"Second document", path=u"/b",content=u"The second one is even added more interesting!") writer.commit() from whoosh.qparser import QueryParser
def _words(self, ixreader): fieldname = self.fieldname return [word for word in variations(self.text) if (fieldname, word) in ixreader]