示例#1
0
 def _btexts(self, ixreader):
     fieldname = self.fieldname
     to_bytes = ixreader.schema[fieldname].to_bytes
     for word in variations(self.text):
         btext = to_bytes(word)
         if (fieldname, btext) in ixreader:
             yield btext
示例#2
0
 def _btexts(self, ixreader):
     fieldname = self.fieldname
     to_bytes = ixreader.schema[fieldname].to_bytes
     for word in variations(self.text):
         btext = to_bytes(word)
         if (fieldname, btext) in ixreader:
             yield btext
示例#3
0
def queryParsing(query):
	from whoosh import qparser
	from whoosh.analysis import RegexTokenizer
	from whoosh.lang.porter import stem
	from whoosh.lang.morph_en import variations
	from whoosh.analysis import StopFilter
	print("inside queryParsing")
	tokenizer = RegexTokenizer()
	return_list = []   
	
	#Removing stop words
	stopper = StopFilter()
	tokens = stopper(tokenizer(query))

	for t in tokens:
		
		#converting to lower case
		t.text = t.text.lower()
		
		#stemming
		s=stem(t.text)
		return_list.append(s)
		
		#adding variations
		termVariations = variations(t.text)
		for u in termVariations:
			return_list.append(u)

	return return_list
示例#4
0
    def prepare(self, obj):
        res = super(VariationCharField, self).prepare(obj)
        all_terms = re.findall("[\w]+", res, re.IGNORECASE)
        all_variations = [' '.join(variations(term.lower())) for
                          term in all_terms]

        for variation in all_variations:
            res += '\n%s' % variation

        return res
    def _btexts(self, ixreader):
        fieldname = self.fieldname
        to_bytes = ixreader.schema[fieldname].to_bytes
        for word in variations(self.text):
            try:
                btext = to_bytes(word)
            except ValueError:
                continue

            if (fieldname, btext) in ixreader:
                yield btext
示例#6
0
    def _btexts(self, ixreader):
        fieldname = self.fieldname
        to_bytes = ixreader.schema[fieldname].to_bytes
        for word in variations(self.text):
            try:
                btext = to_bytes(word)
            except ValueError:
                continue

            if (fieldname, btext) in ixreader:
                yield btext
示例#7
0
    def prepare(self, obj):
        res = super(VariationCharField, self).prepare(obj)
        all_terms = re.findall("[\w]+", res, re.IGNORECASE)
        all_variations = [
            ' '.join(variations(term.lower())) for term in all_terms
        ]

        for variation in all_variations:
            res += '\n%s' % variation

        return res
def queryParsing(query):
    print("inside queryParsing")
    tokenizer = RegexTokenizer()
    return_list = []   
    
    #Removing stop words
    stopper = StopFilter()
    tokens = stopper(tokenizer(query))

    for t in tokens:
        
        #converting to lower case
        t.text = t.text.lower()
        
        #stemming
        s=stem(t.text)
        return_list.append(s)
        
        #adding variations
        termVariations = variations(t.text)
        for u in termVariations:
            return_list.append(u)

    return return_list
示例#9
0
 def __init__(self, fieldname, text, boost = 1.0):
     self.fieldname = fieldname
     self.text = text
     self.boost = boost
     self.words = variations(self.text)
示例#10
0
def get_variations(term):
    return variations(term)
示例#11
0
def get_variations(term):
    return variations(term)
示例#12
0
 def _words(self, ixreader):
     fieldname = self.fieldname
     return [
         word for word in variations(self.text)
         if (fieldname, word) in ixreader
     ]
示例#13
0
writer = ix.writer()
writer.add_document(title=u"My the document", content=myfile.read(),
                    path=u"/a")
writer.add_document(title=u"My the document two", content=u"This is my third test document!",
                    path=u"/a")
'''writer.add_document(title=u"Second try", content=u"This is the second third example.",
              path=u"/b")
writer.add_document(title=u"Third time's the charm", content=u"Examples are third many.",
                    path=u"/c")'''
writer.commit()

from whoosh.qparser import QueryParser
with ix.searcher() as s:
	qp = QueryParser("content", schema=ix.schema)
	for i in variations("enhanced"):
		q = qp.parse(i)
	#q = stem(q)
		results = s.search(q)
		print(results)

print(variations("enhanced"))
'''from whoosh.index import create_in
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = create_in("indexdir", schema)
writer = ix.writer()
writer.add_document(title=u"First document", path=u"/a",content=u"This is the first document we've added!")
writer.add_document(title=u"Second document", path=u"/b",content=u"The second one is even added more interesting!")
writer.commit()
from whoosh.qparser import QueryParser
示例#14
0
文件: terms.py 项目: intabeta/inta
 def _words(self, ixreader):
     fieldname = self.fieldname
     return [word for word in variations(self.text)
             if (fieldname, word) in ixreader]
示例#15
0
 def __init__(self, fieldname, text, boost = 1.0):
     self.fieldname = fieldname
     self.text = text
     self.boost = boost
     self.words = variations(self.text)