Python variations示例，whoosh.lang.morph_en.variations Python示例

示例#1

0

显示文件

文件： terms.py 项目： mykytamorachov/outpost

 def _btexts(self, ixreader):
     fieldname = self.fieldname
     to_bytes = ixreader.schema[fieldname].to_bytes
     for word in variations(self.text):
         btext = to_bytes(word)
         if (fieldname, btext) in ixreader:
             yield btext

示例#2

0

显示文件

文件： terms.py 项目： adamhorner/yaki-tng

 def _btexts(self, ixreader):
     fieldname = self.fieldname
     to_bytes = ixreader.schema[fieldname].to_bytes
     for word in variations(self.text):
         btext = to_bytes(word)
         if (fieldname, btext) in ixreader:
             yield btext

示例#3

0

显示文件

文件： views.py 项目： ashiq-techie/lsiSearch

def queryParsing(query):
	from whoosh import qparser
	from whoosh.analysis import RegexTokenizer
	from whoosh.lang.porter import stem
	from whoosh.lang.morph_en import variations
	from whoosh.analysis import StopFilter
	print("inside queryParsing")
	tokenizer = RegexTokenizer()
	return_list = []   
	
	#Removing stop words
	stopper = StopFilter()
	tokens = stopper(tokenizer(query))

	for t in tokens:
		
		#converting to lower case
		t.text = t.text.lower()
		
		#stemming
		s=stem(t.text)
		return_list.append(s)
		
		#adding variations
		termVariations = variations(t.text)
		for u in termVariations:
			return_list.append(u)

	return return_list

示例#4

0

显示文件

文件： search_indexes.py 项目： dstl/lighthouse

    def prepare(self, obj):
        res = super(VariationCharField, self).prepare(obj)
        all_terms = re.findall("[\w]+", res, re.IGNORECASE)
        all_variations = [' '.join(variations(term.lower())) for
                          term in all_terms]

        for variation in all_variations:
            res += '\n%s' % variation

        return res

示例#5

0

显示文件

文件： terms.py 项目： 32footsteps/SpecialCollectionsProject

    def _btexts(self, ixreader):
        fieldname = self.fieldname
        to_bytes = ixreader.schema[fieldname].to_bytes
        for word in variations(self.text):
            try:
                btext = to_bytes(word)
            except ValueError:
                continue

            if (fieldname, btext) in ixreader:
                yield btext

示例#6

0

显示文件

文件： terms.py 项目： MadAd360/GoGramming

    def _btexts(self, ixreader):
        fieldname = self.fieldname
        to_bytes = ixreader.schema[fieldname].to_bytes
        for word in variations(self.text):
            try:
                btext = to_bytes(word)
            except ValueError:
                continue

            if (fieldname, btext) in ixreader:
                yield btext

示例#7

0

显示文件

    def prepare(self, obj):
        res = super(VariationCharField, self).prepare(obj)
        all_terms = re.findall("[\w]+", res, re.IGNORECASE)
        all_variations = [
            ' '.join(variations(term.lower())) for term in all_terms
        ]

        for variation in all_variations:
            res += '\n%s' % variation

        return res

示例#8

0

显示文件

文件： query_parsing.py 项目： josnancy/semanticRetrieval

def queryParsing(query):
    print("inside queryParsing")
    tokenizer = RegexTokenizer()
    return_list = []   
    
    #Removing stop words
    stopper = StopFilter()
    tokens = stopper(tokenizer(query))

    for t in tokens:
        
        #converting to lower case
        t.text = t.text.lower()
        
        #stemming
        s=stem(t.text)
        return_list.append(s)
        
        #adding variations
        termVariations = variations(t.text)
        for u in termVariations:
            return_list.append(u)

    return return_list

示例#9

0

显示文件

文件： query.py 项目： parulian1/fashionment

 def __init__(self, fieldname, text, boost = 1.0):
     self.fieldname = fieldname
     self.text = text
     self.boost = boost
     self.words = variations(self.text)

示例#10

0

显示文件

def get_variations(term):
    return variations(term)

示例#11

0

显示文件

文件： gazetteerExtractor.py 项目： jerrygaoLondon/SPTR

def get_variations(term):
    return variations(term)

示例#12

0

显示文件

文件： terms.py 项目： skrieder/microblog

 def _words(self, ixreader):
     fieldname = self.fieldname
     return [
         word for word in variations(self.text)
         if (fieldname, word) in ixreader
     ]

示例#13

0

显示文件

文件： Whoosh_usage.py 项目： shivam1104/Dhund

writer = ix.writer()
writer.add_document(title=u"My the document", content=myfile.read(),
                    path=u"/a")
writer.add_document(title=u"My the document two", content=u"This is my third test document!",
                    path=u"/a")
'''writer.add_document(title=u"Second try", content=u"This is the second third example.",
              path=u"/b")
writer.add_document(title=u"Third time's the charm", content=u"Examples are third many.",
                    path=u"/c")'''
writer.commit()

from whoosh.qparser import QueryParser
with ix.searcher() as s:
	qp = QueryParser("content", schema=ix.schema)
	for i in variations("enhanced"):
		q = qp.parse(i)
	#q = stem(q)
		results = s.search(q)
		print(results)

print(variations("enhanced"))
'''from whoosh.index import create_in
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = create_in("indexdir", schema)
writer = ix.writer()
writer.add_document(title=u"First document", path=u"/a",content=u"This is the first document we've added!")
writer.add_document(title=u"Second document", path=u"/b",content=u"The second one is even added more interesting!")
writer.commit()
from whoosh.qparser import QueryParser

示例#14

0

显示文件

文件： terms.py 项目： intabeta/inta

 def _words(self, ixreader):
     fieldname = self.fieldname
     return [word for word in variations(self.text)
             if (fieldname, word) in ixreader]

示例#15

0

显示文件

 def __init__(self, fieldname, text, boost = 1.0):
     self.fieldname = fieldname
     self.text = text
     self.boost = boost
     self.words = variations(self.text)