示例#1
0
	def checkWords(self, words):
		if words and words[0]:
			suggestions = {x:self.check(x) for x in words}
			suggestions = {k:v for k,v in suggestions.items() if strip_accents(k) != v}
			return suggestions
		else:
			return {}
示例#2
0
	def check(self, word):
		word = strip_accents(word)

		if word in self.allWords or not word:
			return word

		lword = len(word)
		maxDiffs = max(1, math.floor(lword / 5))
		candidates = [x for x in self.allWords if abs(len(x) - lword) <= maxDiffs and (x[0] == word[0] or x[len(x)-1] == word[lword-1])]
		matches = get_close_matches(word, candidates, cutoff=0.7)
		return matches[0] if matches else word	
示例#3
0
	def _getStemDict(self, keywords):
		stemsWords = {}
		keywords = {x[0] for x in keywords}
		
		for word, stem in savedStems.items():
			stem = strip_accents(stem)
			if stem in keywords:
				temp = stemsWords.get(stem, [])
				temp.append(word)
				stemsWords[stem] = temp
			
		for stem, words in stemsWords.items():
			stemsWords[stem] = max([(x, wordCounter[x]) for x in words], key=lambda x: x[1])[0]
			
		return stemsWords
示例#4
0
 def normalizeQuery(self, query):
     return strip_accents(createStem(normalize_text(query)))
示例#5
0
	def normalizeQuery(self, query):
		return strip_accents(createStem(normalize_text(query)))
示例#6
0
def stripAccents(words):
    return {strip_accents(x) for x in words}
示例#7
0
def getstem(word, lang):
    word = normalize_text(word)
    stem = createStem(word, lang)
    stem = strip_accents(stem)
    return stem
示例#8
0
	def test_strip_accents(self):
		self.assertEqual('escrzyaieuuESCRZYAIEUU', strip_accents('ěščřžýáíéúůĚŠČŘŽÝÁÍÉÚŮ'))
示例#9
0
def stripAccents(words):
	return {strip_accents(x) for x in words}
示例#10
0
def getstem(word, lang):
	word = normalize_text(word)
	stem = createStem(word, lang)
	stem = strip_accents(stem)
	return stem
示例#11
0
 def test_strip_accents(self):
     self.assertEqual('escrzyaieuuESCRZYAIEUU',
                      strip_accents('ěščřžýáíéúůĚŠČŘŽÝÁÍÉÚŮ'))