Пример #1
0
class DidYouMean:
	def __init__(self):
		self.stemmer = Porter()
 
	def specialhash(self, s):
		s = s.lower()
		s = s.replace("z", "s")
		s = s.replace("h", "")
		for i in [chr(ord("a") + i) for i in range(26)]:
			s = s.replace(i+i, i)
			s = self.stemmer.stem(s)
		return s
Пример #2
0
class DidYouMean:
    def __init__(self):
        self.stemmer = Porter()

    def specialhash(self, s):
        s = s.lower()
        s = s.replace("z", "s")
        s = s.replace("h", "")
        for i in [chr(ord("a") + i) for i in range(26)]:
            s = s.replace(i + i, i)
        s = self.stemmer.stem(s)
        return s

    def test(self, token):
        hashed = self.specialhash(token)
        if hashed in self.learned:
            words = self.learned[hashed].items()
            sortby(words, 1, reverse=1)
            if token in [i[0] for i in words]:
                return 'This word seems OK'
            else:
                if len(words) == 1:
                    return 'Did you mean "%s" ?' % words[0][0]
                else:
                    return 'Did you mean "%s" ? (or %s)' \
                           % (words[0][0], ", ".join(['"'+i[0]+'"' \
                                                      for i in words[1:]]))
        return "I can't found similar word in my learned db"

    def learn(self, listofsentences=[], n=2000):
        self.learned = defaultdict(mydict)
        if listofsentences == []:
            listofsentences = brown.raw()
        for i, sent in enumerate(listofsentences):
            if i >= n:  # Limit to the first nth sentences of the corpus
                break
            for word in sent:
                self.learned[self.specialhash(word)][word.lower()] += 1
Пример #3
0
 def __init__(self):
     self.stemmer = Porter()
Пример #4
0
	def __init__(self):
		self.stemmer = Porter()