class DidYouMean: def __init__(self): self.stemmer = Porter() def specialhash(self, s): s = s.lower() s = s.replace("z", "s") s = s.replace("h", "") for i in [chr(ord("a") + i) for i in range(26)]: s = s.replace(i+i, i) s = self.stemmer.stem(s) return s
class DidYouMean: def __init__(self): self.stemmer = Porter() def specialhash(self, s): s = s.lower() s = s.replace("z", "s") s = s.replace("h", "") for i in [chr(ord("a") + i) for i in range(26)]: s = s.replace(i + i, i) s = self.stemmer.stem(s) return s def test(self, token): hashed = self.specialhash(token) if hashed in self.learned: words = self.learned[hashed].items() sortby(words, 1, reverse=1) if token in [i[0] for i in words]: return 'This word seems OK' else: if len(words) == 1: return 'Did you mean "%s" ?' % words[0][0] else: return 'Did you mean "%s" ? (or %s)' \ % (words[0][0], ", ".join(['"'+i[0]+'"' \ for i in words[1:]])) return "I can't found similar word in my learned db" def learn(self, listofsentences=[], n=2000): self.learned = defaultdict(mydict) if listofsentences == []: listofsentences = brown.raw() for i, sent in enumerate(listofsentences): if i >= n: # Limit to the first nth sentences of the corpus break for word in sent: self.learned[self.specialhash(word)][word.lower()] += 1
def __init__(self): self.stemmer = Porter()