def __init__(self, double_metaphone=False, metaphone=False, soundex=False, nysiis=False, caverphone=False, *args, **kwargs): self.funs = {} if double_metaphone: self.funs['double-metaphone'] = lambda s: dm(unicode(s)) if metaphone: self.funs['metaphone'] = phonetics.metaphone if soundex: self.funs['soundex'] = phonetics.soundex if nysiis: self.funs['nysiis'] = phonetics.nysiis if caverphone: self.funs['caverphone'] = phonetics.caverphone # for some reason don't get self.funs if this is done first, but works if done last ClassifierBasedPOSTagger.__init__(self, *args, **kwargs)
def feature_detector(self, tokens, index, history): feats = ClassifierBasedPOSTagger.feature_detector(self, tokens, index, history) s = tokens[index] for key, fun in self.funs.iteritems(): feats[key] = fun(s) return feats