def __init__(self, params): hundisambig = Hundisambig(params['hundisambig_runnable'], params['hundisambig_model'], params['hundisambig_morphtable'], params.get('ocamorph_encoding', 'iso-8859-2'), True) self.morph_analyzer = HundisambigAnalyzer(hundisambig)
class HundisambigWrapper(PosTaggerWrapper, LemmatizerWrapper): def __init__(self, params): hundisambig = Hundisambig( params['hundisambig_runnable'], params['hundisambig_model'], params['hundisambig_morphtable'], params.get('ocamorph_encoding', 'iso-8859-2'), True) self.morph_analyzer = HundisambigAnalyzer(hundisambig) def add_pos_and_stems(self, tokens): """Adds POS tags and lemmatizes the words in @c tokens.""" for sen_i, sen in enumerate(tokens): if sen == []: continue # TODO The API expects [sentences+], but it can only handle one :( ret = list(self.morph_analyzer.analyze([[word[0] for word in sen]]))[0] for tok_i, _ in enumerate(sen): try: spl = ret[tok_i][1].rsplit('|', 2) tokens[sen_i][tok_i].append(spl[2]) tokens[sen_i][tok_i].append(spl[0]) except Exception, e: logging.warning("Exception:", str(e)) logging.warning(unicode(sen[tok_i]).encode('utf-8'))
class HundisambigWrapper(PosTaggerWrapper, LemmatizerWrapper): def __init__(self, params): hundisambig = Hundisambig(params['hundisambig_runnable'], params['hundisambig_model'], params['hundisambig_morphtable'], params.get('ocamorph_encoding', 'iso-8859-2'), True) self.morph_analyzer = HundisambigAnalyzer(hundisambig) def add_pos_and_stems(self, tokens): """Adds POS tags and lemmatizes the words in @c tokens.""" for sen_i, sen in enumerate(tokens): if sen == []: continue # TODO The API expects [sentences+], but it can only handle one :( ret = list(self.morph_analyzer.analyze([[word[0] for word in sen]]))[0] for tok_i, _ in enumerate(sen): try: spl = ret[tok_i][1].rsplit('|', 2) tokens[sen_i][tok_i].append(spl[2]) tokens[sen_i][tok_i].append(spl[0]) except Exception, e: logging.warning("Exception:", str(e)) logging.warning(unicode(sen[tok_i]).encode('utf-8'))
def __init__(self, params): hundisambig = Hundisambig( params['hundisambig_runnable'], params['hundisambig_model'], params['hundisambig_morphtable'], params.get('ocamorph_encoding', 'iso-8859-2'), True) self.morph_analyzer = HundisambigAnalyzer(hundisambig)