def __init__(self, map, options): """ Create a Tokenizer from a map. @param map: A map associating tokens to their possible recognitions. @type map: dict (unicode -> list of object) @param options: The options to use. Required information is: - A separator @type options: dict """ self._separator = options["separator"] fsa = self.__create_key_fsa(map) Parser.__init__(self, fsa) self.__dict = map
def __call__(self, stream): """ Tokenize a character stream. @param stream: A character stream. @param stream: unicode @return: The result of the parsing. @rtype: OptionTree @raise tokenizer.UnknownTokenException: If an unexpected token is encountered. """ def explode_list(dct, lst, pos): t = OptionTree() if pos < len(lst): for obj in dct[lst[pos]]: c = explode_list(dct, lst, pos + 1) c.element = obj t.append(c) return t terminated = stream + self._separator try: p = Parser.__call__(self, terminated) except ParseError, pe: dead_end = len(pe) - 1 rgt = max(stream.rfind(self._separator, 0, dead_end-1) + 1, 0) lft = stream.find(self._separator, dead_end + 1) if lft <> -1: ell = u"..." else: ell = u"" raise UnknownTokenException(stream[rgt:lft] + ell)