Пример #1
0
	def __init__(self, map, options):
		"""
		Create a Tokenizer from a map.
		@param map: A map associating tokens to their possible recognitions.
		@type map: dict (unicode -> list of object)
		@param options: The options to use.
			Required information is:
				- A separator
		@type options: dict
		"""
		self._separator = options["separator"]
		fsa = self.__create_key_fsa(map)
		Parser.__init__(self, fsa)
		self.__dict = map
Пример #2
0
	def __call__(self, stream):
		"""
		Tokenize a character stream.
		@param stream: A character stream.
		@param stream: unicode
		@return: The result of the parsing.
		@rtype: OptionTree

		@raise tokenizer.UnknownTokenException: If an unexpected token is encountered.
		"""
		def explode_list(dct, lst, pos):
			t = OptionTree()
			if pos < len(lst):
				for obj in dct[lst[pos]]:
					c = explode_list(dct, lst, pos + 1)
					c.element = obj
					t.append(c)
			return t

		terminated = stream + self._separator
		try:
			p = Parser.__call__(self, terminated)
		except ParseError, pe:
			dead_end = len(pe) - 1
			rgt = max(stream.rfind(self._separator, 0, dead_end-1) + 1, 0)
			lft = stream.find(self._separator, dead_end + 1)
			if lft <> -1:
				ell = u"..."
			else:
				ell = u""
			raise UnknownTokenException(stream[rgt:lft] + ell)