def list_tokenize(self, instr): """ Returns a list of tokens, tokenized from given input. This method can be called by other classes (e.g. the ContainerToken class) when further input needs to be processed using the same parser. Arguments: instr -- a string containing the input to tokenize. """ tlist = TokenList() self._unknown = "" while instr: (instr, container_found) = self._parse_containers(instr, tlist) if not container_found: (instr, token_found) = self._parse_tokens(instr, tlist) if not token_found: self._unknown += instr[0] instr = instr[1:] if self._unknown: tlist.push(Token(self._unknown)) self._unknown = "" return tlist
class ContainerToken(Token): """ Container token class. Allows a container (e.g. matched pair of parentheses, braces, brackets) to be represented by a single token in another list of tokens, and treated as such. Instance attributes: _tlist : a TokenList instance containing the contained tokens """ def __init__(self, value, parser): """ Class initializer. Arguments: value -- the string to be tokenized. parser -- a reference to the parser which instantiated the instance, used to parse the contained tokens. """ Token.__init__(self, value) self._token_type_string = "Container" self._tlist = TokenList() # Push opening container token, parsed contained # tokens, and closing container token onto the list c_open, c_middle, c_close = value[0], value[1:-1], value[-1] self._tlist.push(OpeningToken(c_open)) self._tlist.push(parser.list_tokenize(c_middle)) self._tlist.push(ClosingToken(c_close)) def read(self, decorated=False, html=False, tree=False): """ Returns a string representing the unmodified or modified tokens. Arguments: decorated -- set to True to decorate with dec_open and dec_close html -- set to True to HTML-escape the characters tree -- set to True to output in an indented tree view """ if tree: old_prefix = Token.prefix Token.prefix += " " out_str = self._tlist.read(decorated=decorated, html=html, tree=tree) if tree: Token.prefix = old_prefix return out_str