def test_recognises_a_word(self): self._text = "hello" self._verify_tokens(self._tokens.character(Position(1, 1), "h"), self._tokens.character(Position(1, 2), "e"), self._tokens.character(Position(1, 3), "l"), self._tokens.character(Position(1, 4), "l"), self._tokens.character(Position(1, 5), "o"))
def test_recognises_a_comment(self): self._text = "%This is a comment\n\\def\\foo" self._verify_tokens( self._tokens.comment(Position(1, 1), "%This is a comment"), self._tokens.new_line(Position(1, 1)), self._tokens.command(Position(2, 1), r"\def"), self._tokens.command(Position(2, 5), r"\foo"))
def test_recognises_an_opening_group(self): self._text = "{" self._verify_tokens(self._tokens.begin_group(Position(1, 1)))
def test_recognises_two_commands_separated_by_white_spaces(self): self._text = "\\def \t \\foo" self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"), self._tokens.white_space(Position(1, 5), " \t "), self._tokens.command(Position(1, 14), r"\foo"))
def _reset(self): self._position = Position(1, 0, self._source.name) self._input = Stream(iter(self._source.content), self._on_take)
def test_print_properly(self): self.assertEqual( Token.DISPLAY.format(text="a", category="character", location=Position(1, 1)), repr(self._token))
def test_equals_a_similar_tokens(self): self.assertEqual(self._tokens.character(Position(1, 1), "a"), self._token)
def test_recognises_a_complete_macro_definition(self): self._text = "\\def\\point#1#2{(#2,#1)}" self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"), self._tokens.command(Position(1, 5), r"\point"), self._tokens.parameter(Position(1, 11), "#1"), self._tokens.parameter(Position(1, 13), "#2"), self._tokens.begin_group(Position(1, 15), "{"), self._tokens.others(Position(1, 16), "("), self._tokens.parameter(Position(1, 17), "#2"), self._tokens.others(Position(1, 19), ","), self._tokens.parameter(Position(1, 20), "#1"), self._tokens.others(Position(1, 22), ")"), self._tokens.end_group(Position(1, 23), "}"))
def test_recognises_a_single_character(self): self._text = "b" self._verify_tokens(self._tokens.character(Position(1, 1), "b"))
def test_recognises_subscript(self): self._text = "_" self._verify_tokens(self._tokens.subscript(Position(1, 1)))
def test_recognises_non_breaking_space(self): self._text = "~" self._verify_tokens(self._tokens.non_breaking_space(Position(1, 1)))
def test_recognises_math_mode(self): self._text = "$" self._verify_tokens(self._tokens.math(Position(1, 1)))
class Lexer: """ Scan a stream of character and yields a stream of token. The lexer shall define handler for each category of symbols. These handlers are automatically selected using reflection: each handler shall be named "_read_category". """ def __init__(self, symbols, source): self._source = source self._symbols = symbols self._tokens = TokenFactory(self._symbols) self._reset() def _reset(self): self._position = Position(1, 0, self._source.name) self._input = Stream(iter(self._source.content), self._on_take) def _on_take(self, character): if character in self._symbols.NEW_LINE: self._position = self._position.next_line() else: self._position = self._position.next_character() @property def position(self): return self._position def _take(self): return self._input.take() @property def _next(self): return self._input.look_ahead() def __iter__(self): return self def __next__(self): if self._next is None: raise StopIteration() return self._one_token() def _one_token(self): handler = self._handler_for(self._symbols.category_of(self._next)) return handler() def _handler_for(self, category): handler_name = "_read_" + category.name.lower() handler = getattr(self, handler_name) assert handler, "Lexer has no handler for '%s' symbols" % category.name return handler def _read_character(self): character = self._take() return self._tokens.character(self._position, character) def _read_control(self): marker = self._take() location = self._position assert marker in self._symbols.CONTROL if not self._next in self._symbols.CHARACTER: name = self._take() else: name = self._take_while(lambda c: c in self._symbols.CHARACTER) return self._tokens.command(location, marker + name) def _take_while(self, predicate): return "".join(self._input.take_while(predicate)) def _read_comment(self): marker = self._input.take() location = self._position assert marker in self._symbols.COMMENT text = self._take_while(lambda c: c not in self._symbols.NEW_LINE) return self._tokens.comment(location, marker + text) def _read_white_spaces(self): marker = self._input.take() location = self._position spaces = self._take_while(lambda c: c in self._symbols.WHITE_SPACES) return self._tokens.white_space(location, marker + spaces) def _read_new_line(self): marker = self._input.take() location = self._position assert marker in self._symbols.NEW_LINE return self._tokens.new_line(location, marker) def _read_begin_group(self): marker = self._input.take() location = self._position assert marker in self._symbols.BEGIN_GROUP return self._tokens.begin_group(location, marker) def _read_end_group(self): marker = self._take() location = self._position assert marker in self._symbols.END_GROUP return self._tokens.end_group(location, marker) def _read_parameter(self): marker = self._input.take() location = self._position assert marker in self._symbols.PARAMETER text = marker + self._take_while(lambda c: c.isdigit()) return self._tokens.parameter(location, text) def _read_math(self): marker = self._input.take() location = self._position assert marker in self._symbols.MATH return self._tokens.math(location) def _read_superscript(self): marker = self._input.take() location = self._position assert marker in self._symbols.SUPERSCRIPT return self._tokens.superscript(location, marker) def _read_subscript(self): marker = self._input.take() location = self._position assert marker in self._symbols.SUBSCRIPT return self._tokens.subscript(location, marker) def _read_non_breaking_space(self): marker = self._input.take() location = self._position assert marker in self._symbols.NON_BREAKING_SPACE return self._tokens.non_breaking_space(location, marker) def _read_others(self): marker = self._input.take() location = self._position #assert marker in self._symbols.OTHERS return self._tokens.others(location, marker)
def test_recognises_an_ending_group(self): self._text = "}" self._verify_tokens(self._tokens.end_group(Position(1, 1)))
def test_recognises_a_single_command(self): self._text = r"\myMacro" self._verify_tokens(self._tokens.command(Position(1, 1), r"\myMacro"))
def test_recognises_an_parameter(self): self._text = "\def#1" self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"), self._tokens.parameter(Position(1, 5), "#1"))
def test_recognises_a_single_special_character_command(self): self._text = r"\%" self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"))
def setUp(self): self._tokens = TokenFactory(SymbolTable.default()) self._token = self._tokens.character(Position(1, 1), "a")
def test_recognises_sequences_of_single_character_command(self): self._text = r"\%\$\\" self._verify_tokens(self._tokens.command(Position(1, 1), r"\%"), self._tokens.command(Position(1, 3), r"\$"), self._tokens.command(Position(1, 5), r"\\"))
def test_differs_from_a_different_character(self): self.assertNotEqual(self._tokens.character(Position(1, 1), "b"), self._token)
def test_recognises_two_commands(self): self._text = r"\def\foo" self._verify_tokens(self._tokens.command(Position(1, 1), r"\def"), self._tokens.command(Position(1, 5), r"\foo"))
def _at(line, column): return Position(line, column)