def __init__( self, source: typing.Union[str, typing.TextIO], path: Path, indentation: typing.Optional[str] = None, ): self.lexer = Lexer(source=source, path=path, indentation=indentation)
def simple_test( token_type: Type[tokens.Token], lex_method: str, src: str, expected_value: Any, ) -> None: """Returns the first token""" filepath = "test" lexer = Lexer(source=StringIO(src), path=filepath) lexer.next() # skips nodent t = lexer.next() assert t == token_type(expected_value)
def test_lex_program() -> None: src = dedent(""" ()[]{}:`,; 0b10 0o51 0Xa8 # comment 12 -12 -12.01 -12e3 -12.01e30 12. 12.E30 0 -0 - --wow abc "wow" """) filepath = "test" expected = [ tokens.Nodent(), tokens.LParen(), tokens.RParen(), tokens.LBracket(), tokens.RBracket(), tokens.LCurly(), tokens.RCurly(), tokens.Colon(), tokens.Tick(), tokens.Comma(), tokens.Semicolon(), tokens.Indent(), tokens.Integer(0b10), tokens.Integer(0o51), tokens.Integer(0xA8), tokens.Dedent(), tokens.Integer(12), tokens.Integer(-12), tokens.Float(-12.01), tokens.Float(-12e3), tokens.Nodent(), tokens.Float(-12.01e30), tokens.Float(12.0), tokens.Float(12.0e30), tokens.Nodent(), tokens.Integer(0), tokens.Integer(-0), tokens.Indent(), tokens.Identifier("-"), tokens.Indent(), tokens.Identifier("--wow"), tokens.Identifier("abc"), tokens.Dedent(), tokens.Dedent(), tokens.String("wow"), ] actual = list(Lexer(source=src, path=filepath)) assert actual == expected
def test_lexer_token_iter() -> None: lexer = Lexer(source="123 abc", path="") assert lexer.next() == tokens.Nodent() assert lexer.next() == tokens.Integer(123) assert lexer.peek() == tokens.Identifier("abc") assert lexer.peek(2) == tokens.End() assert lexer.next() == tokens.Identifier("abc") assert lexer.next() == tokens.End() assert lexer.next() == tokens.End()
class Parser: lexer: Lexer def __init__( self, source: typing.Union[str, typing.TextIO], path: Path, indentation: typing.Optional[str] = None, ): self.lexer = Lexer(source=source, path=path, indentation=indentation) def parse_module(self) -> ast.Module: self._assert(tokens.Start) lines: typing.List[ast.Node] = [] while self.lexer.curr != tokens.End: self._assert(tokens.Nodent) lines.append(self._parse_block()) return ast.Module(lines) def _parse_block(self) -> ast.Node: exprs: typing.List[ast.Node] = [] exprs.append(self._parse_expr()) if type(self.lexer.curr) in ( tokens.Indent, tokens.Nodent, tokens.Dedent, tokens.End, ): return exprs[0] while type(self.lexer.curr) not in ( tokens.Semicolon, tokens.Indent, tokens.Nodent, tokens.Dedent, tokens.End, ): exprs.append(self._parse_expr()) self._optional(tokens.Semicolon) if type(self.lexer.curr) in (tokens.Nodent, tokens.Dedent, tokens.End): if len(exprs) < 2: self._error("line must contain at least 2 sub-expressions") return ast.Line(exprs) if type(self.lexer.curr) == tokens.Colon: self.lexer.next() if type(self.lexer.curr) == tokens.Indent: self.lexer.next() exprs.append(self._parse_block()) while type(self.lexer.curr) != tokens.Dedent: self._assert(tokens.Nodent) exprs.append(self._parse_block()) self.lexer.next() return ast.Paragraph(exprs) self._fail() def parse_block(self) -> ast.Node: self._assert(tokens.Start) self._assert(tokens.Nodent) return self._parse_block() def _parse_expr(self) -> ast.Node: if type(self.lexer.curr) == tokens.LParen: return self._parse_list_expr() return self._parse_value() def parse_expr(self) -> ast.Node: self._assert(tokens.Start) self._assert(tokens.Nodent) return self._parse_expr() def _parse_list_expr(self) -> ast.Expression: self._assert(tokens.LParen) exprs: typing.List[ast.Node] = [] while type(self.lexer.curr) != tokens.RParen: exprs.append(self._parse_expr()) self._optional(tokens.Comma) self._assert(tokens.RParen) return ast.Expression(exprs) def _parse_value(self) -> ast.Value: if type(self.lexer.curr) in (tokens.String, tokens.Integer, tokens.Float): res = ast.Literal(self.lexer.curr.value) self.lexer.next() return res if type(self.lexer.curr) == tokens.Name: res2 = ast.Name(typing.cast(str, self.lexer.curr.value)) self.lexer.next() return res2 if type(self.lexer.curr) == tokens.LBracket: return self._parse_list() if type(self.lexer.curr) == tokens.LCurly: return self._parse_set_or_map() self._fail() def _parse_list(self) -> ast.List: self._assert(tokens.LBracket) elems: typing.List[ast.Node] = [] while type(self.lexer.curr) != tokens.RBracket: elems.append(self._parse_expr()) self._assert(tokens.Comma) self.lexer.next() return ast.List(elems) def _parse_set_or_map(self) -> typing.Union[ast.Set, ast.Map]: self._assert(tokens.LCurly) if type(self.lexer.curr) == tokens.RCurly: # empty map literal "{}" return ast.Map(()) first = self._parse_expr() if type(self.lexer.curr) == tokens.Colon: # parse as map exprs: typing.List[typing.Tuple[ast.Node, ast.Node]] = [] self.lexer.next() second = self._parse_expr() exprs.append((first, second)) while type(self.lexer.curr) != tokens.RCurly: first = self._parse_expr() self._assert(tokens.Colon) second = self._parse_expr() self._assert(tokens.Comma) return ast.Map(exprs) elif type(self.lexer.curr) == tokens.Comma: # parse as set exprs2: typing.List[ast.Node] = [first] self.lexer.next() while type(self.lexer.curr) != tokens.RCurly: exprs2.append(self._parse_expr()) self._assert(tokens.Comma) self._fail() def _assert(self, expected: typing.Type[tokens.Token]) -> None: """ Check if current token is expected one and consumes it. Passing :value:`None` or giving no argument causes the assert to always fail and error. """ if type(self.lexer.curr) != expected: self._error( f"expected token of type: {expected.__name__}, found: {self.lexer.curr}" ) self.lexer.next() def _fail(self) -> typing.NoReturn: self._error(f"unexpected token: {self.lexer.curr}") def _optional(self, expected: typing.Type[tokens.Token]) -> None: """ Check if the current token is the expected one and consumes it only if it is """ if type(self.lexer.curr) == expected: self.lexer.next() def _error(self, msg: str) -> typing.NoReturn: """Formats and raises a ParseError""" raise ParseError(msg)
def test_lexer_peek_at_end() -> None: actuals = list(Lexer(source="0", path="")) expecteds = [tokens.Nodent(), tokens.Integer(0)] assert expecteds == actuals
def test_lexer_bad_indentation() -> None: with pytest.raises(LexError): list(Lexer(source=" a\n\tb", path="")) with pytest.raises(LexError): list(Lexer(source=" \tstuff", path=""))
def test_lexer_unexpected_char(): with pytest.raises(LexError): list(Lexer(source="\0", path=""))