def finish_tuple( parser: Parser, parse_func: Callable[[Parser], Result[T]], end: lx.TokenType = lx.TokenType.RIGHT_PAREN, ) -> Result[List[T]]: """ Given that the opening token has already been consumed, parse the elements of a tuple form `<opener> a,b,... <end>` into a list using a parameter function to parse each element. By default the end token is ')'. """ opener = parser.prev() if parser.match(end): return [] first = parse_func(parser) if isinstance(first, er.CompileError): return first pairs = [first] while not parser.match(end): if parser.done(): return er.CompileError(message=f"expected '{end}' before EOF", regions=[opener.lexeme]) if not parser.match(lx.TokenType.COMMA): return er.CompileError(f"expected ',' delimiter or '{end}'", [parser.curr_region()]) elem = parse_func(parser) if isinstance(elem, er.CompileError): return elem pairs.append(elem) return pairs
def finish_set_stmt(parser: Parser) -> Result[ast.AstSetStmt]: """ Parse a set statement from the parser or return an error. Assumes that the "set" token has already been consumed. AstSetStmt : "set" AstExpr "=" AstExpr ";" ; """ start = parser.prev().lexeme target_token = parse_token(parser, [lx.TokenType.IDENTIFIER]) if target_token is None: return er.CompileError( message="expected identifier expression to set", regions=[parser.curr_region()], ) target = ast.AstIdentExpr.make(token=target_token) if not parser.match(lx.TokenType.EQUALS): return er.CompileError(message="expected '=' for value to set", regions=[parser.curr_region()]) value = parse_expr(parser) if isinstance(value, er.CompileError): return value if not parser.match(lx.TokenType.SEMICOLON): return er.CompileError( message="expected ';' to end set statement", regions=[value.region, parser.curr_region()], ) return ast.AstSetStmt( target=target, value=value, region=er.SourceView.range(start, parser.prev().lexeme), )
def parse_alias(parser: Parser) -> Result[Tuple[ast.AstExpr, ast.AstBinding]]: """ Parse an alias from the parser or return an error. ALIAS : "(" AstExpr ")" "as" AstBinding | AstIdentExpr ; """ if not parser.match(lx.TokenType.LEFT_PAREN): return er.CompileError( message="expected '(' to open target expression", regions=[parser.curr_region()], ) target = finish_group_expr(parser) if isinstance(target, er.CompileError): return target if parser.match(lx.TokenType.AS): binding = parse_binding(parser) if isinstance(binding, er.CompileError): return binding else: if isinstance(target, ast.AstIdentExpr): # "<ident>" is syntax sugar for "<ident> as <ident>" binding = ast.AstBinding(name=target.name, region=target.region) else: return er.CompileError( message=f"missing binding for non-identifier target expression", regions=[target.region, parser.curr_region()], ) return target, binding
def parse_case( parser: Parser ) -> Result[Union[Tuple[ast.AstType, ast.AstExpr], ast.AstExpr]]: """ Parse a type case from the parser or return an error. """ if parser.match(lx.TokenType.ELSE): # Parse fallback if not parser.match(lx.TokenType.COLON): return er.CompileError( message="expected ':' before fallback value", regions=[parser.curr_region()], ) fallback = parse_expr(parser, precedence=Precedence.TUPLE.next()) if isinstance(fallback, er.CompileError): return fallback if not parser.match(lx.TokenType.RIGHT_BRACE): return er.CompileError( message="expected '}' after fallback case", regions=[parser.curr_region()], ) return fallback # Parse case case_type = parse_type(parser) if isinstance(case_type, er.CompileError): return case_type if not parser.match(lx.TokenType.COLON): return er.CompileError(message="expected ':' before case value", regions=[parser.curr_region()]) case_value = parse_expr(parser, precedence=Precedence.TUPLE.next()) if isinstance(case_value, er.CompileError): return case_value return case_type, case_value
def finish_case_expr(parser: Parser) -> Result[ast.AstExpr]: """ Parse a case expression from the parser or return an error. Assumes that the "case" token has already been consumed. """ start = parser.prev().lexeme alias = parse_alias(parser) if isinstance(alias, er.CompileError): return alias if not parser.match(lx.TokenType.LEFT_BRACE): return er.CompileError(message="expected '{' to open case block", regions=[parser.curr_region()]) opener = parser.prev() cases: List[Tuple[ast.AstType, ast.AstExpr]] = [] fallback: Result[Optional[ast.AstExpr]] = None while not parser.match(lx.TokenType.RIGHT_BRACE): if parser.done(): return er.CompileError( message="unclosed '{'", regions=[opener.lexeme, parser.curr_region()]) if cases and not parser.match(lx.TokenType.COMMA): return er.CompileError(message=f"expected ',' to delimit cases", regions=[parser.curr_region()]) case = parse_case(parser) if isinstance(case, er.CompileError): return case if isinstance(case, ast.AstExpr): fallback = case break
def finish_while_stmt(parser: Parser) -> Result[ast.AstWhileStmt]: """ Parse a while statement from the parser or return an error. Assumes the "while" token has already been consumed. AstWhileStmt : "while" ( "(" AstExpr ")" )? AstBlockStmt ; """ start = parser.prev().lexeme cond = None if parser.match(lx.TokenType.LEFT_PAREN): cond_ = parse_expr(parser) if isinstance(cond_, er.CompileError): return cond_ cond = cond_ if not parser.match(lx.TokenType.RIGHT_PAREN): return er.CompileError(message="expected ')' to end condition", regions=[parser.curr_region()]) if not parser.match(lx.TokenType.LEFT_BRACE): return er.CompileError(message="expected block", regions=[parser.curr_region()]) block = finish_block_stmt(parser) if isinstance(block, er.CompileError): return block return ast.AstWhileStmt(cond=cond, block=block, region=er.SourceView.range(start, parser.prev().lexeme))
def finish_block_stmt(parser: Parser) -> Result[ast.AstBlockStmt]: """ Parse a block statement from the parser or return an error. Assumes that the opening brace has already been consumed. AstBlockStmt : "{" AstDecl* "}" ; """ start = parser.prev().lexeme decls = [] open_brace = parser.prev() while not parser.match(lx.TokenType.RIGHT_BRACE): if parser.done(): return er.CompileError( message="unclosed '{'", regions=[open_brace.lexeme, parser.curr_region()], ) decl = parse_decl(parser) if isinstance(decl, er.CompileError): return decl decls.append(decl) return ast.AstBlockStmt(decls=decls, region=er.SourceView.range(start, parser.prev().lexeme))
def finish_print_stmt(parser: Parser) -> Result[ast.AstPrintStmt]: """ Parse a print statement from the parser or return an error. Assumes that the "print" token has already been consumed. AstPrintStmt : "print" AstExpr? ";" ; """ start = parser.prev().lexeme if parser.match(lx.TokenType.SEMICOLON): return ast.AstPrintStmt(expr=None, region=er.SourceView.range( start, parser.prev().lexeme)) expr = parse_expr(parser) if isinstance(expr, er.CompileError): return expr if not parser.match(lx.TokenType.SEMICOLON): print_region = er.SourceView.range(start, parser.prev().lexeme) return er.CompileError( message="expected ';' to end print statement", regions=[print_region, parser.curr_region()], ) return ast.AstPrintStmt(expr=expr, region=er.SourceView.range(start, parser.prev().lexeme))
def finish_if_stmt(parser: Parser) -> Result[ast.AstIfStmt]: """ Parse an if statement from the parser or return an error. Assumes that the "if" token has already been consumed. AstIfStmt : "if" "(" AstExpr ")" AstBlockStmt ( "else" "if" "(" AstExpr ")" AstBlockStmt )* ( "else" AstBlockStmt )? ; """ start = parser.prev().lexeme def parse_cond() -> Result[Tuple[ast.AstExpr, ast.AstBlockStmt]]: if not parser.match(lx.TokenType.LEFT_PAREN): return er.CompileError( message="expected '(' to start condition", regions=[parser.curr_region()], ) cond = parse_expr(parser) if isinstance(cond, er.CompileError): return cond if not parser.match(lx.TokenType.RIGHT_PAREN): return er.CompileError(message="expected ')' to end condition", regions=[parser.curr_region()]) if not parser.match(lx.TokenType.LEFT_BRACE): return er.CompileError(message="expected block after condition", regions=[parser.curr_region()]) block = finish_block_stmt(parser) if isinstance(block, er.CompileError): return block return cond, block if_pair = parse_cond() if isinstance(if_pair, er.CompileError): return if_pair elif_pairs = [] else_block = None while parser.match(lx.TokenType.ELSE): if parser.match(lx.TokenType.IF): elif_pair = parse_cond() if isinstance(elif_pair, er.CompileError): return elif_pair elif_pairs.append(elif_pair) else: if not parser.match(lx.TokenType.LEFT_BRACE): return er.CompileError(message="expected else block", regions=[parser.curr_region()]) else_block_ = finish_block_stmt(parser) if isinstance(else_block_, er.CompileError): return else_block_ else_block = else_block_ break return ast.AstIfStmt( if_part=if_pair, elif_parts=elif_pairs, else_part=else_block, region=er.SourceView.range(start, parser.prev().lexeme), )
def parse_init( parser: Parser) -> Result[Tuple[ast.AstLabel, ast.AstExpr]]: if not parser.match(lx.TokenType.IDENTIFIER): return er.CompileError( message="expected identifier for field specification", regions=[parser.curr_region()], ) name = parser.prev() if not parser.match(lx.TokenType.EQUALS): return er.CompileError( message="expected '=' for field value", regions=[parser.curr_region()], ) value = parse_expr(parser, precedence=Precedence.TUPLE.next()) if isinstance(value, er.CompileError): return value return ast.AstLabel.make(name), value
def parse_prefix(parser: Parser, table: PrattTable[T], expected: str) -> Result[T]: """ Parses a prefix element from a Parser using a pratt table. """ start_token = parser.advance() if not start_token: return er.CompileError( message=f"unexpected EOF; expected {expected}", regions=[parser.curr_region()], ) rule = table[start_token.kind] if not rule.prefix: return er.CompileError( message=f"unexpected token; expected {expected}", regions=[start_token.lexeme], ) return rule.prefix(parser)
def finish_value_decl(parser: Parser) -> Result[ast.AstValueDecl]: """ Parse a value declaration from the parser or return an error. Assumes that the "val" token along with any decorators have already been consumed. AstValueDecl : "val" AstBinding ( ", " AstBinding )* ":" AstType? "=" AstExpr ";" ; """ start = parser.prev().lexeme bindings = finish_tuple(parser, parse_binding, end=lx.TokenType.COLON) if isinstance(bindings, er.CompileError): return bindings # There has to be at least one if not bindings: return er.CompileError(message="expected value name", regions=[parser.prev().lexeme]) val_type = None if not parser.match(lx.TokenType.EQUALS): val_type_ = parse_type(parser) if isinstance(val_type_, er.CompileError): return val_type_ val_type = val_type_ if not parser.match(lx.TokenType.EQUALS): return er.CompileError( message="expected '=' for value initializer", regions=[parser.curr_region()], ) expr = parse_expr(parser) if isinstance(expr, er.CompileError): return expr if not parser.match(lx.TokenType.SEMICOLON): value_range = er.SourceView.range(start, parser.prev().lexeme) return er.CompileError( message="expected ';' to end value initializer", regions=[value_range, parser.curr_region()], ) region = er.SourceView.range(start, parser.prev().lexeme) return ast.AstValueDecl(bindings=bindings, val_type=val_type, val_init=expr, region=region)
def parse_cond() -> Result[Tuple[ast.AstExpr, ast.AstBlockStmt]]: if not parser.match(lx.TokenType.LEFT_PAREN): return er.CompileError( message="expected '(' to start condition", regions=[parser.curr_region()], ) cond = parse_expr(parser) if isinstance(cond, er.CompileError): return cond if not parser.match(lx.TokenType.RIGHT_PAREN): return er.CompileError(message="expected ')' to end condition", regions=[parser.curr_region()]) if not parser.match(lx.TokenType.LEFT_BRACE): return er.CompileError(message="expected block after condition", regions=[parser.curr_region()]) block = finish_block_stmt(parser) if isinstance(block, er.CompileError): return block return cond, block
def finish_int_expr(parser: Parser) -> Result[ast.AstExpr]: """ Parse an int expression from the parser or return an error. Assumes that the literal token has already been consumed. """ token = parser.prev() val = int(str(token)[:-1]) if val > 2**31 - 1: return er.CompileError(message="literal value is too large", regions=[token.lexeme]) return ast.AstIntExpr(literal=token)
def finish_str_expr(parser: Parser) -> Result[ast.AstExpr]: """ Parse a str expression from the parser or return an error. Assumes that the literal token has already been consumed. """ token = parser.prev() if len(str(token)) > 512 + 2: return er.CompileError( message="string literal too long, max length is 512", regions=[token.lexeme]) return ast.AstStrExpr(literal=token)
def parse_binding(parser: Parser) -> Result[ast.AstBinding]: """ Parse a value binding from the parser or return an error. AstBinding : IDENTIFIER ; """ token = parse_token(parser, [lx.TokenType.IDENTIFIER]) if token is None: return er.CompileError(message="expected value name", regions=[parser.curr_region()]) return ast.AstBinding(name=str(token), region=token.lexeme)
def parse_field( parser: Parser) -> Result[Union[ast.AstParam, ast.AstNameDecl]]: name_decl = parse_name_decl(parser) if name_decl is not None: return name_decl param = parse_param(parser) if isinstance(param, er.CompileError): return param if not parser.match(lx.TokenType.SEMICOLON): return er.CompileError(message="expected ';' after field", regions=[parser.curr_region()]) return param
def finish_struct_decl(parser: Parser) -> Result[ast.AstStructDecl]: """ Parse a struct declaration from the parser or return an error. Assumes that the "struct" token has already been consumed. AstStructDecl : "struct" AstBinding "{" ( AstParam ";" | AstValueDecl | AstFuncDecl )* "}" """ start = parser.prev().lexeme ident = parse_token(parser, [lx.TokenType.IDENTIFIER]) if ident is None: return er.CompileError(message="expected struct name", regions=[parser.curr_region()]) if not parser.match(lx.TokenType.LEFT_BRACE): return er.CompileError(message="expected '{' for struct body", regions=[parser.curr_region()]) def parse_field( parser: Parser) -> Result[Union[ast.AstParam, ast.AstNameDecl]]: name_decl = parse_name_decl(parser) if name_decl is not None: return name_decl param = parse_param(parser) if isinstance(param, er.CompileError): return param if not parser.match(lx.TokenType.SEMICOLON): return er.CompileError(message="expected ';' after field", regions=[parser.curr_region()]) return param fields = [] while not parser.match(lx.TokenType.RIGHT_BRACE): if parser.done(): return er.CompileError(message="unclosed '{'", regions=[start]) field = parse_field(parser) if isinstance(field, er.CompileError): return field fields.append(field) region = er.SourceView.range(start, parser.prev().lexeme) return ast.AstStructDecl(name=str(ident), fields=fields, region=region)
def finish_func_decl(parser: Parser) -> Result[ast.AstFuncDecl]: """ Parse a function declaration from the parser or return an error. Assumes that the "func" token along with any decorators have already been consumed. AstFuncDecl : "func" IDENTIFIER "(" ( AstParam ( "," AstParam )* )? ")" AstType AstBlockStmt ; """ start = parser.prev().lexeme binding = parse_binding(parser) if isinstance(binding, er.CompileError): return binding if not parser.match(lx.TokenType.LEFT_PAREN): return er.CompileError(message="expected '(' to begin parameters", regions=[parser.curr_region()]) params = finish_tuple(parser, parse_param) if isinstance(params, er.CompileError): return params return_type = parse_type(parser) if isinstance(return_type, er.CompileError): return return_type if not parser.match(lx.TokenType.LEFT_BRACE): return er.CompileError(message="expected function body", regions=[parser.curr_region()]) block = finish_block_stmt(parser) if isinstance(block, er.CompileError): return block region = er.SourceView.range(start, parser.prev().lexeme) return ast.AstFuncDecl( binding=binding, params=params, return_type=return_type, block=block, region=region, )
def finish_num_expr(parser: Parser) -> Result[ast.AstExpr]: """ Parse a num expression from the parser or return an error. Assumes that the literal token has already been consumed. """ token = parser.prev() if "." in str(token): decimals = str(token).split(".")[1] if len(decimals) > 7: return er.CompileError( message= "too many decimal palces, precision up to only 7 is supported", regions=[token.lexeme], ) return ast.AstNumExpr(literal=token)
def finish_group_expr(parser: Parser) -> Result[ast.AstExpr]: """ Parse a grouped expression from the parser or return an error. Assumes that the opening parenthesis has already been consumed. """ opener = parser.prev() expr = parse_expr(parser) if isinstance(expr, er.CompileError): return expr if not parser.match(lx.TokenType.RIGHT_PAREN): return er.CompileError( message="expected ')' to finish expression", regions=[opener.lexeme, expr.region], ) expr.region = er.SourceView.range(opener.lexeme, parser.prev().lexeme) return expr
def finish_group_type(parser: Parser) -> Result[ast.AstType]: """ Parse a grouped type from the parser or return an error. Assumes that the opening parenthesis has already been consumed. """ start = parser.prev().lexeme result = parse_type(parser) if isinstance(result, er.CompileError): return result if not parser.match(lx.TokenType.RIGHT_PAREN): return er.CompileError( message="expected ')' to end type grouping", regions=[start, parser.curr_region()], ) result.region = er.SourceView.range(start, parser.prev().lexeme) return result
def finish_access_expr(parser: Parser, target: ast.AstExpr) -> Result[ast.AstExpr]: """ Parse an access expression from the parser or return an error. Assumes that the opening '{' has already been consumed. """ if not parser.match(lx.TokenType.IDENTIFIER): return er.CompileError( message=f"expected identifier for field access", regions=[parser.curr_region()], ) ident = parser.prev() return ast.AstAccessExpr( target=target, name=str(ident), region=er.SourceView.range(target.region, ident.lexeme), )
def finish_lambda_expr(parser: Parser) -> Result[ast.AstExpr]: """ Parse a lambda expression from the parser or return an error. Assumes that the "func" token has already been consumed. """ start = parser.prev().lexeme if not parser.match(lx.TokenType.LEFT_PAREN): return er.CompileError(message=f"expected '(' to start parameters", regions=[parser.curr_region()]) params = finish_tuple(parser, parse_param) if isinstance(params, er.CompileError): return params value = parse_expr(parser, precedence=Precedence.LAMBDA) if isinstance(value, er.CompileError): return value region = er.SourceView.range(start, parser.prev().lexeme) return ast.AstLambdaExpr(params=params, value=value, region=region)
def parse_expr_stmt(parser: Parser) -> Result[ast.AstExprStmt]: """ Parse an expression statement from the parser or return an error. AstExprStmt : AstExpr ";" ; """ expr = parse_expr(parser) if isinstance(expr, er.CompileError): return expr if not parser.match(lx.TokenType.SEMICOLON): return er.CompileError( message="expected ';' to end expression statement", regions=[parser.prev().lexeme, parser.curr_region()], ) return ast.AstExprStmt(expr=expr, region=er.SourceView.range(expr.region, parser.prev().lexeme))
def finish_return_stmt(parser: Parser) -> Result[ast.AstReturnStmt]: """ Parse a return statement from the parser or return an error. Assumes the "return" token has already been consumed. AstReturnStmt : "return" AstExpr? ";" ; """ return_token = parser.prev() expr = None if not parser.match(lx.TokenType.SEMICOLON): expr_ = parse_expr(parser) if isinstance(expr_, er.CompileError): return expr_ expr = expr_ if not parser.match(lx.TokenType.SEMICOLON): return er.CompileError( message="expected ';' to end return statement", regions=[parser.prev().lexeme, parser.curr_region()], ) region = er.SourceView.range(return_token.lexeme, parser.prev().lexeme) return ast.AstReturnStmt(expr=expr, region=region)
def finish_func_type(parser: Parser) -> Result[ast.AstFuncType]: """ Parse a function type from the parser or return an error. Assumes that the "func" token has already been consumed. """ start = parser.prev().lexeme if not parser.match(lx.TokenType.LEFT_PAREN): return er.CompileError( message="expected '(' to begin parameter types", regions=[parser.curr_region()], ) params = finish_tuple(parser, lambda p: parse_type(p, Precedence.TUPLE.next())) if isinstance(params, er.CompileError): return params return_type = parse_type(parser) if isinstance(return_type, er.CompileError): return return_type region = er.SourceView.range(start, parser.prev().lexeme) return ast.AstFuncType(params=params, return_type=return_type, region=region)
def tokenize_source( source: str) -> Tuple[List["Token"], List[er.CompileError]]: """ Given a string of Clear source code, lexes it into a list of tokens. """ skip_rules = [r"//.*", r"\s+"] consume_rules = [ (r"[a-zA-Z_][a-zA-Z0-9_]*", TokenType.IDENTIFIER), (r"[0-9]+i", TokenType.INT_LITERAL), (r"[0-9]+(\.[0-9]+)?", TokenType.NUM_LITERAL), (r"\".*?\"", TokenType.STR_LITERAL), (r"==", TokenType.DOUBLE_EQUALS), (r"!=", TokenType.NOT_EQUALS), (r"<=", TokenType.LESS_EQUALS), (r"<", TokenType.LESS), (r">=", TokenType.GREATER_EQUALS), (r">", TokenType.GREATER), (r"=", TokenType.EQUALS), (r",", TokenType.COMMA), (r";", TokenType.SEMICOLON), (r":", TokenType.COLON), (r"\|", TokenType.VERT), (r"{", TokenType.LEFT_BRACE), (r"}", TokenType.RIGHT_BRACE), (r"\(", TokenType.LEFT_PAREN), (r"\)", TokenType.RIGHT_PAREN), (r"\?", TokenType.QUESTION_MARK), (r"\+", TokenType.PLUS), (r"-", TokenType.MINUS), (r"\*", TokenType.STAR), (r"/", TokenType.SLASH), (r"\.", TokenType.DOT), (r"@", TokenType.AT), ] fallback_rule = (r".", TokenType.ERROR) lexer = Lexer(source) lexer.run(consume_rules, skip_rules, fallback_rule) def keywordize(token: "Token") -> "Token": keyword_set = { TokenType.VAL, TokenType.FUNC, TokenType.VOID, TokenType.IF, TokenType.ELSE, TokenType.WHILE, TokenType.RETURN, TokenType.PRINT, TokenType.OR, TokenType.AND, TokenType.TRUE, TokenType.NIL, TokenType.FALSE, TokenType.AS, TokenType.CASE, TokenType.STRUCT, TokenType.THIS, TokenType.SET, } keywords = {keyword.value: keyword for keyword in keyword_set} if token.kind == TokenType.IDENTIFIER: lexeme = str(token.lexeme) if lexeme in keywords: token.kind = keywords[lexeme] return token return ( [ keywordize(token) for token in lexer.tokens if token.kind != TokenType.ERROR ], [ er.CompileError(message=f"unexpected token {token}", regions=[token.lexeme]) for token in lexer.tokens if token.kind == TokenType.ERROR ], )