def atom(self): res = ParseResult() token = self.current_token if token.type in (TT_INT, TT_DEC): res.register_advancement() self.advance() return res.success(NumberNode(token)) elif token.type == TT_ID: res.register_advancement() self.advance() return res.success(VarAccessNode(token)) elif token.type == TT_LPAREN: res.register_advancement() self.advance() expr = res.register(self.expr()) if res.error: return res if self.current_token.type == TT_RPAREN: res.register_advancement() self.advance() return res.success(expr) else: error = InvalidSyntaxError( self.current_token.pos_start, self.current_token.pos_end, f"Unexpected token '{self.current_token.type}', expected ')' in parenthetical" ) return res.failure(error) error = InvalidSyntaxError( token.pos_start, token.pos_end, f"Unexpected token '{token.type}', expected operator or identifier" ) return res.failure(error)
def factor(self): response = ParseResult() num_node_token = self.current_token if num_node_token.type in (TOKENTYPE_PLUS, TOKENTYPE_MINUS): response.register(self.advance()) factor = response.register(self.factor()) if response.error: return response return response.success(UnaryOperationNode(num_node_token, factor)) elif num_node_token.type in (TOKENTYPE_INT, TOKENTYPE_FLOAT): response.register(self.advance()) return response.success(NumberNode(num_node_token)) elif num_node_token.type == TOKENTYPE_LEFTPARENTESIS: response.register(self.advance()) expression = response.register(self.expression()) if response.error: return response if self.current_token.type == TOKENTYPE_RIGHTPARENTESIS: response.register(self.advance()) return response.success(expression) else: return response.failure(InvalidSyntaxError( self.current_token.position_start, self.current_token.position_end, "Expected ')'" )) return response.failure(InvalidSyntaxError( num_node_token.position_start, num_node_token.position_end, "Expected int or float" ))
def while_expr(self): """ while-expr : KEYWORD:while expr KEYWROD:then statement | (NEWLINE statements KEYWORD:end) var i = 0 while i < 10 then var i = i + 1 :return: """ res = ParserResult() if not self.current_tok.matches(TT_KEYWORD, 'while'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'while'" )) res.register_advancement() self.advance() condition = res.register(self.expr()) if res.error: return res if not self.current_tok.matches(TT_KEYWORD, 'then'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'then'" )) res.register_advancement() self.advance() if self.current_tok.type == TT_NEWLINE: res.register_advancement() self.advance() # 调用statements方法,递归解析多行逻辑 body = res.register(self.statements()) if res.error: return res if not self.current_tok.matches(TT_KEYWORD, 'end'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'end'" )) res.register_advancement() self.advance() return res.success(WhileNode(condition, body, True)) body = res.register(self.statement()) if res.error: return res return res.success(WhileNode(condition, body, False))
def expr(self): """ 表达式 expr : KEYWORD:var IDENTIFIER EQ expr : term ((PLUS|MINUS) term)* :return: """ res = ParserResult() # 如果token为var,则是声明语句 # expr : KEYWORD:var IDENTIFIER EQ expr if self.current_tok.matches(TT_KEYWORD, 'var'): res.register_advancement() self.advance() if self.current_tok.type != TT_IDENTIFIER: # 不是变量名,语法异常,报错 return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected identifier" )) var_name = self.current_tok res.register_advancement() self.advance() if self.current_tok.type != TT_EQ: # 表示等号,语法异常,报错 return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected '='" )) res.register_advancement() self.advance() # 变量赋值时,右值为表达式expr,此时可以调用self.expr() 递归处理 # 此外,等于操作符不会出现在生成树中 # basic > var a = 1 # [KEYWORDS:var, IDENTIFIER:a, EQ, INT:1, EOF] # (IDENTIFIER:a, INT:1) => EQ 不存在 # 1 expr = res.register(self.expr()) if res.error: return res # 赋值操作 var a = 1 + 4 => KEYWORD: var, Identifier: a, expr: 1 + 4 return res.success(VarAssignNode(var_name, expr)) else: node = res.register(self.bin_op(self.comp_expr, ((TT_KEYWORD, 'and'), (TT_KEYWORD, 'or')))) if res.error: return res.failure(InvalidSyntaxError( self.current_tok.pos_end, self.current_tok.pos_end, # 期望的值中,包含var "Expected 'var', int, float, identifier, '+', '-', '(' or 'not'" )) return res.success(node)
def call(self): """ 解析函数调用 call : atom (LPAREN (expr (COMMA expr)*)? RPAREN) a(1+2, 3+4) """ res = ParserResult() atom = res.register(self.atom()) if res.error: return res if self.current_tok.type == TT_LPAREN: res.register_advancement() self.advance() arg_nodes = [] # 调用函数时,没有传参数 if self.current_tok.type == TT_RPAREN: res.register_advancement() self.advance() else: # 调用函数时,参数可以写成expr,expr也包含INT|FLOAT|IDENTIFIER等 => a(1+2 arg_nodes.append(res.register(self.expr())) if res.error: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected ')', 'VAR', 'IF', 'FOR', 'WHILE', 'FUN', int, float, identifier, '+', '-', '(' or 'NOT'" )) # 多个参数,由逗号分开 while self.current_tok.type == TT_COMMA: res.register_advancement() self.advance() # 将调用参数添加到arg_nodes列表中 arg_nodes.append(res.register(self.expr())) if res.error: return res # a(1+2, 3) if self.current_tok.type != TT_RPAREN: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected ',' or ')'" )) res.register_advancement() self.advance() return res.success(CallNode(atom, arg_nodes)) return res.success(atom)
def list_expr(self): """ List :return: """ res = ParserResult() element_nodes = [] pos_start = self.current_tok.pos_start.copy() if self.current_tok.type != TT_LSQUARE: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected '['" )) res.register_advancement() self.advance() # 空列表 => [] if self.current_tok.type == TT_RSQUARE: res.register_advancement() self.advance() else: # 非空列表 => [1,2,3] element_nodes.append(res.register(self.expr())) if res.error: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected ']', 'VAR', 'IF', 'FOR', 'WHILE', 'FUN', int, float, identifier, '+', '-', '(', '[' or 'NOT'" )) # 匹配list中的元素 while self.current_tok.type == TT_COMMA: res.register_advancement() self.advance() element_nodes.append(res.register(self.expr())) if res.error: return res if self.current_tok.type != TT_RSQUARE: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected ',' or ']''" )) res.register_advancement() self.advance() return res.success(ListNode(element_nodes, pos_start, self.current_tok.pos_end.copy()))
def if_expr_c(self): """ if-expr-c : KEYWORD:else expr | (NEWLINE statments KEYWORD:end) :return: """ res = ParserResult() else_case = None if self.current_tok.matches(TT_KEYWORD, 'else'): res.register_advancement() self.advance() if self.current_tok.type == TT_NEWLINE: res.register_advancement() self.advance() statements = res.register(self.statements()) if res.error: return res else_case = (statements, True) if self.current_tok.matches(TT_KEYWORD, 'end'): res.register_advancement() self.advance() else: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'end'" )) else: expr = res.register(self.statement()) if res.error: return res else_case = (expr, False) return res.success(else_case)
def parse(self): response = self.expression() if not response.error and self.current_token.type != TOKENTYPE_EOF: return response.failure(InvalidSyntaxError( self.current_token.position_start, self.current_token.position_end, "Expected '+', '-', '*', or '/'" )) return response
def parse(self): res = self.expr() if not res.error and self.current_token.type != TT_EOF: error = InvalidSyntaxError( self.current_token.pos_start, self.current_token.pos_end, f"Unexpected token '{self.current_token.type}', expected expression" ) return res.failure(error) return res
def parse(self): # 语法解析Tokens # 从其实非终结符开始 => AST Root Node res = self.statements() if not res.error and self.current_tok.type != TT_EOF: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected '+', '-', '*', '/', '^', '==', '!=', '<', '>', <=', '>=', 'AND' or 'OR'" )) return res
def expr(self): res = ParseResult() if self.current_token.matches(TT_KEY, "SET"): res.register_advancement() self.advance() if self.current_token.type != TT_ID: error = InvalidSyntaxError( self.current_token.pos_start, self.current_token.pos_end, f"Unexpected token '{self.current_token.type}', expected identifier" ) return res.failure(error) var_name = self.current_token res.register_advancement() self.advance() if self.current_token.type != TT_EQ: error = InvalidSyntaxError( self.current_token.pos_start, self.current_token.pos_end, f"Unexpected token '{self.current_token.type}', expected '=''" ) return res.failure(error) res.register_advancement() self.advance() expr = res.register(self.expr()) if (res.error): return res return res.success(VarAssignNode(var_name, expr)) node = res.register( self.bin_op(self.comp_expr, ((TT_KEY, "AND"), (TT_KEY, "OR")))) if res.error: error = InvalidSyntaxError( self.current_token.pos_start, self.current_token.pos_end, f"Unexpected token '{self.current_token.type}', expected 'SET', operator or identifier" ) return res.failure(error) return res.success(node)
def comp_expr(self): res = ParserResult() # comp-expr : NOT comp-expr if self.current_tok.matches(TT_KEYWORD, 'not'): op_tok = self.current_tok res.register_advancement() self.advance() node = res.register(self.comp_expr()) if res.error: return res return res.success(UnaryOpNode(op_tok, node)) else: # comp-expr : arith-expr ((EE|LT|GT|LTE|GTE) arith-expr)* node = res.register(self.bin_op(self.arith_expr, (TT_EE, TT_NE, TT_LT, TT_GT, TT_LTE, TT_GTE))) if res.error: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected int, float, identifier, '+', '-', '(' or 'not'" )) return res.success(node)
def statement(self): """ statement : KEYWORD:return expr? : KEYWORD:continue : KEYWORD:break : expr :return: """ res = ParserResult() pos_start = self.current_tok.pos_start.copy() if self.current_tok.matches(TT_KEYWORD, 'return'): res.register_advancement() self.advance() # KEYWORD:return expr? => expr? 表示expr出现0次或1次 # 语法上运行起返回为空,即只有return关键字 expr = res.try_register(self.expr()) if not expr: self.reverse(res.to_reverse_count) return res.success(ReturnNode(expr, pos_start, self.current_tok.pos_start.copy())) if self.current_tok.matches(TT_KEYWORD, 'continue'): res.register_advancement() self.advance() return res.success(ContinueNode(pos_start, self.current_tok.pos_start.copy())) if self.current_tok.matches(TT_KEYWORD, 'break'): res.register_advancement() self.advance() return res.success(BreakNode(pos_start, self.current_tok.pos_start.copy())) expr = res.register(self.expr()) if res.error: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'return', 'continue', 'break', 'var', 'if', 'for', 'where', 'fun', int, float, identifier, '+', '-', '(', '[' or 'not'" )) return res.success(expr)
def comp_expr(self): res = ParseResult() if self.current_token.matches(TT_KEY, "NOT"): op = self.current_token res.register_advancement() self.advance node = res.register(self.comp_expr()) if res.error: return res return res.success(UnaryOpNode(op, node)) node = res.register( self.bin_op(self.arith_expr, (TT_EE, TT_NE, TT_LT, TT_GT, TT_LTE, TT_GTE))) if res.error: return res.failure( InvalidSyntaxError( self.current_token.pos_start, self.current_token.pos_end, "Unexpected token '{token.type}', expected operator or identifier" )) return res.success(node)
def if_expr_cases(self, case_keyword): """ KEYWORD:case_keyword expr KEYWORD:then (expr if-expr-b | if-expr-c?) | (NEWLINE statements KEYWORD:end | if-expr-b | if-expr-c) :param case_keyword: if 或 elif :return: """ res = ParserResult() cases = [] else_case = None # 判断关键字是为 if 或 elif if not self.current_tok.matches(TT_KEYWORD, case_keyword): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, f"Expected '{case_keyword}'" )) res.register_advancement() self.advance() # 执行if或elif关键字后的expr,获得判断条件的condition condition = res.register(self.expr()) if res.error: return res if not self.current_tok.matches(TT_KEYWORD, 'then'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'then" )) res.register_advancement() self.advance() # 如果是换行 if self.current_tok.type == TT_NEWLINE: res.register_advancement() self.advance() statements = res.register(self.statements()) if res.error: return res cases.append((condition, statements, True)) # 判断关键字是否为end,如果为end,则说明if判断只有一层,即 # if <expr> then; # <expr>; # <expr>; # end if self.current_tok.matches(TT_KEYWORD, 'end'): res.register_advancement() self.advance() else: # 如果不为end,则说明if判断有多层,即 # if <expr> then; # <expr>; # <expr>; # elif <expr> then; # <expr>; # <expr>; # else; # <expr>; # end new_cases, else_case = res.register(self.if_expr_b_or_c()) if res.error: return res cases.extend(new_cases) else: # 不是换行符; 则说明当前层语句只有一行,即 # if <expr> then <expr> # 但如果if有多层,无法保证其他是否会换行,如elif层可能会换行 # if <expr> then <expr> elif <expr> then; expr = res.register(self.statement()) if res.error: return res cases.append((condition, expr, False)) # 调用if_expr_b_or_c方法,无论其他层是否换行,都可以通过递归调用来解决 new_cases, else_case = res.register(self.if_expr_b_or_c()) if res.error: return res cases.extend(new_cases) return res.success((cases, else_case))
def for_expr(self): """ for-expr : KEYWORD:for IDENTIFIER EQ expr KEYWORD:to expr (KEYWORD:step expr)? KEYWROD: then expr | (NEWLINE statements KEYWORD:end) var res = 1 for var i = 1 to 10 then var res = res * i :return: """ res = ParserResult() if not self.current_tok.matches(TT_KEYWORD, 'for'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'for'" )) res.register_advancement() self.advance() if self.current_tok.type != TT_IDENTIFIER: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'identifier'" )) var_name = self.current_tok res.register_advancement() self.advance() if self.current_tok.type != TT_EQ: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'identifier'" )) res.register_advancement() self.advance() start_value = res.register(self.expr()) # for循环起始值 if res.error: return res if not self.current_tok.matches(TT_KEYWORD, 'to'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'to'" )) res.register_advancement() self.advance() end_value = res.register(self.expr()) # for循环结束值 if res.error: return res if self.current_tok.matches(TT_KEYWORD, 'step'): # 单次循环跳跃多少元素 res.register_advancement() self.advance() step_value = res.register(self.expr()) if res.error: return res else: step_value = None if not self.current_tok.matches(TT_KEYWORD, 'then'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'then'" )) res.register_advancement() self.advance() if self.current_tok.type == TT_NEWLINE: res.register_advancement() self.advance() body = res.register(self.statements()) if res.error: return res if not self.current_tok.matches(TT_KEYWORD, 'end'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'end'" )) res.register_advancement() self.advance() return res.success(ForNode(var_name, start_value, end_value, step_value, body, True)) body = res.register(self.statement()) if res.error: return res return res.success(ForNode(var_name, start_value, end_value, step_value, body, False))
def atom(self): """ atom : INT|FLOAT|IDENTIFIER : LPAREN expr RPAREN : if-expr : for-expr : while-expr : func-expr :return: """ res = ParserResult() tok = self.current_tok # atom : INT|FLOAT if tok.type in (TT_INT, TT_FLOAT): res.register_advancement() self.advance() return res.success(NumberNode(tok)) # atom : STRINg elif tok.type == TT_STRING: res.register_advancement() self.advance() return res.success(StringNode(tok)) # atom : IDENTIFIER elif tok.type == TT_IDENTIFIER: res.register_advancement() self.advance() # 访问变量时,只会输入单独的变量名 return res.success(VarAccessNode(tok)) # atom : LPAREN expr RPAREN => (1 + 2) * 3 elif tok.type == TT_LPAREN: self.advance() expr = res.register(self.expr()) if res.error: return res if self.current_tok.type == TT_RPAREN: self.advance() return res.success(expr) else: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected ')'" )) elif tok.type == TT_LSQUARE: list_expr = res.register(self.list_expr()) if res.error: return res return res.success(list_expr) # atom : if-expr elif tok.matches(TT_KEYWORD, 'if'): if_expr = res.register(self.if_expr()) if res.error: return res return res.success(if_expr) # atom : for-expr elif tok.matches(TT_KEYWORD, 'for'): for_expr = res.register(self.for_expr()) if res.error: return res return res.success(for_expr) # atom : while-expr elif tok.matches(TT_KEYWORD, 'while'): while_expr = res.register(self.while_expr()) if res.error: return res return res.success(while_expr) # atom : func-expr elif tok.matches(TT_KEYWORD, 'func'): func_expr = res.register(self.func_expr()) if res.error: return res return res.success(func_expr) return res.failure(InvalidSyntaxError( tok.pos_start, tok.pos_end, # 报错中,期望的值中不包含var,虽然其文法中包含expr(LPAREN expr RPAREN),而expr中又包含var KEYWORD # 但这里并不存赋值的情况,所以报错中不包含var # 编程语言中的错误提示非常重要,所以要尽可能保持正确 "Expected int, float, identifier, '+', '-', '(', 'IF', 'FOR', 'WHILE', 'FUN'" ))
def func_expr(self): """ 解析函数定义 func-expr : KEYWORD func IDENTIFIER? LPAREN (IDENTIFIER (COMMA IDENTIFIER)*)? RPAREN ARROW expr :return: """ res = ParserResult() if not self.current_tok.matches(TT_KEYWORD, 'func'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'func'" )) res.register_advancement() self.advance() if self.current_tok.type == TT_IDENTIFIER: # 函数名 var_name_tok = self.current_tok res.register_advancement() self.advance() # 函数名后必然跟着 ( => func a( if self.current_tok.type != TT_LPAREN: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected '('" )) else: var_name_tok = None # 匿名函数 # 匿名函数后,直接跟 ( if self.current_tok.type != TT_LPAREN: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected identifier or '('" )) res.register_advancement() self.advance() arg_name_toks = [] # 参数名,函数至少有一个参数 => func a(x if self.current_tok.type == TT_IDENTIFIER: arg_name_toks.append(self.current_tok) res.register_advancement() self.advance() # 参数中有逗号分隔,则有多个参数 => func a(x,y while self.current_tok.type == TT_COMMA: res.register_advancement() self.advance() if self.current_tok.type != TT_IDENTIFIER: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected identifier" )) # 将参数添加到arg_name_toks列表中 arg_name_toks.append(self.current_tok) res.register_advancement() self.advance() # 参数匹配完后,就需要匹配右括号 => func a(x,y) if self.current_tok.type != TT_RPAREN: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected , or ')'" )) else: # 函数定义时,可以没有参数 => func a() if self.current_tok.type != TT_RPAREN: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected identifier or ')'" )) res.register_advancement() self.advance() # func a (x,y) -> if self.current_tok.type == TT_ARROW: # (ARROW expr) res.register_advancement() self.advance() # 解析函数体中的逻辑,获得该函数的返回值 node_to_return = res.register(self.expr()) if res.error: return res # should_auto_return设置为True,表示自动返回,此时的函数为一行函数,不需要通过return关键字返回内容 # func add(a,b) -> a + b => add函数会返回 a+b 的结果 return res.success(FuncNode(var_name_tok, arg_name_toks, node_to_return, True)) if self.current_tok.type != TT_NEWLINE: return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected '->' or ';' " )) res.register_advancement() self.advance() body = res.register(self.statements()) if res.error: return res if not self.current_tok.matches(TT_KEYWORD, 'end'): return res.failure(InvalidSyntaxError( self.current_tok.pos_start, self.current_tok.pos_end, "Expected 'end' " )) res.register_advancement() self.advance() # 此时的函数为多行函数,需要通过return关键字才可返回 # func add(a,b); return a + b; end return res.success(FuncNode(var_name_tok, arg_name_toks, body, False))