def stmtlist(self,pass_in_ATexpr=None): """ parse a bunch of statements """ self.dbg_msg(u" STMTLIST ") stlist = StmtList() while( not self.lex.end_of_tokens() ): self.dbg_msg(u"STMTLIST => STMT") ptok = self.peek() self.dbg_msg(u"STMTLIST "+unicode(ptok)) if ( self.debug ): print(u"peek @ ",unicode(ptok)) if ( ptok.kind == EzhilToken.END ): self.dbg_msg(u"End token found"); break elif ( ptok.kind == EzhilToken.DOWHILE ): if ( self.debug ): print("DOWHILE token found") break elif( self.inside_if and ( ptok.kind == EzhilToken.ELSE or ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.CASE or ptok.kind == EzhilToken.OTHERWISE ) ): break elif( ptok.kind == EzhilToken.DEF ): break st = self.stmt(pass_in_ATexpr) pass_in_ATexpr = None stlist.append( st ) return stlist
def stmtlist(self): """ parse a bunch of statements """ self.dbg_msg(" STMTLIST ") stlist = StmtList() while not self.lex.end_of_tokens(): self.dbg_msg("STMTLIST => STMT") ptok = self.peek() if ptok.kind == Token.END: break if not self.inside_if and (ptok.kind == Token.ELSE or ptok.kind == Token.ELSEIF): break st = self.stmt() stlist.append(st) return stlist
def stmtlist(self): """ parse a bunch of statements """ self.dbg_msg(" STMTLIST ") stlist = StmtList() while (not self.lex.end_of_tokens()): self.dbg_msg("STMTLIST => STMT") ptok = self.peek() if (ptok.kind == Token.END): break if (not self.inside_if and (ptok.kind == Token.ELSE or ptok.kind == Token.ELSEIF)): break st = self.stmt() stlist.append(st) return stlist
def parse(self): """ parser routine """ self.ast = StmtList() while (not self.lex.end_of_tokens()): self.dbg_msg("AST length = %d" % len(self.ast)) if (self.lex.peek().kind == Token.DEF): self.dbg_msg("parsing for function") ## save function in a global table. func = self.function() self.warn_function_overrides(func.name) self.function_map[func.name] = func else: self.dbg_msg("parsing for stmt") st = self.stmt() if (not self.parsing_function): self.ast.append(st) return self.ast
def stmtlist(self, pass_in_ATexpr=None): """ parse a bunch of statements """ self.dbg_msg(" STMTLIST ") stlist = StmtList() while (not self.lex.end_of_tokens()): self.dbg_msg("STMTLIST => STMT") ptok = self.peek() self.dbg_msg("STMTLIST " + str(ptok)) if (self.debug): print("peek @ ", str(ptok)) if (ptok.kind == EzhilToken.END): self.dbg_msg("End token found") break elif (ptok.kind == EzhilToken.DOWHILE): if (self.debug): print("DOWHILE token found") break elif (self.inside_if and (ptok.kind == EzhilToken.ELSE or ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.CASE or ptok.kind == EzhilToken.OTHERWISE)): break st = self.stmt(pass_in_ATexpr) pass_in_ATexpr = None stlist.append(st) return stlist
def parse(self): """ parser routine """ self.ast = StmtList() while ( not self.lex.end_of_tokens() ): self.dbg_msg( "AST length = %d"%len(self.ast) ) if ( self.lex.peek().kind == Token.DEF ): self.dbg_msg ( "parsing for function" ) ## save function in a global table. func = self.function() self.warn_function_overrides(func.name) self.function_map[func.name]=func else: self.dbg_msg( "parsing for stmt" ) st = self.stmt() if ( not self.parsing_function ): self.ast.append(st) return self.ast
def parse(self): """ parser routine """ self.ast = StmtList(istoplevel=True) self.dbg_msg(u" entering parser " ) while ( not self.lex.end_of_tokens() ): self.dbg_msg( u"AST length = %d"%len(self.ast) ) if ( self.lex.peek().kind == EzhilToken.DEF ): self.dbg_msg ( u"parsing for function" ) ## save function in a global table. func = self.function() self.warn_function_overrides(func.name) self.function_map[func.name]=func self.ast.append(DeclarationStmt(func)) #add to AST else: self.dbg_msg( u"parsing for stmt" ) st = self.stmtlist() if ( not self.parsing_function ): self.ast.append(st) return self.ast
class EzhilParser(Parser): """ when you add new language feature, add a AST class and its evaluate methods. Also add a parser method """ def __init__(self,lexer,fcn_map, builtin_map, dbg = False): if ( not isinstance(lexer, EzhilLex) ): raise RuntimeException(u"Cannot find Ezhil lexer class") Parser.__init__(self,lexer,fcn_map,builtin_map,dbg) self.open_if_stmts = 0 self.backtrack_atexpr = None @staticmethod def factory(lexer,fcn_map,builtin_map, dbg = False): """ Factory method """ return EzhilParser(lexer,fcn_map,builtin_map, dbg) def match(self,kind): ## if match return token, else ParseException tok = self.dequeue() if ( tok.kind != kind ): raise ParseException(u"cannot find token "+ \ EzhilToken.get_name(kind) + u" got " \ + unicode(tok) \ + u" instead!") return tok def parse(self): """ parser routine """ self.ast = StmtList() self.dbg_msg(u" entering parser " ) while ( not self.lex.end_of_tokens() ): self.dbg_msg( u"AST length = %d"%len(self.ast) ) if ( self.lex.peek().kind == EzhilToken.DEF ): self.dbg_msg ( u"parsing for function" ) ## save function in a global table. func = self.function() self.warn_function_overrides(func.name) self.function_map[func.name]=func self.ast.append(DeclarationStmt(func)) #add to AST else: self.dbg_msg( u"parsing for stmt" ) st = self.stmtlist() if ( not self.parsing_function ): self.ast.append(st) return self.ast def stmtlist(self,pass_in_ATexpr=None): """ parse a bunch of statements """ self.dbg_msg(u" STMTLIST ") stlist = StmtList() while( not self.lex.end_of_tokens() ): self.dbg_msg(u"STMTLIST => STMT") ptok = self.peek() self.dbg_msg(u"STMTLIST "+unicode(ptok)) if ( self.debug ): print(u"peek @ ",unicode(ptok)) if ( ptok.kind == EzhilToken.END ): self.dbg_msg(u"End token found"); break elif ( ptok.kind == EzhilToken.DOWHILE ): if ( self.debug ): print("DOWHILE token found") break elif( self.inside_if and ( ptok.kind == EzhilToken.ELSE or ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.CASE or ptok.kind == EzhilToken.OTHERWISE ) ): break elif( ptok.kind == EzhilToken.DEF ): break st = self.stmt(pass_in_ATexpr) pass_in_ATexpr = None stlist.append( st ) return stlist def parseSwitchStmt(self,exp): ## @ <ID/EXPR> SWITCH @( expr ) CASE {stmtlist} @( expr ) CASE {stmtlist} OTHERWISE {stmtlist} END ## implement as an if-elseif-else statement self.dbg_msg("parsing SWITCH statement") sw_tok = self.dequeue() [l,c]=sw_tok.get_line_col() self.inside_if = True lhs=exp[0] # enter this if-statement always ifstmt = IfStmt( Number(1), None, None, l, c, self.debug) self.if_stack.append(ifstmt) self.dbg_msg("parsing SWITCH-body") #self.dbg_msg ptok = self.peek() equality_token = EzhilLexeme("=",EzhilToken.EQUALITY) while ( ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.OTHERWISE ): self.inside_if = True [l,c]=ptok.get_line_col() if ( ptok.kind == EzhilToken.ATRATEOF ): # parse elseif branch self.dbg_msg("parsing CASE") self.match( EzhilToken.ATRATEOF ) exp = self.valuelist(); self.dbg_msg("parsing CASE EXPR") self.match( EzhilToken.CASE ) next_stmt = self.stmtlist() expr = Expr( lhs, equality_token, exp[0], l, c, self.debug ) self.dbg_msg("building an Expr "+unicode(expr)) if not ifstmt.body : ifstmt.expr = expr ifstmt.body = next_stmt else: case_stmt = IfStmt(expr,next_stmt,None,l,c,self.debug); ifstmt.append_stmt( case_stmt ) elif ( ptok.kind == EzhilToken.OTHERWISE ): #parse else branch self.dbg_msg("parsing OTHERWISE: ") self.match( EzhilToken.OTHERWISE ) self.dbg_msg("parsing OTHERWISE-Body") self.inside_if = False body = self.stmtlist() else_stmt = ElseStmt( body , l, c, self.debug) if not ifstmt.body : ifstmt.body = else_stmt else: ifstmt.append_stmt( else_stmt ) break else: self.inside_if = False raise ParseException("SWITCH-CASE-OTHERWISE statement syntax is messed up") ptok = self.peek() self.dbg_msg("parsing SWITCH-CASE next bits "+unicode(ptok)) self.match( EzhilToken.END ) self.inside_if = False self.dbg_msg("parsing -SWITCH-CASE- complete") return ifstmt def parseIfStmt(self,exp): ## @ <expression> if { stmtlist } @<expr> ELSEIF {stmtlist} ELSE <stmtlist> END self.dbg_msg(u"parsing IF statement") if_tok = self.dequeue() [l,c]=if_tok.get_line_col() self.inside_if = True ifstmt = IfStmt( exp[0], None, None, l, c, self.debug) self.if_stack.append(ifstmt) self.dbg_msg(u"parsing IF-body") body = self.stmtlist() prev_body = body; ifstmt.set_body( body ) ptok = self.peek() while ( ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.ELSE ): self.inside_if = True [l,c]=ptok.get_line_col() if ( ptok.kind == EzhilToken.ATRATEOF ): # parse elseif branch self.dbg_msg(u"parsing ELSE-IF") self.match( EzhilToken.ATRATEOF ) exp = self.valuelist(); self.dbg_msg(u"parsing ELSE-IF EXPR") tok = self.peek() if ( tok.kind != EzhilToken.ELSEIF ): # maybe another IF statement, SWITCH-CASE or a WHILE loop, DO-WHILE loop etc. next_stmt = self.stmtlist(exp) #pass in the expression prev_body.append( next_stmt ) # append to previously scanned body. else: self.dbg_msg(u"parsing ELSE-IF-body") self.match( EzhilToken.ELSEIF ) body = self.stmtlist() prev_body = body next_stmt = IfStmt(exp[0],body,None,l,c,self.debug) self.dbg_msg(u"ELSEIF parsed correctly => "+unicode(next_stmt)) ifstmt.append_stmt( next_stmt ) elif ( ptok.kind == EzhilToken.ELSE ): #parse else branch self.dbg_msg(u"parsing stmt else: ") self.match( EzhilToken.ELSE ) self.dbg_msg(u"parsing ELSE-Body") self.inside_if = False body = self.stmtlist() prev_body = body; else_stmt = ElseStmt( body , l, c, self.debug) ifstmt.append_stmt( else_stmt ) break else: self.inside_if = False raise ParseException(u"If-Else-If statement syntax is messed up") ptok = self.peek() self.dbg_msg(u"parsing -IF next bits "+unicode(ptok)) self.match( EzhilToken.END ) self.inside_if = False self.dbg_msg(u"parsing -IF-complete") return ifstmt def stmt(self,pass_in_ATexpr=None): """ try an assign, print, return, if or eval statement """ self.dbg_msg(u" STMT ") ptok = self.peek() self.dbg_msg(u"stmt: peeking at "+unicode(ptok)) if ( ptok.kind == EzhilToken.RETURN ): ## return <expression> self.dbg_msg('enter->return: <expression>') ret_tok = self.dequeue() [l,c]=ret_tok.get_line_col(); if ( not self.parsing_function ): raise ParseException( u"return statement outside of function body "+unicode(ret_tok)) rstmt = ReturnStmt(self.expr(),l,c,self.debug) self.dbg_msg(u"return statement parsed") return rstmt elif ( ptok.kind == EzhilToken.PRINT ): self.dbg_msg(u"stmt : print ") self.currently_parsing.append( ptok ) ## print <expression> print_tok = self.dequeue() [l,c]=print_tok.get_line_col(); exprlist_val = self.exprlist(); self.currently_parsing.pop() return PrintStmt(exprlist_val,l,c,self.debug) elif ( ptok.kind == EzhilToken.ATRATEOF or pass_in_ATexpr): ## @ <expression> {if | while | elseif} if not pass_in_ATexpr: at_tok = self.match(EzhilToken.ATRATEOF) self.currently_parsing.append( at_tok ) exp = self.valuelist(); self.currently_parsing.pop() else: exp = pass_in_ATexpr pass_in_ATexpr = None #use it just once if( self.debug ): print ("return from valuelist ",unicode(exp)) ptok = self.peek(); if ( ptok.kind == EzhilToken.IF ): return self.parseIfStmt(exp) elif ( ptok.kind == EzhilToken.WHILE ): ## @ ( expr ) while { body } end self.loop_stack.append(True) self.dbg_msg(u"while-statement") while_tok = self.dequeue() self.currently_parsing.append( while_tok ) [l,c]=while_tok.get_line_col() wexpr = exp[0]; body = self.stmtlist( ) self.match( EzhilToken.END) whilestmt = WhileStmt(wexpr, body, l, c, self.debug) self.loop_stack.pop() self.currently_parsing.pop() return whilestmt elif ( ptok.kind == EzhilToken.SWITCH ): return self.parseSwitchStmt(exp) elif ( ptok.kind == EzhilToken.FOREACH ): foreach_tok = self.dequeue() self.currently_parsing.append(foreach_tok) [l,c]=foreach_tok.get_line_col() if ( self.debug ): print(u"parsing FOREACH stmt") self.loop_stack.append(True) self.dbg_msg(u"foreach-statement") # convert to a for statement - building Ezhil AST - transformations if not isinstance( exp[1], Identifier ): raise ParseException(u" FOR-EACH statement "+unicode(foreach_tok) ) foreach_iter = exp[1]; iter = Identifier("__"+foreach_iter.id,l=0,c=-1); eq_token = EzhilLexeme(u"=",EzhilToken.EQUALS) plus_token = EzhilLexeme(u"+",EzhilToken.PLUS) lt_token = EzhilLexeme(u"<",EzhilToken.LT) if ( self.debug ): print(u"build init assign stmt") init = AssignStmt( iter, eq_token , Number(0),l,c,self.debug) if ( self.debug ): print(u"build cond expr") VL1 = ValueList([exp[0]],l,c,self.debug) cond = Expr( iter, lt_token, ExprCall( Identifier("len",l,c), VL1, l, c, self.debug ), l, c, self.debug ) if ( self.debug ): print("build plus1 stmt") plus1_iter = Expr( iter, plus_token, Number(1), l, c, self.debug ) if ( self.debug ): print(u"build equals stmt") update = AssignStmt( iter, eq_token , plus1_iter ,l,c,self.debug) body = self.stmtlist() #parse body # and insert artifical update variable in body VL2 = ValueList([exp[0],iter],l,c,self.debug) extract_foreach_iter_from_list = ExprCall( Identifier("__getitem__",l,c), VL2,l,c,self.debug); foreach_iter_Assign = AssignStmt( foreach_iter, eq_token , extract_foreach_iter_from_list, l,c,self.debug ) body.List.insert( 0,foreach_iter_Assign) # complete FOREACH stmt self.match( EzhilToken.END) self.currently_parsing.pop() foreach_stmt = ForStmt(init, cond, update, body, l, c, self.debug); self.loop_stack.pop(); if ( self.debug ): print(u"completed parsing FOR-EACH loop",unicode(foreach_stmt)) return foreach_stmt elif ( ptok.kind == EzhilToken.FOR ): ## Fixme : empty for loops not allowed. """ For ( exp1 , exp2 , exp3 ) stmtlist end""" if ( self.debug ): print("parsing FOR stmt") self.loop_stack.append(True) self.dbg_msg("for-statement") for_tok = self.peek() self.currently_parsing.append(for_tok) if ( self.debug ): print("matching for STMT",unicode(self.peek())) self.match( EzhilToken.FOR ) if ( self.debug ): print("matched for STMT",unicode(self.peek())) [l,c]= for_tok.get_line_col(); init,cond,update = exp[0],exp[1],exp[2] if ( self.debug ): print("extract 3 parts",unicode(init),unicode(cond),unicode(update)) body = self.stmtlist() self.match( EzhilToken.END) self.currently_parsing.pop() if ( self.debug ): print("body of loop",unicode(body)) forstmt = ForStmt(init, cond, update, body, l, c, self.debug); self.loop_stack.pop(); if ( self.debug ): print("completed parsing FOR loop",unicode(forstmt)) return forstmt elif ( ptok.kind == EzhilToken.DO ): if ( self.debug ): print("parsing DO-WHILE statement") self.loop_stack.append(True) do_tok = self.dequeue() self.currently_parsing.append(do_tok) [l,c]=do_tok.get_line_col() body = self.stmtlist() if ( self.debug ): print("parsed body") self.match(EzhilToken.DOWHILE) self.match(EzhilToken.ATRATEOF) exp = self.valuelist(); if ( self.debug ): print("parsed EXP",exp[0]) doWhileStmt = DoWhileStmt(exp[0], body, l, c, self.debug) self.loop_stack.pop() self.currently_parsing.pop() return doWhileStmt elif ( ptok.kind == EzhilToken.BREAK ): ## break, must be in loop-environment self.dbg_msg("break-statement"); break_tok = self.dequeue(); [l,c]=break_tok.get_line_col() self.check_loop_stack(); ##raises a parse error brkstmt = BreakStmt( l, c, self.debug); return brkstmt elif ( ptok.kind == EzhilToken.CONTINUE ): ## continue, must be in loop-environment self.dbg_msg("continue-statement"); cont_tok = self.dequeue(); [l,c]=cont_tok.get_line_col() self.check_loop_stack(); ##raises a parse error cntstmt = ContinueStmt( l, c, self.debug); return cntstmt else: ## lval := rval ptok = self.peek() self.currently_parsing.append(ptok) [l,c] = ptok.get_line_col() lhs = self.expr() self.dbg_msg("parsing expr: "+unicode(lhs)) ptok = self.peek() if ( ptok.kind in EzhilToken.ASSIGNOP ): assign_tok = self.dequeue() rhs = self.expr() [l,c]=assign_tok.get_line_col() self.currently_parsing.pop() return AssignStmt( lhs, assign_tok, rhs, l, c, self.debug) self.currently_parsing.pop() return EvalStmt( lhs, l, c, self.debug ) raise ParseException("parsing Statement, unknown operators" + unicode(ptok)) def function(self): """ def[kw] fname[id] (arglist) {body} end[kw] """ if ( self.parsing_function ): self.parsing_function = False raise ParseException(u" Nested functions not allowed! "+unicode(self.last_token())) self.parsing_function = True def_tok = self.dequeue() if ( def_tok.kind != EzhilToken.DEF ): raise ParseException(u"unmatched 'def' in function " +unicode(def_tok)) id_tok = self.dequeue() if ( id_tok.kind != EzhilToken.ID ): raise ParseException(u"expected identifier in function"+unicode(id_tok)) arglist = self.arglist() self.dbg_msg( u"finished parsing arglist" ) body = self.stmtlist() self.match( EzhilToken.END ) [l,c] = def_tok.get_line_col() fval = Function( id_tok.val, arglist, body, l, c, self.debug ) self.parsing_function = False self.dbg_msg( u"finished parsing function" ) return fval def valuelist(self): """parse: ( expr_1 , expr_2, ... ) """ valueList = list() self.dbg_msg(u"valuelist: ") lparen_tok = self.match( EzhilToken.LPAREN ) while ( self.peek().kind != EzhilToken.RPAREN ): val = self.expr() if ( self.debug ): print(u"val = ",unicode(val)) ptok = self.peek() if ( self.debug ) : print(u"ptok = ",unicode(ptok),unicode(ptok.kind),unicode(EzhilToken.ASSIGNOP)) if ( ptok.kind in EzhilToken.ASSIGNOP ): assign_tok = self.dequeue() rhs = self.expr() [l,c]=assign_tok.get_line_col() lhs = val val = AssignStmt( lhs, assign_tok, rhs, l, c, self.debug) if ( self.debug ): print(u"AssignStmt = ",unicode(val)) ptok = self.peek() else: if ( self.debug ): print(u"No-Assign // Expr =",unicode(val)) self.dbg_msg(u"valuelist-expr: "+unicode(val)) valueList.append( val ) if ( ptok.kind == EzhilToken.RPAREN ): break elif ( ptok.kind == EzhilToken.COMMA ): self.match( EzhilToken.COMMA ) else: raise ParseException(u" function call argument list "+unicode(ptok)) self.match( EzhilToken.RPAREN ) [l,c] = lparen_tok.get_line_col() return ValueList(valueList, l, c, self.debug ) def arglist(self): """parse: ( arg_1, arg_2, ... ) """ self.dbg_msg( u" ARGLIST " ) args = list() lparen_tok = self.match( EzhilToken.LPAREN ) while ( self.peek().kind != EzhilToken.RPAREN ): arg_name = self.dequeue() args.append( arg_name.val ) ptok = self.peek() if ( ptok.kind == EzhilToken.RPAREN ): break elif ( ptok.kind == EzhilToken.COMMA ): self.match( EzhilToken.COMMA ) else: raise ParseException(u" function definition argument list " +unicode(ptok)) self.match( EzhilToken.RPAREN ) [l,c] = lparen_tok.get_line_col() return ArgList(args , l, c, self.debug ) def exprlist(self): """ EXPRLIST : EXPR, EXPRLIST ## EXPRLIST : EXPR """ self.dbg_msg( u" EXPRLIST " ) exprs=[] comma_tok = None l = 0; c = 0 while ( not self.lex.end_of_tokens() ): exprs.append(self.expr()) if self.lex.peek().kind != EzhilToken.COMMA: break tok = self.match( EzhilToken.COMMA) if ( not comma_tok ): comma_tok = tok if ( comma_tok ): [l,c] = comma_tok.get_line_col() self.dbg_msg(u"finished expression list") return ExprList(exprs, l, c, self.debug) def expr(self): self.dbg_msg( u" EXPR " ) val1=self.term() res=val1 ptok = self.peek() if ptok.kind in EzhilToken.ADDSUB: binop=self.dequeue() if ( ptok.kind == EzhilToken.MINUS ): val2 = self.term() else: val2=self.expr() [l,c] = binop.get_line_col() res=Expr(val1,binop,val2, l, c, self.debug ) elif ptok.kind == EzhilToken.LPAREN: ## function call if ( not isinstance(res, Identifier) ): raise ParseException(u"invalid function call"+unicode(ptok)) [l,c] = ptok.get_line_col() vallist = self.valuelist() res=ExprCall( res, vallist, l, c, self.debug ) ptok = self.peek() while ptok.kind in EzhilToken.BINOP: binop = self.dequeue() [l,c] = binop.get_line_col() res = Expr( res, binop,self.expr(), l,c,self.debug) ptok = self.peek() return res def term(self): """ this is a grammar abstraction; but AST only has Expr elements""" self.dbg_msg( "term" ) val1=self.factor() res=val1 tok = self.peek() if ( tok.kind in EzhilToken.MULDIV or tok.kind in EzhilToken.COMPARE or tok.kind in EzhilToken.EXPMOD or tok.kind in EzhilToken.BITWISE_AND_LOGICAL ): binop=self.dequeue() val2=self.term() [l,c] = binop.get_line_col() res=Expr(val1,binop,val2, l, c, self.debug) return res def factor(self): self.dbg_msg( "factor" ) tok=self.peek() if tok.kind == EzhilToken.LPAREN: lparen_tok = self.dequeue() val=self.expr() if self.dequeue().kind!= EzhilToken.RPAREN: raise SyntaxError("Missing Parens "+unicode(self.last_token())) elif tok.kind == EzhilToken.NUMBER: tok_num = self.dequeue() [l, c] = tok_num.get_line_col() val = Number( tok.val , l, c, self.debug ) elif tok.kind == EzhilToken.LOGICAL_NOT: tok_not = self.dequeue() [l, c] = tok_not.get_line_col() val = UnaryExpr( self.expr(), tok_not , l, c, self.debug ) self.dbg_msg("completed parsing unary expression"+unicode(val)) elif tok.kind == EzhilToken.ID: tok_id = self.dequeue() [l, c] = tok_id.get_line_col() val = Identifier( tok.val , l, c, self.debug ) ptok = self.peek() self.dbg_msg(u"factor: "+unicode(ptok) + u" / "+ unicode(tok) ) if ( ptok.kind == EzhilToken.LPAREN ): ## function call [l, c] = ptok.get_line_col() vallist = self.valuelist() val=ExprCall( val, vallist, l, c, self.debug ) elif ( ptok.kind == EzhilToken.LSQRBRACE ): ## indexing a array type variable or ID [l,c] = ptok.get_line_col() ## replace with a call to __getitem__ exp = self.factor(); if ( hasattr(exp,'__getitem__') ): VL2 = ValueList([val,exp[0]],l,c,self.debug) else: # when exp is a expression VL2 = ValueList([val,exp],l,c,self.debug) val = ExprCall( Identifier("__getitem__",l,c), VL2,l,c,self.debug) for itr in range(1,len(exp)): VL2 = ValueList([val,exp[itr]],l,c,self.debug) val = ExprCall( Identifier("__getitem__",l,c), VL2,l,c,self.debug) #raise ParseException("array indexing implemented"+unicode(ptok)); elif ( ptok.kind == EzhilToken.LCURLBRACE ): val=None raise ParseException("dictionary indexing implemented"+unicode(ptok)); elif tok.kind == EzhilToken.STRING : str_tok = self.dequeue() [l,c] = str_tok.get_line_col() val = String( tok.val , l, c, self.debug ) elif tok.kind in EzhilToken.ADDSUB: unop = self.dequeue(); [l, c] = unop.get_line_col() val=Expr(Number(0),unop,self.term(),l,c,self.debug); elif tok.kind == EzhilToken.LCURLBRACE: # creating a list/dictionary expression dict_start = self.dequeue(); val = Dict() while( True ): if ( self.peek().kind == EzhilToken.RCURLBRACE ): break; exprkey = self.expr() tok_colon = self.match(EzhilToken.COLON) exprval = self.expr() val.update( {exprkey : exprval} ) if self.debug : print(self.peek().__class__,self.peek()) if ( self.peek().kind == EzhilToken.RCURLBRACE ): break else: assert( self.peek().kind == EzhilToken.COMMA) self.dequeue() assert( self.peek().kind == EzhilToken.RCURLBRACE ) list_end = self.dequeue() elif tok.kind == EzhilToken.LSQRBRACE: # creating a list/array expression list_start = self.dequeue(); val = Array() while( True ): if ( self.peek().kind == EzhilToken.RSQRBRACE ): break; exprval = self.expr() val.append( exprval ) if self.debug : print(self.peek().__class__,self.peek()) if ( self.peek().kind == EzhilToken.RSQRBRACE ): break else: assert( self.peek().kind == EzhilToken.COMMA) self.dequeue() assert( self.peek().kind == EzhilToken.RSQRBRACE ) list_end = self.dequeue() else: raise ParseException("Expected Number, found something "+unicode(tok)) self.dbg_msg( u"factor-returning: "+unicode(val) ) return val
class Parser(DebugUtils): """ when you add new language feature, add a AST class and its evaluate methods. Also add a parser method """ def __init__(self,lexer,fcn_map, builtin_map, dbg = False): DebugUtils.__init__(self,dbg) self.parsing_function = False self.lex=lexer self.ast=None self.currently_parsing = [] # stack, just in case we should have parse errors self.function_map = fcn_map #parsed functions self.builtin_map = builtin_map #pointers to builtin functions self.if_stack = [] #parsing if-statements self.loop_stack = [] #parsing while-statements self.last_tok = None ## handle to last token dequeued self.inside_if = False def reset(self): """reset parser, and lexer, when stuff gets messed up""" self.inside_if = False self.loop_stack = [] self.if_stack = [] self.currently_parsing = [] self.lex.reset() return def check_loop_stack(self): if ( len(self.loop_stack) == 0 ): raise ParseException(u"break/continue statement outside any loop, near" + str(self.last_token())); return len(self.loop_stack); def check_if_stack(self): if ( len(self.if_stack) == 0 ): raise ParseException(u"unmatched else statement, near" + str(self.last_token())) return len(self.if_stack) def last_token(self): return self.last_tok def peek(self): ptok = self.lex.peek() self.dbg_msg(u"peek: " + unicode(ptok)) return ptok def dequeue(self): tok = self.lex.dequeue() self.last_tok = tok self.dbg_msg( u"deqeue: " + unicode(tok) ) return tok def match(self,kind): ## if match return token, else ParseException tok = self.dequeue() if ( tok.kind != kind ): raise ParseException(u"cannot find token "+ \ Token.get_name(kind) + u" got " \ + unicode(tok) + u" instead!") return tok def __repr__(self): rval = u"[Interpreter: " rval = rval + u"[Functions[" for k in list(self.function_map.keys()): rval = rval + u"\n "+ str(self.function_map[k]) rval = rval + u"]] "+ str(self.ast) + u"]\n" return rval def warn_function_overrides( self, func_name ): ## used in defining user-functions to see ## if they shadow builtins. val = ( func_name in self.function_map \ or func_name in self.builtin_map ) if ( val ): print(u"WARNING: function %s overrides builtin"%(func_name)) return val def parse(self): """ parser routine """ self.ast = StmtList() while ( not self.lex.end_of_tokens() ): self.dbg_msg( "AST length = %d"%len(self.ast) ) if ( self.lex.peek().kind == Token.DEF ): self.dbg_msg ( "parsing for function" ) ## save function in a global table. func = self.function() self.warn_function_overrides(func.name) self.function_map[func.name]=func else: self.dbg_msg( "parsing for stmt" ) st = self.stmt() if ( not self.parsing_function ): self.ast.append(st) return self.ast def stmtlist(self): """ parse a bunch of statements """ self.dbg_msg(" STMTLIST ") stlist = StmtList() while( not self.lex.end_of_tokens() ): self.dbg_msg("STMTLIST => STMT") ptok = self.peek() if ( ptok.kind == Token.END ): break if ( not self.inside_if and ( ptok.kind == Token.ELSE or ptok.kind == Token.ELSEIF ) ): break st = self.stmt() stlist.append( st ) return stlist def stmt(self): """ try an assign, print, return, if or eval statement """ self.dbg_msg(" STMT ") ptok = self.peek() self.dbg_msg("stmt: peeking at "+unicode(ptok)) if ( ptok.kind == Token.RETURN ): ## return <expression> ret_tok = self.dequeue() [l,c]=ret_tok.get_line_col(); rstmt = ReturnStmt(self.expr(),l,c,self.debug) self.dbg_msg("return statement parsed") return rstmt elif ( ptok.kind == Token.PRINT ): ## print <expression> print_tok = self.dequeue() [l,c]=print_tok.get_line_col(); return PrintStmt(self.exprlist(),l,c,self.debug) elif ( ptok.kind == Token.IF ): ## if <expression> stmtlist end if_tok = self.dequeue() [l,c]=if_tok.get_line_col(); exp = self.expr() ifstmt = IfStmt( exp, None, None, l, c, self.debug) self.if_stack.append(ifstmt) body = self.stmtlist() ifstmt.set_body( body ) ptok = self.peek() if ( ptok.kind in [Token.ELSEIF, Token.ELSE] ): self.inside_if = True next_stmt = self.stmtlist() self.inside_if = False ifstmt.append_stmt( next_stmt ) self.match(Token.END) return ifstmt elif ( ptok.kind == Token.ELSEIF ): ## elseif <expression> stmtlist elseif_tok = self.dequeue() [l,c]=elseif_tok.get_line_col(); self.check_if_stack() exp = self.expr() elseif_stmt = IfStmt( exp, None, None, l, c, self.debug ) ifstmt = self.if_stack[-1] ifstmt.append_stmt( elseif_stmt ) self.if_stack.pop() self.if_stack.append( elseif_stmt ) body = self.stmtlist( ) elseif_stmt.set_body ( body ) return elseif_stmt elif ( ptok.kind == Token.ELSE ): ## else stmtlist self.check_if_stack() ifstmt = self.if_stack.pop() self.dbg_msg("stmt-else: ") else_tok = self.dequeue() [l,c]=else_tok.get_line_col() body = self.stmtlist() else_stmt = ElseStmt( body , l, c, self.debug) ifstmt.append_stmt( else_stmt ) return else_stmt elif ( ptok.kind == Token.FOR ): ## Fixme : empty for loops not allowed. """ For ( exp1 ; exp2 ; exp3 ) stmtlist end""" self.loop_stack.append(True) self.dbg_msg("for-statement") for_tok = self.dequeue() self.match(Token.LPAREN) lhs = self.expr() init = lhs ptok = self.peek() if ( ptok.kind in Token.ASSIGNOP ): assign_tok = self.dequeue() [l,c]=assign_tok.get_line_col(); rhs = self.expr() init = AssignStmt( lhs, assign_tok, rhs, l, c, self.debug) self.match(Token.COMMA ) cond = self.expr(); self.match(Token.COMMA ) lhs = self.expr() update = lhs ptok = self.peek() if ( ptok.kind in Token.ASSIGNOP ): assign_tok = self.dequeue() [l,c]=assign_tok.get_line_col() rhs = self.expr() update = AssignStmt( lhs, assign_tok, rhs, l, c, self.debug) self.match(Token.RPAREN); body = self.stmtlist( ) self.match(Token.END) [l,c]= for_tok.get_line_col(); forstmt = ForStmt(init, cond, update, body, l, c, self.debug); self.loop_stack.pop(); return forstmt elif ( ptok.kind == Token.WHILE ): ## while ( expr ) body end self.loop_stack.append(True); self.dbg_msg("while-statement"); while_tok = self.dequeue(); [l,c]=while_tok.get_line_col() wexpr = self.expr(); body = self.stmtlist( ) self.match(Token.END) whilestmt = WhileStmt(wexpr, body, l, c, self.debug); self.loop_stack.pop(); return whilestmt elif ( ptok.kind == Token.BREAK ): ## break, must be in loop-environment self.dbg_msg("break-statement"); break_tok = self.dequeue(); [l,c]=break_tok.get_line_col() self.check_loop_stack(); ##raises a parse error brkstmt = BreakStmt( l, c, self.debug); return brkstmt elif ( ptok.kind == Token.CONTINUE ): ## continue, must be in loop-environment self.dbg_msg("continue-statement"); cont_tok = self.dequeue(); [l,c]=cont_tok.get_line_col() self.check_loop_stack(); ##raises a parse error cntstmt = ContinueStmt( l, c, self.debug); return cntstmt else: ## lval := rval ptok = self.peek() [l,c] = ptok.get_line_col() lhs = self.expr() self.dbg_msg("parsing expr: "+str(lhs)) ptok = self.peek() if ( ptok.kind in Token.ASSIGNOP ): assign_tok = self.dequeue() rhs = self.expr() [l,c]=assign_tok.get_line_col() return AssignStmt( lhs, assign_tok, rhs, l, c, self.debug) return EvalStmt( lhs, l, c, self.debug ) raise ParseException("parsing Statement, unkown operators" + unicode(ptok)) def function(self): """ def[kw] fname[id] (arglist) {body} end[kw] """ if ( self.parsing_function ): raise ParseException(" Nested functions not allowed! ") self.parsing_function = True def_tok = self.dequeue() if ( def_tok.kind != Token.DEF ): raise ParseException("unmatched 'def' in function " +str(def_tok)) id_tok = self.dequeue() if ( id_tok.kind != Token.ID ): raise ParseException("expected identifier in function"+str(id_tok)) arglist = self.arglist() self.dbg_msg( "finished parsing arglist" ) body = self.stmtlist() self.match( Token.END ) [l,c] = def_tok.get_line_col() fval = Function( id_tok.val, arglist, body, l, c, self.debug ) self.parsing_function = False self.dbg_msg( "finished parsing function" ) return fval def valuelist(self): """parse: ( expr_1 , expr_2, ... ) """ valueList = list() self.dbg_msg("valuelist: ") lparen_tok = self.match( Token.LPAREN ) while ( self.peek().kind != Token.RPAREN ): val = self.expr() self.dbg_msg("valuelist-expr: "+str(val)) valueList.append( val ) ptok = self.peek() if ( ptok.kind == Token.RPAREN ): break elif ( ptok.kind == Token.COMMA ): self.match( Token.COMMA ) else: raise ParseException(" function call argument list "+unicode(ptok)) self.match( Token.RPAREN ) [l,c] = lparen_tok.get_line_col() return ValueList(valueList, l, c, self.debug ) def arglist(self): """parse: ( arg_1, arg_2, ... ) """ self.dbg_msg( " ARGLIST " ) args = list() lparen_tok = self.match( Token.LPAREN ) while ( self.peek().kind != Token.RPAREN ): arg_name = self.dequeue() args.append( arg_name.val ) ptok = self.peek() if ( ptok.kind == Token.RPAREN ): break elif ( ptok.kind == Token.COMMA ): self.match( Token.COMMA ) else: raise ParseException(" function definition argument list " +unicode(ptok)) self.match( Token.RPAREN ) [l,c] = lparen_tok.get_line_col() return ArgList(args , l, c, self.debug ) def exprlist(self): """ EXPRLIST : EXPR, EXPRLIST ## EXPRLIST : EXPR """ self.dbg_msg( " EXPRLIST " ) exprs=[] comma_tok = None l = 0; c = 0 while ( not self.lex.end_of_tokens() ): exprs.append(self.expr()) if self.lex.peek().kind != Token.COMMA: break tok = self.match(Token.COMMA) if ( not comma_tok ): comma_tok = tok if ( comma_tok ): [l,c] = comma_tok.get_line_col() self.dbg_msg("finished expression list") return ExprList(exprs, l, c, self.debug) def expr(self): self.dbg_msg( " EXPR " ) val1=self.term() res=val1 ptok = self.peek() if ptok.kind in Token.ADDSUB: binop=self.dequeue() val2=self.expr() [l,c] = binop.get_line_col() res=Expr(val1,binop,val2, l, c, self.debug ) elif ptok.kind == Token.LPAREN: ## function call -OR- array type. if ( res.__class__ != Identifier ): raise ParseException("invalid function call"+unicode(ptok)) [l,c] = ptok.get_line_col() vallist = self.valuelist() res=ExprCall( res, vallist, l, c, self.debug ) return res def term(self): """ this is a grammar abstraction; but AST only has Expr elements""" self.dbg_msg( "term" ) val1=self.factor() res=val1 tok = self.peek() if ( tok.kind in Token.MULDIV or tok.kind in Token.COMPARE or tok.kind in Token.EXPMOD ): binop=self.dequeue() val2=self.expr() [l,c] = binop.get_line_col() res=Expr(val1,binop,val2, l, c, self.debug) return res def factor(self): self.dbg_msg( "factor" ) tok=self.peek() if tok.kind == Token.LPAREN: lparen_tok = self.dequeue() val=self.expr() if self.dequeue().kind!=Token.RPAREN: raise SyntaxError("Missing Parens") elif tok.kind == Token.NUMBER: tok_num = self.dequeue() [l, c] = tok_num.get_line_col() val = Number( tok.val , l, c, self.debug ) elif tok.kind == Token.ID: tok_id = self.dequeue() [l, c] = tok_id.get_line_col() val = Identifier( tok.val , l, c, self.debug ) ptok = self.peek() self.dbg_msg("factor: "+unicode(ptok) + " / "+str(tok) ) if ( ptok.kind == Token.LPAREN ): ## function call [l, c] = ptok.get_line_col() vallist = self.valuelist() val=ExprCall( val, vallist, l, c, self.debug ) elif ( ptok.kind == Token.LSQRBRACE ): ## array type val=None raise ParseException("arrays not implemented"+unicode(ptok)); elif tok.kind == Token.STRING : str_tok = self.dequeue() [l,c] = str_tok.get_line_col() val = String( tok.val , l, c, self.debug ) else: raise ParseException("Expected Number, found something "+str(tok)) self.dbg_msg( "factor-returning: "+str(val) ) return val
class Parser(DebugUtils): """ when you add new language feature, add a AST class and its evaluate methods. Also add a parser method """ def __init__(self, lexer, fcn_map, builtin_map, dbg=False): DebugUtils.__init__(self, dbg) self.parsing_function = False self.lex = lexer self.ast = None self.currently_parsing = [ ] # stack, just in case we should have parse errors self.function_map = fcn_map #parsed functions self.builtin_map = builtin_map #pointers to builtin functions self.if_stack = [] #parsing if-statements self.loop_stack = [] #parsing while-statements self.last_tok = None ## handle to last token dequeued self.inside_if = False def reset(self): """reset parser, and lexer, when stuff gets messed up""" self.inside_if = False self.loop_stack = [] self.if_stack = [] self.currently_parsing = [] self.lex.reset() return def check_loop_stack(self): if (len(self.loop_stack) == 0): raise ParseException( "break/continue statement outside any loop, near" + str(self.last_token())) return len(self.loop_stack) def check_if_stack(self): if (len(self.if_stack) == 0): raise ParseException("unmatched else statement, near" + str(self.last_token())) return len(self.if_stack) def last_token(self): return self.last_tok def peek(self): ptok = self.lex.peek() self.dbg_msg("peek: " + str(ptok)) return ptok def dequeue(self): tok = self.lex.dequeue() self.last_tok = tok self.dbg_msg("deqeue: " + str(tok)) return tok def match(self, kind): ## if match return token, else ParseException tok = self.dequeue() if (tok.kind != kind): raise ParseException("cannot find token "+ \ Token.get_name(kind) + " got " \ + str(tok) \ + " instead!") return tok def __repr__(self): rval = "[Interpreter: " rval = rval + "[Functions[" for k in list(self.function_map.keys()): rval = rval + "\n " + str(self.function_map[k]) rval = rval + "]] " + str(self.ast) + "]\n" return rval def warn_function_overrides(self, func_name): ## used in defining user-functions to see ## if they shadow builtins. val = ( func_name in self.function_map \ or func_name in self.builtin_map ) if (val): print("WARNING: function %s overrides builtin" % (func_name)) return val def parse(self): """ parser routine """ self.ast = StmtList() while (not self.lex.end_of_tokens()): self.dbg_msg("AST length = %d" % len(self.ast)) if (self.lex.peek().kind == Token.DEF): self.dbg_msg("parsing for function") ## save function in a global table. func = self.function() self.warn_function_overrides(func.name) self.function_map[func.name] = func else: self.dbg_msg("parsing for stmt") st = self.stmt() if (not self.parsing_function): self.ast.append(st) return self.ast def stmtlist(self): """ parse a bunch of statements """ self.dbg_msg(" STMTLIST ") stlist = StmtList() while (not self.lex.end_of_tokens()): self.dbg_msg("STMTLIST => STMT") ptok = self.peek() if (ptok.kind == Token.END): break if (not self.inside_if and (ptok.kind == Token.ELSE or ptok.kind == Token.ELSEIF)): break st = self.stmt() stlist.append(st) return stlist def stmt(self): """ try an assign, print, return, if or eval statement """ self.dbg_msg(" STMT ") ptok = self.peek() self.dbg_msg("stmt: peeking at " + str(ptok)) if (ptok.kind == Token.RETURN): ## return <expression> ret_tok = self.dequeue() [l, c] = ret_tok.get_line_col() rstmt = ReturnStmt(self.expr(), l, c, self.debug) self.dbg_msg("return statement parsed") return rstmt elif (ptok.kind == Token.PRINT): ## print <expression> print_tok = self.dequeue() [l, c] = print_tok.get_line_col() return PrintStmt(self.exprlist(), l, c, self.debug) elif (ptok.kind == Token.IF): ## if <expression> stmtlist end if_tok = self.dequeue() [l, c] = if_tok.get_line_col() exp = self.expr() ifstmt = IfStmt(exp, None, None, l, c, self.debug) self.if_stack.append(ifstmt) body = self.stmtlist() ifstmt.set_body(body) ptok = self.peek() if (ptok.kind in [Token.ELSEIF, Token.ELSE]): self.inside_if = True next_stmt = self.stmtlist() self.inside_if = False ifstmt.append_stmt(next_stmt) self.match(Token.END) return ifstmt elif (ptok.kind == Token.ELSEIF): ## elseif <expression> stmtlist elseif_tok = self.dequeue() [l, c] = elseif_tok.get_line_col() self.check_if_stack() exp = self.expr() elseif_stmt = IfStmt(exp, None, None, l, c, self.debug) ifstmt = self.if_stack[-1] ifstmt.append_stmt(elseif_stmt) self.if_stack.pop() self.if_stack.append(elseif_stmt) body = self.stmtlist() elseif_stmt.set_body(body) return elseif_stmt elif (ptok.kind == Token.ELSE): ## else stmtlist self.check_if_stack() ifstmt = self.if_stack.pop() self.dbg_msg("stmt-else: ") else_tok = self.dequeue() [l, c] = else_tok.get_line_col() body = self.stmtlist() else_stmt = ElseStmt(body, l, c, self.debug) ifstmt.append_stmt(else_stmt) return else_stmt elif (ptok.kind == Token.FOR): ## Fixme : empty for loops not allowed. """ For ( exp1 ; exp2 ; exp3 ) stmtlist end""" self.loop_stack.append(True) self.dbg_msg("for-statement") for_tok = self.dequeue() self.match(Token.LPAREN) lhs = self.expr() init = lhs ptok = self.peek() if (ptok.kind in Token.ASSIGNOP): assign_tok = self.dequeue() [l, c] = assign_tok.get_line_col() rhs = self.expr() init = AssignStmt(lhs, assign_tok, rhs, l, c, self.debug) self.match(Token.COMMA) cond = self.expr() self.match(Token.COMMA) lhs = self.expr() update = lhs ptok = self.peek() if (ptok.kind in Token.ASSIGNOP): assign_tok = self.dequeue() [l, c] = assign_tok.get_line_col() rhs = self.expr() update = AssignStmt(lhs, assign_tok, rhs, l, c, self.debug) self.match(Token.RPAREN) body = self.stmtlist() self.match(Token.END) [l, c] = for_tok.get_line_col() forstmt = ForStmt(init, cond, update, body, l, c, self.debug) self.loop_stack.pop() return forstmt elif (ptok.kind == Token.WHILE): ## while ( expr ) body end self.loop_stack.append(True) self.dbg_msg("while-statement") while_tok = self.dequeue() [l, c] = while_tok.get_line_col() wexpr = self.expr() body = self.stmtlist() self.match(Token.END) whilestmt = WhileStmt(wexpr, body, l, c, self.debug) self.loop_stack.pop() return whilestmt elif (ptok.kind == Token.BREAK): ## break, must be in loop-environment self.dbg_msg("break-statement") break_tok = self.dequeue() [l, c] = break_tok.get_line_col() self.check_loop_stack() ##raises a parse error brkstmt = BreakStmt(l, c, self.debug) return brkstmt elif (ptok.kind == Token.CONTINUE): ## continue, must be in loop-environment self.dbg_msg("continue-statement") cont_tok = self.dequeue() [l, c] = cont_tok.get_line_col() self.check_loop_stack() ##raises a parse error cntstmt = ContinueStmt(l, c, self.debug) return cntstmt else: ## lval := rval ptok = self.peek() [l, c] = ptok.get_line_col() lhs = self.expr() self.dbg_msg("parsing expr: " + str(lhs)) ptok = self.peek() if (ptok.kind in Token.ASSIGNOP): assign_tok = self.dequeue() rhs = self.expr() [l, c] = assign_tok.get_line_col() return AssignStmt(lhs, assign_tok, rhs, l, c, self.debug) return EvalStmt(lhs, l, c, self.debug) raise ParseException("parsing Statement, unkown operators" + str(ptok)) def function(self): """ def[kw] fname[id] (arglist) {body} end[kw] """ if (self.parsing_function): raise ParseException(" Nested functions not allowed! ") self.parsing_function = True def_tok = self.dequeue() if (def_tok.kind != Token.DEF): raise ParseException("unmatched 'def' in function " + str(def_tok)) id_tok = self.dequeue() if (id_tok.kind != Token.ID): raise ParseException("expected identifier in function" + str(id_tok)) arglist = self.arglist() self.dbg_msg("finished parsing arglist") body = self.stmtlist() self.match(Token.END) [l, c] = def_tok.get_line_col() fval = Function(id_tok.val, arglist, body, l, c, self.debug) self.parsing_function = False self.dbg_msg("finished parsing function") return fval def valuelist(self): """parse: ( expr_1 , expr_2, ... ) """ valueList = list() self.dbg_msg("valuelist: ") lparen_tok = self.match(Token.LPAREN) while (self.peek().kind != Token.RPAREN): val = self.expr() self.dbg_msg("valuelist-expr: " + str(val)) valueList.append(val) ptok = self.peek() if (ptok.kind == Token.RPAREN): break elif (ptok.kind == Token.COMMA): self.match(Token.COMMA) else: raise ParseException(" function call argument list " + str(ptok)) self.match(Token.RPAREN) [l, c] = lparen_tok.get_line_col() return ValueList(valueList, l, c, self.debug) def arglist(self): """parse: ( arg_1, arg_2, ... ) """ self.dbg_msg(" ARGLIST ") args = list() lparen_tok = self.match(Token.LPAREN) while (self.peek().kind != Token.RPAREN): arg_name = self.dequeue() args.append(arg_name.val) ptok = self.peek() if (ptok.kind == Token.RPAREN): break elif (ptok.kind == Token.COMMA): self.match(Token.COMMA) else: raise ParseException(" function definition argument list " + str(ptok)) self.match(Token.RPAREN) [l, c] = lparen_tok.get_line_col() return ArgList(args, l, c, self.debug) def exprlist(self): """ EXPRLIST : EXPR, EXPRLIST ## EXPRLIST : EXPR """ self.dbg_msg(" EXPRLIST ") exprs = [] comma_tok = None l = 0 c = 0 while (not self.lex.end_of_tokens()): exprs.append(self.expr()) if self.lex.peek().kind != Token.COMMA: break tok = self.match(Token.COMMA) if (not comma_tok): comma_tok = tok if (comma_tok): [l, c] = comma_tok.get_line_col() self.dbg_msg("finished expression list") return ExprList(exprs, l, c, self.debug) def expr(self): self.dbg_msg(" EXPR ") val1 = self.term() res = val1 ptok = self.peek() if ptok.kind in Token.ADDSUB: binop = self.dequeue() val2 = self.expr() [l, c] = binop.get_line_col() res = Expr(val1, binop, val2, l, c, self.debug) elif ptok.kind == Token.LPAREN: ## function call -OR- array type. if (res.__class__ != Identifier): raise ParseException("invalid function call" + str(ptok)) [l, c] = ptok.get_line_col() vallist = self.valuelist() res = ExprCall(res, vallist, l, c, self.debug) return res def term(self): """ this is a grammar abstraction; but AST only has Expr elements""" self.dbg_msg("term") val1 = self.factor() res = val1 tok = self.peek() if (tok.kind in Token.MULDIV or tok.kind in Token.COMPARE or tok.kind in Token.EXPMOD): binop = self.dequeue() val2 = self.expr() [l, c] = binop.get_line_col() res = Expr(val1, binop, val2, l, c, self.debug) return res def factor(self): self.dbg_msg("factor") tok = self.peek() if tok.kind == Token.LPAREN: lparen_tok = self.dequeue() val = self.expr() if self.dequeue().kind != Token.RPAREN: raise SyntaxError("Missing Parens") elif tok.kind == Token.NUMBER: tok_num = self.dequeue() [l, c] = tok_num.get_line_col() val = Number(tok.val, l, c, self.debug) elif tok.kind == Token.ID: tok_id = self.dequeue() [l, c] = tok_id.get_line_col() val = Identifier(tok.val, l, c, self.debug) ptok = self.peek() self.dbg_msg("factor: " + str(ptok) + " / " + str(tok)) if (ptok.kind == Token.LPAREN): ## function call [l, c] = ptok.get_line_col() vallist = self.valuelist() val = ExprCall(val, vallist, l, c, self.debug) elif (ptok.kind == Token.LSQRBRACE): ## array type val = None raise ParseException("arrays not implemented" + str(ptok)) elif tok.kind == Token.STRING: str_tok = self.dequeue() [l, c] = str_tok.get_line_col() val = String(tok.val, l, c, self.debug) else: raise ParseException("Expected Number, found something " + str(tok)) self.dbg_msg("factor-returning: " + str(val)) return val
class EzhilParser(Parser): """ when you add new language feature, add a AST class and its evaluate methods. Also add a parser method """ def __init__(self, lexer, fcn_map, builtin_map, dbg=False): if (not isinstance(lexer, EzhilLex)): raise RuntimeException("Cannot find Ezhil lexer class") Parser.__init__(self, lexer, fcn_map, builtin_map, dbg) def factory(lexer, fcn_map, builtin_map, dbg=False): """ Factory method """ return EzhilParser(lexer, fcn_map, builtin_map, dbg) factory = staticmethod(factory) def match(self, kind): ## if match return token, else ParseException tok = self.dequeue() if (tok.kind != kind): raise ParseException("cannot find token "+ \ EzhilToken.get_name(kind) + " got " \ + str(tok) \ + " instead!") return tok def parse(self): """ parser routine """ self.ast = StmtList() while (not self.lex.end_of_tokens()): self.dbg_msg("AST length = %d" % len(self.ast)) if (self.lex.peek().kind == EzhilToken.DEF): self.dbg_msg("parsing for function") ## save function in a global table. func = self.function() self.warn_function_overrides(func.name) self.function_map[func.name] = func else: self.dbg_msg("parsing for stmt") st = self.stmt() if (not self.parsing_function): self.ast.append(st) return self.ast def stmtlist(self, pass_in_ATexpr=None): """ parse a bunch of statements """ self.dbg_msg(" STMTLIST ") stlist = StmtList() while (not self.lex.end_of_tokens()): self.dbg_msg("STMTLIST => STMT") ptok = self.peek() self.dbg_msg("STMTLIST " + str(ptok)) if (self.debug): print("peek @ ", str(ptok)) if (ptok.kind == EzhilToken.END): self.dbg_msg("End token found") break elif (ptok.kind == EzhilToken.DOWHILE): if (self.debug): print("DOWHILE token found") break elif (self.inside_if and (ptok.kind == EzhilToken.ELSE or ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.CASE or ptok.kind == EzhilToken.OTHERWISE)): break st = self.stmt(pass_in_ATexpr) pass_in_ATexpr = None stlist.append(st) return stlist def parseSwitchStmt(self, exp): ## @ <ID/EXPR> SWITCH @( expr ) CASE {stmtlist} @( expr ) CASE {stmtlist} OTHERWISE {stmtlist} END ## implement as an if-elseif-else statement self.dbg_msg("parsing SWITCH statement") sw_tok = self.dequeue() [l, c] = sw_tok.get_line_col() self.inside_if = True lhs = exp[0] # enter this if-statement always ifstmt = IfStmt(Number(1), None, None, l, c, self.debug) self.if_stack.append(ifstmt) self.dbg_msg("parsing SWITCH-body") #self.dbg_msg ptok = self.peek() equality_token = EzhilLexeme("=", EzhilToken.EQUALITY) while (ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.OTHERWISE): self.inside_if = True [l, c] = ptok.get_line_col() if (ptok.kind == EzhilToken.ATRATEOF): # parse elseif branch self.dbg_msg("parsing CASE") self.match(EzhilToken.ATRATEOF) exp = self.valuelist() self.dbg_msg("parsing CASE EXPR") self.match(EzhilToken.CASE) next_stmt = self.stmtlist() expr = Expr(lhs, equality_token, exp[0], l, c, self.debug) self.dbg_msg("building an Expr " + str(expr)) if not ifstmt.body: ifstmt.expr = expr ifstmt.body = next_stmt else: case_stmt = IfStmt(expr, next_stmt, None, l, c, self.debug) ifstmt.append_stmt(case_stmt) elif (ptok.kind == EzhilToken.OTHERWISE): #parse else branch self.dbg_msg("parsing OTHERWISE: ") self.match(EzhilToken.OTHERWISE) self.dbg_msg("parsing OTHERWISE-Body") self.inside_if = False body = self.stmtlist() else_stmt = ElseStmt(body, l, c, self.debug) if not ifstmt.body: ifstmt.body = else_stmt else: ifstmt.append_stmt(else_stmt) break else: self.inside_if = False raise ParseError( "SWITCH-CASE-OTHERWISE statement syntax is messed up") ptok = self.peek() self.dbg_msg("parsing SWITCH-CASE next bits " + str(ptok)) self.match(EzhilToken.END) self.inside_if = False self.dbg_msg("parsing -SWITCH-CASE- complete") return ifstmt def parseIfStmt(self, exp): ## @ <expression> if { stmtlist } @<expr> ELSEIF {stmtlist} ELSE <stmtlist> END self.dbg_msg("parsing IF statement") if_tok = self.dequeue() [l, c] = if_tok.get_line_col() self.inside_if = True ifstmt = IfStmt(exp[0], None, None, l, c, self.debug) self.if_stack.append(ifstmt) self.dbg_msg("parsing IF-body") body = self.stmtlist() prev_body = body ifstmt.set_body(body) ptok = self.peek() while (ptok.kind == EzhilToken.ATRATEOF or ptok.kind == EzhilToken.ELSE): self.inside_if = True [l, c] = ptok.get_line_col() if (ptok.kind == EzhilToken.ATRATEOF): # parse elseif branch self.dbg_msg("parsing ELSE-IF") self.match(EzhilToken.ATRATEOF) exp = self.valuelist() self.dbg_msg("parsing ELSE-IF EXPR") tok = self.peek() if (tok.kind != EzhilToken.ELSEIF): # maybe another IF statement, SWITCH-CASE or a WHILE loop, DO-WHILE loop etc. next_stmt = self.stmtlist(exp) #pass in the expression prev_body.append(next_stmt) # append to previously scanned body. else: self.dbg_msg("parsing ELSE-IF-body") self.match(EzhilToken.ELSEIF) body = self.stmtlist() prev_body = body next_stmt = IfStmt(exp[0], body, None, l, c, self.debug) ifstmt.append_stmt(next_stmt) elif (ptok.kind == EzhilToken.ELSE): #parse else branch self.dbg_msg("parsing stmt else: ") self.match(EzhilToken.ELSE) self.dbg_msg("parsing ELSE-Body") self.inside_if = False body = self.stmtlist() prev_body = body else_stmt = ElseStmt(body, l, c, self.debug) ifstmt.append_stmt(else_stmt) break else: self.inside_if = False raise ParseError("If-Else-If statement syntax is messed up") ptok = self.peek() self.dbg_msg("parsing -IF next bits " + str(ptok)) self.match(EzhilToken.END) self.inside_if = False self.dbg_msg("parsing -IF-complete") return ifstmt def stmt(self, pass_in_ATexpr=None): """ try an assign, print, return, if or eval statement """ self.dbg_msg(" STMT ") ptok = self.peek() self.dbg_msg("stmt: peeking at " + str(ptok)) if (ptok.kind == EzhilToken.RETURN): ## return <expression> self.dbg_msg('enter->return: <expression>') ret_tok = self.dequeue() [l, c] = ret_tok.get_line_col() rstmt = ReturnStmt(self.expr(), l, c, self.debug) self.dbg_msg("return statement parsed") return rstmt elif (ptok.kind == EzhilToken.PRINT): self.currently_parsing.append(ptok) ## print <expression> print_tok = self.dequeue() [l, c] = print_tok.get_line_col() exprlist_val = self.exprlist() self.currently_parsing.pop() return PrintStmt(exprlist_val, l, c, self.debug) elif (ptok.kind == EzhilToken.ATRATEOF or pass_in_ATexpr): ## @ <expression> {if | while | elseif} if not pass_in_ATexpr: at_tok = self.match(EzhilToken.ATRATEOF) self.currently_parsing.append(at_tok) exp = self.valuelist() self.currently_parsing.pop() else: exp = pass_in_ATexpr if (self.debug): print("return from valuelist ", str(exp)) ptok = self.peek() if (ptok.kind == EzhilToken.IF): return self.parseIfStmt(exp) elif (ptok.kind == EzhilToken.WHILE): ## @ ( expr ) while { body } end self.loop_stack.append(True) self.dbg_msg("while-statement") while_tok = self.dequeue() self.currently_parsing.append(while_tok) [l, c] = while_tok.get_line_col() wexpr = exp[0] body = self.stmtlist() self.match(EzhilToken.END) whilestmt = WhileStmt(wexpr, body, l, c, self.debug) self.loop_stack.pop() self.currently_parsing.pop() return whilestmt elif (ptok.kind == EzhilToken.SWITCH): return self.parseSwitchStmt(exp) elif (ptok.kind == EzhilToken.FOREACH): foreach_tok = self.dequeue() self.currently_parsing.append(foreach_tok) [l, c] = foreach_tok.get_line_col() if (self.debug): print("parsing FOREACH stmt") self.loop_stack.append(True) self.dbg_msg("foreach-statement") # convert to a for statement - building Ezhil AST - transformations if not isinstance(exp[1], Identifier): raise ParseError(" FOR-EACH statement " + str(foreach_tok)) foreach_iter = exp[1] iter = Identifier("__" + foreach_iter.id, l=0, c=-1) eq_token = EzhilLexeme("=", EzhilToken.EQUALS) plus_token = EzhilLexeme("+", EzhilToken.PLUS) lt_token = EzhilLexeme("<", EzhilToken.LT) if (self.debug): print("build init assign stmt") init = AssignStmt(iter, eq_token, Number(0), l, c, self.debug) if (self.debug): print("build cond expr") VL1 = ValueList([exp[0]], l, c, self.debug) cond = Expr( iter, lt_token, ExprCall(Identifier("len", l, c), VL1, l, c, self.debug), l, c, self.debug) if (self.debug): print("build plus1 stmt") plus1_iter = Expr(iter, plus_token, Number(1), l, c, self.debug) if (self.debug): print("build equals stmt") update = AssignStmt(iter, eq_token, plus1_iter, l, c, self.debug) body = self.stmtlist() #parse body # and insert artifical update variable in body VL2 = ValueList([exp[0], iter], l, c, self.debug) extract_foreach_iter_from_list = ExprCall( Identifier("__getitem__", l, c), VL2, l, c, self.debug) foreach_iter_Assign = AssignStmt( foreach_iter, eq_token, extract_foreach_iter_from_list, l, c, self.debug) body.List.insert(0, foreach_iter_Assign) # complete FOREACH stmt self.match(EzhilToken.END) self.currently_parsing.pop() foreach_stmt = ForStmt(init, cond, update, body, l, c, self.debug) self.loop_stack.pop() if (self.debug): print("completed parsing FOR-EACH loop", str(foreach_stmt)) return foreach_stmt elif (ptok.kind == EzhilToken.FOR): ## Fixme : empty for loops not allowed. """ For ( exp1 , exp2 , exp3 ) stmtlist end""" if (self.debug): print("parsing FOR stmt") self.loop_stack.append(True) self.dbg_msg("for-statement") for_tok = self.peek() self.currently_parsing.append(for_tok) if (self.debug): print("matching for STMT", str(self.peek())) self.match(EzhilToken.FOR) if (self.debug): print("matched for STMT", str(self.peek())) [l, c] = for_tok.get_line_col() init, cond, update = exp[0], exp[1], exp[2] if (self.debug): print("extract 3 parts", str(init), str(cond), str(update)) body = self.stmtlist() self.match(EzhilToken.END) self.currently_parsing.pop() if (self.debug): print("body of loop", str(body)) forstmt = ForStmt(init, cond, update, body, l, c, self.debug) self.loop_stack.pop() if (self.debug): print("completed parsing FOR loop", str(forstmt)) return forstmt elif (ptok.kind == EzhilToken.DO): if (self.debug): print("parsing DO-WHILE statement") self.loop_stack.append(True) do_tok = self.dequeue() self.currently_parsing.append(do_tok) [l, c] = do_tok.get_line_col() body = self.stmtlist() if (self.debug): print("parsed body") self.match(EzhilToken.DOWHILE) self.match(EzhilToken.ATRATEOF) exp = self.valuelist() if (self.debug): print("parsed EXP", exp[0]) doWhileStmt = DoWhileStmt(exp[0], body, l, c, self.debug) self.loop_stack.pop() self.currently_parsing.pop() return doWhileStmt elif (ptok.kind == EzhilToken.BREAK): ## break, must be in loop-environment self.dbg_msg("break-statement") break_tok = self.dequeue() [l, c] = break_tok.get_line_col() self.check_loop_stack() ##raises a parse error brkstmt = BreakStmt(l, c, self.debug) return brkstmt elif (ptok.kind == EzhilToken.CONTINUE): ## continue, must be in loop-environment self.dbg_msg("continue-statement") cont_tok = self.dequeue() [l, c] = cont_tok.get_line_col() self.check_loop_stack() ##raises a parse error cntstmt = ContinueStmt(l, c, self.debug) return cntstmt else: ## lval := rval ptok = self.peek() self.currently_parsing.append(ptok) [l, c] = ptok.get_line_col() lhs = self.expr() self.dbg_msg("parsing expr: " + str(lhs)) ptok = self.peek() if (ptok.kind in EzhilToken.ASSIGNOP): assign_tok = self.dequeue() rhs = self.expr() [l, c] = assign_tok.get_line_col() self.currently_parsing.pop() return AssignStmt(lhs, assign_tok, rhs, l, c, self.debug) self.currently_parsing.pop() return EvalStmt(lhs, l, c, self.debug) raise ParseException("parsing Statement, unkown operators" + str(ptok)) def function(self): """ def[kw] fname[id] (arglist) {body} end[kw] """ if (self.parsing_function): self.parsing_function = False raise ParseException(" Nested functions not allowed! " + str(self.last_token())) self.parsing_function = True def_tok = self.dequeue() if (def_tok.kind != EzhilToken.DEF): raise ParseException("unmatched 'def' in function " + str(def_tok)) id_tok = self.dequeue() if (id_tok.kind != EzhilToken.ID): raise ParseException("expected identifier in function" + str(id_tok)) arglist = self.arglist() self.dbg_msg("finished parsing arglist") body = self.stmtlist() self.match(EzhilToken.END) [l, c] = def_tok.get_line_col() fval = Function(id_tok.val, arglist, body, l, c, self.debug) self.parsing_function = False self.dbg_msg("finished parsing function") return fval def valuelist(self): """parse: ( expr_1 , expr_2, ... ) """ valueList = list() self.dbg_msg("valuelist: ") lparen_tok = self.match(EzhilToken.LPAREN) while (self.peek().kind != EzhilToken.RPAREN): val = self.expr() if (self.debug): print("val = ", str(val)) ptok = self.peek() if (self.debug): print("ptok = ", str(ptok), str(ptok.kind), str(EzhilToken.ASSIGNOP)) if (ptok.kind in EzhilToken.ASSIGNOP): assign_tok = self.dequeue() rhs = self.expr() [l, c] = assign_tok.get_line_col() lhs = val val = AssignStmt(lhs, assign_tok, rhs, l, c, self.debug) if (self.debug): print("AssignStmt = ", str(val)) ptok = self.peek() else: if (self.debug): print("No-Assign // Expr =", str(val)) self.dbg_msg("valuelist-expr: " + str(val)) valueList.append(val) if (ptok.kind == EzhilToken.RPAREN): break elif (ptok.kind == EzhilToken.COMMA): self.match(EzhilToken.COMMA) else: raise ParseException(" function call argument list " + str(ptok)) self.match(EzhilToken.RPAREN) [l, c] = lparen_tok.get_line_col() return ValueList(valueList, l, c, self.debug) def arglist(self): """parse: ( arg_1, arg_2, ... ) """ self.dbg_msg(" ARGLIST ") args = list() lparen_tok = self.match(EzhilToken.LPAREN) while (self.peek().kind != EzhilToken.RPAREN): arg_name = self.dequeue() args.append(arg_name.val) ptok = self.peek() if (ptok.kind == EzhilToken.RPAREN): break elif (ptok.kind == EzhilToken.COMMA): self.match(EzhilToken.COMMA) else: raise ParseException(" function definition argument list " + str(ptok)) self.match(EzhilToken.RPAREN) [l, c] = lparen_tok.get_line_col() return ArgList(args, l, c, self.debug) def exprlist(self): """ EXPRLIST : EXPR, EXPRLIST ## EXPRLIST : EXPR """ self.dbg_msg(" EXPRLIST ") exprs = [] comma_tok = None l = 0 c = 0 while (not self.lex.end_of_tokens()): exprs.append(self.expr()) if self.lex.peek().kind != EzhilToken.COMMA: break tok = self.match(EzhilToken.COMMA) if (not comma_tok): comma_tok = tok if (comma_tok): [l, c] = comma_tok.get_line_col() self.dbg_msg("finished expression list") return ExprList(exprs, l, c, self.debug) def expr(self): self.dbg_msg(" EXPR ") val1 = self.term() res = val1 ptok = self.peek() if ptok.kind in EzhilToken.ADDSUB: binop = self.dequeue() if (ptok.kind == EzhilToken.MINUS): val2 = self.term() else: val2 = self.expr() [l, c] = binop.get_line_col() res = Expr(val1, binop, val2, l, c, self.debug) elif ptok.kind == EzhilToken.LPAREN: ## function call if (res.__class__ != Identifier): raise ParseException("invalid function call" + str(ptok)) [l, c] = ptok.get_line_col() vallist = self.valuelist() res = ExprCall(res, vallist, l, c, self.debug) ptok = self.peek() while ptok.kind in EzhilToken.BINOP: binop = self.dequeue() [l, c] = binop.get_line_col() res = Expr(res, binop, self.expr(), l, c, self.debug) ptok = self.peek() return res def term(self): """ this is a grammar abstraction; but AST only has Expr elements""" self.dbg_msg("term") val1 = self.factor() res = val1 tok = self.peek() if (tok.kind in EzhilToken.MULDIV or tok.kind in EzhilToken.COMPARE or tok.kind in EzhilToken.EXPMOD or tok.kind in EzhilToken.BITWISE_AND_LOGICAL): binop = self.dequeue() val2 = self.term() [l, c] = binop.get_line_col() res = Expr(val1, binop, val2, l, c, self.debug) return res def factor(self): self.dbg_msg("factor") tok = self.peek() if tok.kind == EzhilToken.LPAREN: lparen_tok = self.dequeue() val = self.expr() if self.dequeue().kind != EzhilToken.RPAREN: raise SyntaxError("Missing Parens " + str(self.last_token())) elif tok.kind == EzhilToken.NUMBER: tok_num = self.dequeue() [l, c] = tok_num.get_line_col() val = Number(tok.val, l, c, self.debug) elif tok.kind == EzhilToken.LOGICAL_NOT: tok_not = self.dequeue() [l, c] = tok_not.get_line_col() val = UnaryExpr(self.expr(), tok_not, l, c, self.debug) self.dbg_msg("completed parsing unary expression" + str(val)) elif tok.kind == EzhilToken.ID: tok_id = self.dequeue() [l, c] = tok_id.get_line_col() val = Identifier(tok.val, l, c, self.debug) ptok = self.peek() self.dbg_msg("factor: " + str(ptok) + " / " + str(tok)) if (ptok.kind == EzhilToken.LPAREN): ## function call [l, c] = ptok.get_line_col() vallist = self.valuelist() val = ExprCall(val, vallist, l, c, self.debug) elif (ptok.kind == EzhilToken.LSQRBRACE): ## indexing a array type variable or ID val = None raise ParseException("arrays not implemented" + str(ptok)) elif tok.kind == EzhilToken.STRING: str_tok = self.dequeue() [l, c] = str_tok.get_line_col() val = String(tok.val, l, c, self.debug) elif tok.kind in EzhilToken.ADDSUB: unop = self.dequeue() [l, c] = unop.get_line_col() val = Expr(Number(0), unop, self.term(), l, c, self.debug) elif tok.kind == EzhilToken.LSQRBRACE: # creating a list/array expression list_start = self.dequeue() val = Array() while (True): exprval = self.expr() val.append(exprval) if self.debug: print(self.peek().__class__, self.peek()) if (self.peek().kind == EzhilToken.RSQRBRACE): break else: assert (self.peek().kind == EzhilToken.COMMA) self.dequeue() assert (self.peek().kind == EzhilToken.RSQRBRACE) list_end = self.dequeue() else: raise ParseException("Expected Number, found something " + str(tok)) self.dbg_msg("factor-returning: " + str(val)) return val