def __init__( self, parent = None ): QsciLexerPostScript.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "%" return
def parse_file(self, fpath): lexer = Lexer() try: lexer.parse_file(fpath) except IOError: raise ParseError, "file not accessible" return self.parse_with_lexer(lexer)
def parse(filename, context): file = open(filename, 'r') try: lexer = Lexer(file, context) token = lexer.getToken() while token.type != 'EOF': if token.type != 'STRING': raise UnexpectedTokenError(token.value, 'STRING', 'STRING', line=lexer.getLine()) cmd_name = token.value token = lexer.getToken() if token.type != '(': raise UnexpectedTokenError(token.value, '(', 'STRING', line=lexer.getLine()) cmd_args = [] token = lexer.getToken() while token.type != ')': if token.type != 'STRING': raise UnexpectedTokenError(token.value, 'STRING', line=lexer.getLine()) cmd_args.append(token.value) token = lexer.getToken() context.run_cmd(cmd_name, cmd_args) token = lexer.getToken() except LanguageError as e: # fixup filename and line number if needed and re-raise if not e.getLine(): e.setLine(lexer.getLine()) if not e.getFile(): e.setFile(filename) raise e finally: file.close()
def testLexer(): """ Ukazka pouziti lexeru. """ s = " append (bd fdfdg)" l = Lexer() # timhle si zalozite objekt lexilaniho analyzatoru l.analyzeString(s) # timhle mu reknete, aby naparsoval string, ktery jste napsali while (not l.isEOF()): # tohle slouzi k vypsani vsech tokenu print(l.popToken())
def __init__( self, parent = None ): QsciLexerProperties.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "#" return
def test_lexer_extracts_single_integer(): text = "11" l = Lexer() l.lex(text) assert l.get_next_token().value == 11
def __init__( self, parent = None ): QsciLexerYAML.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "#" return
def __init__( self, parent = None ): QsciLexerLua.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "--" return
def __init__( self, parent = None ): QsciLexerFortran.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "!" return
def vhdl_unit_name(file): """ Given the name of a VHDL file, attempts to find the unit (entity or package) name in this file. If several units are present, the first is returned. None is returned if no unit name is found. """ rules = [ ('--[^\n]*\n', 'COMMENT'), ('\n+', 'NEWLINE'), ('\w+', 'ID'), ('\s+', 'WHITESPACE'), ('[^\w\n]+', 'NONE'), ] lx = Lexer(rules, skip_whitespace=False) lx.input(open(file).read()) window = [None, None, None] try: for tok in lx.tokens(): # Implements a simple sliding window looking for # (entity|package) <name> is # as 3 consecutive IDs # if tok.type == 'ID': window = window[1:3] + [tok.val.lower()] if ( window[0] in ('entity', 'package') and window[2] == 'is'): return window[1] except LexerError, err: return None
def __init__( self, parent = None ): QsciLexer.__init__( self, parent ) Lexer.__init__( self ) self.editor = parent return
def __init__( self, parent = None ): QsciLexerBatch.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "REM " return
def testParser(): s = """ function quicksort(seznam) { x = len(seznam); if (x <= 1) { return seznam; } else { pivot = pop(seznam,0); mali = []; velci = []; for prvek in seznam { if (prvek<pivot) { append(mali,prvek); } else { append(velci,prvek); }; }; vystup1 = quicksort(mali); vystup2 = quicksort(velci); vystup = join (vystup1, pivot, vystup2); return vystup; }; }; a = [7,758,6,87,25465,487,654,87,564,687,65]; b = quicksort(a); print b; """ """s = function rekurze(bla) { if(bla<=0){ return(bla); }; print bla; bla = bla - 1; return( rekurze(bla) ); }; rekurze(10); """ # s = """ bla = 5; function nekdo(){ print("test"); }; nekdo(); print bla;""" l = Lexer() # timhle si zalozite objekt lexilaniho analyzatoru l.analyzeString(s) # timhle mu reknete, aby naparsoval string, ktery jste napsali p = Parser(l) # zalozim si parser a dam mu lexer ze ktereho bude cist tokeny ast = p.parse() # naparsuju co mam v lexeru a vratim AST frame = Frame(None) ffy=FunctionFrame() print(ast) # zobrazim ten strom ast.run(frame,ffy) print frame.locals
def __init__( self, parent = None ): QsciLexerHTML.__init__( self, parent ) Lexer.__init__( self ) self.streamCommentString = { 'start' : '<!-- ', 'end' : ' -->' } return
def __init__( self, parent = None ): QsciLexerMakefile.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "#" self._alwaysKeepTabs = True return
def __init__( self, parent = None ): QsciLexerCSS.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "#" self.streamCommentString = { 'start' : '/* ', 'end' : ' */' } return
def __init__( self, parent = None ): QsciLexerPascal.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "//" self.streamCommentString = { 'start' : '{ ', 'end' : ' }' } return
def parseHTML(fileName): fileObj = open(fileName) lexer = Lexer(fileName, fileObj) items = [] while not lexer.isEnd(): item = parseSingleElement(lexer) items.append(item) fileObj.close() return DOMBody(items)
def __init__( self, parent = None ): QsciLexerCSharp.__init__( self, parent ) Lexer.__init__( self ) self.commentString = "//" self.streamCommentString = { 'start' : '/* ', 'end' : ' */' } self.boxCommentString = { 'start' : '/* ', 'middle' : ' * ', 'end' : ' */' } return
def test_function_property(self): src = """Item { property var fnProperty: function (arg1, arg2) { return arg1 + arg2; } }""" lexer = Lexer(src) lexer.tokenize() qmlclass = QmlClass("Foo") qmlparser.parse(lexer.tokens, qmlclass) self.assertEqual(qmlclass.properties[0].name, "fnProperty") self.assertEqual(qmlclass.properties[0].type, "var")
def test(self): src = "Item { function foo() {} function bar() {} }" lexer = Lexer(src) lexer.tokenize() qmlclass = QmlClass("Foo") qmlparser.parse(lexer.tokens, qmlclass) self.assertEqual(qmlclass.base_name, "Item") self.assertEqual(qmlclass.functions[0].name, "foo") self.assertEqual(qmlclass.functions[1].name, "bar") self.assertEqual(len(qmlclass.functions), 2)
def __init__( self, parent = None, caseInsensitiveKeywords = False ): QsciLexerCPP.__init__( self, parent, caseInsensitiveKeywords ) Lexer.__init__( self ) self.commentString = "//" self.streamCommentString = { 'start' : '/* ', 'end' : ' */' } self.boxCommentString = { 'start' : '/* ', 'middle' : ' * ', 'end' : ' */' } return
def feed(self, text): lexer = Lexer() lexer.build() # TODO is this a hack self.tokens = lexer.tokens parser = yacc.yacc(module=self) parser.parse(text,lexer=lexer.lexer) self.errors = lexer.errors # print self.classes
def test_var_property(self): src = """Item { property var varProperty: { "key1": "value1", "key2": "value2" } }""" lexer = Lexer(src) lexer.tokenize() qmlclass = QmlClass("Foo") qmlparser.parse(lexer.tokens, qmlclass) self.assertEqual(qmlclass.properties[0].name, "varProperty") self.assertEqual(qmlclass.properties[0].type, "var")
def test_normal_arguments(self): src = """Item { function foo(arg1, arg2) { return arg1 + arg2; } }""" lexer = Lexer(src) lexer.tokenize() qmlclass = QmlClass("Foo") qmlparser.parse(lexer.tokens, qmlclass) self.assertEqual(qmlclass.functions[0].name, "foo") self.assertEqual(qmlclass.functions[0].type, "void")
def test_keyword_arguments(self): src = """Item { function foo(propertyArgument, signalArgument) { return propertyArgument + signalArgument; } }""" lexer = Lexer(src) lexer.tokenize() qmlclass = QmlClass("Foo") qmlparser.parse(lexer.tokens, qmlclass) self.assertEqual(qmlclass.functions[0].name, "foo") self.assertEqual(qmlclass.functions[0].type, "void")
def test(text): print print text lexer = Lexer() tokens = lexer.tokenize(text) print tokens parser = Parser() ast = parser.parse(tokens) print ast print fmt(ast) interpreter = Interpreter() interpreter.interpret(ast, None)
def main(): if(len(sys.argv)>1): fname=sys.argv[1] lexer = Lexer(fname) parser = pg.build() mypar=parser.parse(lexer.lex()) recprint(mypar,0) else: while(1): lexer=Lexer("hw.scala") parser=pg.build() mypar=parser.parse(lexer.lex(raw_input("scaladoll> "))) # print mypar recprint(mypar,0)
def __init__(self,afterparse,debug): parser=self.pg.build() if(len(sys.argv)>1): fname=sys.argv[1] lexer = Lexer(fname,debug) mypar=parser.parse(lexer.lex()) if(debug): self.recprint(mypar,0) afterparse(mypar) else: while(1): lexer=Lexer(None,debug) mypar=parser.parse(lexer.lex(self.readfromprompt())) if(debug): self.recprint(mypar,0) afterparse(mypar)
def __init__(self, code, whitespaces=False): self.lexer = Lexer(code) self.lexer.lex() self.curtok = 0 self.start_symbol = None self.rules = {} self.whitespaces = whitespaces
import sys import unittest from lexer import Lexer, TokenKind from parser import Parser file_name = sys.argv[1] if len(sys.argv) > 1 else 'sample.txt' file = open(file_name, 'r') content = file.read() tokens = Lexer(content).tokenize() parser = Parser() print(*tokens, sep='\n') output = None ''' try: output = parser.parse(tokens) except SyntaxError as e: output = 'Syntax Error at {line}:{col}'.format(line=e.message.line, col=e.message.col) print(output) '''
def __init__(self, parent=None): QsciLexerSQL.__init__(self, parent) Lexer.__init__(self) return
from lexer import Lexer from syntacticalAnalyzer.analyzer import syntacticalAnalyzer from semanticAnalyzer.analyzer import semanticAnalyzer lexer = Lexer() tokens = [] print('enter your command: ') #input_command = input() #input_command = 'i want to turn on tv latte in the bedroom' input_command = 'i want to turn on coffee-machine tea in the kitchen' #input_command = 'i want to turn in plate degree = 200 in the kitchen' print(input_command) divided_command = lexer.get_command(input_command.lower()) lexer.get_tokens(divided_command, tokens) tokens_types = [] if len(tokens) == 0: print('wrong command') else: synAnalyzer = syntacticalAnalyzer() words = synAnalyzer.get_sentence(input_command) syntax_error = synAnalyzer.createSentenceTree(words) if syntax_error == True: print('you have a syntax error, please recheck your commnad') else: semAnalyzer = semanticAnalyzer() result = semAnalyzer.room_to_object(tokens)
from lexer import Lexer from parser import Parser import os open("func.txt", "w").close() open("main.txt", "w").close() program = open("test.roc").read() tokens = Lexer(program).tokenize() for token in tokens: print(token) program = Parser(tokens).parse() program.execute() print("----------------------------------------") start = open("roc.cpp").read() f2 = open("com.cpp", "w") f2.write(start) f2.close() f2 = open("com.cpp", "a") f2.write(open("func.txt", "r").read() + "\n") f2.write("int main(){\n" + open("main.txt", "r").read()) f2.write("return 0;\n}\n") f2.close() os.system("g++ com.cpp -o com.out") os.system("./com.out") """
def test_next_token(self) -> None: source = """let five = 5; let ten = 10; let add = fn(x, y) { x + y; }; let result = add(five, ten); !-/*5; 5 < 10 > 5; if (5 < 10) { return true; } else { return false; } 10 == 10; 10 != 9; "foobar" "foo bar" [1, 2]; {"foo": "bar"}""" Case = namedtuple("Case", ["expected_token_type", "expected_token_literal"]) tests = [ Case(TokenType.LET, "let"), Case(TokenType.IDENT, "five"), Case(TokenType.ASSIGN, "="), Case(TokenType.INT, "5"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.LET, "let"), Case(TokenType.IDENT, "ten"), Case(TokenType.ASSIGN, "="), Case(TokenType.INT, "10"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.LET, "let"), Case(TokenType.IDENT, "add"), Case(TokenType.ASSIGN, "="), Case(TokenType.FUNCTION, "fn"), Case(TokenType.LPAREN, "("), Case(TokenType.IDENT, "x"), Case(TokenType.COMMA, ","), Case(TokenType.IDENT, "y"), Case(TokenType.RPAREN, ")"), Case(TokenType.LBRACE, "{"), Case(TokenType.IDENT, "x"), Case(TokenType.PLUS, "+"), Case(TokenType.IDENT, "y"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.RBRACE, "}"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.LET, "let"), Case(TokenType.IDENT, "result"), Case(TokenType.ASSIGN, "="), Case(TokenType.IDENT, "add"), Case(TokenType.LPAREN, "("), Case(TokenType.IDENT, "five"), Case(TokenType.COMMA, ","), Case(TokenType.IDENT, "ten"), Case(TokenType.RPAREN, ")"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.BANG, "!"), Case(TokenType.MINUS, "-"), Case(TokenType.SLASH, "/"), Case(TokenType.ASTERISK, "*"), Case(TokenType.INT, "5"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.INT, "5"), Case(TokenType.LT, "<"), Case(TokenType.INT, "10"), Case(TokenType.GT, ">"), Case(TokenType.INT, "5"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.IF, "if"), Case(TokenType.LPAREN, "("), Case(TokenType.INT, "5"), Case(TokenType.LT, "<"), Case(TokenType.INT, "10"), Case(TokenType.RPAREN, ")"), Case(TokenType.LBRACE, "{"), Case(TokenType.RETURN, "return"), Case(TokenType.TRUE, "true"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.RBRACE, "}"), Case(TokenType.ELSE, "else"), Case(TokenType.LBRACE, "{"), Case(TokenType.RETURN, "return"), Case(TokenType.FALSE, "false"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.RBRACE, "}"), Case(TokenType.INT, "10"), Case(TokenType.EQ, "=="), Case(TokenType.INT, "10"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.INT, "10"), Case(TokenType.NOT_EQ, "!="), Case(TokenType.INT, "9"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.STRING, "foobar"), Case(TokenType.STRING, "foo bar"), Case(TokenType.LBRACKET, "["), Case(TokenType.INT, "1"), Case(TokenType.COMMA, ","), Case(TokenType.INT, "2"), Case(TokenType.RBRACKET, "]"), Case(TokenType.SEMICOLON, ";"), Case(TokenType.LBRACE, "{"), Case(TokenType.STRING, "foo"), Case(TokenType.COLON, ":"), Case(TokenType.STRING, "bar"), Case(TokenType.RBRACE, "}"), Case(TokenType.EOF, "") ] lexer = Lexer(source) for test in tests: token_ = lexer.next_token() self.assertEqual(token_.type_, test.expected_token_type) self.assertEqual(token_.literal, test.expected_token_literal)
def parse(self, text): lex = Lexer(text) par = Parser(lex) program = par.parse_program() self.check_parser_errors(par) return program
def highlight_line(content): return highlight(content, Lexer(ensurenl=False), TerminalFormatter())
def main(input_filename): with open(input_filename) as f: parser = Parser(Lexer(f.read())) parser.get_token() # Initialize with first token parser.n_prog()
elif kind == N.CONST: self.const(ops) elif kind == N.EMPTY: return else: self.fail(f'Unexpected opcode {kind}') def run(self, ast): if ast.kind != N.PROGRAM: self.fail('Bad AST tree root type') for op in ast.ops: self.run_op(op) if __name__ == '__main__': lex = Lexer( sys.argv[1] if len(sys.argv) > 1 else 'code-samples/hello-world.xp' ) parser = Parser(lex) ast = parser.parse() if '--ast' in sys.argv: print('AST', ast) vm = VM() print('RESULT: ') vm.run(ast)
def main(): banner = (""" R O B O T B A S I C """) print(banner) lexer = Lexer() program = Program() # Continuously accept user input and act on it until # the user enters 'EXIT' while True: stmt = input('> ') #print('stmt:',stmt,len(stmt)) try: tokenlist = lexer.tokenize(stmt) #print('tokenlist:',tokenlist) # Execute commands directly, otherwise # add program statements to the stored # BASIC program if len(tokenlist) > 0: # Exit the interpreter if tokenlist[0].category == Token.EXIT: break # Add a new program statement, beginning # a line number elif tokenlist[0].category == Token.UNSIGNEDINT\ and len(tokenlist) > 1: program.add_stmt(tokenlist) # Delete a statement from the program elif tokenlist[0].category == Token.UNSIGNEDINT \ and len(tokenlist) == 1: program.delete_statement(int(tokenlist[0].lexeme)) # Execute the program elif tokenlist[0].category == Token.RUN: try: program.execute() except KeyboardInterrupt: print("Program terminated") # List the program elif tokenlist[0].category == Token.LIST: program.list() # Save the program to disk elif tokenlist[0].category == Token.SAVE: program.save(tokenlist[1].lexeme) print("Program written to file") # Load the program from disk elif tokenlist[0].category == Token.LOAD: program.load(tokenlist[1].lexeme) print("Program read from file") # Delete the program from memory elif tokenlist[0].category == Token.NEW: program.delete() # Unrecognised input else: print("Unrecognised input", file=stderr) for token in tokenlist: token.print_lexeme() print(flush=True) # Trap all exceptions so that interpreter # keeps running except Exception as e: print(e, file=stderr, flush=True)
r = stack.pop() l = stack.pop() result = { 'ADD': l + r, 'SUB': l - r, 'MUL': l * r, 'DIV': l // r, 'MOD': l % r, } val = result[t] stack.append(val) return stack.pop() if __name__ == '__main__': lex = Lexer(terminals) text = ''' a * 13 + b * 100 / 10 ''' tokens = lex.tokenize(text) rpn_tokens = shunting_yard(tokens) accs = [acc for _, acc in rpn_tokens] result = rpn_eval(rpn_tokens, {'a': 10, 'b': 20}) print('Input:', text) print('RPN:', ' '.join(accs)) print() print('Result:', result) print() print('RPN tokens:') for t in rpn_tokens: print(t)
from lexer import Lexer from semantics import Parser inputfile = "program.joy" with open(inputfile) as f: text_input = f.read() lexer = Lexer().get_lexer() tokens = lexer.lex(text_input) pg = Parser() pg.parse() parser = pg.get_parser() parser.parse(tokens).eval()
from lexer import Lexer import parser import sys import os.path lexer = Lexer() # This is the main entry point for code execution of Cookpy code. # This file takes a single argument, which should point to the Cookpy code you want to run. # If no argument is given, or if the file doesn't exist an error is raised. if len(sys.argv) > 1: if os.path.exists(sys.argv[1]): sourcefile = open(sys.argv[1]) else: raise StandardError("Usage Error: The SourceFile Doesn't Exist.") else: raise StandardError( 'Usage Error: You Must Provide a Valid Source File to Run.') # The lexer will read the file and return a list of valid words. Comments will be ignored. tokens = lexer.languagelexer(sourcefile.read()) sourcefile.close() # The parser will handle the rest of code execution. parser.parser(tokens) # If no error was raised in the parser, we exit with a 'clean' exit code.
from parser import Parser from code_generator import CodeGen # text_input = """ # print(6 - 10 + 2); # """ # Input filename fname = "input.toy" # Open input file with open(fname) as f: text_input = f.read() # Initialize new Lexer object lexer = Lexer().create() # Tokenize the text input with the lexer object tokens = lexer.lex(text_input) # Test print statement for token in tokens: print(token) # Above print statement outputs: # Token('PRINT', 'print') # Token('OPEN_PAREN', '(') # Token('NUMBER', '4') # Token('SUM', '+') # Token('NUMBER', '4') # Token('SUB', '-')
def __init__(self, input): self.lexer = Lexer(input) self.current_token = self.lexer.next_token() self.peek_token = self.lexer.next_token()
from lexer import Lexer object1 = Lexer().build() inputFile = open("test1.txt") inputText = inputFile.read() inputFile.close() outputFile = open("test1 result.txt", "w") object1.input(inputText) while True: tok = object1.token() if not tok: break print(tok) outputFile.write(str(tok) + "\n") outputFile.close()
from gen_code import CodeGenerator if __name__ == "__main__": if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} c_file.c") exit(1) try: # for path in os.listdir("tests/stage_1/valid"): c_file = open(sys.argv[1]) # c_file = open("tests/stage_1/valid/" + path) src_code = c_file.read() c_file.close() # print(src_code) lexer = Lexer(src_code) # lexer.print_all_tokens() parser = Parser(lexer) ast_root = parser.parse_program() # print("parsing successful") # exit(0) out = open("assembly.s", "w") # sys.stdout CodeGenerator(ast_root, out).generate_code() out.close() # print("Code Generated.\nto get executable use this:") # print(f"\tgcc -m32 {out.name}") # print("then execute by ./a.out") # print("to verify the return value run echo $?") except Exception as ex:
def __init__(self): self.accept = True self.lexer = Lexer() self.parser = yacc.yacc(module=self, debug=True)
FALSE False NAME test_Name-name NUMBER 6956 STRING \"A_String!\" """ def test_lexer(): token_string_dict = {} for line in tokens.split('\n'): if len(pair := line.split()) == 2: name, string = line.split() token_string_dict[name] = string #print(token_string_dict) import time test_lexer = Lexer(input=' ', fpath=grammar_fpath) for name, string in token_string_dict.items(): #print("Help me"); time.sleep(0.1) test_lexer.input = list(string) test_lexer.p = 0 test_lexer.c = test_lexer.input[0] token = test_lexer.nextToken() assert token._tname == name if __name__ == "__main__": test_lexer()
temp = cur; cur = prev + cur; prev = temp; counter = counter + 1; } printf("%d ", cur); x = x +1; } } """ from tests.system_tests.code_runner import run_code from lexer import Lexer from parser_ import Parser from compiler.compiler import Compiler from ast.visualizer import Visualizer l = Lexer(code2) p = Parser(l) c = Compiler(p) print("=" * 10) print(c.compile()) print("=" * 10) l = Lexer(code2) p = Parser(l) v = Visualizer(p) print(run_code(code2)) v.visualize()
from lexer import Lexer from parser import Parser from grammar import Grammar lexer = Lexer('phase_2.vas') grammar = Grammar("augmented_grammar.txt", "table.txt") parser = Parser(lexer, grammar) parser.parse(1)
from tag import Tag from lexer import Lexer from analisadorParser import AnalisadorParser if __name__ == "__main__": lexer = Lexer('teste.pasc') parser = AnalisadorParser(lexer) parser.prog() parser.lexer.closeFile print("\n=>Tabela de simbolos:") lexer.printTS() lexer.closeFile() print('\n=> Fim da compilacao')
from lexer import Lexer from parser import Parser from evaluator import init_env, evaluate, EvalExeption try: lexer = Lexer() tokens = lexer.tokenize('print(9+2.2*2)') parser = Parser() ast = parser.parse(tokens) #print('==== AST ====\n%s' % ast) result = evaluate(ast, init_env()) #print('==== RESULT ====\n%s' % result) except EvalExeption as e: print('[ERROR]' + str(e)) except KeyboardInterrupt: print('\nInterrupted')
def main(): parser = argparse.ArgumentParser() parser.add_argument( "file", help= "File(s) or folder(s) you wanna run the parser on. If no file provided, runs on current folder.", default=[], action='append', nargs='*') parser.add_argument("-d", "--debug", action="count", help="Debug output (multiple values available)", default=0) parser.add_argument('-v', '--version', action='version', version='norminette ' + str(__version__)) #parser.add_argument('-s', '--sentry', action='store_true', default=False) parser.add_argument( '--cfile', action='store', help="Store C file content directly instead of filename") parser.add_argument( '--hfile', action='store', help="Store header file content directly instead of filename") args = parser.parse_args() registry = Registry() targets = [] has_err = None content = None debug = args.debug #if args.sentry == True: #sentry_sdk.init("https://[email protected]/72") if args.cfile != None or args.hfile != None: targets = ['file.c'] if args.cfile else ['file.h'] content = args.cfile if args.cfile else args.hfile else: args.file = args.file[0] if args.file == [[]] or args.file == []: targets = glob.glob("**/*.[ch]", recursive=True) target = targets.sort() else: for arg in args.file: if os.path.exists(arg) is False: print(f"'{arg}' no such file or directory") elif os.path.isdir(arg): if arg[-1] != '/': arg = arg + '/' targets.extend(glob.glob(arg + '**/*.[ch]', recursive=True)) elif os.path.isfile(arg): targets.append(arg) event = [] for target in targets: if target[-2:] not in [".c", ".h"]: print(f"{arg} is not valid C or C header file") else: with configure_scope() as scope: scope.set_extra("File", target) try: event.append(Event()) #if args.sentry == True: # proc = Thread(target=timeout, args=(event[-1], 5, )) # proc.daemon = True # proc.start() if content == None: with open(target) as f: #print ("Running on", target) source = f.read() else: source = content lexer = Lexer(source) tokens = lexer.get_tokens() context = Context(target, tokens, debug) registry.run(context, source) event[-1].set() if context.errors is not []: has_err = True # except (TokenError, CParsingError) as e: except TokenError as e: has_err = True print(target + f": KO!\n\t{colors(e.msg, 'red')}") event[-1].set() except CParsingError as e: has_err = True print(target + f": KO!\n\t{colors(e.msg, 'red')}") event[-1].set() except KeyboardInterrupt as e: event[-1].set() sys.exit(1) sys.exit(1 if has_err else 0)
text += self._expr_src(defn.kids[1], 1) return text def _expr_src(self, expr, depth=0): indent = ' ' * (depth * SrcBuilder._INDENT) text = indent if expr.tag == 'APP': kids_text = ([ self._expr_src(e, depth + 1) for e in expr.kids[0].kids ]) text += f'( {expr.name}\n{"".join(kids_text)}{indent})\n' elif expr.tag == 'REF': text += f'{expr.name}\n' elif expr.tag == 'INT': text += f'{expr.value}\n' elif SrcBuilder._TAGS_MAP.get(expr.tag, None): kids_text = [self._expr_src(e, depth + 1) for e in expr.kids] text += f'{SrcBuilder._TAGS_MAP[expr.tag]}\n{"".join(kids_text)}' else: assert False, f'unhandled tag {expr.tag}' return text if __name__ == '__main__': import sys from lexer import Lexer lexer = Lexer(sys.argv[1:]) parser = Parser(lexer) ast, errors = parser.parse() print(ast, errors)
def run1(): cstr = "char *(*)() ," node = AbstractDeclarator() node.parse(Lexer(cstr, True), Symbols()) print node.deepstr()
def coordinate(p, token_idx): last_cr = p.lexer.lexdata.rfind('\n', 0, p.lexpos(token_idx)) if last_cr < 0: last_cr = -1 column = (p.lexpos(token_idx) - (last_cr)) return (p.lineno(token_idx), column) class ParseError(Exception):pass def parse_error(self, msg, coord): raise ParseError("%s: %s" % (coord, msg)) buffer = Lexer(parse_error) buffer.run() tokens = buffer.tokens precedence = ( ('left', 'OR'), ('left', 'EQUAL'), ('left', 'GT', 'LT'), ('left', 'PLUS', 'MINUS'), ('left', 'INC', 'DEC') )
ath_lexer = Lexer([ (r'(?s)/\*.*?\*/', None), # Multi-line comment (r'//[^\n]*', None), # Single-line comment (r'\s+', None), # Whitespace # Code enclosures (r'\(', 'BUILTIN'), # Conditional/Call open (r'\)', 'BUILTIN'), # Conditional/Call close (r'{', 'BUILTIN'), # Suite open (r'}', 'BUILTIN'), # Suite close (r'\[', 'BUILTIN'), # Symbol slice open (r'\]', 'BUILTIN'), # Symbol slice close # Separators (r';', 'BUILTIN'), # Statement separator (r'\.', 'BUILTIN'), # Lookup operator (r',', 'BUILTIN'), # Group operator # Arithmetic in-place operators (r'\+=', 'BUILTIN'), # Add (r'-=', 'BUILTIN'), # Sub (r'\*\*=', 'BUILTIN'), # Pow (r'\*=', 'BUILTIN'), # Mul (r'/_=', 'BUILTIN'), # FloorDiv (r'/=', 'BUILTIN'), # TrueDiv (r'%=', 'BUILTIN'), # Modulo # Arithmetic operators (r'\+', 'BUILTIN'), # Add, UnaryPos (r'-', 'BUILTIN'), # Sub, UnaryInv (r'\*\*', 'BUILTIN'), # Pow (r'\*', 'BUILTIN'), # Mul (r'/_', 'BUILTIN'), # FloorDiv (r'/', 'BUILTIN'), # TrueDiv (r'%', 'BUILTIN'), # Modulo # Symbol operators (r'!=!', 'BUILTIN'), # Assert Both (r'!=\?', 'BUILTIN'), # Assert Left (r'\?=!', 'BUILTIN'), # Assert Right (r'~=!', 'BUILTIN'), # Negate Left (r'!=~', 'BUILTIN'), # Negate Right (r'~=~', 'BUILTIN'), # Negate Both # Bitwise shift in-place operators (r'<<=', 'BUILTIN'), # Bitwise lshift (r'>>=', 'BUILTIN'), # Bitwise rshift # Bitwise shift operators (r'<<', 'BUILTIN'), # Bitwise lshift (r'>>', 'BUILTIN'), # Bitwise rshift # Value operators (r'<=', 'BUILTIN'), # Less than or equal to (r'<', 'BUILTIN'), # Less than (r'>=', 'BUILTIN'), # Greater than or equal to (r'>', 'BUILTIN'), # Greater than (r'~=', 'BUILTIN'), # Not equal to (r'==', 'BUILTIN'), # Equal to # Boolean operators (r'&&', 'BUILTIN'), # Boolean AND (r'\|\|', 'BUILTIN'), # Boolean OR (r'\^\^', 'BUILTIN'), # Boolean XOR (r'!', 'BUILTIN'), # Boolean NOT # Statement keywords (r'DIE', 'BUILTIN'), # Kill symbol (r'~ATH', 'BUILTIN'), # Loop (r'print', 'BUILTIN'), # Output (r'input', 'BUILTIN'), # Input (r'import', 'BUILTIN'), # Import another file (r'DEBATE', 'BUILTIN'), # Conditional Consequent (r'UNLESS', 'BUILTIN'), # Conditional Alternative (r'EXECUTE', 'BUILTIN'), # Subroutine execution (r'DIVULGATE', 'BUILTIN'), # Return a symbol (r'FABRICATE', 'BUILTIN'), # Subroutine declaration (r'PROCREATE', 'BUILTIN'), # Value declaration (r'REPLICATE', 'BUILTIN'), # Deep copy (r'BIFURCATE', 'BUILTIN'), # Split a symbol (r'AGGREGATE', 'BUILTIN'), # Merge a symbol # Bitwise in-place operators (r'&=', 'BUILTIN'), # Bitwise and (r'\|=', 'BUILTIN'), # Bitwise or (r'\^=', 'BUILTIN'), # Bitwise xor # Bitwise operators (r'&', 'BUILTIN'), # Bitwise and (r'\|', 'BUILTIN'), # Bitwise or (r'\^', 'BUILTIN'), # Bitwise xor (r'~', 'BUILTIN'), # Bitwise not # Other identifiers (r'([\'"])[^\1]*?\1', 'STRING'), (r'(\d+\.(\d*)?|\.\d+)([eE][-+]?\d+)?', 'FLOAT'), (r'\d+', 'INT'), (r'[a-zA-Z]\w*', 'SYMBOL'), ])
class Parser: def __init__(self, fn: str): """ :param fn: a file_name str that it will read through lex: A lexer that will split up all the tokens in the file tg: The token generator for the Lexer currtok: The current Token being looked at ids: a dictionary of all the ids that have been declared """ self.lex = Lexer(fn) self.tg = self.lex.token_generator() self.currtok = next(self.tg) self.ids = dict() self.functions = dict() def Program(self): """ The highest level of the Parser will build up a list of 0 or more function definitions :return The ast of of the list of function definitions """ funcs = list() while self.currtok[1].name in {"INT", "FLOAT", "BOOLEAN"}: func = self.FunctionDef() funcs.append(func) return Program(funcs) def FunctionDef(self): """ A Function definition figure out its return type its identifier its required parameters and all of the declarations and statements that are made withing the function body :return: This will return a abstract syntax tree :raises: SLUCSyntaxError for missing parentheses around the parameters following the functions identifier :raises: SLUCSyntaxError for missing Curly braces around the function body """ type = self.Type() key = self.currtok[0] self.functions[self.currtok[0]] = "first_call" id = self.primary() self.functions[key] = type if self.currtok[1].name == "LPAREN": self.currtok = next(self.tg) params = self.Params() if self.currtok[1].name == "RPAREN": self.currtok = next(self.tg) if self.currtok[1].name == "LCURLY": self.currtok = next(self.tg) decs = self.Declarations() states = self.Statements() if self.currtok[1].name == "RCURLY": self.currtok = next(self.tg) return FunctionDef(type, id, params, decs, states) raise SLUCSyntaxError( "ERROR: Missing Right Curly Brace on line {0}".format( str(self.currtok[2] - 1 - 1))) raise SLUCSyntaxError( "ERROR: Missing Left Curly Brace on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError( "ERROR: Missing Right Paren on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR: Missing Left Paren on line {0}".format( str(self.currtok[2] - 1))) def Params(self): """ This is where all the parameters of a function are put into a list the parameter id's are also added to the ids dictionary for the parser :return: This returns the parameters in the form of an abstract syntax tree """ params = list() if self.currtok[1].name in {"INT", "FLOAT", "BOOLEAN"}: type = self.Type() self.ids[self.currtok[0]] = type id = self.primary() par = Param(type, id) params.append(par) while self.currtok[1].name in {"COMMA"}: self.currtok = next(self.tg) type = self.Type() self.ids[self.currtok[0]] = type id = self.primary() par = Param(type, id) params.append(par) return Params(params) def Declarations(self): """ This will build up a list of declarations using the Declaration function from the Parser :return: an abstract syntax tree """ decs = list() while self.currtok[1].name in {"INT", "FLOAT", "BOOLEAN"}: dec = self.Declaration() decs.append(dec) return DeclarationsExpr(decs) def Declaration(self): """ This looks to declare identifiers withing the program and adds them to the ids dictionary in the Parser :return: the declaration as an abstract syntax tree :raises: SLUCSyntaxError if a semicolon is missing following the declaration :raises: SLUCSyntaxError if the identifiers has already been declaration :raises: SLUCSyntaxError if an identifier is not provided """ type = self.Type() if self.currtok[1].name == "IDENT": self.ids[self.currtok[0]] = type id = self.primary() if self.currtok[1].name == "SEMI": self.currtok = next(self.tg) return DeclarationExpr(type, id) raise SLUCSyntaxError("ERROR Missing Semicolon on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR Missing Identifier on line {0}".format( str(self.currtok[2] - 1))) def Type(self): """ This is used to get a type for function returns, params, declarations :return: an abstract syntax tree :raises: if we are not given a valid type """ if self.currtok[1].name in {"INT", "FLOAT", "BOOLEAN"}: type = self.currtok[0] self.currtok = next(self.tg) return type raise SLUCSyntaxError("ERROR: Unexpected token {0} on line {1}".format( self.currtok[1], str(self.currtok[2] - 1))) def Statements(self): """ Used for when there is zero or more statements it builds up a list of statments :return: an abstract syntax tree """ states = list() while self.currtok[1].name in { "SEMI", "LCURLY", "IDENT", "if", "print", "while", "return" }: state = self.Statement() states.append(state) return StatementsStmt(states) def Statement(self): """ leads to the correct statement token :return: the ast that matches with the statement token :raises: SLUCSyntaxError If not a valid statement token is given it """ if self.currtok[1].name == "SEMI": self.currtok = next(self.tg) return semicolon() if self.currtok[1].name == "LCURLY": return self.Block() if self.currtok[1].name == "IDENT": if self.functions.get(self.currtok[0]) is None: return self.Assignment() else: return self.FunctionCall() if self.currtok[1].name == "if": return self.IfStatement() if self.currtok[1].name == "print": return self.PrintStmt() if self.currtok[1].name == "while": return self.WhileStatement() if self.currtok[1].name == "return": return self.ReturnStmt() raise SLUCSyntaxError("ERROR: Unexpected token {0} on line {1}".format( self.currtok[1], str(self.currtok[2] - 1))) def ReturnStmt(self): """ With the key word return it followed by an expression and semi colon :return: the abstract syntax tree :raises: SLUCSyntaxError is a missing semicolon following the return statement """ self.currtok = next(self.tg) express = self.Expression() if self.currtok[1].name == "SEMI": self.currtok = next(self.tg) return returnStmt(express) raise SLUCSyntaxError("ERROR: Missing Semicolon on line {0}".format( str(self.currtok[2] - 1))) def Block(self): """ A set of 0 or more statements withing curly braces :return: the abstract syntax tree :raises: SLUCSyntaxError is a missing a right curly brace to close the block """ self.currtok = next(self.tg) statements = self.Statements() if self.currtok[1].name == "RCURLY": self.currtok = next(self.tg) return BlockExpr(statements.get_lst()) raise SLUCSyntaxError("ERROR: Right Curly Brace on line {0}".format( str(self.currtok[2] - 1))) def Assignment(self): """ Setting a value to an identifier that is in the ids dictionary :return: the abstract syntax tree :raises: SLUCSyntaxError is a missing semicolon following the assigned value :raises: SLUCSyntaxError is a missing an (=) following the identifier """ id = self.primary() if self.currtok[1].name == "DECLERATION": self.currtok = next(self.tg) if self.functions.get(self.currtok[0]) is not None: express = self.FunctionCall() return assignmentStmt(id, express) else: express = self.Expression() if self.currtok[1].name == "SEMI": self.currtok = next(self.tg) return assignmentStmt(id, express) raise SLUCSyntaxError( "ERROR: Missing Semicolon on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR: Missing assignment on line {0}".format( str(self.currtok[2] - 1))) def IfStatement(self): """ Given the key word if has an expression as a condition for the if then win a set of parentheses and followed by an statement and can end with the key world else followed by a statement :return: an abstract syntax tree :raises: SLUCSyntaxError missing a left parenthesise for following the key word if :raises: SLUCSyntaxError missing a right parenthesise for following the expression """ self.currtok = next(self.tg) if self.currtok[1].name == "LPAREN": self.currtok = next(self.tg) express = self.Expression() if self.currtok[1].name == "RPAREN": self.currtok = next(self.tg) state = self.Statement() if self.currtok[1].name == "else": self.currtok = next(self.tg) state2 = self.Statement() return ifelseStmt(express, state, state2) else: return ifStmt(express, state) raise SLUCSyntaxError( "ERROR: Missing right paren on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR: Missing left paren on line {0}".format( str(self.currtok[2] - 1))) def WhileStatement(self): """ Given the key word while has an expression as a condition for the if then within a set of parentheses and followed by a statement :return: an abstract syntax tree :raises: SLUCSyntaxError missing a left parenthesise for following the key word while :raises: SLUCSyntaxError missing a right parenthesise for following the expression """ self.currtok = next(self.tg) if self.currtok[1].name == "LPAREN": self.currtok = next(self.tg) express = self.Expression() if self.currtok[1].name == "RPAREN": self.currtok = next(self.tg) state = self.Statement() return whileStmt(express, state) raise SLUCSyntaxError( "ERROR: Missing right paren on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR: Missing left paren on line {0}".format( str(self.currtok[2] - 1))) def PrintStmt(self): """ This takes a list of print arguments separated by commas and surrounded by parentheses all following the key word print :return: an abstract syntax tree :raises: SLUCSyntaxError missing a left parenthesise for following the key word print :raises: SLUCSyntaxError missing a right parenthesise for following the set of print arguments or a comma """ args = list() self.currtok = next(self.tg) if self.currtok[1].name == "LPAREN": self.currtok = next(self.tg) arg = self.PrintArg() args.append(arg) while self.currtok[1].name == "COMMA": self.currtok = next(self.tg) arg = self.PrintArg() args.append(arg) if self.currtok[1].name == "RPAREN": self.currtok = next(self.tg) if self.currtok[1].name == "SEMI": return printstmtStmt(args) raise SLUCSyntaxError( "ERROR: Missing right semicolon line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError( "ERROR: Missing right paren or a comma line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR: Missing left paren on line {0}".format( str(self.currtok[2] - 1))) def PrintArg(self): """ The items that are found within a print statements can be strings expressions or function calls :return: an abstract syntax tree """ if self.currtok[1].name == "STRING_LIT": arg = String_LitExpr(self.currtok[0]) self.currtok = next(self.tg) return printArg(arg) if self.functions.get(self.currtok[0]) is not None: arg = self.FunctionCall() return printArg(arg) arg = self.Expression() return printArg(arg) def FunctionCall(self): """ If a function is called it is the list of required params separated by commas all surrounded by parentheses :return: an abstract syntax tree :raises: SLUCSyntaxError missing a separating comma between two parameters :raises: SLUCSyntaxError missing a left parenthesise for following the key word print :raises: SLUCSyntaxError missing a right parenthesise for following the set of print arguments or a comma """ id = self.currtok[0] self.currtok = next(self.tg) if self.currtok[1].name == "LPAREN": self.currtok = next(self.tg) params = list() while self.currtok[1].name in {"BOOL", "INTLIT", "IDENT", "REAL"}: param = self.Expression() if self.currtok[1].name != "RPAREN": if self.currtok[1].name == "COMMA": self.currtok = next(self.tg) else: raise SLUCSyntaxError( "ERROR: Missing comma on line {0}".format( str(self.currtok[2] - 1))) params.append(param) if self.currtok[1].name == "RPAREN": self.currtok = next(self.tg) return FuncIDExpr(id, params) raise SLUCSyntaxError( "ERROR: Missing right paren on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR: Missing left paren on line {0}".format( str(self.currtok[2] - 1))) def Expression(self, paren=False): """ One conjunction expression possibly matched more conjunctions expression separated by || :return: an abstract syntax tree """ left = self.Conjunction(paren) while self.currtok[1].name == "OR": op = self.currtok[0] self.currtok = next(self.tg) right = self.Conjunction() left = BinaryExpr(op, left, right, paren) return left def Conjunction(self, paren=False): """ One equality expression possibly matched more equality expression separated by && :return: an abstract syntax tree """ left = self.Equality(paren) while self.currtok[1].name == "AND": op = self.currtok[0] self.currtok = next(self.tg) right = self.Equality(paren) left = BinaryExpr(op, left, right, paren) return left def Equality(self, paren=False): """ One relation expression possibly matched one more relation expression separated by == or != :return: an abstract syntax tree """ left = self.Relation(paren) if self.currtok[1].name in {"EQULITY", "NOTEQUAL"}: op = self.currtok[0] self.currtok = next(self.tg) right = self.Relation(paren) left = BinaryExpr(op, left, right, paren) return left def Relation(self, paren=False): """ One adddition expression possibly matched one more adddition expression separated by > , <, >= or <= :return: an abstract syntax tree """ left = self.Addition(paren) if self.currtok[1].name in {"GREATERTHAN", "LESSTHAN", "LET", "GET"}: op = self.currtok[0] self.currtok = next(self.tg) right = self.Addition(paren) left = BinaryExpr(op, left, right, paren) return left def Addition(self, paren=False): """ One term expression possibly matched more term expression separated by + or - :return: an abstract syntax tree """ left = self.Term(paren) while self.currtok[1].name in {"PLUS", "MINUS"}: op = self.currtok[0] self.currtok = next(self.tg) right = self.Term(paren) left = BinaryExpr(op, left, right, paren) return left def Term(self, paren=False): """ One Factor expression possibly matched more Factor expression separated by *, / or % :return: an abstract syntax tree """ left = self.Factor() while self.currtok[1].name in {"TIMES", "DIVISION", "MOD"}: op = self.currtok[0] self.currtok = next(self.tg) right = self.Factor() left = BinaryExpr(op, left, right, paren) return left def Factor(self): """ A primary expression that is preceded a !, - or nothing at all :return: an abstract syntax tree """ if self.currtok[1].name in {"MINUS", "NOT"}: op = self.currtok[0] self.currtok = next(self.tg) prime = self.primary() return Factor(op, prime) return self.primary() def primary(self): """ Gets the back an identifier, an intlit, boolean real or another expression that is surrounded by parentheses :return: an abstract syntax tree :raises: SLUCSyntaxError if an identifier that was passed in is not been declared in the ids dictionary or function dictionary :raises: SLUCSyntaxError if a right parenthesise is missing from the cases when an (expression) occurs :raises SLUCSyntaxError if an invalid token has been passed into the primary """ if self.currtok[1].name == "IDENT": tmp = self.currtok if self.functions.get( tmp[0]) is not "first_call" and self.functions.get( tmp[0]) is not None: func = self.FunctionCall() return func elif self.ids.get(tmp[0]) is not None or self.functions.get( tmp[0]) is "first_call": self.currtok = next(self.tg) return IDExpr(tmp[0]) else: raise SLUCSyntaxError( "ERROR: Given ID {0} was not declared above on line {1}". format(tmp[0], str(self.currtok[2] - 1))) if self.currtok[1].name == "INTLIT": tmp = self.currtok self.currtok = next(self.tg) return IntLitExpr(tmp[0]) if self.currtok[1].name == "BOOL": tmp = self.currtok[0] self.currtok = next(self.tg) return BoolExpr(tmp[0]) if self.currtok[1].name == "REAL": tmp = self.currtok self.currtok = next(self.tg) return Real_Expr(tmp[0]) if self.currtok[1].name == "STRING_LIT": tmp = self.currtok self.currtok = next(self.tg) return String_LitExpr(tmp[0]) if self.currtok[1].name == "LPAREN": self.currtok = next(self.tg) tree = self.Expression(True) if self.currtok[1].name == "RPAREN": self.currtok = next(self.tg) return tree else: raise SLUCSyntaxError( "ERROR: Missing right paren on line {0}".format( str(self.currtok[2] - 1))) raise SLUCSyntaxError("ERROR: Unexpected token {0} on line {1}".format( self.currtok[1], str(self.currtok[2] - 1)))
def test_next_token(): input = """let five = 5; let ten = 10; let add = fn(x, y){ x + y; }; let result = add(five, ten); !-/*5; 5 < 10 > 5; if 5 == 10 != 8 { return true; } else { return false; } # this is a comment it should all be ignored # not_a_comment; # comments again let return {} a_b; "foobar" "foo bar" [1, 2]; {5: 2}; for(x in v){break;}; while(true){continue;}; """ tests = [ (tokens.LET, "let"), (tokens.IDENT, "five"), (tokens.ASSIGN, "="), (tokens.INT, "5"), (tokens.SEMICOLON, ";"), (tokens.LET, "let"), (tokens.IDENT, "ten"), (tokens.ASSIGN, "="), (tokens.INT, "10"), (tokens.SEMICOLON, ";"), (tokens.LET, "let"), (tokens.IDENT, "add"), (tokens.ASSIGN, "="), (tokens.FUNCTION, "fn"), (tokens.LPAREN, "("), (tokens.IDENT, "x"), (tokens.COMMA, ","), (tokens.IDENT, "y"), (tokens.RPAREN, ")"), (tokens.LBRACE, "{"), (tokens.IDENT, "x"), (tokens.PLUS, "+"), (tokens.IDENT, "y"), (tokens.SEMICOLON, ";"), (tokens.RBRACE, "}"), (tokens.SEMICOLON, ";"), (tokens.LET, "let"), (tokens.IDENT, "result"), (tokens.ASSIGN, "="), (tokens.IDENT, "add"), (tokens.LPAREN, "("), (tokens.IDENT, "five"), (tokens.COMMA, ","), (tokens.IDENT, "ten"), (tokens.RPAREN, ")"), (tokens.SEMICOLON, ";"), (tokens.BANG, "!"), (tokens.MINUS, "-"), (tokens.SLASH, "/"), (tokens.ASTERISK, "*"), (tokens.INT, "5"), (tokens.SEMICOLON, ";"), (tokens.INT, "5"), (tokens.LT, "<"), (tokens.INT, "10"), (tokens.GT, ">"), (tokens.INT, "5"), (tokens.SEMICOLON, ";"), (tokens.IF, "if"), (tokens.INT, "5"), (tokens.EQ, "=="), (tokens.INT, "10"), (tokens.NOT_EQ, "!="), (tokens.INT, "8"), (tokens.LBRACE, "{"), (tokens.RETURN, "return"), (tokens.TRUE, "true"), (tokens.SEMICOLON, ";"), (tokens.RBRACE, "}"), (tokens.ELSE, "else"), (tokens.LBRACE, "{"), (tokens.RETURN, "return"), (tokens.FALSE, "false"), (tokens.SEMICOLON, ";"), (tokens.RBRACE, "}"), (tokens.IDENT, "not_a_comment"), (tokens.SEMICOLON, ";"), (tokens.IDENT, "a_b"), (tokens.SEMICOLON, ";"), (tokens.STRING, "foobar"), (tokens.STRING, "foo bar"), (tokens.LBRACKET, "["), (tokens.INT, "1"), (tokens.COMMA, ","), (tokens.INT, "2"), (tokens.RBRACKET, "]"), (tokens.SEMICOLON, ";"), (tokens.LBRACE, "{"), (tokens.INT, "5"), (tokens.COLON, ":"), (tokens.INT, "2"), (tokens.RBRACE, "}"), (tokens.SEMICOLON, ";"), (tokens.FOR, "for"), (tokens.LPAREN, "("), (tokens.IDENT, "x"), (tokens.IN, "in"), (tokens.IDENT, "v"), (tokens.RPAREN, ")"), (tokens.LBRACE, "{"), (tokens.BREAK, "break"), (tokens.SEMICOLON, ";"), (tokens.RBRACE, "}"), (tokens.SEMICOLON, ";"), (tokens.WHILE, "while"), (tokens.LPAREN, "("), (tokens.TRUE, "true"), (tokens.RPAREN, ")"), (tokens.LBRACE, "{"), (tokens.CONTINUE, "continue"), (tokens.SEMICOLON, ";"), (tokens.RBRACE, "}"), (tokens.SEMICOLON, ";"), (tokens.EOF, ""), ] l = Lexer(input) i = 0 for expected_type, expected_literal in tests: tok: tokens.Token = l.next_token() assert tok.typ == expected_type, f"Token type wrong: {i}. got {tok.typ}, wanted {expected_type}. token: {tok}" assert tok.literal == expected_literal, f"Token literal wrong: {i}" i += 1