def _t(inputs, outputs): from mtots import test import contextlib contents = dict(inputs) @contextlib.contextmanager def open(name, mode='r'): if mode == 'r': yield FakeFile('r', contents[name]) elif mode == 'w': fake_file = FakeFile('w', '') yield fake_file contents[name] = fake_file.read() else: assert False, mode main(open) for filename in outputs: test.that(filename in contents, f'File {filename} missing') test.equal( f'FILE{filename}:{contents[filename]}', f'FILE{filename}:{outputs[filename]}', )
def test_lexer_with_adapter(): @Lexer.new def lexer(builder): @builder.add('\s+') def spaces(m, mark): return () @builder.add('\w+') def name(m, mark): return [Token(mark, 'NAME', m.group())] @builder.add_adapter def double_every_name_token(tokens): for token in tokens: if token.type == 'NAME': yield token yield token else: yield token test.equal(list(lexer.lex_string('a b cc')), [ Token(None, 'NAME', 'a'), Token(None, 'NAME', 'a'), Token(None, 'NAME', 'b'), Token(None, 'NAME', 'b'), Token(None, 'NAME', 'cc'), Token(None, 'NAME', 'cc'), Token(None, 'EOF', None), ])
def test_empty(): test.equal( list(lex_string(r""" """)), [base.Token(None, 'NEWLINE', None), base.Token(None, 'EOF', None)], )
def test_simple_arithmetic_grammar(): expr = Forward(name='expr', parser_factory=lambda: addexpr) atom = Any('NUMBER', All('(', expr, ')').map(lambda args: args[1])) mulexpr = Forward( name='mulexpr', parser_factory=lambda: (All(mulexpr, '*', atom).map(lambda args: args[0] * args[2]) | All(mulexpr, '/', atom).map(lambda args: args[0] / args[2]) | All( mulexpr, '%', atom).map(lambda args: args[0] % args[2]) | atom)) addexpr = Forward(name='addexpr', parser_factory=lambda: (All(addexpr, '+', mulexpr).map(lambda args: args[ 0] + args[2]) | All(addexpr, '-', mulexpr).map( lambda args: args[0] - args[2]) | mulexpr)) def parse(text): return expr.parse(test_lexer.lex_string(text)) test.equal( parse('1 + 2 - 7 * 2'), Success(None, 1 + 2 - 7 * 2), ) test.equal( parse('1 + (2 - 7) * 2'), Success(None, 1 + (2 - 7) * 2), )
def test_sample_lexer(): builder = Lexer.Builder() @builder.add('\s+') def spaces(m, mark): return () @builder.add('\w+') def name(m, mark): return [Token(mark, 'NAME', m.group())] lexer = builder.build() test.equal(list(lexer.lex_string('a b cc')), [ Token(None, 'NAME', 'a'), Token(None, 'NAME', 'b'), Token(None, 'NAME', 'cc'), Token(None, 'EOF', None), ]) @test.throws(Error, """Unrecognized token <string> line 1 & * """) def lex_invalid_token(): list(lexer.lex_string('&'))
def test_funny_sample(): test.equal( list(lex_string(r""" print("Hello world!") File file = fopen("setup.py", "r") """)), [ base.Token(None, 'NEWLINE', None), base.Token(None, 'ID', 'print'), base.Token(None, '(', None), base.Token(None, 'STR', 'Hello world!'), base.Token(None, ')', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'ID', 'File'), base.Token(None, 'ID', 'file'), base.Token(None, '=', None), base.Token(None, 'ID', 'fopen'), base.Token(None, '(', None), base.Token(None, 'STR', 'setup.py'), base.Token(None, ',', None), base.Token(None, 'STR', 'r'), base.Token(None, ')', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'EOF', None), ], )
def test_sample_simple_python_code(): test.equal( list(lex_string(r""" # Some comments def foo( ): pass """)), [ base.Token(None, 'NEWLINE', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'def', None), base.Token(None, 'ID', 'foo'), base.Token(None, '(', None), base.Token(None, ')', None), base.Token(None, ':', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'INDENT', None), base.Token(None, 'pass', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'DEDENT', None), base.Token(None, 'EOF', None), ], )
def test_struct_defn(): def parse(s): return (All(struct_defn, Peek('EOF')).map(lambda args: args[0]).parse( lexer.lex_string(s))) test.equal( parse(""" struct Foo { Bar b; int* x; } """), base.Success( None, ast.StructDefinition( mark=None, native=False, name='Foo', fields=[ ast.Field( mark=None, name='b', type=types.NamedType('Bar'), ), ast.Field( mark=None, name='x', type=types.PointerType(types.INT), ), ], )), ) test.equal( parse(""" native struct Foo { Bar b; int* x; } """), base.Success( None, ast.StructDefinition( mark=None, native=True, name='Foo', fields=[ ast.Field( mark=None, name='b', type=types.NamedType('Bar'), ), ast.Field( mark=None, name='x', type=types.PointerType(types.INT), ), ], )), )
def test_triple_quote(): test.equal( list(lex_string(r'''( """hi""" """world""" )''')), [ base.Token(None, '(', None), base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, ')', None), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r"""( '''hi''' '''world''' )""")), [ base.Token(None, '(', None), base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, ')', None), base.Token(None, 'EOF', None), ], )
def test_id(): test.equal( list(lex_string('hi')), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ] )
def test_keyword(): test.equal( list(lex_string('for')), [ base.Token(None, 'for', None), base.Token(None, 'EOF', None), ] )
def test_line_comment(): test.equal( list(lex_string(""" # this is a comment hi """)), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ])
def test_triple_quote(): test.equal( list(lex_string(r''' """hi""" """world""" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], )
def test_block_comment(): test.equal( list( lex_string(""" /* this is a comment * this is a block comment */ hi """)), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ])
def test_id(): test.equal(list(lex_string('`hi`')), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ]) test.equal(list(lex_string('`class`')), [ base.Token(None, 'ID', 'class'), base.Token(None, 'EOF', None), ]) @test.throws(errors.LexError) def on_c_keyword(): list(lex_string('`struct`'))
def test_eof_dedents(): test.equal( list(lex_string(r""" foo bar""")), [ base.Token(None, 'NEWLINE', None), base.Token(None, 'ID', 'foo'), base.Token(None, 'NEWLINE', None), base.Token(None, 'INDENT', None), base.Token(None, 'ID', 'bar'), base.Token(None, 'DEDENT', None), base.Token(None, 'EOF', None), ], )
def test_blob(): def parse(s): return (All(blob.repeat(), Peek('EOF')).map(lambda args: args[0]).parse( lexer.lex_string(s))) blob_result = parse(r""" # Hi, this is some code import <stdio.h> def main() int { printf("Hello world!\n"); return 0; } """) test.equal( blob_result, base.Success( None, [ 'import', '<', 'stdio', '.', 'h', '>', 'def', 'main', '(', ')', 'int', [ '{', [ 'printf', '(', 'Hello world!\n', ')', ';', 'return', 0, ';', ], '}' ], ], ), )
def test_header(): def parse(s): return (All(header, Peek('EOF')).map(lambda args: args[0]).parse( lexer.lex_string(s))) test.equal( parse(""" import <stdio.h> int main() { print("Hello world!"); } void print(char* x); """), base.Success( None, ast.Header( None, imports=[ast.AngleBracketImport(None, path='stdio.h')], decls=[ ast.FunctionDeclaration( None, native=False, rtype=types.INT, name='main', params=[], varargs=False, attrs=[], ), ast.FunctionDeclaration( None, native=False, rtype=types.VOID, name='print', attrs=[], params=[ ast.Param( None, type=types.PointerType(types.CHAR), name='x', ), ], varargs=False, ), ], ), ), )
def test_left_recursive_grammar(): atom = Any('NAME', 'NUMBER') addexpr = Forward(name='addexpr', parser_factory=lambda: Any( All(addexpr, '+', atom), atom, )) expr = addexpr def parse(text): return expr.parse(test_lexer.lex_string(text)) test.equal( parse("1 + 2 + 3"), Success(None, [[1, '+', 2], '+', 3]), )
def test_string_and_char_literals(): test.equal( list(lex_string(r""" "hi" 'h' "h\nh" "\123" """)), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'CHAR', "h"), base.Token(None, 'STR', 'h\nh'), base.Token(None, 'STR', chr(int('123', 8))), base.Token(None, 'EOF', None), ], )
def test_struct_decl(): def parse(s): return (All(struct_decl, Peek('EOF')).map(lambda args: args[0]).parse( lexer.lex_string(s))) test.equal( parse('struct Foo;'), base.Success( None, ast.StructDeclaration(None, 'Foo'), ), ) test.equal( parse('struct Foo'), base.Failure(None, 'Expected ; but got EOF'), )
def test_separators_and_operators(): test.equal( list(lex_string(r""" ( ) , . += + """)), [ base.Token(None, '(', None), base.Token(None, ')', None), base.Token(None, ',', None), base.Token(None, '.', None), base.Token(None, '+=', None), base.Token(None, '+', None), base.Token(None, 'EOF', None), ], )
def sample_test(): test.equal( gen_header(parser.parse_header(r""" import <stdio.h> int main() { printf("Hello world!\n"); return 0; } """)), r"""#ifndef __MAIN___NC_H #define __MAIN___NC_H /* (NC HEADER) __main__ */ #include "__main__.nc.fwd.h" #include <stdio.h> int main(); #endif/*__MAIN___NC_H*/ """, )
def test_decimal_int(): test.equal( list(lex_string(""" 11l 22L 33 44 0 """)), [ base.Token(None, 'LONG', 11), base.Token(None, 'LONG', 22), base.Token(None, 'INT', 33), base.Token(None, 'INT', 44), base.Token(None, 'INT', 0), base.Token(None, 'EOF', None), ], )
def test_newline_and_grouping(): # Newlines should only appear: # * at top level, # * or inside '{}' grouping symbol test.equal( list(lex_string(r""" ( )[ ]{ }""")), [ base.Token(None, 'NEWLINE', None), base.Token(None, '(', None), base.Token(None, ')', None), base.Token(None, '[', None), base.Token(None, ']', None), base.Token(None, '{', None), base.Token(None, 'NEWLINE', None), base.Token(None, '}', None), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r"""({ })""")), [ base.Token(None, '(', None), base.Token(None, '{', None), base.Token(None, 'NEWLINE', None), base.Token(None, '}', None), base.Token(None, ')', None), base.Token(None, 'EOF', None), ], ) @test.throws(errors.InvalidGrouping) def throws(): list(lex_string('( ]')) @test.throws(errors.InvalidGrouping) def throws(): list(lex_string(']')) list(lex_string('[ ]'))
def test_sample_parser(): sexpr = Forward( name='sexpr', parser_factory=( lambda: All('(', expr.repeat(), ')').map(lambda args: args[1])), ) atom = Any('NAME', 'NUMBER') expr = atom | sexpr prog = All(expr.repeat(), 'EOF').map(lambda args: args[0]) def parse(text): return prog.parse(test_lexer.lex_string(text)) test.equal( parse(""" (1) (begin (a b c) ) """), Success(None, [[1], ['begin', ['a', 'b', 'c']]]))
def test_decimal_float(): test.equal( list(lex_string(""" 1.0 .5 1.5f 1.5F 1.5D 1.5d """)), [ base.Token(None, 'DOUBLE', 1.0), base.Token(None, 'DOUBLE', 0.5), base.Token(None, 'FLOAT', 1.5), base.Token(None, 'FLOAT', 1.5), base.Token(None, 'DOUBLE', 1.5), base.Token(None, 'DOUBLE', 1.5), base.Token(None, 'EOF', None), ], )
def test_import_stmt(): def parse(s): return (All(import_stmt.repeat(), Peek('EOF')).map(lambda args: args[0]).parse( lexer.lex_string(s))) test.equal( parse(""" import <stdio.h> import "stdlib.h" import a.b.c """), base.Success( None, [ ast.AngleBracketImport(None, path='stdio.h'), ast.QuoteImport(None, path='stdlib.h'), ast.AbsoluteImport(None, path='a.b.c'), ], ), )
def test_non_greedy_str_literals(): test.equal( list(lex_string(r''' "hi" "world" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r''' "hi" """world""" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r''' """hi""" "world" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], )
def test_struct(): class Foo(typing.NamedTuple): abc: float xyz: str foo_parser = Struct(Foo, [ ['abc', 'NUMBER'], '+', ['xyz', 'NAME'], ]) def parse(parser, text): return parser.parse(test_lexer.lex_string(text)) test.equal( parse(foo_parser, "924 + hi"), Success(None, Foo(924, 'hi')), ) class Bar(typing.NamedTuple): mark: base.Mark abc: float xyz: str bar_parser = Struct(Bar, [ ['abc', 'NUMBER'], '+', ['xyz', 'NAME'], ], include_mark=True) m = parse(bar_parser, "924 + hi") test.that(isinstance(m.mark, base.Mark)) test.that(isinstance(m, Success)) test.that(hasattr(m.value, 'mark')) test.equal( m, Success(None, Bar(m.value.mark, 924, 'hi')), )