def test_invalid_rule_2(l): l.add_rules( Case( Literal('def'), Alt(prev_token_cond=(Token.Var, ), send=Token.LexingFailure, match_size=3), Alt(send=Token.Def, match_size=3)))
def lower_matcher(expr): """ Lower a token matcher to our internals. :type expr: liblktlang.GrammarExpr :rtype: langkit.lexer.Matcher """ with ctx.lkt_context(expr): if isinstance(expr, liblktlang.TokenLit): return Literal(json.loads(text_as_str(expr))) elif isinstance(expr, liblktlang.TokenNoCaseLit): return NoCaseLit(json.loads(text_as_str(expr))) elif isinstance(expr, liblktlang.TokenPatternLit): return Pattern(pattern_as_str(expr)) else: check_source_language(False, 'Invalid lexing expression')
LPar = WithText() RPar = WithText() LBrace = WithText() RBrace = WithText() Equal = WithText() Plus = WithText() Number = WithText() Identifier = WithSymbol() foo_lexer = Lexer(Token) foo_lexer.add_rules( (Pattern(r'[ \n\r\t]+'), Ignore()), (Eof(), Token.Termination), (Literal('def'), Token.Def), (Literal('error'), Token.Error), (Literal('example'), Token.Example), (Literal('null'), Token.Null), (Literal(','), Token.Comma), (Literal('.'), Token.Dot), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('='), Token.Equal), (Literal('+'), Token.Plus), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), )
LCurl = WithText() RCurl = WithText() At = WithText() Pipe = WithText() LArrow = WithText() BigRArrow = WithText() Box = WithText() SubBlockLiteral = WithText() Comment = WithTrivia() Whitespace = WithTrivia() lkql_lexer = Lexer(Token) lkql_lexer.add_rules( (Pattern(r"[ \t\n\r]"), Token.Whitespace), (Literal("."), Token.Dot), (Literal("?."), Token.QuestionDot), (Literal("?["), Token.QuestionBrack), (Literal("?"), Token.Question), (Literal(","), Token.Coma), (Literal(";"), Token.SemiCol), (Literal(":"), Token.Colon), (Literal("_"), Token.UnderScore), (Literal("="), Token.Eq), (Literal("=="), Token.EqEq), (Literal("!="), Token.Neq), (Literal("!!"), Token.ExclExcl), (Literal("<"), Token.Lt), (Literal("<="), Token.LEq), (Literal(">"), Token.Gt), (Literal(">="), Token.GEq), (Literal("and"), Token.And), (Literal("or"), Token.Or), (Literal("+"), Token.Plus), (Literal("-"), Token.Minus), (Literal("*"), Token.Mul), (Literal("/"), Token.Div), (Literal("&"), Token.Amp), (Literal("("), Token.LPar), (Literal(")"), Token.RPar), (Literal("{"), Token.LCurl), (Literal("}"), Token.RCurl), (Literal("["), Token.LBrack), (Literal("]"), Token.RBrack), (Literal("@"), Token.At), (Pattern(r"\|\"(.?)+"), Token.SubBlockLiteral),
def test_invalid_rule_1(l): l.add_rules((Literal('def'), Token.Termination))
# String StringLiteral = WithText() rflx_lexer = Lexer(Token) rflx_lexer.add_rules( (Pattern(r"[ \t\r\n]+"), Ignore()), (Pattern(r"--.*"), Token.Comment), ) # Hack to support keywords that equal attributes # Inspired by Libadalang grammar (ada/language/lexer.py) rflx_lexer.add_rules(*[ Case( Literal(text), Alt( prev_token_cond=(Token.Tick, ), send=token, match_size=len(text), ), Alt(send=Token.UnqualifiedIdentifier, match_size=len(text)), ) for text, token in [ ("First", Token.First), ("Last", Token.Last), ("Size", Token.Size), ("Valid_Checksum", Token.ValidChecksum), ("Has_Data", Token.HasData), ("Head", Token.Head), ("Opaque", Token.Opaque), ("Present", Token.Present),
Comma = WithText() LPar = WithText() RPar = WithText() LBrace = WithText() RBrace = WithText() Plus = WithText() Number = WithText() Identifier = WithSymbol() foo_lexer = Lexer(Token, track_indent=True) foo_lexer.add_rules( (Pattern(r'[ \r\t]+'), Ignore()), (Eof(), Token.Termination), (Literal("example"), Token.Example), (Literal("null"), Token.Null), (Literal(','), Token.Comma), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('+'), Token.Plus), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), ) L = foo_lexer
pre_rules=[(Pattern(r'\\\n[ \r\t]*'), Ignore())]) python_lexer.add_patterns( ("STRING_DBQ", r'\"(\\\"|[^\n\"])*\"'), ("STRING_SQ", r"'(\\'|[^\n'])*'"), ("MLSTRING_DBQ", r'\"\"\"([^"]|("[^"])|(""[^"])|\n)*\"\"\"'), ("MLSTRING_SQ", r"'''([^']|('[^'])|(''[^'])|\n)*'''"), ) python_lexer.add_rules( (Pattern('(u|U)?(r|R)?' '({MLSTRING_SQ}|{MLSTRING_DBQ}' '|{STRING_SQ}|{STRING_DBQ})'), Token.String), (Pattern(r'[ \r\t]+'), Ignore()), (Pattern(r"#(.?)+"), Token.Comment), (Literal('>>='), Token.RshAssign), (Literal('is'), Token.Is), (Literal('=='), Token.Equals), (Literal('def'), Token.Def), (Literal('<='), Token.Lte), (Literal('raise'), Token.Raise), (Literal('%'), Token.Mod), (Literal('yield'), Token.Yield), (Literal('^='), Token.XorAssign), (Literal('as'), Token.As), (Literal('lambda'), Token.Lambda), (Literal('`'), Token.Backtick), (Literal('try'), Token.Try), (Literal('/'), Token.Divide), (Literal('~'), Token.Invert), (Literal('return'), Token.Return),
Comma = WithText() LPar = WithText() RPar = WithText() LBrace = WithText() RBrace = WithText() Plus = WithText() Number = WithText() Identifier = WithSymbol() foo_lexer = Lexer(Token, track_indent=True) foo_lexer.add_rules( (Pattern(r'[ \r\t]+'), Ignore()), (Eof(), Token.Termination), (Literal("example"), Token.Example), (Literal("null"), Token.Null), (Literal(','), Token.Comma), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('+'), Token.Plus), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), ) L = foo_lexer Diagnostics.set_lang_source_dir(os.path.abspath(__file__))
from langkit.lexer import (Ignore, Lexer, LexerToken, Literal, Pattern, WithText, WithTrivia) from langkit.parsers import Grammar, List from utils import build_and_run class Token(LexerToken): Example = WithText() Comment = WithTrivia() foo_lexer = Lexer(Token) foo_lexer.add_rules( (Pattern(r'[ \n\r\t]+'), Ignore()), (Literal('example'), Token.Example), (Pattern('#(.?)+'), Token.Comment), ) class FooNode(ASTNode): pass class Example(FooNode): pass g = Grammar('main_rule') g.add_rules(main_rule=List(Example('example'))) build_and_run(g, lexer=foo_lexer, py_script='main.py', ada_main='main.adb')
class Token(LexerToken): Ident = WithSymbol() Colon = WithText() Arrow = WithText() Equal = WithText() ParOpen = WithText() ParClose = WithText() Backslash = WithText() Dot = WithText() Comment = WithTrivia() Whitespace = WithTrivia() Newlines = WithText() dependz_lexer = Lexer(Token) dependz_lexer.add_rules( # Blanks and trivia (Pattern(r"[ \r\t]+"), Token.Whitespace), (Pattern(r"[\n]+"), Token.Newlines), (Pattern(r"#(.?)+"), Token.Comment), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Ident), (Literal(':'), Token.Colon), (Literal('->'), Token.Arrow), (Literal('='), Token.Equal), (Literal('('), Token.ParOpen), (Literal(')'), Token.ParClose), (Literal('\\'), Token.Backslash), (Literal('.'), Token.Dot))
from utils import build_and_run class Token(LexerToken): Def = WithText() LPar = WithText() RPar = WithText() Comma = WithText() Identifier = WithSymbol() Comment = WithTrivia() foo_lexer = Lexer(Token, track_indent=True) foo_lexer.add_rules( (Pattern(r'[ \r\t]+'), Ignore()), (Literal('def'), Token.Def), (Literal(','), Token.Comma), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), (Pattern('#.*'), Token.Comment), ) L = foo_lexer class FooNode(ASTNode): pass class Def(FooNode): name = Field()
NullKw = WithText() lkt_lexer = Lexer(Token) lkt_lexer.add_patterns( ("STRING_DBQ", r'\"(\\\"|[^\n\"])*\"'), ("STRING_SQ", r"'(\\'|[^\n'])*'"), ) lkt_lexer.add_rules( # Whitespace & EOF (Pattern(r"[ \t\r\n\f]+"), Ignore()), # Operators (Literal('!'), Token.ExclMark), (Literal(':'), Token.Colon), (Literal('?'), Token.IntMark), (Literal('/'), Token.Div), (Literal('*'), Token.Times), (Literal('+'), Token.Plus), (Literal('|'), Token.Pipe), (Literal('<-'), Token.LeftArrow), (Literal('.'), Token.Dot), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('['), Token.LBrack), (Literal(']'), Token.RBrack), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('|>'), Token.Comb),
LPar = WithText() RPar = WithText() Equal = WithText() Different = WithText() Not = WithText() Or = WithText() And = WithText() kconfig_lexer = Lexer(Token) kconfig_lexer.add_rules( (Pattern(r"[ \t\r\n]+"), Ignore()), (Pattern(r"#.*"), Ignore()), # Keywords (Literal("config"), Token.Config), (Literal("menuconfig"), Token.Menuconfig), (Literal("choice"), Token.Choice), (Literal("endchoice"), Token.Endchoice), (Literal("comment"), Token.Comment), (Literal("menu"), Token.Menu), (Literal("endmenu"), Token.Endmenu), (Literal("if"), Token.If), (Literal("endif"), Token.Endif), (Literal("source"), Token.Source), (Literal("mainmenu"), Token.Mainmenu), (Literal("depends"), Token.Depends), (Literal("on"), Token.On), (Literal("help"), Token.Help), (Literal("--help--"), Token.Help), (Literal("prompt"), Token.Prompt),
(NoCaseLit("end"), Token.End), (NoCaseLit("for"), Token.For), (NoCaseLit("is"), Token.Is), (NoCaseLit("limited"), Token.Limited), (NoCaseLit("private"), Token.Private), (NoCaseLit("null"), Token.Null), (NoCaseLit("others"), Token.Others), (NoCaseLit("package"), Token.Package), (NoCaseLit("renames"), Token.Renames), (NoCaseLit("type"), Token.Type), (NoCaseLit("use"), Token.Use), (NoCaseLit("pragma"), Token.Pragma), (NoCaseLit("when"), Token.When), (NoCaseLit("with"), Token.With), (NoCaseLit("extends"), Token.Extends), (Literal("("), Token.ParOpen), (Literal(")"), Token.ParClose), (Literal(";"), Token.Semicolon), (Literal(":"), Token.Colon), (Literal(","), Token.Comma), (Literal("."), Token.Dot), (Literal("&"), Token.Amp), (Literal("'"), Token.Tick), (Literal("|"), Token.Pipe), (Literal(":="), Token.Assign), (Literal("=>"), Token.Arrow), (Pattern("{integer}"), Token.Number), (Pattern(r"[_a-zA-Z][_a-zA-Z0-9]*"), Token.Identifier), (Pattern("{p_string}"), Token.String), )
String = WithText() Comment = WithTrivia() Whitespace = WithTrivia() Alphanumericals = TokenFamily(Def, Error, Example, Null, Number, Identifier) Punctuation = TokenFamily(Comma, Dot, Semicolon, LPar, RPar, LBrace, RBrace, Equal, Plus) Comments = TokenFamily(Comment) foo_lexer = Lexer(Token) foo_lexer.add_rules( (Pattern(r'[ \n\r\t]+'), Token.Whitespace), (Literal('def'), Token.Def), (Literal('var'), Token.Var), (Literal('error'), Token.Error), (Literal('example'), Token.Example), (Literal('null'), Token.Null), (Literal(','), Token.Comma), (Literal('.'), Token.Dot), (Literal(';'), Token.Semicolon), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('='), Token.Equal), (Literal('+'), Token.Plus), (Literal('-'), Token.Minus), (Literal('<'), Token.LessThan),
lkt_lexer.add_patterns( ("HEX_DIGIT", r'[0-9a-fA-F]'), ("HEX_DIGITS_2", r'{HEX_DIGIT}{HEX_DIGIT}'), ("HEX_DIGITS_4", r'{HEX_DIGITS_2}{HEX_DIGITS_2}'), ("HEX_DIGITS_8", r'{HEX_DIGITS_4}{HEX_DIGITS_4}'), ("STRING_LIT", r'\"(\\\"|[^\n\"])*\"'), ("CHAR_LIT", r"'(\\'|[^\n']|\\x{HEX_DIGITS_2}|\\u{HEX_DIGITS_4}|\\U{HEX_DIGITS_8})'"), ) lkt_lexer.add_rules( # Whitespace & EOF (Pattern(r"[ \t\r\n\f]+"), Ignore()), # Operators (Literal('!'), Token.ExclMark), (Literal(';'), Token.Semicolon), (Literal(':'), Token.Colon), (Literal('?'), Token.IntMark), (Literal('/'), Token.Div), (Literal('*'), Token.Times), (Literal('+'), Token.Plus), (Literal('&'), Token.Amp), (Literal('-'), Token.Minus), (Literal('|'), Token.Pipe), (Literal('<-'), Token.LeftArrow), (Literal('->'), Token.RightArrow), (Literal('.'), Token.Dot), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('['), Token.LBrack),
(NoCaseLit("use"), Token.Use), (NoCaseLit("declare"), Token.Declare), (NoCaseLit("delay"), Token.Delay), (NoCaseLit("limited"), Token.Limited), (NoCaseLit("record"), Token.Record), (NoCaseLit("when"), Token.When), (NoCaseLit("loop"), Token.Loop), (NoCaseLit("rem"), Token.Rem), (NoCaseLit("while"), Token.While), (NoCaseLit("renames"), Token.Renames), (NoCaseLit("with"), Token.With), (NoCaseLit("do"), Token.Do), (NoCaseLit("xor"), Token.Xor), # Punctuation (Literal("("), Token.ParOpen), (Literal(")"), Token.ParClose), (Literal(";"), Token.Semicolon), (Literal(":"), Token.Colon), (Literal(","), Token.Comma), (Literal(".."), Token.Doubledot), (Literal(":="), Token.Assign), (Literal("."), Token.Dot), (Literal("<>"), Token.Diamond), (Literal("<="), Token.Lte), (Literal(">="), Token.Gte), (Literal("=>"), Token.Arrow), (Literal("="), Token.Equal), (Literal("<"), Token.Lt), (Literal(">"), Token.Gt), (Literal("+"), Token.Plus),
Comma = WithText() LPar = WithText() RPar = WithText() LBrace = WithText() RBrace = WithText() Plus = WithText() Number = WithText() Identifier = WithSymbol() foo_lexer = Lexer(Token, track_indent=True) foo_lexer.add_rules( (Pattern(r'[ \r\t]+'), Ignore()), (Literal("example"), Token.Example), (Literal("null"), Token.Null), (Literal(','), Token.Comma), (Literal('('), Token.LPar), (Literal(')'), Token.RPar), (Literal('{'), Token.LBrace), (Literal('}'), Token.RBrace), (Literal('+'), Token.Plus), (Pattern('[0-9]+'), Token.Number), (Pattern('[a-zA-Z_][a-zA-Z0-9_]*'), Token.Identifier), ) L = foo_lexer class FooNode(ASTNode): pass