def lexer_from_mapping(mapping): lg = LexerGenerator() # Escape data with forward slashes lg.add("DATA", r'/.+?/') # Add the special characters for char in mapping.keys(): lg.add(char, r"\\" + char) # Normal tokens lg.add("TYPE", r':') lg.add("AND", r'\&') lg.add("OR", r'\|') lg.add("L_PAREN", r'\(') lg.add("R_PAREN", r'\)') lg.add("EQUAL", r'=') lg.add("CHILD", r'>') lg.add("PARENT", r'<') lg.add("NOT", r'!') # Everything else is data excluded_chars = r'^<>=&|():!' for char in mapping.keys(): excluded_chars += r"\\" + char lg.add("DATA", "[{excluded}]+".format(excluded=excluded_chars)) lg.ignore(r'\s+') lexer = lg.build() return lexer
def test_ignore_recursion(self): lg = LexerGenerator() lg.ignore(r"\s") l = lg.build() assert list(l.lex(" " * 2000)) == []
def test_basic_lexer(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") l = lg.build() def f(n): tokens = l.lex("%d+%d+%d" % (n, n, n)) i = 0 s = 0 while i < 5: t = tokens.next() if i % 2 == 0: if t.name != "NUMBER": return -1 s += int(t.value) else: if t.name != "PLUS": return -2 if t.value != "+": return -3 i += 1 if tokens.next() is not None: return -4 return s assert self.run(f, [14]) == 42
def test_arithmetic(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.add("TIMES", r"\*") pg = ParserGenerator(["NUMBER", "PLUS", "TIMES"], precedence=[ ("left", ["PLUS"]), ("left", ["TIMES"]), ]) @pg.production("main : expr") def main(p): return p[0] @pg.production("expr : expr PLUS expr") @pg.production("expr : expr TIMES expr") def expr_binop(p): return BoxInt({ "+": operator.add, "*": operator.mul }[p[1].getstr()](p[0].getint(), p[2].getint())) @pg.production("expr : NUMBER") def expr_num(p): return BoxInt(int(p[0].getstr())) lexer = lg.build() parser = pg.build() assert parser.parse(lexer.lex("3*4+5"))
def test_position(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 3 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 5 with raises(StopIteration): stream.next() stream = l.lex("2 +\n 37") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 3 t = stream.next() assert t.source_pos.lineno == 2 assert t.source_pos.colno == 5 with raises(StopIteration): stream.next()
def test_regex_flags_ignore(self): lg = LexerGenerator() lg.add("ALL", r".*", re.DOTALL) lg.ignore(r".*", re.DOTALL) l = lg.build() stream = l.lex("test\ndotall") with raises(StopIteration): stream.next()
def test_error(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") l = lg.build() stream = l.lex('fail') with raises(LexingError) as excinfo: stream.next() assert 'SourcePosition(' in repr(excinfo.value)
def construct_lexer(): lg = LexerGenerator() #Literals lg.add('NUMBER',r'\d+(\.\d+)?') lg.add('STRING',r'\".*?\"') #Tokens lg.add('OPEN_PAREN',r'\(') lg.add('CLOSE_PAREN',r'\)') lg.add('INDEX_OPEN',r'\[') lg.add('INDEX_CLOSE',r'\]') lg.add('NAME',r'[a-zA-Z0-9_]*') lg.add('RANGE',r'\.\.\.') lg.add('COMMA',',') #Operators lg.add('ADD',r'\+') lg.add('SUBTRACT',r'-') lg.add('MULTIPLY',r'\*') lg.add('DIVIDE','/') lg.add('EXPONENTIATION',r'\*\*') lg.add('AND','and') lg.add('OR','or') lg.add('NOT','not') lg.add('XOR','xor') lg.add('SELF_APPLY','!') lg.add('SINGLE_ARROW','->') lg.add('DOUBLE_ARROW','=>') lg.add('DOT',r'\.') lg.add('IN','in') lg.add('GT','>') lg.add('LT','<') lg.add('LE','<=') lg.add('GE','>=') lg.add('EQ','==') lg.add('NE','!=') #Keywords lg.add('IF','if') lg.add('ELSE','else') lg.add('DO','do') lg.add('END','end') lg.add('DEF','def') lg.add('LET','let') lg.add('WHILE','while') lg.add('FOR','for') #Whitespace lg.ignore(r"\s+") return lg.build()
def test_regex_flags(self): lg = LexerGenerator() lg.add("ALL", r".*", re.DOTALL) l = lg.build() stream = l.lex("test\ndotall") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 assert t.getstr() == "test\ndotall" with raises(StopIteration): stream.next()
def test_repr(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") assert str(stream) is not None t = stream.next() assert t.name == "NUMBER" assert t.value == "2" assert str(stream) is not None t = stream.next() assert t.name == "PLUS"
def test_newline_position(self): lg = LexerGenerator() lg.add("NEWLINE", r"\n") lg.add("SPACE", r" ") l = lg.build() stream = l.lex(" \n ") t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 1 t = stream.next() assert t.source_pos.lineno == 1 assert t.source_pos.colno == 2 t = stream.next() assert t.source_pos.lineno == 2 assert t.source_pos.colno == 1
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Keywords self.lexer.add('START', r'start') self.lexer.add('DONE', r'done') self.lexer.add('NEXT', r'next') # Parenthesis, assorted self.lexer.add('LPAREN', r'\(') self.lexer.add('RPAREN', r'\)') self.lexer.add('LBRACE', r'\{') self.lexer.add('RBRACE', r'\}') self.lexer.add('LBRACKET', r'\[') self.lexer.add('RBRACKET', r'\]') # Delimiters self.lexer.add('COLON', r'\:') self.lexer.add('SEMI', r'\;') self.lexer.add('LEFT_ARROW', r'<=') self.lexer.add('EQUAL', r'=') self.lexer.add('COMMA', r',') # Operators self.lexer.add('ADD', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MOD', r'mod') self.lexer.add('NOT_EQ', r'\!\=') # Number self.lexer.add('NUMBER', r'\d+') self.lexer.add('ID', r'[\w|_]+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def addTokens(self): self.lexer.add('NUMBER', r'\d+') self.lexer.add('PLUS', r'\+') self.lexer.add('MINUS', r'-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'/') self.lexer.add('BIGGER', r'\>') self.lexer.add('SMALLER', r'\<') # self.lexer.add('BIGGEREQ', r'\>=') # self.lexer.add('SMALLEREQ', r'\<=') self.lexer.add('EQUAL', r'==') self.lexer.add('ASSINGMENT', r'=') self.lexer.add('DIFF', r'\!=') self.lexer.add('COMMA', r',') self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('OPEN_BRACKET', r'\{') self.lexer.add('CLOSE_BRACKET', r'\}') self.lexer.add('PRINT', r'imprime') self.lexer.add('IF', r'se(?!\w)') self.lexer.add('ELSE', r'senao(?!\w)') self.lexer.add('WHILE', r'enquanto') self.lexer.add('FUNC', r'func(?!\w)') self.lexer.add('AND', r'e') self.lexer.add('OR', r'ou') self.lexer.add('NOT', r'inv') self.lexer.add('NEWLINE', r'[\r\n]+') # Identifiers comes last, so it does not match other tokens self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z0-9_]*') self.lexer.ignore('[ \t\r\f\v]+') # Ignores whitespace def createLexer(self): self.addTokens() return self.lexer.build()
class Lexer: def __init__(self): self.lg = LexerGenerator() def build(self): # --- Keywords --- # self.lg.add("LET", r"let") self.lg.add("FN", r"fn") self.lg.add("RET", r"return") self.lg.add("TO", r"to") # --- Reserved --- # self.lg.add("TRUE", r"true") self.lg.add("FALSE", r"false") # --- Modifiers --- # self.lg.add("PTR", r"ptr") self.lg.add("REF", r"ref") self.lg.add("DEREF", r"deref") self.lg.add("ADDR", r"addr") # --- Punctuations --- # self.lg.add("(", r"\(") self.lg.add(")", r"\)") self.lg.add("{", r"\{") self.lg.add("}", r"\}") self.lg.add("=", r"\=") self.lg.add(";", r"\;") self.lg.add(",", r"\,") # --- Base Tokens --- # self.lg.add("FLOAT", r"[-]?\d+[.]\d+") self.lg.add("NUMBER", r"[-]?\d+") self.lg.add("IDENTIFIER", r"[_\w]+[_\w0-9]*") self.lg.ignore(r"\s+") return self.lg.build()
class RegoLexer: def __init__(self): self.lexer = LexerGenerator() def create_tokens(self): self.lexer.add('PRINT', r'OUTPUT') self.lexer.add('NUMBER', '-?\d+') self.lexer.add('STRING', '(".+")|(\'.+\')|(\'\')|("")') self.lexer.add('NEWLINE', '\n') self.lexer.add('NEWTAB', '\t') # operations self.lexer.add('ADD', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'DIV') self.lexer.add('MOD', r'MOD') self.lexer.add('POW', r'POW') self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # conditionals self.lexer.add('IF', r'IF') self.lexer.add('THEN', r'THEN') self.lexer.add('ELSE', r'ELSE') self.lexer.add('END_IF', r'ENDIF') # comparatives self.lexer.add("GTE", r"(>=)") self.lexer.add("LTE", r"(<=)") self.lexer.add("EQ", r"(=)") self.lexer.add("LT", r"(<)") self.lexer.add("GT", r"(>)") self.lexer.ignore('[ \r\f\v]+') def get_lexer(self): self.create_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('NUMBER', r'\d+') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') # Signs self.lexer.add('OPEN_PARENS', r'\(') self.lexer.add('CLOSE_PARENS', r'\)') self.lexer.add('SEMI_COLON', r'\;') # Ignore spaces self.lexer.ignore(r'\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
def test_simple(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") l = lg.build() stream = l.lex("2+3") t = stream.next() assert t.name == "NUMBER" assert t.value == "2" t = stream.next() assert t.name == "PLUS" assert t.value == "+" t = stream.next() assert t.name == "NUMBER" assert t.value == "3" assert t.source_pos.idx == 2 t = stream.next() assert t is None
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('NUMBER', r'\d+') # Operators self.lexer.add('PLUS', r'\+') self.lexer.add('MINUS', r'-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'/') # Comp self.lexer.add('BIGGER', r'\>') self.lexer.add('SMALLER', r'\<') self.lexer.add('EQUAL', r'\=') self.lexer.add('DIFF', r'\!=') self.lexer.add('OPEN_PARENS', r'\(') self.lexer.add('CLOSE_PARENS', r'\)') self.lexer.add('OPEN_BRACKETS', r'\{') self.lexer.add('CLOSE_BRACKETS', r'\}') self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('QUOTE', r'\"') # Vars self.lexer.add('ATTRIBUTION', r':=') self.lexer.add('VAR', r'var') # Else self.lexer.add('ELSE', r'SENAO') self.lexer.add('ELSE', r'senao') # If self.lexer.add('IF', r'SE') self.lexer.add('IF', r'se') # Print self.lexer.add('PRINT', r'PRINT') self.lexer.add('PRINT', r'print') # Identifier self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9]*') self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Syntax: def __init__(self) -> None: self.lg = LexerGenerator() def Build(self): self.lg.add(";", ";") self.lg.add(".", "\.") self.lg.add(",", ",") self.lg.add("(", "\(") self.lg.add(")", "\)") self.lg.add("{", "\{") self.lg.add("}", "\}") self.lg.add("[", "\[") self.lg.add("]", "\]") self.lg.add("=", "\=") self.lg.add("->", "\-\>") self.lg.add("*", "\*") self.lg.add("STRING", '["]([^"\\\n]|\\.|\\\n)*["]') self.lg.add("&", "\&") self.lg.add("*", "\*") self.lg.add("@", "\@") self.lg.add("NUMBER", "[-]*[0-9]+") self.lg.add("STRUCT", "struct ") self.lg.add("FN", "fn ") self.lg.add("RETURN", "return ") self.lg.add("TO", "to ") self.lg.add("IDENTIFIER", "[_\w][_\w0-9]*") self.lg.ignore("\s+") return self.lg.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): ''' Definitions for all possible tokens ''' # Print statement self.lexer.add('PRINT', r'print') # Left Parenthesis self.lexer.add('OPEN_PAREN', r'\(') # Close Parenthesis self.lexer.add('CLOSE_PAREN', r'\)') # Semicolon self.lexer.add('SEMI_COLON', r'\;') # Binary Operators self.lexer.add('SUM', r'\+') # Addition self.lexer.add('SUB', r'\-') # Subtraction # TODO: # Unary Operators # TODO: # Conditional Statements # TODO: # Loop Statements # Number self.lexer.add('NUMBER', r'\d+') # Ignore whitespace self.lexer.ignore('\s+') def create(self): self._add_tokens() return self.lexer.build()
def test_ignore(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") l = lg.build() stream = l.lex("2 + 3") t = stream.next() assert t.name == "NUMBER" assert t.value == "2" t = stream.next() assert t.name == "PLUS" assert t.value == "+" t = stream.next() assert t.name == "NUMBER" assert t.value == "3" assert t.source_pos.idx == 4 with raises(StopIteration): stream.next()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('PRINT', r'print') self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # semicolon self.lexer.add('SEMI_COLON', r'\;') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('NUMBER', r'\d+') self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('IMPRIMA', r'mostra_ai') #modificado heuhue self.lexer.add('ABRE_PAR', r'\(') self.lexer.add('FECHA_PAR', r'\)') self.lexer.add('PONTO_VIRGULA', r'\;') self.lexer.add('SOMA', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('NUM', r'\d+') self.lexer.add('MULT', r'\*') #adicionado self.lexer.add('DIV', r'\/') #adicionado self.lexer.add('POT',r'\^') #adicionado self.lexer.add('REST',r'\%') #adicionado self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Print self.lexer.add('PRINT', r'print') # Parenthesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') # Number self.lexer.add('NUMBER', r'\d+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('PRINT', r'print') self.lexer.add('MAIN', r'main') self.lexer.add('NUMBER', r'\d+(\.\d+)?') self.lexer.add('STRING', '(""".*?""")|(".*?")|(\'.*?\')') self.lexer.add('IF', r'if(?!\w)') self.lexer.add('ELSE', r'else(?!\w)') self.lexer.add('WHILE', r'while(?!\w)') self.lexer.add('LET', r'let(?!\w)') self.lexer.add('IDENTIFIER', r"[a-zA-Z_][a-zA-Z0-9_]*") self.lexer.add('==', r'==') self.lexer.add('!=', r'!=') self.lexer.add('>=', r'>=') self.lexer.add('<=', r'<=') self.lexer.add('>', r'>') self.lexer.add('<', r'<') self.lexer.add('=', r'=') self.lexer.add('{', r'\{') self.lexer.add('}', r'\}') self.lexer.add('|', r'\|') self.lexer.add('SEMICOLON', r';') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'/') self.lexer.add('(', r'\(') self.lexer.add(')', r'\)') self.lexer.add('NEWLINE', r'\n') # ignore whitespace self.lexer.ignore('[ \t\r\f\v]+') # self.lexer.ignore(r'[^\S\r\n]') def get_lexer(self): self._add_tokens() return self.lexer.build()
def build_lexer(): lg = LexerGenerator() commands = sorted( itertools.chain(common_commands, lmao_commands, rofl_commands)) for command in reversed(commands): lg.add(command, command) lg.add('NEWLINE', r'\n') lg.add('SCALAR_VAR', r's\d+') lg.add('ARRAY_VAR', r'a\d+') lg.add('REGISTER', r'reg[A-H]') lg.add('LABEL', r'[a-zA-Z_][a-zA-Z_0-9]*') lg.add('NUM_LITERAL', r'-?((\d+)(\.\d+)?)|(\.\d+)') lg.add('CHAR_LITERAL', r"'([^\\']|\\n|\\t|\\'|\\\\)'") lg.add('COLON', r':') lg.ignore(r'[ \t]') lg.ignore(r'\#.*') lg.add('ERROR', r'.') return lg.build()
class Lexer: def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('PRINT', r'print') self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') self.lexer.add('MOD', r'\%') self.lexer.add('NUMBER', r'\d+') self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def addTokens(self): self.lexer.add('NUMBER', r'^((-?)(\d+)((\.\d+)?))') self.lexer.add('EQUAL', r'^(==)') self.lexer.add('LESSEQUAL', r'^(<=)') self.lexer.add('GREATEREQUAL', r'^(>=)') self.lexer.add('DIFFERENT', r'^(!=)') self.lexer.add('OPR_LOG', 'not') self.lexer.add('OPR_LOG', 'or') self.lexer.add('OPR_LOG', 'and') self.lexer.add('IF', 'if') self.lexer.add('ELSE', 'else') self.lexer.add('WHILE', 'while') self.lexer.add('LEIA', 'leia') self.lexer.add('ESCREVA', 'escreva') self.lexer.add('ID', r'^([a-zA-Z]([a-zA-Z]|\d)*)') self.lexer.add('GRE', '>') self.lexer.add('LES', '<') self.lexer.add('ATRIB', '=') self.lexer.add('COM', ',') self.lexer.add('SEM', ';') self.lexer.add('ADD', r'^(\+)') self.lexer.add('SUB', r'^(\-)') self.lexer.add('MUL', r'^(\*)') self.lexer.add('POW', r'(\^)') self.lexer.add('DIV', '/') self.lexer.add('MOD', '%') self.lexer.add('LPA', r'^(\()') self.lexer.add('RPA', r'^(\))') self.lexer.add('DOT', '.') self.lexer.ignore('\s+') def getLexer(self): self.addTokens() return self.lexer.build()
class Lexer(object): '''Defines a lexer for the PseudoExe language.''' def __init__(self): '''Inits the lexer.''' self.lexer = LexerGenerator() def _add_tokens(self): '''add new tokens to the lexer.''' for t in TOKENS: self.lexer.add(*t) # Ignore spaces self.lexer.ignore('\s+') self.lexer.ignore("\n+") def get_lexer(self): '''build and return the lexer.''' self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def addTokens(self): self.lexer.add('NUMBER', r'\d+') self.lexer.add('PLUS', r'plus') self.lexer.add('MINUS', r'minus') self.lexer.add('MUL', r'times') self.lexer.add('DIV', r'divide') self.lexer.add('GREATER', r'greater') self.lexer.add('LESS', r'less') self.lexer.add('EQUAL', r'==') self.lexer.add('ASSINGMENT', r'=') self.lexer.add('COMMA', r',') self.lexer.add('OPEN_PAREN', r'\[') self.lexer.add('CLOSE_PAREN', r'\]') self.lexer.add('OPEN_BRACKET', r'\/') self.lexer.add('CLOSE_BRACKET', r'\\') self.lexer.add('PRINT', r'print(?!\w)') self.lexer.add('IF', r'if(?!\w)') self.lexer.add('ELSE', r'else(?!\w)') self.lexer.add('WHILE', r'while(?!\w)') self.lexer.add('FUNC', r'def(?!\w)') self.lexer.add('AND', r'and(?!\w)') self.lexer.add('OR', r'or(?!\w)') self.lexer.add('NOT', r'not(?!\w)') self.lexer.add('NEWLINE', r'[\r\n]+') self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z0-9_]*') self.lexer.ignore('[ \t\r\f\v]+') def createLexer(self): self.addTokens() return self.lexer.build()
class Lexer(object): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): self.lexer.add('PRINT', r'print') self.lexer.add('INPUT', r'input') self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('SEMICOLON', r';') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('NUMBER', r'\d+') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'/') self.lexer.add('NOT', r'!') self.lexer.add('COMPLEMENT', r'~') self.lexer.add('PRIMITIVE_DATA_TYPE', r'int') self.lexer.add('OPEN_CURLY', r'{') self.lexer.add('CLOSE_CURLY', r'}') self.lexer.add('RETURN', r'return') self.lexer.add('IF', r'if') self.lexer.add('ELSE', r'else') self.lexer.add('EQUALS', r'==') self.lexer.add('NOT_EQUALS', r'!=') self.lexer.add('FOR', r'for') self.lexer.add('LESS_EQ', r'<=') self.lexer.add('GREATER_EQ', r'>=') self.lexer.add('LESS', r'<') self.lexer.add('GREATER', r'>') self.lexer.add('EQUAL_SIGN', r':=') self.lexer.add('COMMA', r',') self.lexer.add('IDENTIFIER', r'[a-zA-Z]\w*') self.lexer.ignore(r'\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # print self.lexer.add('PRINT', r'print') # parentheses self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') # semicolon self.lexer.add('SEMI_COLON', r'\;') # addition and subtraction operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') # number self.lexer.add('NUMBER', r'\d+') # ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): #Print self.lexer.add('PRINT', r'mostrarenpantalla') #Parentesis self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') #Semi Colon self.lexer.add('SEMI_COLON', r'\;') #Operators self.lexer.add('SUM', r'suma') self.lexer.add('SUB', r'resta') #Number self.lexer.add('NUMBER', r'\d+') # Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # dynamic tokens for item in get_id_tokens(): self.lexer.add(item[0], item[1]) # braces self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') self.lexer.add('OPEN_CURLY', r'{') self.lexer.add('CLOSE_CURLY', r'}') # Semi Colon self.lexer.add('SEMI_COLON', r'\;') # Comma self.lexer.add('COMMA', r',') # Operators self.lexer.add('MUL', r'\*') self.lexer.add('EQU', r'=') # Number self.lexer.add('NUMBER', r'\d+') # String self.lexer.add('STRING', r""" \"([^\\\"]|\\.)*\" """) # Identifier self.lexer.add('IDENTIFIER', r"[a-zA-Z_][a-zA-Z0-9_]") # Ignore spaces self.lexer.ignore(r'\s+') def get_rules(self): return [rule.name for rule in self.lexer.rules] def get_lexer(self): self._add_tokens() return self.lexer.build()
def test_states(self): lg = LexerGenerator(initial_state="scalar") lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") lg.ignore(r"\s+") lg.add("OPEN_BRACKET", r"\[", to_state="vector") lg.add("PLUS", r"\+", state="vector") lg.add("NUMBER", r"\d+", state="vector") lg.add("NEW_LINE", r"\n+", state="vector") lg.add("CLOSE_BRACKET", r"\]", state="vector", to_state="scalar") lg.ignore(r" +", state="vector") l = lg.build() stream = l.lex("2 + [ 3 + 4 \n\n 5 + 6 ] + 7") tokens = [ ("NUMBER", "2", "scalar"), ("PLUS", "+", "scalar"), ("OPEN_BRACKET", "[", "scalar"), ("NUMBER", "3", "vector"), ("PLUS", "+", "vector"), ("NUMBER", "4", "vector"), ("NEW_LINE", "\n\n", "vector"), ("NUMBER", "5", "vector"), ("PLUS", "+", "vector"), ("NUMBER", "6", "vector"), ("CLOSE_BRACKET", "]", "vector"), ("PLUS", "+", "scalar"), ("NUMBER", "7", "scalar"), ] for compare_token, token in zip(tokens, stream): name, value, state = compare_token assert token.name == name assert token.value == value assert token.state == state
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): #Print self.lexer.add('OUT', r'out') #If self.lexer.add('IF', r'if') #Parentheses self.lexer.add('OPEN_PAREN', r'\(') self.lexer.add('CLOSE_PAREN', r'\)') #Braces self.lexer.add('OPEN_BRACE', r'\{') self.lexer.add('CLOSE_BRACE', r'\}') #Semicolon self.lexer.add('SEMI_COLON', r'\;') #Operators self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MULTIPLY', r'\*') #Number self.lexer.add('NUMBER', r'\d+') #Ignore spaces self.lexer.ignore('\s+') def get_lexer(self): self._add_tokens() return self.lexer.build()
class Lexer: def __init__(self, code): self.code = code self.lg = LexerGenerator() self.lg.ignore(r'\s+') self.lg.add('COMMENT', r';') self.lg.add('STRING', r'".*"') self.lg.add('STRING', r'\'.*\'') self.lg.add('IF', r'if') self.lg.add('ELSE', r'else') self.lg.add('LPAREN', r'\(') self.lg.add('RPAREN', r'\)') self.lg.add('LBRACE', r'\{') self.lg.add('RBRACE', r'\}') self.lg.add('IS_EQUAL_TO', r'==') self.lg.add('EQUAL', r'=') self.lg.add('GREATER_EQUAL', r'>=') self.lg.add('LESSER_EQUAL', r'<=') self.lg.add('LESSER', r'<') self.lg.add('GREATER', r'>') self.lg.add('PLUS', r'-') self.lg.add('MINUS', r'\+') self.lg.add('COMMA', r',') self.lg.add('NUMBER', r'\d+') self.lg.add('PRINT', r'print') self.lg.add('NAME', r'[a-zA-Z_][a-zA-Z0-9_]*') self.lexer = self.lg.build() def lex(self): tokens = [] stream = self.lexer.lex(self.code) while True: try: tokens += [stream.next()] except StopIteration: break return tokens
def test_basic_lexer(self): lg = LexerGenerator() lg.add("NUMBER", r"\d+") lg.add("PLUS", r"\+") l = lg.build() def f(n): tokens = l.lex("%d+%d+%d" % (n, n, n)) i = 0 s = 0 while i < 5: t = tokens.next() if i % 2 == 0: if t.name != "NUMBER": return -1 s += int(t.value) else: if t.name != "PLUS": return -2 if t.value != "+": return -3 i += 1 ended = False try: tokens.next() except StopIteration: ended = True if not ended: return -4 return s assert self.run(f, [14]) == 42
def generateLexer(): afiLex = LexerGenerator() afiLex.ignore(r'\s+') afiLex.add("LBRACKET", r'\[') afiLex.add("RBRACKET", r'\]') afiLex.add("LPARENS", r'\(') afiLex.add("RPARENS", r'\)') afiLex.add("LBRACE", r'\{') afiLex.add("RBRACE", r'\}') afiLex.add("QUOTE", r'\"') afiLex.add("IF", r"if") afiLex.add("ELSE", r"else") afiLex.add("ELIF", r"elif") afiLex.add("WHILE", r"while") afiLex.add("NUMBER", r'\d+') afiLex.add("WORD", r"\w+[^\^;]") afiLex.add("EQUAL", r'\=') afiLex.add("ADD", r'\+') afiLex.add("SUB", r'\-') afiLex.add("MULT", r'\*') afiLex.add("DIV", r'\/') afiLex.add("POW", r'\^') afiLex.add("SEMICOLON", r'\;') return afiLex.build()
from rply import LexerGenerator lg = LexerGenerator() lg.add("LPAREN", r"\(") lg.add("RPAREN", r"\)") # lg.add('LBRACKET', r'\[') # lg.add('RBRACKET', r'\]') lg.add("IDENTIFIER", r"[^()\[\]{}\s#]+") lg.ignore(r"#.*(?=\r|\n|$)") lg.ignore(r"\s+") lexer = lg.build()
@pg.production("hashtags : hashtags HASHTAG") @pg.production("hashtags : HASHTAG") def hashtags(p): if len(p) == 1: return p else: return list(flattened(p)) @pg.production("topics : FOR hashtags") @pg.production("topics : ") def topics(p): if len(p) == 0: return { u'topics': [] } else: topics = [tok.value.strip('#') for tok in p[1]] return { u'topics': topics } @pg.error def error_handler(token): pos = token.getsourcepos() if pos: offset = "offset {}".format(pos.idx) else: offset = u"end of stream" raise ValueError("Ran into a {0} where it wasn't expected at {1}".format(token.gettokentype(), offset)) TweetLexer = lex.build() TweetParser = pg.build()
"-D" return -int_from_digit(p[1]) @PG.production("charge : + DIGIT") def charge_production_plus_many(p): "+D" return +int_from_digit(p[1]) ##### ATOM CLASS ##### # class ::= ':' NUMBER # class :: int @PG.production("class : colon NUMBER") def class_production(p): "return :: int." return p[1] @PG.error def error_handler(token, expected=None): "Handle parser errors." if DEBUG: raise ValueError(("Ran into a %s (%s) where it wasn't expected."+\ "At %s. Instead expected: %s.") % (repr(token.name), \ repr(token.value), dictof(token.source_pos), repr(expected))) else: print "Warning: parser error" LEXER = LG.build() PARSER = PG.build()
def create_generator(rules): lg = LexerGenerator() for rule in rules: lg.add(rule[1], rule[0]) lexer = lg.build() return lexer
## generator will build lexer in runtime lexer_generator = LexerGenerator() ## drop comments lexer_generator.ignore(r'[\\\#].*|\(.*\)') ## drop spaces lexer_generator.ignore(r'\s+') ## number parsing lexer_generator.add('HEX', '0x[0-9A-Fa-f]+') lexer_generator.add('BIN', '0b[01]+') lexer_generator.add('NUMBER', r'[\+\-]?[0-9]+(\.[0-9]*)?') ## FORTH word names lexer_generator.add('WORD', r'[A-Za-z0-9_]+') ## build resulting lexer lexer = lexer_generator.build() ## parse next token from ## @param[in] source stream def WORD(source): try: token = source.next() D.append(token) return token except StopIteration: return None ## REPL loop def INTERPRET(SRC=''):
class Lexer(): def __init__(self): self.lexer = LexerGenerator() def _add_tokens(self): # Utilidades self.lexer.add('DISP', r'disp') self.lexer.add('GET', r'get') self.lexer.add('TO-STRING', r'stringify') self.lexer.add('TO-FLOAT', r'floatify') self.lexer.add('TO-INT', r'intfy') self.lexer.add('TO-BOOL', r'boolfy') # Almacenamiento de variables self.lexer.add('SET_INT', r'integer') self.lexer.add('SET_CHAR', r'char') self.lexer.add('SET_FLOAT', r'float') self.lexer.add('SET_ARRAY', r'array') self.lexer.add('SET_STRING', r'string') self.lexer.add('SET_BOOL', r'bool') self.lexer.add('SET_ARRAY', r'array') # Funciones definidas self.lexer.add('IF', r'if') self.lexer.add('DOWHILE', r'do\-while') self.lexer.add('WHILE', r'while') self.lexer.add('FOR', r'for') self.lexer.add('IN', r'\#in') self.lexer.add('SET', r'set') # Operadores self.lexer.add('EQ', r'==') self.lexer.add('GE', r'>=') self.lexer.add('LE', r'<=') self.lexer.add('NE', r'!=') self.lexer.add('SUM', r'\+') self.lexer.add('SUB', r'\-') self.lexer.add('MUL', r'\*') self.lexer.add('DIV', r'\/') self.lexer.add('POW', r'\^') self.lexer.add('EQL', r'\=') self.lexer.add('MOD', r'\%') self.lexer.add('GT', r'>') self.lexer.add('LT', r'<') self.lexer.add('AND', r'&&') self.lexer.add('OR', r'\|\|') self.lexer.add('NOT', r'!') # Parentesis y corchetes self.lexer.add('OPEN_BRACKET', r'\(') self.lexer.add('CLOSED_BRACKET', r'\)') # Tipo de datos self.lexer.add('FLOAT', r'-?\d+\.\d+') self.lexer.add('INT', r'-?\d+') self.lexer.add('STRING', r'\"[^"]*\"') self.lexer.add('TRUE', r'#t') self.lexer.add('FALSE', r'#f') self.lexer.add('VAR_NAME', r'[a-zA-Z0-9]+') self.lexer.add('CHAR', r'\'[^ ]{1}\'') self.lexer.add( 'VALUES_STRING', r'\{((-?\d+\.\d+|-?\d+|\"[^",]*\")[\ ]*,[\ ]*)*[\ ]*(-?\d+\.\d+|-?\d+|\"[^",]*\")\}' ) # Ignorar espacios self.lexer.ignore('\s+') self.lexer.ignore('\[[^\[\]]*\]') def get_lexer(self): self._add_tokens() return self.lexer.build()
("GT", r">"), # Punctuation ("LPAREN", r"\("), ("RPAREN", r"\)"), ("LBRACE", r"{"), ("RBRACE", r"}"), ("COMMA", r","), ("LBRACK", r"\["), ("RBRACK", r"\]"), # Literals ("TRUE", r"true\b"), ("FALSE", r"false\b"), ("FLOAT", r"(((0|[1-9][0-9]*)(\.[0-9]*)+)|(\.[0-9]+))([eE][\+\-]?[0-9]*)?"), ("INTEGER", r"-?(0|[1-9][0-9]*)"), ("STRING", r"\"([^\"\\]|\\.)*\""), ("IDENTIFIER", r"[a-zA-Z_$][a-zA-Z_0-9]*"), # Others ("EQUAL", r"="), ] tokens = get_tokens() for token in tokens: lexer_gen.add(token[0], token[1]) LEXER = lexer_gen.build() def get_lexer(): return LEXER
lg.add("colon", r":") lg.add("char_nl", r"\\newline") lg.add("char_tab", r"\\tab") lg.add("char_return", r"\\return") lg.add("char_space", r"\\space") lg.add("char", r"\\.") lg.add("ns_symbol", NS_SYMBOL) lg.add("symbol", SYMBOL_RE) lg.add("string", r'"(\\\^.|\\.|[^\"])*"') lg.add("ns_tag", "#" + NS_SYMBOL) lg.add("tag", "#" + SYMBOL_RE) lg.ignore(r"[\s,\n]+") lg.ignore(r";.*\n") lexer = lg.build() pg = ParserGenerator([ "boolean", "nil", "float", "number", "olist", "clist", "omap", "cmap", "ovec", "cvec", "oset", "colon", "char_nl", "char_tab", "char_return", "char_space", "char", "symbol", "ns_symbol", "string", "tag", "ns_tag" ]) class Char(TaggedValue): def __init__(self, rep): TaggedValue.__init__(self, 'char', rep) NL = Char('\n') TAB = Char('\t')
def __init__(self): _lg = LexerGenerator() for r in grammar: _lg.add(r[0], r[1]) _lg.ignore(r'\s+') self._scanner = _lg.build()