示例#1
0
def lexer_from_mapping(mapping):
    lg = LexerGenerator()

    # Escape data with forward slashes
    lg.add("DATA", r'/.+?/')

    # Add the special characters
    for char in mapping.keys():
        lg.add(char, r"\\" + char)

    # Normal tokens
    lg.add("TYPE", r':')
    lg.add("AND", r'\&')
    lg.add("OR", r'\|')
    lg.add("L_PAREN", r'\(')
    lg.add("R_PAREN", r'\)')
    lg.add("EQUAL", r'=')
    lg.add("CHILD", r'>')
    lg.add("PARENT", r'<')
    lg.add("NOT", r'!')

    # Everything else is data
    excluded_chars = r'^<>=&|():!'
    for char in mapping.keys():
        excluded_chars += r"\\" + char
        lg.add("DATA", "[{excluded}]+".format(excluded=excluded_chars))

    lg.ignore(r'\s+')
    lexer = lg.build()
    return lexer
示例#2
0
文件: test_lexer.py 项目: alex/rply
    def test_ignore_recursion(self):
        lg = LexerGenerator()
        lg.ignore(r"\s")

        l = lg.build()

        assert list(l.lex(" " * 2000)) == []
    def test_basic_lexer(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        def f(n):
            tokens = l.lex("%d+%d+%d" % (n, n, n))
            i = 0
            s = 0
            while i < 5:
                t = tokens.next()
                if i % 2 == 0:
                    if t.name != "NUMBER":
                        return -1
                    s += int(t.value)
                else:
                    if t.name != "PLUS":
                        return -2
                    if t.value != "+":
                        return -3
                i += 1
            if tokens.next() is not None:
                return -4
            return s

        assert self.run(f, [14]) == 42
示例#4
0
文件: test_both.py 项目: DasIch/rply
    def test_arithmetic(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.add("TIMES", r"\*")

        pg = ParserGenerator(["NUMBER", "PLUS", "TIMES"], precedence=[
            ("left", ["PLUS"]),
            ("left", ["TIMES"]),
        ])

        @pg.production("main : expr")
        def main(p):
            return p[0]

        @pg.production("expr : expr PLUS expr")
        @pg.production("expr : expr TIMES expr")
        def expr_binop(p):
            return BoxInt({
                "+": operator.add,
                "*": operator.mul
            }[p[1].getstr()](p[0].getint(), p[2].getint()))

        @pg.production("expr : NUMBER")
        def expr_num(p):
            return BoxInt(int(p[0].getstr()))

        lexer = lg.build()
        parser = pg.build()

        assert parser.parse(lexer.lex("3*4+5"))
示例#5
0
文件: test_lexer.py 项目: alex/rply
    def test_position(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()

        stream = l.lex("2 +\n    37")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 2
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()
示例#6
0
文件: test_lexer.py 项目: alex/rply
    def test_regex_flags_ignore(self):
        lg = LexerGenerator()
        lg.add("ALL", r".*", re.DOTALL)
        lg.ignore(r".*", re.DOTALL)

        l = lg.build()

        stream = l.lex("test\ndotall")

        with raises(StopIteration):
            stream.next()
示例#7
0
文件: test_lexer.py 项目: alex/rply
    def test_error(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        stream = l.lex('fail')
        with raises(LexingError) as excinfo:
            stream.next()

        assert 'SourcePosition(' in repr(excinfo.value)
示例#8
0
def construct_lexer():
	lg = LexerGenerator()

	#Literals
	lg.add('NUMBER',r'\d+(\.\d+)?')
	lg.add('STRING',r'\".*?\"')

	#Tokens
	lg.add('OPEN_PAREN',r'\(')
	lg.add('CLOSE_PAREN',r'\)')
	lg.add('INDEX_OPEN',r'\[')
	lg.add('INDEX_CLOSE',r'\]')
	lg.add('NAME',r'[a-zA-Z0-9_]*')
	lg.add('RANGE',r'\.\.\.')
	lg.add('COMMA',',')

	#Operators
	lg.add('ADD',r'\+')
	lg.add('SUBTRACT',r'-')
	lg.add('MULTIPLY',r'\*')
	lg.add('DIVIDE','/')
	lg.add('EXPONENTIATION',r'\*\*')
	lg.add('AND','and')
	lg.add('OR','or')
	lg.add('NOT','not')
	lg.add('XOR','xor')
	lg.add('SELF_APPLY','!')
	lg.add('SINGLE_ARROW','->')
	lg.add('DOUBLE_ARROW','=>')
	lg.add('DOT',r'\.')
	lg.add('IN','in')
	lg.add('GT','>')
	lg.add('LT','<')
	lg.add('LE','<=')
	lg.add('GE','>=')
	lg.add('EQ','==')
	lg.add('NE','!=')

	#Keywords
	lg.add('IF','if')
	lg.add('ELSE','else')
	lg.add('DO','do')
	lg.add('END','end')
	lg.add('DEF','def')
	lg.add('LET','let')
	lg.add('WHILE','while')
	lg.add('FOR','for')

	#Whitespace
	lg.ignore(r"\s+")

	return lg.build()
示例#9
0
文件: test_lexer.py 项目: alex/rply
    def test_regex_flags(self):
        lg = LexerGenerator()
        lg.add("ALL", r".*", re.DOTALL)

        l = lg.build()

        stream = l.lex("test\ndotall")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        assert t.getstr() == "test\ndotall"

        with raises(StopIteration):
            stream.next()
示例#10
0
    def test_repr(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")
 
        l = lg.build()
 
        stream = l.lex("2 + 3")
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "PLUS"
示例#11
0
文件: test_lexer.py 项目: alex/rply
    def test_newline_position(self):
        lg = LexerGenerator()
        lg.add("NEWLINE", r"\n")
        lg.add("SPACE", r" ")

        l = lg.build()

        stream = l.lex(" \n ")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 2
        t = stream.next()
        assert t.source_pos.lineno == 2
        assert t.source_pos.colno == 1
示例#12
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Keywords
        self.lexer.add('START', r'start')
        self.lexer.add('DONE', r'done')
        self.lexer.add('NEXT', r'next')

        # Parenthesis, assorted
        self.lexer.add('LPAREN', r'\(')
        self.lexer.add('RPAREN', r'\)')

        self.lexer.add('LBRACE', r'\{')
        self.lexer.add('RBRACE', r'\}')

        self.lexer.add('LBRACKET', r'\[')
        self.lexer.add('RBRACKET', r'\]')

        # Delimiters
        self.lexer.add('COLON', r'\:')
        self.lexer.add('SEMI', r'\;')
        self.lexer.add('LEFT_ARROW', r'<=')
        self.lexer.add('EQUAL', r'=')
        self.lexer.add('COMMA', r',')

        # Operators
        self.lexer.add('ADD', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MOD', r'mod')
        self.lexer.add('NOT_EQ', r'\!\=')

        # Number
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('ID', r'[\w|_]+')

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#13
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def addTokens(self):
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('PLUS', r'\+')
        self.lexer.add('MINUS', r'-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')

        self.lexer.add('BIGGER', r'\>')
        self.lexer.add('SMALLER', r'\<')
        # self.lexer.add('BIGGEREQ', r'\>=')
        # self.lexer.add('SMALLEREQ', r'\<=')
        self.lexer.add('EQUAL', r'==')
        self.lexer.add('ASSINGMENT', r'=')
        self.lexer.add('DIFF', r'\!=')
        self.lexer.add('COMMA', r',')

        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('OPEN_BRACKET', r'\{')
        self.lexer.add('CLOSE_BRACKET', r'\}')

        self.lexer.add('PRINT', r'imprime')
        self.lexer.add('IF', r'se(?!\w)')
        self.lexer.add('ELSE', r'senao(?!\w)')
        self.lexer.add('WHILE', r'enquanto')
        self.lexer.add('FUNC', r'func(?!\w)')
        self.lexer.add('AND', r'e')
        self.lexer.add('OR', r'ou')
        self.lexer.add('NOT', r'inv')
        self.lexer.add('NEWLINE', r'[\r\n]+')

        # Identifiers comes last, so it does not match other tokens
        self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z0-9_]*')
        self.lexer.ignore('[ \t\r\f\v]+')  # Ignores whitespace

    def createLexer(self):
        self.addTokens()
        return self.lexer.build()
示例#14
0
class Lexer:
    def __init__(self):
        self.lg = LexerGenerator()

    def build(self):
        # --- Keywords --- #
        self.lg.add("LET", r"let")
        self.lg.add("FN", r"fn")
        self.lg.add("RET", r"return")

        self.lg.add("TO", r"to")

        # --- Reserved --- #
        self.lg.add("TRUE", r"true")
        self.lg.add("FALSE", r"false")

        # --- Modifiers --- #

        self.lg.add("PTR", r"ptr")
        self.lg.add("REF", r"ref")
        self.lg.add("DEREF", r"deref")
        self.lg.add("ADDR", r"addr")

        # --- Punctuations --- #
        self.lg.add("(", r"\(")
        self.lg.add(")", r"\)")

        self.lg.add("{", r"\{")
        self.lg.add("}", r"\}")

        self.lg.add("=", r"\=")
        self.lg.add(";", r"\;")
        self.lg.add(",", r"\,")

        # --- Base Tokens --- #
        self.lg.add("FLOAT", r"[-]?\d+[.]\d+")
        self.lg.add("NUMBER", r"[-]?\d+")
        self.lg.add("IDENTIFIER", r"[_\w]+[_\w0-9]*")

        self.lg.ignore(r"\s+")

        return self.lg.build()
示例#15
0
class RegoLexer:
    def __init__(self):
        self.lexer = LexerGenerator()

    def create_tokens(self):
        self.lexer.add('PRINT', r'OUTPUT')
        self.lexer.add('NUMBER', '-?\d+')
        self.lexer.add('STRING', '(".+")|(\'.+\')|(\'\')|("")')
        self.lexer.add('NEWLINE', '\n')
        self.lexer.add('NEWTAB', '\t')

        # operations
        self.lexer.add('ADD', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'DIV')
        self.lexer.add('MOD', r'MOD')
        self.lexer.add('POW', r'POW')

        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        # conditionals
        self.lexer.add('IF', r'IF')
        self.lexer.add('THEN', r'THEN')
        self.lexer.add('ELSE', r'ELSE')
        self.lexer.add('END_IF', r'ENDIF')

        # comparatives
        self.lexer.add("GTE", r"(>=)")
        self.lexer.add("LTE", r"(<=)")
        self.lexer.add("EQ", r"(=)")
        self.lexer.add("LT", r"(<)")
        self.lexer.add("GT", r"(>)")

        self.lexer.ignore('[ \r\f\v]+')

    def get_lexer(self):
        self.create_tokens()

        return self.lexer.build()
示例#16
0
class Lexer():

    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('NUMBER', r'\d+')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        # Signs
        self.lexer.add('OPEN_PARENS', r'\(')
        self.lexer.add('CLOSE_PARENS', r'\)')
        self.lexer.add('SEMI_COLON', r'\;')
        # Ignore spaces
        self.lexer.ignore(r'\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#17
0
    def test_simple(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        stream = l.lex("2+3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 2
        t = stream.next()
        assert t is None
示例#18
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('NUMBER', r'\d+')
        # Operators
        self.lexer.add('PLUS', r'\+')
        self.lexer.add('MINUS', r'-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')
        # Comp
        self.lexer.add('BIGGER', r'\>')
        self.lexer.add('SMALLER', r'\<')
        self.lexer.add('EQUAL', r'\=')
        self.lexer.add('DIFF', r'\!=')
        self.lexer.add('OPEN_PARENS', r'\(')
        self.lexer.add('CLOSE_PARENS', r'\)')
        self.lexer.add('OPEN_BRACKETS', r'\{')
        self.lexer.add('CLOSE_BRACKETS', r'\}')
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('QUOTE', r'\"')
        # Vars
        self.lexer.add('ATTRIBUTION', r':=')
        self.lexer.add('VAR', r'var')
        # Else
        self.lexer.add('ELSE', r'SENAO')
        self.lexer.add('ELSE', r'senao')
        # If
        self.lexer.add('IF', r'SE')
        self.lexer.add('IF', r'se')
        # Print
        self.lexer.add('PRINT', r'PRINT')
        self.lexer.add('PRINT', r'print')
        # Identifier
        self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9]*')
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#19
0
class Syntax:
    def __init__(self) -> None:
        self.lg = LexerGenerator()

    def Build(self):

        self.lg.add(";", ";")
        self.lg.add(".", "\.")
        self.lg.add(",", ",")
        self.lg.add("(", "\(")
        self.lg.add(")", "\)")
        self.lg.add("{", "\{")
        self.lg.add("}", "\}")
        self.lg.add("[", "\[")
        self.lg.add("]", "\]")

        self.lg.add("=", "\=")

        self.lg.add("->", "\-\>")
        self.lg.add("*", "\*")

        self.lg.add("STRING", '["]([^"\\\n]|\\.|\\\n)*["]')

        self.lg.add("&", "\&")
        self.lg.add("*", "\*")
        self.lg.add("@", "\@")

        self.lg.add("NUMBER", "[-]*[0-9]+")

        self.lg.add("STRUCT", "struct ")

        self.lg.add("FN", "fn ")
        self.lg.add("RETURN", "return ")

        self.lg.add("TO", "to ")

        self.lg.add("IDENTIFIER", "[_\w][_\w0-9]*")

        self.lg.ignore("\s+")

        return self.lg.build()
示例#20
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        ''' Definitions for all possible tokens '''

        # Print statement
        self.lexer.add('PRINT', r'print')

        # Left Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')

        # Close Parenthesis
        self.lexer.add('CLOSE_PAREN', r'\)')

        # Semicolon
        self.lexer.add('SEMI_COLON', r'\;')

        # Binary Operators
        self.lexer.add('SUM', r'\+') # Addition
        self.lexer.add('SUB', r'\-') # Subtraction

        # TODO:
        # Unary Operators

        # TODO:
        # Conditional Statements

        # TODO:
        # Loop Statements

        # Number
        self.lexer.add('NUMBER', r'\d+')

        # Ignore whitespace
        self.lexer.ignore('\s+')

    def create(self):
        self._add_tokens()
        return self.lexer.build()
示例#21
0
文件: test_lexer.py 项目: olasd/rply
    def test_ignore(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 4
        with raises(StopIteration):
            stream.next()
示例#22
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # semicolon
        self.lexer.add('SEMI_COLON', r'\;')

        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')

        self.lexer.add('NUMBER', r'\d+')

        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#23
0
class Lexer():
	def __init__(self):
		self.lexer = LexerGenerator()
	
	def _add_tokens(self): 
		self.lexer.add('IMPRIMA', r'mostra_ai') #modificado heuhue
		self.lexer.add('ABRE_PAR', r'\(')
		self.lexer.add('FECHA_PAR', r'\)')
		self.lexer.add('PONTO_VIRGULA', r'\;')
		self.lexer.add('SOMA', r'\+')
		self.lexer.add('SUB', r'\-')
		self.lexer.add('NUM', r'\d+')
		self.lexer.add('MULT', r'\*')	#adicionado
		self.lexer.add('DIV', r'\/')	#adicionado
		self.lexer.add('POT',r'\^') 	#adicionado
		self.lexer.add('REST',r'\%') 	#adicionado
		self.lexer.ignore('\s+')
	
	def get_lexer(self):
		self._add_tokens()
		return self.lexer.build() 
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#25
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')
        self.lexer.add('MAIN', r'main')
        self.lexer.add('NUMBER', r'\d+(\.\d+)?')
        self.lexer.add('STRING', '(""".*?""")|(".*?")|(\'.*?\')')
        self.lexer.add('IF', r'if(?!\w)')
        self.lexer.add('ELSE', r'else(?!\w)')
        self.lexer.add('WHILE', r'while(?!\w)')
        self.lexer.add('LET', r'let(?!\w)')
        self.lexer.add('IDENTIFIER', r"[a-zA-Z_][a-zA-Z0-9_]*")
        self.lexer.add('==', r'==')
        self.lexer.add('!=', r'!=')
        self.lexer.add('>=', r'>=')
        self.lexer.add('<=', r'<=')
        self.lexer.add('>', r'>')
        self.lexer.add('<', r'<')
        self.lexer.add('=', r'=')
        self.lexer.add('{', r'\{')
        self.lexer.add('}', r'\}')
        self.lexer.add('|', r'\|')
        self.lexer.add('SEMICOLON', r';')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')
        self.lexer.add('(', r'\(')
        self.lexer.add(')', r'\)')
        self.lexer.add('NEWLINE', r'\n')

        # ignore whitespace
        self.lexer.ignore('[ \t\r\f\v]+')
        # self.lexer.ignore(r'[^\S\r\n]')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#26
0
def build_lexer():
    lg = LexerGenerator()
    commands = sorted(
        itertools.chain(common_commands, lmao_commands, rofl_commands))
    for command in reversed(commands):
        lg.add(command, command)
    lg.add('NEWLINE', r'\n')

    lg.add('SCALAR_VAR', r's\d+')
    lg.add('ARRAY_VAR', r'a\d+')

    lg.add('REGISTER', r'reg[A-H]')
    lg.add('LABEL', r'[a-zA-Z_][a-zA-Z_0-9]*')
    lg.add('NUM_LITERAL', r'-?((\d+)(\.\d+)?)|(\.\d+)')
    lg.add('CHAR_LITERAL', r"'([^\\']|\\n|\\t|\\'|\\\\)'")
    lg.add('COLON', r':')

    lg.ignore(r'[ \t]')
    lg.ignore(r'\#.*')
    lg.add('ERROR', r'.')

    return lg.build()
示例#27
0
class Lexer:
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')

        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('MOD', r'\%')

        self.lexer.add('NUMBER', r'\d+')
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#28
0
    def test_ignore(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 4

        with raises(StopIteration):
            stream.next()
示例#29
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def addTokens(self):
        self.lexer.add('NUMBER', r'^((-?)(\d+)((\.\d+)?))')
        self.lexer.add('EQUAL', r'^(==)')
        self.lexer.add('LESSEQUAL', r'^(<=)')
        self.lexer.add('GREATEREQUAL', r'^(>=)')
        self.lexer.add('DIFFERENT', r'^(!=)')
        self.lexer.add('OPR_LOG', 'not')
        self.lexer.add('OPR_LOG', 'or')
        self.lexer.add('OPR_LOG', 'and')
        self.lexer.add('IF', 'if')
        self.lexer.add('ELSE', 'else')
        self.lexer.add('WHILE', 'while')
        self.lexer.add('LEIA', 'leia')
        self.lexer.add('ESCREVA', 'escreva')
        self.lexer.add('ID', r'^([a-zA-Z]([a-zA-Z]|\d)*)')
        self.lexer.add('GRE', '>')
        self.lexer.add('LES', '<')
        self.lexer.add('ATRIB', '=')
        self.lexer.add('COM', ',')
        self.lexer.add('SEM', ';')
        self.lexer.add('ADD', r'^(\+)')
        self.lexer.add('SUB', r'^(\-)')
        self.lexer.add('MUL', r'^(\*)')
        self.lexer.add('POW', r'(\^)')
        self.lexer.add('DIV', '/')
        self.lexer.add('MOD', '%')
        self.lexer.add('LPA', r'^(\()')
        self.lexer.add('RPA', r'^(\))')
        self.lexer.add('DOT', '.')

        self.lexer.ignore('\s+')

    def getLexer(self):
        self.addTokens()
        return self.lexer.build()
示例#30
0
class Lexer(object):
    '''Defines a lexer for the PseudoExe language.'''

    def __init__(self):
        '''Inits the lexer.'''
        self.lexer = LexerGenerator()


    def _add_tokens(self):
        '''add new tokens to the lexer.'''

        for t in TOKENS:
            self.lexer.add(*t)

        # Ignore spaces
        self.lexer.ignore('\s+')
        self.lexer.ignore("\n+")

    def get_lexer(self):
        '''build and return the lexer.'''
        self._add_tokens()
        return self.lexer.build()
示例#31
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def addTokens(self):
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('PLUS', r'plus')
        self.lexer.add('MINUS', r'minus')
        self.lexer.add('MUL', r'times')
        self.lexer.add('DIV', r'divide')

        self.lexer.add('GREATER', r'greater')
        self.lexer.add('LESS', r'less')
        self.lexer.add('EQUAL', r'==')
        self.lexer.add('ASSINGMENT', r'=')
        self.lexer.add('COMMA', r',')

        self.lexer.add('OPEN_PAREN', r'\[')
        self.lexer.add('CLOSE_PAREN', r'\]')
        self.lexer.add('OPEN_BRACKET', r'\/')
        self.lexer.add('CLOSE_BRACKET', r'\\')

        self.lexer.add('PRINT', r'print(?!\w)')
        self.lexer.add('IF', r'if(?!\w)')
        self.lexer.add('ELSE', r'else(?!\w)')
        self.lexer.add('WHILE', r'while(?!\w)')
        self.lexer.add('FUNC', r'def(?!\w)')
        self.lexer.add('AND', r'and(?!\w)')
        self.lexer.add('OR', r'or(?!\w)')
        self.lexer.add('NOT', r'not(?!\w)')
        self.lexer.add('NEWLINE', r'[\r\n]+')

        self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z0-9_]*')
        self.lexer.ignore('[ \t\r\f\v]+')

    def createLexer(self):
        self.addTokens()
        return self.lexer.build()
示例#32
0
class Lexer(object):
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')
        self.lexer.add('INPUT', r'input')
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('SEMICOLON', r';')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')
        self.lexer.add('NOT', r'!')
        self.lexer.add('COMPLEMENT', r'~')
        self.lexer.add('PRIMITIVE_DATA_TYPE', r'int')
        self.lexer.add('OPEN_CURLY', r'{')
        self.lexer.add('CLOSE_CURLY', r'}')
        self.lexer.add('RETURN', r'return')
        self.lexer.add('IF', r'if')
        self.lexer.add('ELSE', r'else')
        self.lexer.add('EQUALS', r'==')
        self.lexer.add('NOT_EQUALS', r'!=')
        self.lexer.add('FOR', r'for')
        self.lexer.add('LESS_EQ', r'<=')
        self.lexer.add('GREATER_EQ', r'>=')
        self.lexer.add('LESS', r'<')
        self.lexer.add('GREATER', r'>')
        self.lexer.add('EQUAL_SIGN', r':=')
        self.lexer.add('COMMA', r',')
        self.lexer.add('IDENTIFIER', r'[a-zA-Z]\w*')
        self.lexer.ignore(r'\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#33
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # print
        self.lexer.add('PRINT', r'print')
        # parentheses
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # semicolon
        self.lexer.add('SEMI_COLON', r'\;')
        # addition and subtraction operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        # number
        self.lexer.add('NUMBER', r'\d+')
        # ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#34
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        #Print
        self.lexer.add('PRINT', r'mostrarenpantalla')
        #Parentesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        #Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        #Operators
        self.lexer.add('SUM', r'suma')
        self.lexer.add('SUB', r'resta')
        #Number
        self.lexer.add('NUMBER', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#35
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # dynamic tokens
        for item in get_id_tokens():
            self.lexer.add(item[0], item[1])
        # braces
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('OPEN_CURLY', r'{')
        self.lexer.add('CLOSE_CURLY', r'}')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Comma
        self.lexer.add('COMMA', r',')
        # Operators
        self.lexer.add('MUL', r'\*')
        self.lexer.add('EQU', r'=')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # String
        self.lexer.add('STRING', r"""
        \"([^\\\"]|\\.)*\"
        """)
        # Identifier
        self.lexer.add('IDENTIFIER', r"[a-zA-Z_][a-zA-Z0-9_]")
        # Ignore spaces
        self.lexer.ignore(r'\s+')

    def get_rules(self):
        return [rule.name for rule in self.lexer.rules]

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#36
0
    def test_states(self):
        lg = LexerGenerator(initial_state="scalar")
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        lg.add("OPEN_BRACKET", r"\[", to_state="vector")
        lg.add("PLUS", r"\+", state="vector")
        lg.add("NUMBER", r"\d+", state="vector")
        lg.add("NEW_LINE", r"\n+", state="vector")
        lg.add("CLOSE_BRACKET", r"\]", state="vector", to_state="scalar")
        lg.ignore(r" +", state="vector")

        l = lg.build()

        stream = l.lex("2 + [ 3 + 4 \n\n 5 + 6 ] + 7")
        tokens = [
            ("NUMBER", "2", "scalar"),
            ("PLUS", "+", "scalar"),
            ("OPEN_BRACKET", "[", "scalar"),
            ("NUMBER", "3", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "4", "vector"),
            ("NEW_LINE", "\n\n", "vector"),
            ("NUMBER", "5", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "6", "vector"),
            ("CLOSE_BRACKET", "]", "vector"),
            ("PLUS", "+", "scalar"),
            ("NUMBER", "7", "scalar"),
        ]

        for compare_token, token in zip(tokens, stream):
            name, value, state = compare_token
            assert token.name == name
            assert token.value == value
            assert token.state == state
示例#37
0
文件: mlexer.py 项目: Elisis/Mendl
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):

        #Print
        self.lexer.add('OUT', r'out')

        #If
        self.lexer.add('IF', r'if')

        #Parentheses
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        #Braces
        self.lexer.add('OPEN_BRACE', r'\{')
        self.lexer.add('CLOSE_BRACE', r'\}')

        #Semicolon
        self.lexer.add('SEMI_COLON', r'\;')

        #Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MULTIPLY', r'\*')

        #Number
        self.lexer.add('NUMBER', r'\d+')

        #Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#38
0
文件: lexer.py 项目: coolq1000/USPIL
class Lexer:
    def __init__(self, code):
        self.code = code
        self.lg = LexerGenerator()
        self.lg.ignore(r'\s+')
        self.lg.add('COMMENT', r';')
        self.lg.add('STRING', r'".*"')
        self.lg.add('STRING', r'\'.*\'')
        self.lg.add('IF', r'if')
        self.lg.add('ELSE', r'else')
        self.lg.add('LPAREN', r'\(')
        self.lg.add('RPAREN', r'\)')
        self.lg.add('LBRACE', r'\{')
        self.lg.add('RBRACE', r'\}')
        self.lg.add('IS_EQUAL_TO', r'==')
        self.lg.add('EQUAL', r'=')
        self.lg.add('GREATER_EQUAL', r'>=')
        self.lg.add('LESSER_EQUAL', r'<=')
        self.lg.add('LESSER', r'<')
        self.lg.add('GREATER', r'>')
        self.lg.add('PLUS', r'-')
        self.lg.add('MINUS', r'\+')
        self.lg.add('COMMA', r',')
        self.lg.add('NUMBER', r'\d+')
        self.lg.add('PRINT', r'print')
        self.lg.add('NAME', r'[a-zA-Z_][a-zA-Z0-9_]*')
        self.lexer = self.lg.build()

    def lex(self):
        tokens = []
        stream = self.lexer.lex(self.code)
        while True:
            try:
                tokens += [stream.next()]
            except StopIteration:
                break
        return tokens
示例#39
0
    def test_basic_lexer(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        def f(n):
            tokens = l.lex("%d+%d+%d" % (n, n, n))
            i = 0
            s = 0
            while i < 5:
                t = tokens.next()
                if i % 2 == 0:
                    if t.name != "NUMBER":
                        return -1
                    s += int(t.value)
                else:
                    if t.name != "PLUS":
                        return -2
                    if t.value != "+":
                        return -3
                i += 1

            ended = False
            try:
                tokens.next()
            except StopIteration:
                ended = True

            if not ended:
                return -4

            return s

        assert self.run(f, [14]) == 42
示例#40
0
def generateLexer():
    afiLex = LexerGenerator()
    afiLex.ignore(r'\s+')
    afiLex.add("LBRACKET", r'\[')
    afiLex.add("RBRACKET", r'\]')
    afiLex.add("LPARENS", r'\(')
    afiLex.add("RPARENS", r'\)')
    afiLex.add("LBRACE", r'\{')
    afiLex.add("RBRACE", r'\}')
    afiLex.add("QUOTE", r'\"')
    afiLex.add("IF", r"if")
    afiLex.add("ELSE", r"else")
    afiLex.add("ELIF", r"elif")
    afiLex.add("WHILE", r"while")
    afiLex.add("NUMBER", r'\d+')
    afiLex.add("WORD", r"\w+[^\^;]")
    afiLex.add("EQUAL", r'\=')
    afiLex.add("ADD", r'\+')
    afiLex.add("SUB", r'\-')
    afiLex.add("MULT", r'\*')
    afiLex.add("DIV", r'\/')
    afiLex.add("POW", r'\^')
    afiLex.add("SEMICOLON", r'\;')
    return afiLex.build()
示例#41
0
from rply import LexerGenerator


lg = LexerGenerator()

lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
# lg.add('LBRACKET', r'\[')
# lg.add('RBRACKET', r'\]')

lg.add("IDENTIFIER", r"[^()\[\]{}\s#]+")

lg.ignore(r"#.*(?=\r|\n|$)")
lg.ignore(r"\s+")

lexer = lg.build()
示例#42
0
@pg.production("hashtags : hashtags HASHTAG")
@pg.production("hashtags : HASHTAG")
def hashtags(p):
    if len(p) == 1:
        return p
    else:
        return list(flattened(p))

@pg.production("topics : FOR hashtags")
@pg.production("topics : ")
def topics(p):
    if len(p) == 0:
        return { u'topics': [] }
    else:
        topics = [tok.value.strip('#') for tok in p[1]]
        return { u'topics': topics }

@pg.error
def error_handler(token):
    pos = token.getsourcepos()
    if pos:
        offset = "offset {}".format(pos.idx)
    else:
        offset = u"end of stream"
    raise ValueError("Ran into a {0} where it wasn't expected at {1}".format(token.gettokentype(), offset))

TweetLexer = lex.build()
TweetParser = pg.build()

示例#43
0
    "-D"
    return -int_from_digit(p[1])
@PG.production("charge : + DIGIT")
def charge_production_plus_many(p):
    "+D"
    return +int_from_digit(p[1])


##### ATOM CLASS #####

# class ::= ':' NUMBER
# class :: int
@PG.production("class : colon NUMBER")
def class_production(p):
    "return :: int."
    return p[1]

@PG.error
def error_handler(token, expected=None):
    "Handle parser errors."
    if DEBUG:
        raise ValueError(("Ran into a %s (%s) where it wasn't expected."+\
            "At %s. Instead expected: %s.") % (repr(token.name), \
            repr(token.value), dictof(token.source_pos), repr(expected)))
    else:
        print "Warning: parser error"


LEXER = LG.build()
PARSER = PG.build()
示例#44
0
def create_generator(rules):
    lg = LexerGenerator()
    for rule in rules:
        lg.add(rule[1], rule[0])
    lexer = lg.build()
    return lexer
示例#45
0
文件: vm.py 项目: ponyatov/proto
## generator will build lexer in runtime
lexer_generator = LexerGenerator()

## drop comments
lexer_generator.ignore(r'[\\\#].*|\(.*\)')
## drop spaces
lexer_generator.ignore(r'\s+')
## number parsing
lexer_generator.add('HEX', '0x[0-9A-Fa-f]+')
lexer_generator.add('BIN', '0b[01]+')
lexer_generator.add('NUMBER', r'[\+\-]?[0-9]+(\.[0-9]*)?')
## FORTH word names
lexer_generator.add('WORD', r'[A-Za-z0-9_]+')

## build resulting lexer
lexer = lexer_generator.build()


## parse next token from
## @param[in] source stream
def WORD(source):
    try:
        token = source.next()
        D.append(token)
        return token
    except StopIteration:
        return None


## REPL loop
def INTERPRET(SRC=''):
示例#46
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):

        # Utilidades
        self.lexer.add('DISP', r'disp')
        self.lexer.add('GET', r'get')
        self.lexer.add('TO-STRING', r'stringify')
        self.lexer.add('TO-FLOAT', r'floatify')
        self.lexer.add('TO-INT', r'intfy')
        self.lexer.add('TO-BOOL', r'boolfy')

        # Almacenamiento de variables
        self.lexer.add('SET_INT', r'integer')
        self.lexer.add('SET_CHAR', r'char')
        self.lexer.add('SET_FLOAT', r'float')
        self.lexer.add('SET_ARRAY', r'array')
        self.lexer.add('SET_STRING', r'string')
        self.lexer.add('SET_BOOL', r'bool')
        self.lexer.add('SET_ARRAY', r'array')

        # Funciones definidas
        self.lexer.add('IF', r'if')
        self.lexer.add('DOWHILE', r'do\-while')
        self.lexer.add('WHILE', r'while')
        self.lexer.add('FOR', r'for')
        self.lexer.add('IN', r'\#in')
        self.lexer.add('SET', r'set')

        # Operadores
        self.lexer.add('EQ', r'==')
        self.lexer.add('GE', r'>=')
        self.lexer.add('LE', r'<=')
        self.lexer.add('NE', r'!=')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('POW', r'\^')
        self.lexer.add('EQL', r'\=')
        self.lexer.add('MOD', r'\%')
        self.lexer.add('GT', r'>')
        self.lexer.add('LT', r'<')
        self.lexer.add('AND', r'&&')
        self.lexer.add('OR', r'\|\|')
        self.lexer.add('NOT', r'!')

        # Parentesis y corchetes
        self.lexer.add('OPEN_BRACKET', r'\(')
        self.lexer.add('CLOSED_BRACKET', r'\)')

        # Tipo de datos
        self.lexer.add('FLOAT', r'-?\d+\.\d+')
        self.lexer.add('INT', r'-?\d+')
        self.lexer.add('STRING', r'\"[^"]*\"')
        self.lexer.add('TRUE', r'#t')
        self.lexer.add('FALSE', r'#f')
        self.lexer.add('VAR_NAME', r'[a-zA-Z0-9]+')
        self.lexer.add('CHAR', r'\'[^ ]{1}\'')
        self.lexer.add(
            'VALUES_STRING',
            r'\{((-?\d+\.\d+|-?\d+|\"[^",]*\")[\ ]*,[\ ]*)*[\ ]*(-?\d+\.\d+|-?\d+|\"[^",]*\")\}'
        )

        # Ignorar espacios
        self.lexer.ignore('\s+')
        self.lexer.ignore('\[[^\[\]]*\]')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#47
0
文件: lexer.py 项目: samgiles/naulang
        ("GT", r">"),
        # Punctuation
        ("LPAREN", r"\("),
        ("RPAREN", r"\)"),
        ("LBRACE", r"{"),
        ("RBRACE", r"}"),
        ("COMMA", r","),
        ("LBRACK", r"\["),
        ("RBRACK", r"\]"),
        # Literals
        ("TRUE", r"true\b"),
        ("FALSE", r"false\b"),
        ("FLOAT", r"(((0|[1-9][0-9]*)(\.[0-9]*)+)|(\.[0-9]+))([eE][\+\-]?[0-9]*)?"),
        ("INTEGER", r"-?(0|[1-9][0-9]*)"),
        ("STRING", r"\"([^\"\\]|\\.)*\""),
        ("IDENTIFIER", r"[a-zA-Z_$][a-zA-Z_0-9]*"),
        # Others
        ("EQUAL", r"="),
    ]

tokens = get_tokens()

for token in tokens:
    lexer_gen.add(token[0], token[1])

LEXER = lexer_gen.build()


def get_lexer():
    return LEXER
示例#48
0
文件: edn.py 项目: salewski/transito
lg.add("colon", r":")
lg.add("char_nl", r"\\newline")
lg.add("char_tab", r"\\tab")
lg.add("char_return", r"\\return")
lg.add("char_space", r"\\space")
lg.add("char", r"\\.")
lg.add("ns_symbol", NS_SYMBOL)
lg.add("symbol", SYMBOL_RE)
lg.add("string", r'"(\\\^.|\\.|[^\"])*"')
lg.add("ns_tag", "#" + NS_SYMBOL)
lg.add("tag", "#" + SYMBOL_RE)

lg.ignore(r"[\s,\n]+")
lg.ignore(r";.*\n")

lexer = lg.build()

pg = ParserGenerator([
    "boolean", "nil", "float", "number", "olist", "clist", "omap", "cmap",
    "ovec", "cvec", "oset", "colon", "char_nl", "char_tab", "char_return",
    "char_space", "char", "symbol", "ns_symbol", "string", "tag", "ns_tag"
])


class Char(TaggedValue):
    def __init__(self, rep):
        TaggedValue.__init__(self, 'char', rep)


NL = Char('\n')
TAB = Char('\t')
示例#49
0
 def __init__(self):
     _lg = LexerGenerator()
     for r in grammar:
         _lg.add(r[0], r[1])
     _lg.ignore(r'\s+')
     self._scanner = _lg.build()