示例#1
0
class Lexer:
    """
    Takes the program as input and divides it into Tokens.
    """
    def __init__(self):
        self.lexer = LexerGenerator()

    def __add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')

        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        # Semi colon
        self.lexer.add('SEMI_COLON', r'\;')

        # Operators
        self.lexer.add('ADD', r'\+')
        self.lexer.add('SUB', r'\-')

        # Number
        self.lexer.add('NUMBER', r'\d+')

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self.__add_tokens()
        return self.lexer.build()
示例#2
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):

        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # lambda
        self.lexer.add('LAMBDA', r'λ')
        self.lexer.add('DOT', r'.')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # LETTER
        self.lexer.add('VAR', r'[a-z]')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\*')

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#3
0
class Lexer():
    def __init__(self):
        self.lg = LexerGenerator()

    def add_tokens(self):
        self.lg.add('FRAC', '-?\d+.\d+')
        self.lg.add('WHOLE', '-?\d+')
        self.lg.add('WORD', '(""".?""")|(".?")|(\'.?\')')
        self.lg.add('BOOL', 'true(?!\w)|false(?!\w)')

        self.lg.add('K_WHOLE', 'whole(?!\w)')
        self.lg.add('K_FRAC', 'frac(?!\w)')
        self.lg.add('K_WORD', 'word(?!\w)')
        self.lg.add('K_BOOL', 'bool(?!\w)')
        self.lg.add('K_LETTER', 'letter(?!\w)')

        self.lg.add('IF', 'if(?!\w)')
        self.lg.add('OTHER', 'other(?!\w)')
        self.lg.add('OTIF', 'otif(?!\w)')

        self.lg.add('LOOP', 'loop(?!\w)')
        self.lg.add('LOOPTILL', 'looptill(?!\w)')
        self.lg.add('ENDNOW', 'endnow(?!\w)')
        self.lg.add('RESUME', 'resume(?!\w)')

        self.lg.add('AND', 'and(?!\w)')
        self.lg.add('OR', 'or(?!\w)')
        self.lg.add('NOT', 'not(?!\w)')

        self.lg.add('FUNCTION', 'func(?!\w)')
        self.lg.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]+")
        self.lg.add('EQUAL', '\==')
        self.lg.add('NOT_EQUAL', '\!=')
        self.lg.add('GREATER_EQUAL', '\>=')
        self.lg.add('LESS_EQUAL', '\<=')
        self.lg.add('GREATER_THAN', '\>')
        self.lg.add('LESS_THAN', '\<')
        self.lg.add('ASSIGNMENT', '\=')
        self.lg.add('LEFT_BRACKET', '\[')
        self.lg.add('RIGHT_BRACKET', '\]')
        self.lg.add('LEFT_PARAN', '\{')
        self.lg.add('RIGHT_PARAN', '\}')
        self.lg.add('COMMA', '\,')
        self.lg.add('DOT', '\.')
        self.lg.add('COLON', '\:')
        self.lg.add('PLUS', '\+')
        self.lg.add('MINUS', '\-')
        self.lg.add('MUL', '\*')
        self.lg.add('DIV', '\/')
        self.lg.add('MOD', '\%')
        self.lg.add('RIGHT_BRACES', '\(')
        self.lg.add('LEFT_BRACES', '\)')
        self.lg.add('NEWLINE', '\n')

        # ignore whitespace
        self.lg.ignore('[ \t\r\f\v]+')

    def get_lexer(self):
        self.add_tokens()
        return self.lg.build()
示例#4
0
def Lexer():

    lexer = LexerGenerator()

    lexer.add('WHILE', r'wh')
    lexer.add('PRINTF', r'pf')
    lexer.add('IF', r'if')
    lexer.add('ELSE', r'el')
    lexer.add('MAIN', r'mn')
    lexer.add('RETURN', r'rt')
    lexer.add('LEFT_PAREN', r'\(')
    lexer.add('RIGHT_PAREN', r'\)')
    lexer.add('SEMI_COLON', r'\;')
    lexer.add('COMMA', r'\,')
    lexer.add('PLUS', r'\+')
    lexer.add('MINUS', r'\-')
    lexer.add('MULT', r'\*')
    lexer.add('DIV', r'\/')
    lexer.add('RIGHT_BRACKETS', r'\}')
    lexer.add('LEFT_BRACKETS', r'\{')
    lexer.add('EQUAL', r'=')
    lexer.add('E_EQUAL', r'sm')
    lexer.add('BT', r'bt')
    lexer.add('LT', r'lt')
    lexer.add('OR', r'or')
    lexer.add('AND', r'and')
    lexer.add('NOT', r'not')
    lexer.add('INT', r'\d+')
    lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*")
    lexer.ignore('\s+')

    return lexer.build()
示例#5
0
class Lexer:
    def __init__(self):
        self.lexer = LexerGenerator()
        self.build = None

    def _add_tokens(self):
        self.lexer.add('->', r'\-\>')
        self.lexer.add('<->', r'\<\-\>')
        self.lexer.add('+', r'\+')
        self.lexer.add('*', r'\*')
        self.lexer.add('!', r'\!')
        self.lexer.add('(', r'\(')
        self.lexer.add(')', r'\)')
        self.lexer.add('false', r'false')
        self.lexer.add('true', r'true')
        self.lexer.add('xor', r'xor')
        self.lexer.add('LITERAL', r'[A-Za-z]+')

        self.lexer.ignore(r'\s+')

    def get_lexer(self):
        self._add_tokens()
        self.build = self.lexer.build()
        return self

    def lex(self, input):
        if self.build is None:
            self.get_lexer()

        return self.build.lex(input)
示例#6
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('MON', r'montrer')
        # Parenthesis
        self.lexer.add('PARENTESE1', r'\(')
        self.lexer.add('PARENTESE2', r'\)')
        # Semi Colon
        self.lexer.add('POINT_VERG', r'\;')
        # Operators
        self.lexer.add('PLUS', r'\+')
        self.lexer.add('MOINS', r'\-')
        # Number
        self.lexer.add('NOMBRE', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')
        self.lexer.add('FOIS', r'\*')
        self.lexer.add('DIVI', r'\/')
        self.lexer.add('TERM', r'[a-zA-Z0-9]+')
        self.lexer.add('EGAL', r'\=+')
        self.lexer.add('QUOTE', r'\'')
        self.lexer.add('VERGULE', r',')
        self.lexer.add('DOLLAR', r'\$')
    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#7
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        self.lexer.add('VAL', r'val')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('EQUAL', r'\=')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\\')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('TEXT', r'[A-Za-z_0-9]*')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#8
0
class Lexer(object):
    """description of class"""
    def __init__(self):
        self.lexer = LexerGenerator()
        pass

    def _add_tokens(self):
        self.lexer.add("CHAR", r'\w')
        self.lexer.add("NUMBER", r'\d+')
        self.lexer.add("STRING", r'\w*')
        self.lexer.add("ID", r'\w+')
        self.lexer.add("SELECT", r'SELECT')
        self.lexer.add("ASTERISK", r'\*')
        self.lexer.add("FROM", r'FROM')
        self.lexer.add("WHERE", r'WHERE')
        self.lexer.add("PATH", r'PATH')
        self.lexer.add("OPEN_CLAUSE", r'\(')
        self.lexer.add("CLOSE_CLAUSE", r'\)')
        self.lexer.add("CITE", r'\'')
        self.lexer.add("INSERT", r'INSERT')
        self.lexer.add("INTO", r'INTO')
        self.lexer.add("CREATE", r'CREATE')
        self.lexer.add("DOCUMENT", r'DOCUMENT')
        self.lexer.add("AT", r'AT')
        self.lexer.add("IS", r'IS')
        self.lexer.add("COMMA", r'\,')
        self.lexer.add("TEMPLATE", r'TEMPLATE')
        self.lexer.ignore('\s+')
        pass

    def get_lexer(self):
        _add_tokens()
        return self.lexer.build()
示例#9
0
class Lexer:
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PUTS', r'puts')
        # Eval
        self.lexer.add('EVAL', r'eval')
        # Parenthesis
        self.lexer.add('(', r'\(')
        self.lexer.add(')', r'\)')
        # Semi Colon
        self.lexer.add(';', r'\;')
        # Operators
        self.lexer.add('+', r'\+')
        self.lexer.add('-', r'\-')
        self.lexer.add('*', r'\*')
        self.lexer.add('/', r'\/')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # String
        self.lexer.add('STRING', r'\".*\"')
        # Functions
        self.lexer.add('{', r'\{')
        self.lexer.add('{', r'\}')
        self.lexer.add('FUNCTION', r'fun')

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#10
0
class Lexer:
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'printf')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # quotations
        #self.lexer.add('QUOTE', r'\"')
        self.lexer.add('STRING', r'"[^"]*"')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#11
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Image
        self.lexer.add('IMAGE', r'[^\s]+(\.(?i)(jpg|png|gif|bmp|jpeg))')
        # Position
        self.lexer.add('POSITION', r'position')
        # Scale
        self.lexer.add('SCALE', r'scale')
        # Move
        self.lexer.add('MOVE', r'move')
        # Dimensions
        self.lexer.add('DIMENSIONS', r'dimensions')
        # Total
        self.lexer.add('TOTAL', r'total')
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Comma separator
        self.lexer.add('COMMA', r'\,')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#12
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        # Number
        self.lexer.add('NUMBER', r'\d+')

        self.lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*")

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#13
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    # Function created accordingly to the programming language's grammar
    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')

        #Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')

        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')

        # Number
        self.lexer.add('NUMBER', r'\d+')

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#14
0
class Lexer():
	def __init__(self):
		self.lexer = LexerGenerator()

	def _add_tokens(self):
		# Print
		self.lexer.add('IN_RA', r'in_ra')
		# Parenthesis
		self.lexer.add('MO_NGOAC_TRON', r'\(')
		self.lexer.add('DONG_NGOAC_TRON', r'\)')
		# Semi Colon
		# self.lexer.add('HET_DONG', r'\;')
		self.lexer.add('HET_DONG', r'(\n)|(\r\n)')
		# Operators
		self.lexer.add('CONG', r'\+')
		self.lexer.add('TRU', r'\-')
		self.lexer.add('NHAN', r'\*')
		self.lexer.add('CHIA', r'\/')
		# bool
		self.lexer.add('BANG', r'\=\=')
		self.lexer.add('LON_HON', r'\>')
		self.lexer.add('NHO_HON)', r'\<')
		self.lexer.add('KHAC', r'\!\=')
		# Number
		self.lexer.add('SO_NGUYEN', r'\d+')
		# Ignore spaces
		self.lexer.ignore(r'(^\s+)|( )+|\t+')


	def get_lexer(self):
		self._add_tokens()
		return self.lexer.build()
示例#15
0
class Lexer:
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        # Braces
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('OPEN_BRACE', r'{')
        self.lexer.add('CLOSE_BRACE', r'}')
        self.lexer.add('OPEN_BRAKET', r'\[')
        self.lexer.add('CLOSE_BRAKET', r'\]')
        # Operations
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')
        # Datatype
        self.lexer.add('NUMBER', r'\d+')
        # Semicolon
        self.lexer.add('SEMI_COLON', r'\;')
        # ignore spaces
        self.lexer.ignore(r'\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#16
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Keywords
        self.lexer.add('PRINT', r'print')
        self.lexer.add('IF', r'if')
        self.lexer.add('ELSE', 'else')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Colon
        self.lexer.add('COLON', r'\:')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')
        self.lexer.add('MORE', r'\>')
        self.lexer.add('LESS', r'\<')
        self.lexer.add('EQUALS', r'\=')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # String
        self.lexer.add('ID', r'\w+')
        # Ignore spaces
        self.lexer.ignore(r'\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#17
0
class Lex():
    def __init__(self):
        self.lex = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lex.add('PRINT', r'print')
        # Parenthesis
        self.lex.add('LEFT_PAREN', r'\(')
        self.lex.add('RIGHT_PAREN', r'\)')
        # End of line
        self.lex.add('EOL', r'\;')
        # Mathematical Operators
        self.lex.add('SUM', r'\+')
        self.lex.add('SUB', r'\-')
        self.lex.add('MUL', r'\*')
        self.lex.add('DIV', r'\/')
        # Number
        self.lex.add('NUMBER', r'\d+')
        # Ignore spaces
        self.lex.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lex.build()
示例#18
0
    def test_position(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()

        stream = l.lex("2 +\n    37")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 2
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()
示例#19
0
class Lexer:
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')
        self.lexer.add('ASSIGN', r'Var')
        self.lexer.add('VAR', r'[a-zA-Z]+')

        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('COMMA', r',')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('VALUE_SETTER', r'=')
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        self.lexer.ignore(r'\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#20
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Parenthesis
        self.lexer.add('OPEN_PAR', r'\(')
        self.lexer.add('CLOSE_PAR', r'\)')
        self.lexer.add('OPEN_KEY', r'\{')
        self.lexer.add('CLOSE_KEY', r'\}')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Comma
        self.lexer.add('COMMA', r'\,')
        # Quote
        self.lexer.add('QUOTE', r'\"')
        # Dots
        self.lexer.add('DOTS', r'\:')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'-')
        self.lexer.add('MULT', r'\*')
        self.lexer.add('DIV', r'/')
        # Boolean Operators
        self.lexer.add('OR', r'\//')
        self.lexer.add('AND', r'\&&')
        self.lexer.add('NOT', r'\!')
        # Relational Operators
        self.lexer.add('BIGGER_THAN', r'\>')
        self.lexer.add('SMALLER_THAN', r'\<')
        self.lexer.add('EQUAL_TO', r'\==')
        self.lexer.add('DIFF', r'\!=')
        self.lexer.add('ASSIGN', r'=')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Types 
        self.lexer.add('INT', r'IN')
        self.lexer.add('CHAR', r'CH')
        self.lexer.add('VOID', r'VO')
        # While
        self.lexer.add('WHILE', r'W')
        # If - else
        self.lexer.add('IF', r'I')
        self.lexer.add('ELSE', r'E')
        # Print
        self.lexer.add('PRINT', r'P')
        # Scanf
        self.lexer.add('SCANF', r'S')
        # Function
        self.lexer.add('FUNC', r'F')
        # Main
        self.lexer.add('MAIN', r'M')
        # Identifier
        self.lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*")
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#21
0
class Lexer:
    def __init__(self):
        self.lg = LexerGenerator()
        self._build_lex_rules()

    def _build_lex_rules(self):
        for enum in TokenEnum:
            self.lg.add(enum.name, enum.value)
        # Parenthesis
        self.lg.ignore(AppConstant.SPACE_REGEX)

    def build_lexer(self):
        return self.lg.build()

    def clean_source(self, source_code):
        comment = re.search(AppConstant.COMMENT_REGEX, source_code)
        while comment is not None:
            start, end = comment.span(1)
            assert start >= 0 and end >= 0
            source_code = source_code[0:start] + source_code[end:]
            comment = re.search(AppConstant.COMMENT_REGEX, source_code)

        empty_line = re.search(AppConstant.EMPTY_LINE_REGEX, source_code)
        while empty_line is not None:
            start, end = empty_line.span(1)
            assert start >= 0 and end >= 0
            source_code = source_code[0:start] + source_code[end:]
            empty_line = re.search(AppConstant.EMPTY_LINE_REGEX, source_code)

        return source_code
示例#22
0
def lexer_from_mapping(mapping):
    lg = LexerGenerator()

    # Escape data with forward slashes
    lg.add("DATA", r'/.+?/')

    # Add the special characters
    for char in mapping.keys():
        lg.add(char, r"\\" + char)

    # Normal tokens
    lg.add("TYPE", r':')
    lg.add("AND", r'\&')
    lg.add("OR", r'\|')
    lg.add("L_PAREN", r'\(')
    lg.add("R_PAREN", r'\)')
    lg.add("EQUAL", r'=')
    lg.add("CHILD", r'>')
    lg.add("PARENT", r'<')
    lg.add("NOT", r'!')

    # Everything else is data
    excluded_chars = r'^<>=&|():!'
    for char in mapping.keys():
        excluded_chars += r"\\" + char
        lg.add("DATA", "[{excluded}]+".format(excluded=excluded_chars))

    lg.ignore(r'\s+')
    lexer = lg.build()
    return lexer
示例#23
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('ESCREVA', r'escreva')
        # Parenthesis
        self.lexer.add('APAR', r'\(')
        self.lexer.add('FPAR', r'\)')
        # Semi Colon
        self.lexer.add('PONTO_VIRGULA', r'\;')
        # Operators
        self.lexer.add('SOMA', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        # Number
        self.lexer.add('NUMERO', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Operators
        self.lexer.add('PLUS', r'\+')
        self.lexer.add('MINUS', r'\-')
        self.lexer.add('DIVIDE', r'\\')
        self.lexer.add('MULTIPLY', r'\*')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#25
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Begin
        self.lexer.add('BEGIN', r'Begin')
        # Nucleo
        self.lexer.add('NUCLEO', r'Nucleo')
        # Si
        self.lexer.add('SI', r'Si')
        # Imprimir
        self.lexer.add('IMPRIMIR', r'Imprimir')
        # Fin
        self.lexer.add('FIN', r'Fin')
        # Parentesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('IDENTIFICADOR', r'[a-z]+[0-9]*')
        # Operators
        self.lexer.add('IGUAL', r'\=')
        # self.lexer.add('COMPARACION', r'\=\='))
        self.lexer.add('MAS', r'\+')
        # Number
        self.lexer.add('INT', r'\d+')
        # String       
        self.lexer.add('STRING', r'".*"')
        # self.lexer.add('STRING', r'^\"[a-z]+\"$')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#26
0
文件: lexer.py 项目: imraghava/nolang
def get_lexer():
    lg = LexerGenerator()

    for name, rule in RULES:
        lg.add(name, rule)
    lg.ignore('\s+')
    return lg.build()
示例#27
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        #mains
        self.lexer.add('INCLUDE', r"#include ")
        self.lexer.add('MAIN', r"main\(\)")
        self.lexer.add('PRINT', r"printf")
        self.lexer.add('NUMBER', r'\d+')
        #libraries
        self.lexer.add('LIBstdio.h', r"<stdio.h>")
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        # Signs
        self.lexer.add('START', '{')
        self.lexer.add('FINISH', '}')
        self.lexer.add('OPEN_PARENS', r'\(')
        self.lexer.add('CLOSE_PARENS', r'\)')
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('STR', r'"(.*?)"')
        # Ignore spaces
        self.lexer.ignore(r'\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#28
0
def tokenize():
    lg = LexerGenerator()

    lg.add('NUMBER', r'\d+')
    lg.add('PLUS', r'\+')
    lg.add('MINUS', r'-')
    lg.add('MUL', r'\*')
    lg.add('DIV', r'/')
    lg.add('OPEN_PAR', r'\(')
    lg.add('CLOSE_PAR', r'\)')
    lg.add('OPEN_BLOCK', r'\{')
    lg.add('CLOSE_BLOCK', r'\}')
    lg.add('COMMA', r',')
    lg.add('IMPRIME', r'imprime')
    lg.add('ESCANEIA', r'escaneia')
    lg.add('CMD_END', r';')
    lg.add('GE', r'>=')
    lg.add('LE', r'<=')
    lg.add('EQUAL', r'=')
    lg.add('EQUALS', r'==')
    lg.add('GREATER', r'>')
    lg.add('LESS', r'<')
    lg.add('ENQUANTO', r'enquanto')
    lg.add('E', r'e')
    lg.add('OU', r'ou')
    lg.add('SENAO', r'senao')
    lg.add('NAO', r'nao')
    lg.add('SE', r'se')

    lg.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*")
    lg.ignore('\s+')

    return lg.build()
示例#29
0
    def test_ignore_recursion(self):
        lg = LexerGenerator()
        lg.ignore(r"\s")

        l = lg.build()

        assert list(l.lex(" " * 2000)) == []
class Lexer ():
    def __init__(self):
        super().__init__()
        self.lexer = LexerGenerator()

    def _add_tokens (self):
        # FILE 
        self.lexer.add('FILE', r'fileinitializer')

        # VARIABLE FILE
        self.lexer.add('VARIABLE_FILE', r'[w]\d+')

        # VARIABLE VECTOR
        # TODO: change v1 and f1 to varV1 and varF1
        self.lexer.add('VARIABLE_VECTOR', r'[v]\d+')

        # TIME STAMP
        self.lexer.add('TIME_STAMP', r'timestamp')

        # VECTOR
        self.lexer.add('VECTOR', r'vector')

        # ERROR
        self.lexer.add('ERROR', r'error')

        # 2D CHART
        self.lexer.add('2D_CHART', r'2dchart')

        # PRINT
        self.lexer.add('PRINT', r'print')

        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        self.lexer.add('OPEN_SQUARE_PAREN', r'\[')
        self.lexer.add('CLOSE_SQUARE_PAREN', r'\]')

        # Equal
        self.lexer.add('EQUAL', r'\=')

        # Semi-colon
        self.lexer.add('SEMI_COLON', r'\;')

        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')

        # Number
        self.lexer.add('NUMBER', r'\d+')

        # Text string with any symbol in it
        self.lexer.add('STRING', r'".+"')

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#31
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')
        self.lexer.add('INT', r'int')
        self.lexer.add('CHAR', r'char')
        self.lexer.add('FLOAT', r'float')
        self.lexer.add('IGUAL', r'\==')
        self.lexer.add('MAIOROUIGUAL', r'\>=')
        self.lexer.add('MENOROUIGUAL', r'\<=')
        self.lexer.add('MENOR', r'\<')
        self.lexer.add('MAIOR', r'\>')
        self.lexer.add('DIFERENTE', r'\!=')
        self.lexer.add('AND', r'\&')
        self.lexer.add('OR', r'\|')
        self.lexer.add('NOT', r'\!')
        self.lexer.add('SOMA', r'\+')
        self.lexer.add('SUBTRACAO', '-')
        self.lexer.add('MULTIPLICACAO', r'\*')
        self.lexer.add('DIVISAO', r'\/')
        self.lexer.add('RESTO', r'mod')
        self.lexer.add('ATRIBUICAO', r'\=')
        # self.lexer.add('ASPASSIMPLES', '\'')
        # self.lexer.add('ASPASDUPLAS', '"')
        # self.lexer.add('PONTO', '\.')
        # self.lexer.add('VIRGULA', '\,')
        # self.lexer.add('DOISPONTOS', r'\:')
        self.lexer.add('PONTOEVIRGULA', r'\;')
        self.lexer.add('ABREPARENTESES', r'\(')
        self.lexer.add('FECHAPARENTESES', r'\)')
        # self.lexer.add('ABRECHAVES', r'\{')
        # self.lexer.add('FECHACHAVES', r'\}')
        self.lexer.add('INICIOBLOCO', r'begin')
        self.lexer.add('FIMBLOCO', r'end')
        self.lexer.add('FOR', r'for')
        self.lexer.add('IF', r'if')
        self.lexer.add('ELSE', r'else')
        self.lexer.add('BREAK', r'break')
        self.lexer.add('WHILE', r'while')
        self.lexer.add('READ', r'read')
        self.lexer.add('REAL', r'-?\d+[.]\d+')
        self.lexer.add('NUMERO', r'-?\d+')
        self.lexer.add('BOOLEANO', "true(?!\w)|false(?!\w)")
        self.lexer.add('STRING', r'"(?:\.|(\\\")|[^\""\n])*"')
        self.lexer.add('IDENT', r'[a-zA-Z][a-zA-Z0-9]*')
        self.lexer.add('CARACTER', r'\'[a-zA-Z]\'')
        # self.lexer.add('STRING', r'[a-zA-Z][a-zA-Z]+}')
        # self.lexer.add('IDENT', r'[a-zA-Z][a-zA-Z0-9]*')
        # self.lexer.add('CARACTER', r'[a-zA-Z]')

        # Ignore spaces
        self.lexer.ignore('[\s\t \r\f\v]+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#32
0
文件: test_lexer.py 项目: alex/rply
    def test_regex_flags_ignore(self):
        lg = LexerGenerator()
        lg.add("ALL", r".*", re.DOTALL)
        lg.ignore(r".*", re.DOTALL)

        l = lg.build()

        stream = l.lex("test\ndotall")

        with raises(StopIteration):
            stream.next()
示例#33
0
def construct_lexer():
	lg = LexerGenerator()

	#Literals
	lg.add('NUMBER',r'\d+(\.\d+)?')
	lg.add('STRING',r'\".*?\"')

	#Tokens
	lg.add('OPEN_PAREN',r'\(')
	lg.add('CLOSE_PAREN',r'\)')
	lg.add('INDEX_OPEN',r'\[')
	lg.add('INDEX_CLOSE',r'\]')
	lg.add('NAME',r'[a-zA-Z0-9_]*')
	lg.add('RANGE',r'\.\.\.')
	lg.add('COMMA',',')

	#Operators
	lg.add('ADD',r'\+')
	lg.add('SUBTRACT',r'-')
	lg.add('MULTIPLY',r'\*')
	lg.add('DIVIDE','/')
	lg.add('EXPONENTIATION',r'\*\*')
	lg.add('AND','and')
	lg.add('OR','or')
	lg.add('NOT','not')
	lg.add('XOR','xor')
	lg.add('SELF_APPLY','!')
	lg.add('SINGLE_ARROW','->')
	lg.add('DOUBLE_ARROW','=>')
	lg.add('DOT',r'\.')
	lg.add('IN','in')
	lg.add('GT','>')
	lg.add('LT','<')
	lg.add('LE','<=')
	lg.add('GE','>=')
	lg.add('EQ','==')
	lg.add('NE','!=')

	#Keywords
	lg.add('IF','if')
	lg.add('ELSE','else')
	lg.add('DO','do')
	lg.add('END','end')
	lg.add('DEF','def')
	lg.add('LET','let')
	lg.add('WHILE','while')
	lg.add('FOR','for')

	#Whitespace
	lg.ignore(r"\s+")

	return lg.build()
示例#34
0
    def test_repr(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")
 
        l = lg.build()
 
        stream = l.lex("2 + 3")
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "PLUS"
示例#35
0
文件: test_lexer.py 项目: olasd/rply
    def test_ignore(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 4
        with raises(StopIteration):
            stream.next()
示例#36
0
    def test_states(self):
        lg = LexerGenerator(initial_state="scalar")
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        lg.add("OPEN_BRACKET", r"\[", to_state="vector")
        lg.add("PLUS", r"\+", state="vector")
        lg.add("NUMBER", r"\d+", state="vector")
        lg.add("NEW_LINE", r"\n+", state="vector")
        lg.add("CLOSE_BRACKET", r"\]", state="vector", to_state="scalar")
        lg.ignore(r" +", state="vector")

        l = lg.build()

        stream = l.lex("2 + [ 3 + 4 \n\n 5 + 6 ] + 7")
        tokens = [
            ("NUMBER", "2", "scalar"),
            ("PLUS", "+", "scalar"),
            ("OPEN_BRACKET", "[", "scalar"),
            ("NUMBER", "3", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "4", "vector"),
            ("NEW_LINE", "\n\n", "vector"),
            ("NUMBER", "5", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "6", "vector"),
            ("CLOSE_BRACKET", "]", "vector"),
            ("PLUS", "+", "scalar"),
            ("NUMBER", "7", "scalar"),
        ]

        for compare_token, token in zip(tokens, stream):
            name, value, state = compare_token
            assert token.name == name
            assert token.value == value
            assert token.state == state
示例#37
0
文件: lexer.py 项目: joshsharp/braid
lg.add('{', '\{')
lg.add('}', '\}')
lg.add('|', '\|')
lg.add(',', ',')
lg.add('DOT', '\.')
lg.add('COLON', ':')
lg.add('MINUS', '-')
lg.add('MUL', '\*')
lg.add('DIV', '/')
lg.add('MOD', '%')
lg.add('(', '\(')
lg.add(')', '\)')
lg.add('NEWLINE', '\n')

# ignore whitespace
lg.ignore('[ \t\r\f\v]+')

lexer = lg.build()

def lex(source):

    comments = r'(#.*)(?:\n|\Z)'
    multiline = r'([\s]+)(?:\n)'
    
    comment = re.search(comments,source)
    while comment is not None:
        start, end = comment.span(1)
        assert start >= 0 and end >= 0
        source = source[0:start] + source[end:] #remove string part that was a comment
        comment = re.search(comments,source)
示例#38
0
from datetime import datetime

from rply import LexerGenerator, ParserGenerator

ISO8601_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

lg = LexerGenerator()

lg.ignore(r"\s+")
lg.ignore(r"\# .*")

lg.add("COLON", r":")
lg.add("LCURLY", r"\{")
lg.add("RCURLY", r"\}")
lg.add("LBRACKET", r"\[")
lg.add("RBRACKET", r"\]")
lg.add("COMMA", r",")
lg.add("EQUALS", r"=")
lg.add("BOOLEAN", r"true|false")
lg.add("DATETIME", r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z")
lg.add("FLOAT", r"-?\d+\.\d+")
lg.add("INTEGER", r"-?\d+")
lg.add("STRING", r'"(\\"|[^"])*"')
lg.add("KEY", r"[a-zA-Z_][a-zA-Z0-9_#\?\.]*")

lexer = lg.build()
pg = ParserGenerator([rule.name for rule in lg.rules], cache_id="libtoml")

@pg.production("main : statements")
def main(p):
    return p[0]
示例#39
0
文件: lexer.py 项目: xando/herbert
from rply import LexerGenerator


lg = LexerGenerator()

lg.add("STEP", r"s")
lg.add("TURN_LEFT", r"l")
lg.add("TURN_RIGHT", r"r")
lg.add("FUNC", r"a|b|c|d|e|f|g|h|i|j|k|m|n|o|p|q|t|u|v|w|x|y|z")
lg.add("COLON", r"\:")
lg.add("NEWLINE", r"\n+ *\n*")
lg.add("NAME", r"[A-Z]")
lg.add("NUMBER", r"\d+")
lg.add("PLUS", r"\+")
lg.add("MINUS", r"\-")
lg.add("(", r"\(")
lg.add(")", r"\)")
lg.add(",", r"\,")

lg.ignore(r" +")
lg.ignore(r"\#.*")

TOKENS = [r.name for r in lg.rules]

lexer = lg.build()
示例#40
0
from rply import LexerGenerator


lg = LexerGenerator()

lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
# lg.add('LBRACKET', r'\[')
# lg.add('RBRACKET', r'\]')

lg.add("IDENTIFIER", r"[^()\[\]{}\s#]+")

lg.ignore(r"#.*(?=\r|\n|$)")
lg.ignore(r"\s+")

lexer = lg.build()
示例#41
0
# Grouping
lg.add('LPAREN',   r'\(')
lg.add('RPAREN',   r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LBRACE',   r'\{')
lg.add('RBRACE',   r'\}')

# Identifiers
lg.add('ID',       r'[a-zA-Z_][a-zA-Z_0-9]*')

# Literals
lg.add('NUMBER',   r'\d+')
#lg.add('QUOTE',    r'\"')

lg.ignore(r' ')
lg.ignore(r'\n')
lg.ignore(r'\t')
lg.ignore(r'\#.*')

lexer = lg.build()


def lex(text):
    stream = lexer.lex(text)

    tok = stream.next()
    while tok is not None:
        yield tok
        tok = stream.next()
示例#42
0
lg.add("ovec", r"\[")
lg.add("cvec", r"\]")
lg.add("oset", r"#{")
lg.add("colon", r":")
lg.add("char_nl", r"\\newline")
lg.add("char_tab", r"\\tab")
lg.add("char_return", r"\\return")
lg.add("char_space", r"\\space")
lg.add("char", r"\\.")
lg.add("ns_symbol", NS_SYMBOL)
lg.add("symbol", SYMBOL_RE)
lg.add("string", r'"(\\\^.|\\.|[^\"])*"')
lg.add("ns_tag", "#" + NS_SYMBOL)
lg.add("tag", "#" + SYMBOL_RE)

lg.ignore(r"[\s,\n]+")
lg.ignore(r";.*\n")

lexer = lg.build()

pg = ParserGenerator(["boolean", "nil", "float", "number", "olist", "clist",
"omap", "cmap", "ovec", "cvec", "oset", "colon", "char_nl", "char_tab",
"char_return", "char_space", "char", "symbol", "ns_symbol", "string",
"tag", "ns_tag"])

class Char(TaggedValue):
    def __init__(self, rep):
        TaggedValue.__init__(self, 'char', rep)

NL = Char('\n')
TAB = Char('\t')
示例#43
0
lg.add("PLUS", r"\+")
lg.add("MINUS", r"-")
# expo
lg.add("POWER",r"\^")
# mul div
lg.add("MUL", r"\*")
lg.add("DIV", r"/")
lg.add("MOD",r"%")
# ()
lg.add("LPAREN",r"\(")
lg.add("RPAREN",r"\)")

#other = ?
lg.add("EQU",r"=")
# question put print
lg.add("QPUT",r"\?")
lg.add("DOT",r"\.")
lg.add("COMMA",r",")
lg.add("MICRO", r"&")
# function
#procedure
lg.ignore(r"\s+")
lg.ignore( r"\s*\*[^\n]*")
lg.ignore( r"&&[^\n]*")
lexer = lg.build()
if __name__=='__main__':
    print [i.name for i in lg.rules]
    for i in lexer.lex("12.2+0x12f+0.2+12if+ 'IF' \"HELLO\" <> # != ** ^ ! if ass"):
        print i
    for i in lexer.lex("[z]"):
        print i
示例#44
0
    def __init__(self):
        lg = LexerGenerator()
        tokens = [
            ("PROTO", r"[a-zA-Z]+://[^ ]+"),
            ("INT", r"\d+"),
            ("STRING", r"'[^']+'|\"[^\"]+\""),
            ("NAME", r"--colors=always"),
            ("PATH", r"([a-zA-Z0-9/._-]|\\ )+"),
            ("PATH", r"~([a-zA-Z0-9/._-]|\\ )*"),
            ("NAME", r"([a-zA-Z0-9_-]|\\ )+"),
            ("SEMICOLON", r";"),
            ("ENDL", r"\r?\n"),
        ]

        for token in tokens:
            lg.add(*token)

        lg.ignore(r"[ 	]+")

        pg = ParserGenerator([x[0] for x in tokens])

        @pg.production("main : statements")
        def main(args):
            return args[0]

        @pg.production("statements : statement")
        def statements_one(args):
            expression, = args
            return {
                "type": "statement",
                "content": expression,
            }

        @pg.production("statements : statement separator statements")
        def statements_many(args):
            statement, separtor, statements = args
            return {
                "type": "statement_infix_operator",
                "content": {
                    "left": {
                        "type": "statement",
                        "content": statement,
                    },
                    "right": statements,
                    "operator": separtor,
                }
            }

        @pg.production("separator : SEMICOLON")
        @pg.production("separator : ENDL")
        def separator(args):
            # don't care
            return args[0].value

        @pg.production("statement : atom")
        def expression_one(args):
            atom, = args
            return [atom]

        @pg.production("statement : atom atoms")
        def expression_many(args):
            atom, atoms = args
            return [atom] + atoms

        @pg.production("atoms : atom")
        def atoms_one(args):
            atom, = args
            return [atom]

        @pg.production("atoms : atom atoms")
        def atoms_many(args):
            atom, atoms = args
            return [atom] + atoms

        @pg.production("atom : NAME")
        @pg.production("atom : INT")
        @pg.production("atom : STRING")
        @pg.production("atom : PATH")
        @pg.production("atom : PROTO")
        def atom(args):
            name, = args
            return name.value

        self.pg = pg
        self.lg = lg

        self.lexer = self.lg.build()
        self.parser = self.pg.build()
示例#45
0
文件: lexer.py 项目: blakev/LTPyB
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# >>
#     LTPyB, 2016
# <<

from rply import LexerGenerator

lg = LexerGenerator()

lg.add('INTEGER',       r'\-?\d+')
lg.add('FLOAT',         r'\-?\d+\.\d+')
lg.add('OP_ASSIGNMENT', r'=')
lg.add('OP_EQUAL',      r'==')

lg.ignore(r'\s+')    # ignore whitespace
lg.ignore(r'#.*\n')  # ignore comments

lexer = lg.build()
示例#46
0
 def __init__(self):
     _lg = LexerGenerator()
     for r in grammar:
         _lg.add(r[0], r[1])
     _lg.ignore(r'\s+')
     self._scanner = _lg.build()
示例#47
0
文件: lexer.py 项目: sarostru/kite
from rply import LexerGenerator

lg = LexerGenerator()

lg.add('NUMBER', r'\d+(\.\d+)?')
lg.add('PLUS', r'\+')
lg.add('MINUS', r'-')
lg.add('MUL', r'\*')
lg.add('DIV', r'/')
lg.add('OPEN_PARENS', r'\(')
lg.add('CLOSE_PARENS', r'\)')
lg.add('EQUALS', r'=')
lg.add('SYMBOL', r'[^\s0-9][^\s]*')

lg.ignore(r'\s+')

lexer = lg.build()
示例#48
0
""":mod:`stencil_lang.matrix.lexer` -- Matrix scanner
"""

from rply import LexerGenerator

from stencil_lang.matrix.tokens import TOKENS, IGNORES

lg = LexerGenerator()

for rule_name, regex in TOKENS.iteritems():
    lg.add(rule_name, regex)

for regex in IGNORES:
    lg.ignore(regex)

# This has to be called outside a function because the parser must be generated
# in Python during translation, not in RPython during runtime.
_lexer = lg.build()
"""This intepreter's lexer instance."""


def lex(text):
    """Scan text using the generated lexer.

    :param text: text to lex
    :type text: :class:`str`
    :return: parsed stream
    :rtype: :class:`rply.lexer.LexerStream`
    """
    return _lexer.lex(text)
示例#49
0
文件: lexer.py 项目: 0atman/hy
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHBANG', r'#!.*[^\r\n]')
lg.add('HASHREADER', r'#.')

# A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter
partial_string = r'''(?x)
    (?:u|r|ur|ru)? # prefix
    "  # start string
    (?:
       | [^"\\]             # non-quote or backslash
       | \\.                # or escaped single character
       | \\x[0-9a-fA-F]{2}  # or escaped raw character
       | \\u[0-9a-fA-F]{4}  # or unicode escape
       | \\U[0-9a-fA-F]{8}  # or long unicode escape
    )* # one or more times
'''

lg.add('STRING', r'%s"' % partial_string)
lg.add('PARTIAL_STRING', partial_string)

lg.add('IDENTIFIER', r'[^()\[\]{}\'"\s;]+')


lg.ignore(r';.*(?=\r|\n|$)')
lg.ignore(r'\s+')


lexer = lg.build()
示例#50
0
文件: lexer.py 项目: samgiles/naulang
from rply import LexerGenerator

lexer_gen = LexerGenerator()

lexer_gen.ignore(r"([\s\f\t\n\r\v]+)|#.*$")


def get_tokens():
    return [
        # Keywords
        ("IF", r"if\b"),
        ("PRINT", r"print\b"),
        ("FN", r"fn\b"),
        ("WHILE", r"while\b"),
        ("RETURN", r"return\b"),
        ("LET", r"let\b"),
        ("BREAK", r"break\b"),
        ("CONTINUE", r"continue\b"),
        ("ASYNC", r"async\b"),
        # Channel Operators
        ("CHAN_OUT", r"<:"),
        ("CHAN_IN", r"<-"),
        # Arithmetic Operators
        ("MUL", r"\*"),
        ("DIV", r"/"),
        ("MOD", r"%"),
        ("PLUS", r"\+"),
        ("MINUS", r"-"),
        # Logical Operators
        ("AND", r"and\b"),
        ("OR", r"or\b"),
示例#51
0
文件: l4850.py 项目: ZachOrr/L4850
from rply import ParserGenerator, LexerGenerator
from rply.token import BaseBox
import AST, Visitor, Environment, sys

lg = LexerGenerator()

# skip these
lg.ignore(r" ")
lg.ignore(r"\r")
lg.ignore(r"\t")
lg.ignore(r"\n")
lg.ignore('// (~["\n","\r"])* ("\r")? "\n"')

# built in functions
lg.add("FIRST", r"first")
lg.add("REST", r"rest")
lg.add("INSERT", r"insert")
lg.add("LISTP", r"listp")
lg.add("LIST", r"list")
lg.add("EMPTYP", r"emptyp")
lg.add("PAIRP", r"pairp")
lg.add("EQUALP", r"equalp")
lg.add("LENGTH", r"length")
lg.add("NUMBERP", r"numberp")
lg.add("EXIT", r"exit")

# keywords
lg.add("DEFUNC", r"defunc")
lg.add("DEFCLASS", r"defclass")
lg.add("METHOD", r"method")
lg.add("FUNC", r"func")
示例#52
0
from rply import LexerGenerator

lg = LexerGenerator()

lg.add("PLUS", r"\+")
lg.add("MINUS", r"-")
lg.add("MUL", r"/")
lg.add("DIV", r"\*")

lg.add("NUMBER", r"\d+")

lg.ignore(r"\s+")

lexer = lg.build()
示例#53
0
import re
import itertools
from collections import deque
from rply import ParserGenerator, LexerGenerator
from graphextractor.rfc3987 import UrlPattern
from graphextractor.flattened import flattened

__all__ = ['TweetLexer', 'TweetParser']

lex = LexerGenerator()
lex.ignore(ur'(?:[,;\s]+|\band\b|\bor\b)+')
lex.add(u'URL', UrlPattern)
lex.add(u'BTHASH', ur'#betterthan')
lex.add(u'IBTHASH', ur'#isbetterthan')
lex.add(u'HASHTAG', ur'#[a-zA-Z0-9_]+')
lex.add(u'MENTION', ur'@[a-zA-Z0-9_]+')
lex.add(u'FOR', ur'(for|FOR|For)')
lex.add(u'WORD', ur'[\w]+')

pg = ParserGenerator([u'URL',
                      u'BTHASH',
                      u'IBTHASH',
                      u'HASHTAG',
                      u'MENTION',
                      u'FOR',
                      u'WORD'
                     ], 
                     cache_id=u'graphextractor.tweetparser')

@pg.production("betterthan : words URL bthash URL topics words")
def betterthan(p):
示例#54
0
文件: lexer.py 项目: RobotDisco/hy
lg.add('RCURLY', r'\}')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHBANG', r'#!.*[^\r\n]')


lg.add('STRING', r'''(?x)
    (?:u|r|ur|ru)? # prefix
    "  # start string
    (?:
       | [^"\\]             # non-quote or backslash
       | \\.                # or escaped single character
       | \\x[0-9a-fA-F]{2}  # or escaped raw character
       | \\u[0-9a-fA-F]{4}  # or unicode escape
       | \\U[0-9a-fA-F]{8}  # or long unicode escape
    )* # one or more times
    "  # end string
''')


lg.add('IDENTIFIER', r'[^()\[\]{}\'"\s;]+')


lg.ignore(r';.*[\r\n]+')
lg.ignore(r'\s+')


lexer = lg.build()
示例#55
0
文件: edn.py 项目: havleoto/plang
lg = LexerGenerator()

SYMBOL_RE = r"[<>\.\*\/\+\!\-\_\?\$%&=a-zA-Z][<>\.\*\+\!\-\_\?\$%&=a-zA-Z0-9:#]*"

lg.add("nil", r"nil")
lg.add("true", r"true")
lg.add("false", r"false")
lg.add("float", r"\d+\.\d+")
lg.add("number", r"\d+")
lg.add("olist", r"\(")
lg.add("clist", r"\)")
lg.add("symbol", SYMBOL_RE)
lg.add("string", r'"(\\\^.|\\.|[^\"])*"')

lg.ignore(r"[\s,\r\n\t]+")
lg.ignore(r";.*\n")

lexer = lg.build()

pg = ParserGenerator(["true", "false", "nil", "float", "number", "olist",
                        "clist", "symbol", "string"])

class State(object):
    def __init__(self):
        pass

class ValueList(BaseBox):
    def __init__(self, value):
        self.value = value
示例#56
0
文件: parser.py 项目: chajadan/mochi
lg.add('OPLEQ', r'<=')
lg.add('OPGEQ', r'>=')
lg.add('OPEQ', r'==')
lg.add('OPNEQ', r'!=')
lg.add('OPLT', r'<')
lg.add('OPGT', r'>')
lg.add('BANG', r'!')

lg.add('EQUALS', r'=')
lg.add('SEMI', r';')
lg.add('AT', r'@')
lg.add('AMP', r'\&')
lg.add('BACKSLASH', r'\\')

lg.add('NEWLINE', r'(?:(?:\r?\n)[\t ]*)+')
lg.ignore(r'[ \t\f\v]+')
lg.ignore(r'#.*(?:\n|\r|\r\n|\n\r|$)')  # comment

klg = LexerGenerator()
klg.add('IMPORT', r'^import$')
klg.add('MODULE', r'^module$')
klg.add('REQUIRE', r'^require$')
klg.add('EXPORT', r'^export$')
klg.add('VAR', r'^var$')
klg.add('LET', r'^let$')
klg.add('DEF', r'^def$')
klg.add('DEFM', r'^defm$')
klg.add('FN', r'^fn$')
klg.add('TRUE', r'^True$')
klg.add('FALSE', r'^False$')
klg.add('DOC', r'^doc:$')
示例#57
0
    'MINUS': r'-',
    'MUL': r'\*',
    'NUMBER_SEP': r'/',
    'EXPR_OPEN': r'\(',
    'EXPR_CLOSE': r'\)',
    'AND': r'&',
    'OR': r'\|',
    'NOT': r'!',
    'EQ': r'\?\s*=',
    'GT': r'>',
    'LT': r'<',
    'BOWL': r':',
    'BOWL_OPEN': r'{',
    'BOWL_CLOSE': r'}',
    'NOODLE_OPEN': r'\[',
    'NOODLE_SEP': r';',
    'NOODLE_CLOSE': r'\]',
    'ASSIGN': r'=',
    'DENO': r'\^',
    'MEM': r'@',
}

lg = LexerGenerator()
for name, regex in op_map.items():
    lg.add(name, regex)

lg.ignore('\s+')
lg.ignore('~\s*#((?!#~).)*#\s*~')

lexer = lg.build()
示例#58
0
文件: lexer.py 项目: mio-lang/mio
"""Lexer"""


from rply import LexerGenerator


from .tokens import TOKENS, IGNORES


lg = LexerGenerator()

for name, rule in TOKENS.iteritems():
    lg.add(name, rule)

for rule in IGNORES:
    lg.ignore(*rule)

# This has to be called outside a function because the parser must be generated
# in Python during translation, not in RPython during runtime.
lexer = lg.build()


def lex(text):
    """Scan text using the generated lexer.

    :param text: text to lex
    :type text: :class:`str`

    :return: parsed stream
    :rtype: :class:`rply.lexer.LexerStream`
    """