def test_basic_lexer(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        def f(n):
            tokens = l.lex("%d+%d+%d" % (n, n, n))
            i = 0
            s = 0
            while i < 5:
                t = tokens.next()
                if i % 2 == 0:
                    if t.name != "NUMBER":
                        return -1
                    s += int(t.value)
                else:
                    if t.name != "PLUS":
                        return -2
                    if t.value != "+":
                        return -3
                i += 1
            if tokens.next() is not None:
                return -4
            return s

        assert self.run(f, [14]) == 42
示例#2
0
文件: test_lexer.py 项目: alex/rply
    def test_position(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()

        stream = l.lex("2 +\n    37")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 2
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()
示例#3
0
文件: test_lexer.py 项目: alex/rply
    def test_regex_flags_ignore(self):
        lg = LexerGenerator()
        lg.add("ALL", r".*", re.DOTALL)
        lg.ignore(r".*", re.DOTALL)

        l = lg.build()

        stream = l.lex("test\ndotall")

        with raises(StopIteration):
            stream.next()
示例#4
0
文件: test_lexer.py 项目: alex/rply
    def test_error(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        stream = l.lex('fail')
        with raises(LexingError) as excinfo:
            stream.next()

        assert 'SourcePosition(' in repr(excinfo.value)
示例#5
0
文件: test_lexer.py 项目: alex/rply
    def test_regex_flags(self):
        lg = LexerGenerator()
        lg.add("ALL", r".*", re.DOTALL)

        l = lg.build()

        stream = l.lex("test\ndotall")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        assert t.getstr() == "test\ndotall"

        with raises(StopIteration):
            stream.next()
示例#6
0
    def test_repr(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")
 
        l = lg.build()
 
        stream = l.lex("2 + 3")
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "PLUS"
示例#7
0
文件: test_lexer.py 项目: alex/rply
    def test_newline_position(self):
        lg = LexerGenerator()
        lg.add("NEWLINE", r"\n")
        lg.add("SPACE", r" ")

        l = lg.build()

        stream = l.lex(" \n ")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 2
        t = stream.next()
        assert t.source_pos.lineno == 2
        assert t.source_pos.colno == 1
    def test_simple(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        stream = l.lex("2+3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 2
        t = stream.next()
        assert t is None
示例#9
0
文件: test_lexer.py 项目: olasd/rply
    def test_ignore(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 4
        with raises(StopIteration):
            stream.next()
示例#10
0
文件: test_both.py 项目: DasIch/rply
    def test_arithmetic(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.add("TIMES", r"\*")

        pg = ParserGenerator(["NUMBER", "PLUS", "TIMES"], precedence=[
            ("left", ["PLUS"]),
            ("left", ["TIMES"]),
        ])

        @pg.production("main : expr")
        def main(p):
            return p[0]

        @pg.production("expr : expr PLUS expr")
        @pg.production("expr : expr TIMES expr")
        def expr_binop(p):
            return BoxInt({
                "+": operator.add,
                "*": operator.mul
            }[p[1].getstr()](p[0].getint(), p[2].getint()))

        @pg.production("expr : NUMBER")
        def expr_num(p):
            return BoxInt(int(p[0].getstr()))

        lexer = lg.build()
        parser = pg.build()

        assert parser.parse(lexer.lex("3*4+5"))
示例#11
0
 def __init__(self):
     _lg = LexerGenerator()
     for r in grammar:
         _lg.add(r[0], r[1])
     _lg.ignore(r'\s+')
     self._scanner = _lg.build()
示例#12
0
def lexer_from_mapping(mapping):
    lg = LexerGenerator()

    # Escape data with forward slashes
    lg.add("DATA", r'/.+?/')

    # Add the special characters
    for char in mapping.keys():
        lg.add(char, r"\\" + char)

    # Normal tokens
    lg.add("TYPE", r':')
    lg.add("AND", r'\&')
    lg.add("OR", r'\|')
    lg.add("L_PAREN", r'\(')
    lg.add("R_PAREN", r'\)')
    lg.add("EQUAL", r'=')
    lg.add("CHILD", r'>')
    lg.add("PARENT", r'<')
    lg.add("NOT", r'!')

    # Everything else is data
    excluded_chars = r'^<>=&|():!'
    for char in mapping.keys():
        excluded_chars += r"\\" + char
        lg.add("DATA", "[{excluded}]+".format(excluded=excluded_chars))

    lg.ignore(r'\s+')
    lexer = lg.build()
    return lexer
示例#13
0
def add_infix_1_macro_name(macro_name):
    infix_1_macro_names.append(macro_name)


def add_infix_2_macro_name(macro_name):
    infix_2_macro_names.append(macro_name)


def add_user_defined_keyword(keyword):
    user_defined_keywords.append(keyword)


lg = LexerGenerator()

lg.add(
    'TQUOTE_STR',
    r'(?x)"""(?:|[^\\]|\\.|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"""'
)
lg.add(
    'SQUOTE_STR',
    r"(?x)'(?:|[^'\\]|\\.|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*'"
)
lg.add(
    'DQUOTE_STR',
    r'(?x)"(?:|[^"\\]|\\.|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"'
)

lg.add(
    'TQUOTE_RAW_STR',
    r'(?x)r"""(?:|[^\\]|\\.|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"""'
)
lg.add(
示例#14
0
class Lexer:
    def __init__(self):
        self.lexer = LexerGenerator()
        self.__add_tokens()

    def __add_tokens(self):
        # Constant
        self.lexer.add('E', r'-?__E__')
        self.lexer.add('PI', r'-?__PI__')
        self.lexer.add('FLOAT', r'-?\d+\.\d+')
        self.lexer.add('INTEGER', r'-?\d+')
        self.lexer.add('STRING', r'(""".*""")|(".*")|(\'.*\')')
        self.lexer.add(
            'BOOLEAN',
            r'true(?!\w)|false(?!\w)|True(?!\w)|False(?!\w)|TRUE(?!\w)|FALSE(?!\w)'
        )
        # Mathematical Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        # Binary Operator
        self.lexer.add('AND', r'and(?!\w)')
        self.lexer.add('OR', r'or(?!\w)')
        self.lexer.add('==', r'\=\=')
        self.lexer.add('!=', r'\!\=')
        self.lexer.add('>=', r'\>\=')
        self.lexer.add('<=', r'\<\=')
        self.lexer.add('>', r'\>')
        self.lexer.add('<', r'\<')
        self.lexer.add('=', r'\=')
        # Statement
        self.lexer.add('IF', r'if(?!\w)')
        self.lexer.add('ELSE', r'else(?!\w)')
        self.lexer.add('NOT', r'not(?!\w)')
        # Semi Colon
        self.lexer.add(';', r'\;')
        self.lexer.add(',', r'\,')
        # Parenthesis
        self.lexer.add('(', r'\(')
        self.lexer.add(')', r'\)')
        self.lexer.add('{', r'\{')
        self.lexer.add('}', r'\}')
        # Function
        self.lexer.add('CONSOLE_INPUT', r'input')
        self.lexer.add('FUNCTION', r'function')
        self.lexer.add('PRINT', r'print')
        self.lexer.add('ABSOLUTE', r'abs')
        self.lexer.add('SIN', r'sin')
        self.lexer.add('COS', r'cos')
        self.lexer.add('TAN', r'tan')
        self.lexer.add('POWER', r'pow')
        # Assignment
        self.lexer.add('LET', r'let(?!\w)')
        self.lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*")
        # Ignore spaces
        self.lexer.ignore('\s+')

        # self.lexer.add('OPT_LINE', r'\n*')

    def build(self):
        return self.lexer.build()
示例#15
0
from rply import LexerGenerator

lg = LexerGenerator()

lg.add("PLUS", r"\+")
lg.add("MINUS", r"-")
lg.add("MUL", r"/")
lg.add("DIV", r"\*")

lg.add("NUMBER", r"\d+")

lg.ignore(r"\s+")

lexer = lg.build()
示例#16
0
文件: __init__.py 项目: lloeki/wasp
from rply import ParserGenerator, LexerGenerator
import box


lg = LexerGenerator()
lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
lg.add("QUOTE", r"'")
lg.add("ATOM", r"[^\s()]+")
lg.ignore(r"\s+")

pg = ParserGenerator(["QUOTE", "LPAREN", "RPAREN", "ATOM"],
                     precedence=[],
                     cache_id="wasp")


@pg.error
def error_handler(token):
    type = token.gettokentype()
    pos = token.getsourcepos()
    if pos is None:
        raise ValueError("unexpected %s" % type)
    else:
        raise ValueError("unexpected %s at (%s, %s)" %
                        (type, pos.lineno, pos.colno))


@pg.production("main : sexpr")
def main(p):
    return p[0]
示例#17
0
from rply import ParserGenerator, LexerGenerator
from rply.token import BaseBox


class BoxString(BaseBox):
    def __init__(self, value):
        self.value = value
    def getstr(self):
        return self.value


lg = LexerGenerator()

lg.add('GT', r'\bgt\b')
lg.add('GE', r'\bge\b')
lg.add('LT', r'\blt\b')
lg.add('LE', r'\ble\b')

lg.add('EQ', r'\beq\b')
lg.add('NE', r'\bne\b')
lg.add('IS', r'\bis\b')
lg.add('LIKE', r'\blike\b')

lg.add('AND', r'\band\b')
lg.add('OR', r'\bor\b')
lg.add('NOT', r'\bnot\b')

lg.add('NONE', r'\bnull\b')
lg.add('OPEN_PARENS', r'\(')
lg.add('CLOSE_PARENS', r'\)')
lg.add('NUMBER', r'[\d]{1,99}([.]\d{1,99})?')
示例#18
0
]

operators = OrderedDict([
    ("COMMA", ","),
    ("PAREN_L", r"\("),
    ("PAREN_R", r"\)"),
    ("ASSIGN", "<-"),
    ("MULTIPLY", r"\*"),
    ("DIVIDE", r"/"),
    ("PLUS", r"\+"),
    ("MINUS", r"-"),
])

lg = LexerGenerator()

lg.add("NUM", r"\d+")
lg.add("ID", r"[a-zA-Z][a-zA-Z0-9]*")

for key, value in operators.items():
    lg.add(key, value)


def id_reserved(token):
    if token.value.lower() in reserved:
        return Token(token.value.upper(), token.value)
    return token


callbacks = {
    "ID": [id_reserved],
}  # type: Dict[str, List[Callable[[Token], Token]]]
示例#19
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')

        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('WQ', r'\"')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUP', r'\*')
        self.lexer.add('DIV', r'\/')

        self.lexer.add('EQUAL', r'=')

        self.lexer.add('GREATER', r'\>')
        self.lexer.add('SMALLER', r'\<')
        self.lexer.add('NOT', r'\!')
        # semi colon
        self.lexer.add('SEMI_COLON', r'\;')

        #logical
        self.lexer.add('AND', r'and')
        self.lexer.add('OR', r'or')

        # colon
        self.lexer.add('COLON', r'\:')
        # comma
        self.lexer.add('COMMA', ',')

        # Number
        self.lexer.add('NUMBER', r'\d+')
        #String
        self.lexer.add('STRING', r'\w*')
        # Variables
        # right now all the alphabets written here <a n d o r i f e l s :> will not be accecpted
        #AND if  i replace round brackets with square then if else will not work
        self.lexer.add('VAR', r'[^(and)|(or)|(if)|(else)|(:) ]\w*')

        # Ignore spaces
        self.lexer.ignore('\s+')

        #if else
        self.lexer.add('IF', r'if')
        self.lexer.add('ELSE', r'else')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#20
0
from rply import LexerGenerator

lg = LexerGenerator()

lg.add("BEGIN_STATEMENT", r"begin")
lg.add("END_STATEMENT", r"end")
lg.add("FLOAT_LITERAL", r"\d+\.\d+")
lg.add("INTEGER_LITERAL", r"[0-9]+")
lg.add("STRING_LITERAL", r"\"[^\"]*\"")
lg.add("CHARACTER_LITERAL", r"\'.\'")
lg.add("BOOLEAN_LITERAL", r"(true)|(false)")
lg.add("FLOAT_TYPENAME", r"Float")
lg.add("INTEGER_TYPENAME", r"Integer")
lg.add("STRING_TYPENAME", r"String")
lg.add("CHARACTER_TYPENAME", r"Character")
lg.add("BOOLEAN_TYPENAME", r"Boolean")
lg.add("PLUS", r"\+")
lg.add("MINUS", r"\-")
lg.add("MULTIPLICATION", r"\*")
lg.add("CONCATENATION", r"&")
lg.add("DIVISION", r"/")
lg.add("MODULO", r"mod")
lg.add("ASSIGNMENT", r":=")
lg.add("TYPE_DECLARATION", r":")
lg.add("EQUALS", r"=")
lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
lg.add("LBRACE", r"\{")
lg.add("RBRACE", r"\}")
lg.add("IF_CONDITIONAL", r"if")
lg.add("THEN_CONDITIONAL", r"then")
示例#21
0
文件: lexer.py 项目: zenhack/hy
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from rply import LexerGenerator

lg = LexerGenerator()

# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'

lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHBANG', r'#!.*[^\r\n]')
lg.add('HASHREADER', r'#.')

lg.add(
    'STRING', r'''(?x)
    (?:u|r|ur|ru)? # prefix
示例#22
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Parentheses
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        # definitions
        self.lexer.add('DEF_NOT', r'def \~')
        self.lexer.add('DEF_IMPLIE', r'def \->')
        self.lexer.add('DEF_AND', r'def \&')
        self.lexer.add('DEF_OR', r'def \|')
        self.lexer.add('DEF_IFF', r'def \<->')
        self.lexer.add('DEF_BASE', r'def A')

        # conectives
        self.lexer.add('NOT', r'\~')
        self.lexer.add('IMPLIE', r'\->')
        self.lexer.add('AND', r'\&')
        self.lexer.add('OR', r'\|')
        self.lexer.add('IFF', r'\<->')

        #hifen
        self.lexer.add('HYPHEN', r'\-')

        #dot
        self.lexer.add('DOT', r'\.')
        self.lexer.add('COMMA', r'\,')

        # Number
        self.lexer.add('NUMBER', r'\d+')

        # Atomo
        self.lexer.add('ATHOM', r'[a-zA-Z][a-zA-Z0-9]*')

        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#23
0
from __future__ import print_function

import re
import ast
import collections

from rply import ParserGenerator, LexerGenerator
from ytypes import *

lg = LexerGenerator()

SYMBOL_RE = r"[\.\*\+\!\-\_\?\$%&=a-zA-Z][\.\*\+\!\-\_\?\$%&=a-zA-Z0-9:#]*"
NS_SYMBOL = SYMBOL_RE + "/" + SYMBOL_RE

lg.add("boolean", r"(true|false)")
lg.add("nil", r"nil")
lg.add("float", r"\d+\.\d+")
lg.add("number", r"[-+]?\d+")
lg.add("olist", r"\(")
lg.add("clist", r"\)")
lg.add("omap", r"{")
lg.add("cmap", r"}")
lg.add("ovec", r"\[")
lg.add("cvec", r"\]")
lg.add("oset", r"#{")
lg.add("colon", r":")
lg.add("char_nl", r"\\newline")
lg.add("char_tab", r"\\tab")
lg.add("char_return", r"\\return")
lg.add("char_space", r"\\space")
lg.add("char", r"\\.")
示例#24
0
    'MINUS': r'-',
    'MUL': r'\*',
    'NUMBER_SEP': r'/',
    'EXPR_OPEN': r'\(',
    'EXPR_CLOSE': r'\)',
    'AND': r'&',
    'OR': r'\|',
    'NOT': r'!',
    'EQ': r'\?\s*=',
    'GT': r'>',
    'LT': r'<',
    'BOWL': r':',
    'BOWL_OPEN': r'{',
    'BOWL_CLOSE': r'}',
    'NOODLE_OPEN': r'\[',
    'NOODLE_SEP': r';',
    'NOODLE_CLOSE': r'\]',
    'ASSIGN': r'=',
    'DENO': r'\^',
    'MEM': r'@',
}

lg = LexerGenerator()
for name, regex in op_map.items():
    lg.add(name, regex)

lg.ignore('\s+')
lg.ignore('~\s*#((?!#~).)*#\s*~')

lexer = lg.build()
示例#25
0
    def test_states(self):
        lg = LexerGenerator(initial_state="scalar")
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        lg.add("OPEN_BRACKET", r"\[", to_state="vector")
        lg.add("PLUS", r"\+", state="vector")
        lg.add("NUMBER", r"\d+", state="vector")
        lg.add("NEW_LINE", r"\n+", state="vector")
        lg.add("CLOSE_BRACKET", r"\]", state="vector", to_state="scalar")
        lg.ignore(r" +", state="vector")

        l = lg.build()

        stream = l.lex("2 + [ 3 + 4 \n\n 5 + 6 ] + 7")
        tokens = [
            ("NUMBER", "2", "scalar"),
            ("PLUS", "+", "scalar"),
            ("OPEN_BRACKET", "[", "scalar"),
            ("NUMBER", "3", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "4", "vector"),
            ("NEW_LINE", "\n\n", "vector"),
            ("NUMBER", "5", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "6", "vector"),
            ("CLOSE_BRACKET", "]", "vector"),
            ("PLUS", "+", "scalar"),
            ("NUMBER", "7", "scalar"),
        ]

        for compare_token, token in zip(tokens, stream):
            name, value, state = compare_token
            assert token.name == name
            assert token.value == value
            assert token.state == state
示例#26
0
class Lexer():
	def __init__(self):
		self.lexer = LexerGenerator()

	def _add_tokens(self):
		# Print
		self.lexer.add('IN_RA', r'in_ra')
		# Parenthesis
		self.lexer.add('MO_NGOAC_TRON', r'\(')
		self.lexer.add('DONG_NGOAC_TRON', r'\)')
		# Semi Colon
		# self.lexer.add('HET_DONG', r'\;')
		self.lexer.add('HET_DONG', r'(\n)|(\r\n)')
		# Operators
		self.lexer.add('CONG', r'\+')
		self.lexer.add('TRU', r'\-')
		self.lexer.add('NHAN', r'\*')
		self.lexer.add('CHIA', r'\/')
		# bool
		self.lexer.add('BANG', r'\=\=')
		self.lexer.add('LON_HON', r'\>')
		self.lexer.add('NHO_HON)', r'\<')
		self.lexer.add('KHAC', r'\!\=')
		# Number
		self.lexer.add('SO_NGUYEN', r'\d+')
		# Ignore spaces
		self.lexer.ignore(r'(^\s+)|( )+|\t+')


	def get_lexer(self):
		self._add_tokens()
		return self.lexer.build()
示例#27
0
    def __init__(self):
        lg = LexerGenerator()
        tokens = [
            ("PROTO", r"[a-zA-Z]+://[^ ]+"),
            ("INT", r"\d+"),
            ("STRING", r"'[^']+'|\"[^\"]+\""),
            ("NAME", r"--colors=always"),
            ("PATH", r"([a-zA-Z0-9/._-]|\\ )+"),
            ("PATH", r"~([a-zA-Z0-9/._-]|\\ )*"),
            ("NAME", r"([a-zA-Z0-9_-]|\\ )+"),
            ("SEMICOLON", r";"),
            ("ENDL", r"\r?\n"),
        ]

        for token in tokens:
            lg.add(*token)

        lg.ignore(r"[ 	]+")

        pg = ParserGenerator([x[0] for x in tokens])

        @pg.production("main : statements")
        def main(args):
            return args[0]

        @pg.production("statements : statement")
        def statements_one(args):
            expression, = args
            return {
                "type": "statement",
                "content": expression,
            }

        @pg.production("statements : statement separator statements")
        def statements_many(args):
            statement, separtor, statements = args
            return {
                "type": "statement_infix_operator",
                "content": {
                    "left": {
                        "type": "statement",
                        "content": statement,
                    },
                    "right": statements,
                    "operator": separtor,
                }
            }

        @pg.production("separator : SEMICOLON")
        @pg.production("separator : ENDL")
        def separator(args):
            # don't care
            return args[0].value

        @pg.production("statement : atom")
        def expression_one(args):
            atom, = args
            return [atom]

        @pg.production("statement : atom atoms")
        def expression_many(args):
            atom, atoms = args
            return [atom] + atoms

        @pg.production("atoms : atom")
        def atoms_one(args):
            atom, = args
            return [atom]

        @pg.production("atoms : atom atoms")
        def atoms_many(args):
            atom, atoms = args
            return [atom] + atoms

        @pg.production("atom : NAME")
        @pg.production("atom : INT")
        @pg.production("atom : STRING")
        @pg.production("atom : PATH")
        @pg.production("atom : PROTO")
        def atom(args):
            name, = args
            return name.value

        self.pg = pg
        self.lg = lg

        self.lexer = self.lg.build()
        self.parser = self.pg.build()
示例#28
0
文件: lexer.py 项目: seeeturtle/ltns
from rply import LexerGenerator

lg = LexerGenerator()

lg.add('LSLASHANGLE', r'</')
lg.add('RSLASHANGLE', r'/>')
lg.add('LANGLE', r'<')
lg.add('RANGLE', r'>')
lg.add('LSQUARE', r'\[')
lg.add('RSQUARE', r'\]')
lg.add('EQUAL', r'=')

lg.add('STRING', r'''(?x)
(r)?
"
[^"]*
"
''')
lg.add('IDENTIFIER', r'[^<>\[\]{}=/\s"]+')

lg.ignore(r'<!--(.|\s)*-->')
lg.ignore(r'\s+')

lexer = lg.build()
示例#29
0
文件: parser.py 项目: seanrife/mochi
    def __eq__(self, other):
        if type(other) is not Keyword:
            return False
        if self.name == other.name:
            return True
        else:
            return False

    def __hash__(self):
        return self.name.__hash__()


lg = LexerGenerator()

lg.add('SQUOTE_STR', r"(?x)'(?:|[^'\\]|\\.|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*'")
lg.add('DQUOTE_STR', r'(?x)"(?:|[^"\\]|\\.|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})*"')

lg.add('UNTERMINATED_STRING', r"[\"\'].*")
lg.add('NUMBER', r'-?[0-9]+(?:\.[0-9]+)?')
lg.add('NAME', r'\&?[_a-zA-Z$][-_a-zA-Z0-9]*')
lg.add('PIPELINE_FIRST_BIND', r'\|>1\?')
lg.add('PIPELINE_FIRST', r'\|>1')
lg.add('PIPELINE_BIND', r'\|>\?')
lg.add('PIPELINE', r'\|>')
lg.add('PIPELINE_SEND', r'!>')
lg.add('PIPELINE_MULTI_SEND', r'!&>')
lg.add('BAR', r'\|')
lg.add('LBRACK', r'\[')
lg.add('RBRACK', r'\]')
lg.add('LBRACE', r'\{')
示例#30
0
文件: lexer.py 项目: aa10000/cycy
    "!=",
    "<=",
    ">=",
    "<",
    ">",
    "=",
    ",",
    "+",
    "-",
    ";",
    "*",
    "/",
    "%",
]
lg = LexerGenerator()
lg.add("INCLUDE", "#include")
lg.add("ASM", "__asm__")
lg.add("ASM", "asm")
lg.add("FLOAT_LITERAL", "\d+\.\d+")
lg.add("INTEGER_LITERAL", "\d+")
lg.add("CHAR_LITERAL", "'\\\\?.'")
lg.add("STRING_LITERAL", "\".*\"")
lg.add("CHAR", "char")
lg.add("SHORT", "short")
lg.add("INT", "int")
lg.add("LONG", "long")
lg.add("FLOAT", "float")
lg.add("DOUBLE", "double")
lg.add("null", "NULL")
lg.add("CONST", "const")
lg.add("UNSIGNED", "unsigned")
示例#31
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.ignore('/\*((.|[\\r\\n])*?)\*/')
        self.lexer.ignore('//.*\\n')
        self.lexer.ignore('\s+')  # Ignore spaces

        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('STRING_CONST', r'"(.*)"')

        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('OPEN_CURLY_PAREN', r'\{')
        self.lexer.add('CLOSE_CURLY_PAREN', r'\}')
        self.lexer.add('OPEN_INDEX_PAREN', r'\[')
        self.lexer.add('CLOSE_INDEX_PAREN', r'\]')
        self.lexer.add('DOT', r'\.')
        self.lexer.add('COMMA', r'\,')
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')
        self.lexer.add('AND', r'&')
        self.lexer.add('OR', r'\|')
        self.lexer.add('LT', r'<')
        self.lexer.add('GT', r'>')
        self.lexer.add('EQUAL', r'=')
        self.lexer.add('NOT', '~')

        self.lexer.add('CLASS', r'class')
        self.lexer.add('CONSTRUCTOR', r'constructor')
        self.lexer.add('FUNCTION', r'function')
        self.lexer.add('METHOD', r'method')
        self.lexer.add('FIELD', r'field')
        self.lexer.add('STATIC', r'static')
        self.lexer.add('VAR', r'var')
        self.lexer.add('INT', r'int')
        self.lexer.add('CHAR', r'char')
        self.lexer.add('BOOLEAN', r'boolean')
        self.lexer.add('VOID', r'void')
        self.lexer.add('TRUE', r'true')
        self.lexer.add('FALSE', r'false')
        self.lexer.add('NULL', r'null')
        self.lexer.add('THIS', r'this')
        self.lexer.add('LET', r'let')
        self.lexer.add('DO', r'do ')
        self.lexer.add('IF', r'if')
        self.lexer.add('ELSE', r'else')
        self.lexer.add('WHILE', r'while')
        self.lexer.add('RETURN', r'return')
        self.lexer.add('IDENTIFIER', r'[A-Za-z_](\w*)')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#32
0
    'OP_NOT_LIKE',
    'OP_IN',
    'OP_NOT_IN',
    'OP_LSHIFT',
    'OP_RSHIFT',
]

SINGLE_OPERATORS = [
    'OP_NOT',
    'OP_BITWISE_NOT',
    'OP_ABSOLUTE',
    'OP_ADD',
    'OP_SUB',
]

lg.add('SELECT', r'SELECT\b', flags=re.IGNORECASE)
lg.add('FROM', r'FROM\b', flags=re.IGNORECASE)
lg.add('AS', r'AS\b', flags=re.IGNORECASE)
lg.add('WHERE', r'WHERE\b', flags=re.IGNORECASE)
lg.add('LIMIT', r'LIMIT\b', flags=re.IGNORECASE)
lg.add('OFFSET', r'OFFSET\b', flags=re.IGNORECASE)
lg.add('GROUP_BY', r'GROUP\s+BY\b', flags=re.IGNORECASE)
lg.add('FLOAT', r'[+-]*(\d*\.\d+|\d+\.)')
lg.add('INTEGER', r'[+-]*\d+')
lg.add('STRING', r"'(\\'|[^'])+'")
lg.add('BOOL', r'TRUE\b|YES\b|NO\b|FALSE\b', flags=re.IGNORECASE)
lg.add('NULL', r'NULL\b', flags=re.IGNORECASE)
lg.add('PAREN_LEFT', r'\(')
lg.add('PAREN_RIGHT', r'\)')
lg.add('BRACKET_LEFT', r'\[')
lg.add('BRACKET_RIGHT', r'\]')
示例#33
0
class Lexer():
    def __init__(self, input=None):
        # Initialize the lexer
        self.lexer = LexerGenerator()
        self._initialize_tokens()
        self.built_lexer = self.lexer.build()
        self.tokens = None
        self.valid_tokens = []
        self.char = 0
        self.line = 0
        self.token_pos = 0

        # Try to parse the input, if there is any
        if input:
            self.input(input)

    # Add all tokens to the lexer
    def _initialize_tokens(self):
        self.lexer.add('KW_ARRAY', r'array')
        self.lexer.add('OP_DOTDOT', r'\.\.')
        self.lexer.add('LBRAK', r'\[')
        self.lexer.add('RBRAK', r'\]')
        self.lexer.add('SEMI', r'\;')
        self.lexer.add('KW_TUPLE', r'tuple')
        self.lexer.add('KW_LOCAL', r'local')
        self.lexer.add('KW_GLOBAL', r'global')
        self.lexer.add('KW_DEFUN', r'defun')
        self.lexer.add('LPAR', r'\(')
        self.lexer.add('RPAR', r'\)')
        self.lexer.add('OP_COMMA', r'\,')
        self.lexer.add('KW_END', r'end')
        self.lexer.add('KW_WHILE', r'while')
        self.lexer.add('KW_DO', r'do')
        self.lexer.add('KW_IF', r'if')
        self.lexer.add('KW_THEN', r'then')
        self.lexer.add('KW_ELSIF', r'elsif')
        self.lexer.add('KW_ELSE', r'else')
        self.lexer.add('KW_FOREACH', r'foreach')
        self.lexer.add('KW_FOR', r'for')
        self.lexer.add('KW_IN', r'in')
        self.lexer.add('OP_DOT', r'\.')
        self.lexer.add('INT_LIT', r'\d+')
        self.lexer.add('RETURN', r'return')
        self.lexer.add('PRINT', r'print')
        self.lexer.add('EXCHANGE', r'\<\-\>')
        self.lexer.add('OP_LESSEQUAL', r'\<\=')
        self.lexer.add('OP_GREATEREQUAL', r'\>\=')
        self.lexer.add('OP_LESS', r'\<')
        self.lexer.add('OP_GREATER', r'\>')
        self.lexer.add('OP_EQUAL', r'\=\=')
        self.lexer.add('OP_NOTEQUA', r'\!\=')
        self.lexer.add('ASSIGN', r'\=')
        self.lexer.add('OP_PLUS', r'\+')
        self.lexer.add('OP_MINUS', r'\-')
        # self.lexer.add('COMMENT', r'\*\*\*.*[^\r\n]')
        self.lexer.add('OP_MULT', r'\*')
        self.lexer.add('OP_DIV', r'\/')
        self.lexer.add('ID', r'[A-Za-z_]+')
        # self.lexer.add('END-OF-LINE', r'\r\n|\n\r|\r|\n')
        # self.lexer.add('WS', r'\s+')
        self.lexer.add('UNKNOWN', r'.')

        # Ignore comments for now
        self.lexer.ignore(r'\*\*\*.*[^\r\n]')
        self.lexer.ignore(r'\r\n|\n\r|\r|\n')
        self.lexer.ignore(r'\s+')

    # Make the lexer to lex an input
    def input(self, input):
        self.char = 0
        self.line = 0
        self.token_pos = 0
        return self.built_lexer.lex(input)
        self.tokens = [i for i in self.built_lexer.lex(input)]
        self.valid_tokens = []

        # Iteratively lex the input
        token = self._next()
        while token:
            # When the token is an ID and it is too long, truncate it
            if token.name == "ID" and len(token.value) > 80:
                truncated = token.value[:80]
                print("ERROR: ID " + token.value +
                      " is too long, truncated to " + truncated)
                token.value = truncated
            if token.name == "INT_LIT" and (int(token.value) > 2147483647 or
                                            int(token.value) < -2147483648):
                print("ERROR: " + token.value + " does not fit in INT_LIT. "
                      + "The proper range is [−2147483648, 2147483647]")
                token.value = "0"
            self.valid_tokens.append(token)
            token = self._next()

        # Reset the value of this counter for further use
        self.token_pos = 0
        return raw_output

    # Recursively return the next valid token in the token list
    def _next(self):
        if self.token_pos < len(self.tokens):
            token = self.tokens[self.token_pos]

            if token.name != "WS" and token.name != "END-OF-LINE"\
               and token.name != "UNKNOWN":
                char_pos = self.char
                self.char += len(token.value)
                self.token_pos += 1

                return Token(token.name, token.value, self.line,
                             char_pos)

            elif token.name == "WS":
                self.char += len(token.value)
                self.token_pos += 1
                return self._next()

            elif token.name == "END-OF-LINE":
                self.line += 1
                self.char = 0
                self.token_pos += 1
                return self._next()

            elif token.name == "UNKNOWN":
                print("ERROR: " + token.value + " is not a valid token")
                self.char += len(token.value)
                self.token_pos += 1
                return self._next()

        else:
            return None

    # Advance the counter and return the next token
    def next(self):
        pos = self.token_pos
        if pos < len(self.valid_tokens):
            self.token_pos += 1
            return self.valid_tokens[pos]
        else:
            return None

    # Return the next token without advancing the counter
    def peek(self):
        pos = self.token_pos
        if pos < len(self.valid_tokens):
            return self.valid_tokens[pos]
        else:
            return None
示例#34
0
文件: lexer.py 项目: supsub/LOUDXD
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'[Ww]ypisz na ekranie')
        self.lexer.add("FORMAT", r'\w puste miejsce wpisz ')
        self.lexer.add("SEPARATOR", r'oraz')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        ###
        #if
        ###
        self.lexer.add('IF', r'[Jj]eżeli')
        self.lexer.add('ELSE', r'W przeciwnym razie')
        self.lexer.add('START_BLOCK', r'to')
        self.lexer.add('END_BLOCK', r'Tyle')
        ###
        #for
        ###
        # Semi Colon
        self.lexer.add('DOT', r'\.')
        # Operators +,-
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        #
        self.lexer.add('INCREMENT', r'Zwiększ')
        #
        self.lexer.add('DECREMENT', r'Zmniejsz')
        # +,- operations helper
        self.lexer.add('ADDSUB_HELPER', r'o')
        # *,/ operations helper
        self.lexer.add('DIVMUL_HELPER', r'przez')
        # assignment
        self.lexer.add('ASSIGN', r'jest równe')
        #bigger >
        self.lexer.add('BIGGER', r'jest wieksze od')
        #smaller <
        self.lexer.add('SMALLER', r'jest mniejsze od')
        #equal ==
        self.lexer.add('EQUAL', r'równa się')
        #!= differ
        self.lexer.add('DIFFER', r'jest różne od')

        #variable name, ex. response_time or latencySegID
        self.lexer.add('VARIABLE', r'(_|[a-zA-Z])(_|[a-zA-Z]|[0-9])*')

        self.lexer.add("STRING", r'\"[^\"]*\"')

        self.lexer.add("COMMA", r'\,')

        # Ignore spaces
        self.lexer.ignore('\s')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#35
0
文件: lexer.py 项目: RobotDisco/hy
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from rply import LexerGenerator


lg = LexerGenerator()


# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'


lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHBANG', r'#!.*[^\r\n]')


lg.add('STRING', r'''(?x)
    (?:u|r|ur|ru)? # prefix
    "  # start string
示例#36
0
from rply import LexerGenerator

try:
    import rpython.rlib.rsre.rsre_re as re
except:
    import re

lg = LexerGenerator()

# build up a set of token names and regexes they match
lg.add('FLOAT', '-?\d+\.\d+')
lg.add('INTEGER', '-?\d+')
lg.add('STRING', '(""".*?""")|(".*?")|(\'.*?\')')
# lg.add('PRINT', 'print(?!\w)') # put this before variable which would otherwise match
lg.add('BOOLEAN', "true(?!\w)|false(?!\w)")
lg.add('IF', 'if(?!\w)')
lg.add('ELSE', 'else(?!\w)')
lg.add('END', 'end(?!\w)')
lg.add('AND', "and(?!\w)")
lg.add('OR', "or(?!\w)")
lg.add('NOT', "not(?!\w)")
lg.add('LET', 'let(?!\w)')
lg.add('FOR', 'for(?!\w)')
lg.add('WHILE', 'while(?!\w)')
lg.add('BREAK', 'break(?!\w)')
lg.add('CONTINUE', 'continue(?!\w)')
lg.add('MATCH', 'match(?!\w)')
lg.add('ENUM', 'enum(?!\w)')
lg.add('NEW', 'new(?!\w)')
lg.add('RETURN', 'return(?!\w)')
lg.add('TYPE', 'type(?!\w)')
示例#37
0
文件: lexer.py 项目: samgiles/naulang
        ("GT", r">"),
        # Punctuation
        ("LPAREN", r"\("),
        ("RPAREN", r"\)"),
        ("LBRACE", r"{"),
        ("RBRACE", r"}"),
        ("COMMA", r","),
        ("LBRACK", r"\["),
        ("RBRACK", r"\]"),
        # Literals
        ("TRUE", r"true\b"),
        ("FALSE", r"false\b"),
        ("FLOAT", r"(((0|[1-9][0-9]*)(\.[0-9]*)+)|(\.[0-9]+))([eE][\+\-]?[0-9]*)?"),
        ("INTEGER", r"-?(0|[1-9][0-9]*)"),
        ("STRING", r"\"([^\"\\]|\\.)*\""),
        ("IDENTIFIER", r"[a-zA-Z_$][a-zA-Z_0-9]*"),
        # Others
        ("EQUAL", r"="),
    ]

tokens = get_tokens()

for token in tokens:
    lexer_gen.add(token[0], token[1])

LEXER = lexer_gen.build()


def get_lexer():
    return LEXER
示例#38
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#39
0
文件: parser.py 项目: dirk/stalk
from rply import ParserGenerator, LexerGenerator

lg = LexerGenerator()

lg.add("INTEGER", r"-?0|([1-9][0-9]*)")
lg.add("DECIMAL", r"\.[0-9]+")
_id = r"[A-Za-z][A-Za-z0-9_]*"
lg.add("KEYWORD", _id + r":")
lg.add("IDENTIFIER", _id)
lg.add("SYMBOL", r":" + _id)
_comment = r"[ \t]*#[^\n]*"
lg.add("COMMENT", _comment)
lg.add("LPAREN", r"\([ \t\n]*")
lg.add("RPAREN", r"[ \t\n]*\)")
# TODO: Maybe clear this up to be prettier.
lg.add("PREFACE", r"<[A-Za-z0-9_:@, \t\n]+>[ \t\n]*")
lg.add("LBRACK", r"{[ \t\n]*")
lg.add("RBRACK", r"[ \t\n]*}")
lg.add("VERT", r"\|[ \t\n]*")
lg.add("LSQ", r"\[[ \t\n]*")
lg.add("RSQ", r"[ \t\n]*\]")
lg.add("CONT", r"[ \t]+\\(" + _comment + r")?\n[ \t]*")
lg.add("SWS", r"[ \t]+")
lg.add("COMMA", ",[ \t\n]*")
lg.add("TERMINAL", r"[ \t]*\n[ \t\n]*")
# TODO: Make strings parsing not suck dick.
lg.add("STRING", r"\"[^\"]*\"")
# TODO: Finalize operators
lg.add("OPERATOR", r"[+\-=*/\^]")
示例#40
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Image
        self.lexer.add('IMAGE', r'[^\s]+(\.(?i)(jpg|png|gif|bmp|jpeg))')
        # Position
        self.lexer.add('POSITION', r'position')
        # Scale
        self.lexer.add('SCALE', r'scale')
        # Move
        self.lexer.add('MOVE', r'move')
        # Dimensions
        self.lexer.add('DIMENSIONS', r'dimensions')
        # Total
        self.lexer.add('TOTAL', r'total')
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Comma separator
        self.lexer.add('COMMA', r'\,')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#41
0
文件: lexer.py 项目: xando/herbert
from rply import LexerGenerator


lg = LexerGenerator()

lg.add("STEP", r"s")
lg.add("TURN_LEFT", r"l")
lg.add("TURN_RIGHT", r"r")
lg.add("FUNC", r"a|b|c|d|e|f|g|h|i|j|k|m|n|o|p|q|t|u|v|w|x|y|z")
lg.add("COLON", r"\:")
lg.add("NEWLINE", r"\n+ *\n*")
lg.add("NAME", r"[A-Z]")
lg.add("NUMBER", r"\d+")
lg.add("PLUS", r"\+")
lg.add("MINUS", r"\-")
lg.add("(", r"\(")
lg.add(")", r"\)")
lg.add(",", r"\,")

lg.ignore(r" +")
lg.ignore(r"\#.*")

TOKENS = [r.name for r in lg.rules]

lexer = lg.build()
示例#42
0
def build_lexer():
    lg = LexerGenerator()
    commands = sorted(
        itertools.chain(common_commands, lmao_commands, rofl_commands))
    for command in reversed(commands):
        lg.add(command, command)
    lg.add('NEWLINE', r'\n')

    lg.add('SCALAR_VAR', r's\d+')
    lg.add('ARRAY_VAR', r'a\d+')

    lg.add('REGISTER', r'reg[A-H]')
    lg.add('LABEL', r'[a-zA-Z_][a-zA-Z_0-9]*')
    lg.add('NUM_LITERAL', r'-?((\d+)(\.\d+)?)|(\.\d+)')
    lg.add('CHAR_LITERAL', r"'([^\\']|\\n|\\t|\\'|\\\\)'")
    lg.add('COLON', r':')

    lg.ignore(r'[ \t]')
    lg.ignore(r'\#.*')
    lg.add('ERROR', r'.')

    return lg.build()
示例#43
0
文件: lexer.py 项目: sarostru/kite
from rply import LexerGenerator

lg = LexerGenerator()

lg.add('NUMBER', r'\d+(\.\d+)?')
lg.add('PLUS', r'\+')
lg.add('MINUS', r'-')
lg.add('MUL', r'\*')
lg.add('DIV', r'/')
lg.add('OPEN_PARENS', r'\(')
lg.add('CLOSE_PARENS', r'\)')
lg.add('EQUALS', r'=')
lg.add('SYMBOL', r'[^\s0-9][^\s]*')

lg.ignore(r'\s+')

lexer = lg.build()
示例#44
0
文件: lexer.py 项目: zhihuizhiming/hy
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from rply import LexerGenerator

lg = LexerGenerator()

# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'

lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('HLCURLY', r'#\{')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHBANG', r'#!.*[^\r\n]')
lg.add('HASHREADER', r'#[^{]')

# A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter
示例#45
0
文件: lexer.py 项目: blakev/LTPyB
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# >>
#     LTPyB, 2016
# <<

from rply import LexerGenerator

lg = LexerGenerator()

lg.add('INTEGER',       r'\-?\d+')
lg.add('FLOAT',         r'\-?\d+\.\d+')
lg.add('OP_ASSIGNMENT', r'=')
lg.add('OP_EQUAL',      r'==')

lg.ignore(r'\s+')    # ignore whitespace
lg.ignore(r'#.*\n')  # ignore comments

lexer = lg.build()
示例#46
0
文件: lex.py 项目: jRimbault/lispy
from rply import LexerGenerator

lg = LexerGenerator()

end_quote = r"(?![\s\)\]\}])"

identifier = r'[^()\[\]{}\'"\s;]+'

lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
lg.add("LBRACKET", r"\[")
lg.add("RBRACKET", r"\]")
lg.add("LCURLY", r"\{")
lg.add("RCURLY", r"\}")
lg.add("HLCURLY", r"#\{")
lg.add("QUOTE", r"\'%s" % end_quote)
lg.add("QUASIQUOTE", r"`%s" % end_quote)
lg.add("UNQUOTESPLICE", r"~@%s" % end_quote)
lg.add("UNQUOTE", r"~%s" % end_quote)
lg.add("DISCARD", r"#_")
lg.add("HASHSTARS", r"#\*+")
lg.add(
    "BRACKETSTRING",
    r"""(?x)
    \# \[ ( [^\[\]]* ) \[
    \n?
    ((?:\n|.)*?)
    \] \1 \]
    """,
)
lg.add("HASHOTHER", r"#%s" % identifier)
示例#47
0
# -*- coding:utf-8 -*-
from rply import  LexerGenerator
from rply.token import BaseBox
lg = LexerGenerator()
# Add takes a rule name, and a regular expression that defines the rule.
#lg.add("COMMENT", r"\s*\*[^\n]*")
#  ([0-9]+)|([0-9]*\.[0-9]+)|(0x[0-9A-Fa-f]+)
lg.add("DATE", r"0d[0-9]{8}")
lg.add("NUMBER", r"(0x[0-9A-Fa-f]+)|([0-9]*\.[0-9]+)|([0-9]+)")
#if
lg.add("IF",r"if|IF")
#lg.add("THEN",r"then|THEN")
lg.add("ELSE",r"ELSE|else")
lg.add("ELSEIF","ELSEIF|elseif")
lg.add("ENDIF","endif|ENDIF")
# do
lg.add("DO", "do|DO")
# do while
lg.add("WHILE",r"while|WHILE")
# end do
lg.add("ENDDO",r"ENDDO|enddo")
# do case
lg.add("CASE",r"case|CASE")
lg.add("ENDCASE", r"ENDCASE|endcase")
# otherwise
lg.add("OTHERWISE",r"otherwise|OTHERWISE")
# exit
lg.add("EXIT",r"exit|EXIT")
# for, for each
lg.add("FOR",r"for|FOR")
lg.add("TO", r"to|TO")
示例#48
0
文件: lexer.py 项目: 0atman/hy
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from rply import LexerGenerator


lg = LexerGenerator()


# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'


lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHBANG', r'#!.*[^\r\n]')
lg.add('HASHREADER', r'#.')

# A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter
partial_string = r'''(?x)
示例#49
0
class BoxInt(BaseBox):
    def __init__(self, value):
        self.value = value

    def getint(self):
        return self.value

'''

from rply import ParserGenerator, LexerGenerator
from rply.token import BaseBox

lexgen = LexerGenerator()

lexgen.add('AND', r"(and)")
lexgen.add('WITHOUT', r"(without)")
lexgen.add('DIVIDE', r"(divide)")
lexgen.add('MULTIPLY', )



keywords = {
        "return": Keyword("RETURN", "RETURN", EXPR_MID),
        "if": Keyword("IF", "IF_MOD", EXPR_BEG),
        "unless": Keyword("UNLESS", "UNLESS_MOD", EXPR_BEG),
        "then": Keyword("THEN", "THEN", EXPR_BEG),
        "elsif": Keyword("ELSIF", "ELSIF", EXPR_BEG),
        "else": Keyword("ELSE", "ELSE", EXPR_BEG),
        "while": Keyword("WHILE", "WHILE_MOD", EXPR_BEG),
        "until": Keyword("UNTIL", "UNTIL_MOD", EXPR_BEG),
示例#50
0
import re
import itertools
from collections import deque
from rply import ParserGenerator, LexerGenerator
from graphextractor.rfc3987 import UrlPattern
from graphextractor.flattened import flattened

__all__ = ['TweetLexer', 'TweetParser']

lex = LexerGenerator()
lex.ignore(ur'(?:[,;\s]+|\band\b|\bor\b)+')
lex.add(u'URL', UrlPattern)
lex.add(u'BTHASH', ur'#betterthan')
lex.add(u'IBTHASH', ur'#isbetterthan')
lex.add(u'HASHTAG', ur'#[a-zA-Z0-9_]+')
lex.add(u'MENTION', ur'@[a-zA-Z0-9_]+')
lex.add(u'FOR', ur'(for|FOR|For)')
lex.add(u'WORD', ur'[\w]+')

pg = ParserGenerator([u'URL',
                      u'BTHASH',
                      u'IBTHASH',
                      u'HASHTAG',
                      u'MENTION',
                      u'FOR',
                      u'WORD'
                     ], 
                     cache_id=u'graphextractor.tweetparser')

@pg.production("betterthan : words URL bthash URL topics words")
def betterthan(p):
示例#51
0
def build_lexer():
    lexer = LexerGenerator()

    # Lexer Analysis Rules
    lexer.ignore(' ')
    lexer.add("WHATEVR", r"WHATEVR")
    lexer.add("VISIBLE", r"VISIBLE")
    lexer.add("KTHXBAI", r"KTHXBAI")
    lexer.add("GIMME", r"GIMME")
    lexer.add("MKAY", r"MKAY")
    lexer.add("HAS", r"HAS")
    lexer.add("HAI", r"HAI")
    lexer.add("ITZ", r"ITZ")
    lexer.add("OF", r"OF")
    lexer.add("BANG", r"!")
    lexer.add("BY", r"BY")
    lexer.add("AN", r"AN")
    lexer.add("A", r"A")
    lexer.add("R", r"R")
    lexer.add("I", r"I")
    lexer.add("MULTI_COMMENT", r"OBTW [.*|\n]TDLR")  # Not working at all!
    lexer.add("NEWLINE", "\n")
    lexer.add("PRIMITIVE_TYPE", r"NUMBR|NUMBAR|LETTR|TROOF")
    lexer.add("NUMBAR_LITERAL", r"-?\d+.\d+")
    lexer.add("NUMBR_LITERAL", r"-?\d+")
    lexer.add("TROOF_LITERAL", r"[WIN|FAIL]")
    lexer.add("YARN_LITERAL", r"[\"|\'].*[\"|\']")
    lexer.add("MATH_BINARY_OPERATOR",
              r"SUM|DIFF|PRODUKT|QUOSHUNT|BIGGR|SMALLR")
    lexer.add("MATH_UNARY_OPERATOR", r"FLIP|SQUAR")
    lexer.add("LOGICAL_BINARY_OPERATOR", r"BOTH|EIHER|WON")
    lexer.add("LOGICAL_UNARY_OPERATOR", r"NOT")
    lexer.add("LOGICAL_VARIABLE_OPERATOR", r"ALL|ANY")
    lexer.add("COMPARISON_BINARY_OPERATOR",
              r"SAEM|DIFFRINT|FURSTSMALLR|FURSTBIGGR")
    lexer.add("ASSIGNMENT_OPERATOR", r"CORRECT_THIS")
    lexer.add(
        "SINGLE_COMMENT",
        r"BTW.*\n")  # New line required to be added to tokens list prior!
    lexer.add("IDENTIFIER", r"[a-zA-Z][a-zA-Z_]*")
    lexer.add("LETTR_LITERAL", r".")
    lexer.add("ERROR", r"^[.]*")

    return lexer.build()
示例#52
0
文件: parser.py 项目: cheery/suchlog
from rply import Token, LexerGenerator, ParserGenerator
from rply.token import BaseBox
from objects import Atom, Compound, Variable, known_atoms, atom, as_list
from objects import parse_integer

leg = LexerGenerator()
leg.ignore(r'#.*\n')
leg.ignore(r'\s+')
leg.add('ATOM',         r'[a-z][a-zA-Z0-9_]*')
leg.add('VARIABLE',     r'[A-Z_][a-zA-Z0-9_]*')
leg.add('INTEGER',      r'[0-9]+')
leg.add('IMPLICATION',  r"<-")
leg.add('LEFTPAREN',    r"\(")
leg.add('RIGHTPAREN',   r"\)")
leg.add('LEFTBRACKET',  r"\[")
leg.add('RIGHTBRACKET', r"\]")
leg.add('COMMA',        r",")
leg.add('AT',           r"@")
leg.add('VBAR',         r"\|")
leg.add('SIMP',         r"<=>")
leg.add('PROP',         r"==>")
leg.add('UNIFY',        r"=")
leg.add('COLON',        r":")
leg.add('SEMICOLON',    r";")
lexer = leg.build()

pg = ParserGenerator(
    ['ATOM', 'VARIABLE', 'IMPLICATION',
     'UNIFY', 'LEFTPAREN', 'RIGHTPAREN',
     'LEFTBRACKET', 'RIGHTBRACKET', 'COLON',
     'INTEGER',
示例#53
0
from rply import LexerGenerator

lg = LexerGenerator()
lg.ignore(r"\s+")
lg.add("NUMBER", r"\d+")
lg.add("BOOLEAN", r"True|False")
lg.add("ADD", r"\+")
lg.add("SUB", r"\-")
lg.add("MULT", r"\*")
lg.add("DIV", r"\/")
lg.add("SEMICOLON", r";")
lg.add("PRINT", r"print")
lg.add("NAME", r"[a-zA-Z_][a-zA-Z0-9_]*")
lg.add("EQUALS", r"==")
lg.add("ASSIGN", r"=")
lexer = lg.build()
示例#54
0
文件: lexer.py 项目: longquan7/hy
# Copyright 2017 the authors.
# This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE.

from rply import LexerGenerator

lg = LexerGenerator()

# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'

identifier = r'[^()\[\]{}\'"\s;]+'

lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('HLCURLY', r'#\{')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('DISCARD', r'#_')
lg.add('HASHSTARS', r'#\*+')
lg.add('HASHOTHER', r'#%s' % identifier)

# A regexp which matches incomplete strings, used to support
# multi-line strings in the interpreter
示例#55
0
""":mod:`stencil_lang.matrix.lexer` -- Matrix scanner
"""

from rply import LexerGenerator

from stencil_lang.matrix.tokens import TOKENS, IGNORES

lg = LexerGenerator()

for rule_name, regex in TOKENS.iteritems():
    lg.add(rule_name, regex)

for regex in IGNORES:
    lg.ignore(regex)

# This has to be called outside a function because the parser must be generated
# in Python during translation, not in RPython during runtime.
_lexer = lg.build()
"""This intepreter's lexer instance."""


def lex(text):
    """Scan text using the generated lexer.

    :param text: text to lex
    :type text: :class:`str`
    :return: parsed stream
    :rtype: :class:`rply.lexer.LexerStream`
    """
    return _lexer.lex(text)
示例#56
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # print
        self.lexer.add('PRINT', r'print')
        # parentheses
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # semicolon
        self.lexer.add('SEMI_COLON', r'\;')
        # addition and subtraction operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        # number
        self.lexer.add('NUMBER', r'\d+')
        # ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#57
0
import collections

from transit.transit_types import Keyword, Symbol, TaggedValue, List, Vector
import transit.transit_types

transit_true = transit.transit_types.true
transit_false = transit.transit_types.false

from rply import ParserGenerator, LexerGenerator

lg = LexerGenerator()

SYMBOL_RE = r"[\.\*\+\!\-\_\?\$%&=a-zA-Z][\.\*\+\!\-\_\?\$%&=a-zA-Z0-9:#]*"
NS_SYMBOL = SYMBOL_RE + "/" + SYMBOL_RE

lg.add("boolean", r"(true|false)")
lg.add("nil", r"nil")
lg.add("float", r"\d+\.\d+")
lg.add("number", r"[-+]?\d+")
lg.add("olist", r"\(")
lg.add("clist", r"\)")
lg.add("omap", r"{")
lg.add("cmap", r"}")
lg.add("ovec", r"\[")
lg.add("cvec", r"\]")
lg.add("oset", r"#{")
lg.add("colon", r":")
lg.add("char_nl", r"\\newline")
lg.add("char_tab", r"\\tab")
lg.add("char_return", r"\\return")
lg.add("char_space", r"\\space")
示例#58
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # LODD
        self.lexer.add('LODD', r'(?<!\w)LODD(?!\w)')
        # STOD
        self.lexer.add('STOD', r'(?<!\w)STOD(?!\w)')
        # ADDD
        self.lexer.add('ADDD', r'(?<!\w)ADDD(?!\w)')
        # SUBD
        self.lexer.add('SUBD', r'(?<!\w)SUBD(?!\w)')
        # JPOS
        self.lexer.add('JPOS', r'(?<!\w)JPOS(?!\w)')
        # JZER
        self.lexer.add('JZER', r'(?<!\w)JZER(?!\w)')
        # JUMP
        self.lexer.add('JUMP', r'(?<!\w)JUMP(?!\w)')
        # LOCO
        self.lexer.add('LOCO', r'(?<!\w)LOCO(?!\w)')
        # LODL
        self.lexer.add('LODL', r'(?<!\w)LODL(?!\w)')
        # STOL
        self.lexer.add('STOL', r'(?<!\w)STOL(?!\w)')
        # ADDL
        self.lexer.add('ADDL', r'(?<!\w)ADDL(?!\w)')
        # SUBL
        self.lexer.add('SUBL', r'(?<!\w)SUBL(?!\w)')
        # JNEG
        self.lexer.add('JNEG', r'(?<!\w)JNEG(?!\w)')
        # JNZE
        self.lexer.add('JNZE', r'(?<!\w)JNZE(?!\w)')
        # CALL
        self.lexer.add('CALL', r'(?<!\w)CALL(?!\w)')
        # PUSHI
        self.lexer.add('PUSHI', r'(?<!\w)PUSHI(?!\w)')
        # POPI
        self.lexer.add('POPI', r'(?<!\w)POPI(?!\w)')
        # PUSH
        self.lexer.add('PUSH', r'(?<!\w)PUSH(?!\w)')
        # POP
        self.lexer.add('POP', r'(?<!\w)POP(?!\w)')
        # RETN
        self.lexer.add('RETN', r'(?<!\w)RETN(?!\w)')
        # SWAP
        self.lexer.add('SWAP', r'(?<!\w)SWAP(?!\w)')
        # INSP
        self.lexer.add('INSP', r'(?<!\w)INSP(?!\w)')
        # DESP
        self.lexer.add('DESP', r'(?<!\w)DESP(?!\w)')
        # INPAC
        self.lexer.add('INPAC', r'(?<!\w)INPAC(?!\w)')
        # OUTAC
        self.lexer.add('OUTAC', r'(?<!\w)OUTAC(?!\w)')
        # HALT
        self.lexer.add('HALT', r'(?<!\w)HALT(?!\w)')
        # ETIQUETA
        self.lexer.add('ETIQUETA', r'\@[A-Za-z]\w*')
        # VARIABLE
        self.lexer.add('VARIABLE', r'(?<!\w)[A-Za-z]\w*')
        # DIRECCION
        self.lexer.add('DIRECCION', r'(?<!\w)0x[A-F0-9][A-F0-9](?!\w)')
        # Numero
        self.lexer.add('NUMERO', r'(?<![A-Za-z])\d+(?![A-Za-z])')
        # Ignore spaces
        self.lexer.ignore('\s+')
        self.lexer.ignore('\%.*')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
示例#59
0
from rply import LexerGenerator


lg = LexerGenerator()

lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
# lg.add('LBRACKET', r'\[')
# lg.add('RBRACKET', r'\]')

lg.add("IDENTIFIER", r"[^()\[\]{}\s#]+")

lg.ignore(r"#.*(?=\r|\n|$)")
lg.ignore(r"\s+")

lexer = lg.build()
示例#60
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'out')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()