示例#1
0
文件: _functions.py 项目: goodmami/pe
def compile(source: Union[str, Grammar],
            actions: Dict[str, Callable] = None,
            parser: str = 'packrat',
            flags: Flag = Flag.OPTIMIZE) -> Parser:
    """Compile the parsing expression or grammar in *source*."""
    parsername = parser.lower()
    if parsername == 'packrat':
        from pe.packrat import PackratParser as parser_class
    elif parsername == 'machine':
        from pe.machine import MachineParser as parser_class  # type: ignore
    elif parsername == 'machine-python':
        from pe._py_machine import MachineParser as parser_class  # type: ignore
    else:
        raise Error(f'unsupported parser: {parser}')

    if isinstance(source, Grammar):
        g = source
        if actions:
            raise Error('cannot assign actions to prepared grammar')
    else:
        assert isinstance(source, str)
        start, defmap = loads(source)
        g = Grammar(defmap, actions=actions, start=start)

    if flags & Flag.DEBUG:
        print('## Grammar ##')
        print(g)

    p = parser_class(g, flags=flags)

    if (flags & Flag.DEBUG) and (flags & Flag.OPTIMIZE):
        print('## Modified Grammar ##')
        print(p.modified_grammar)

    return p
示例#2
0
def _format(defn: Definition,
            prev_op: Operator) -> str:
    try:
        func = _format_map[defn.op]
    except KeyError:
        raise Error(f'invalid operation: {defn.op!r}')
    return func(defn, prev_op)
示例#3
0
def regex(defn: Definition):
    # this can be expanded if there are no nonterminals, captures, or actions
    if defn.op not in (DOT, LIT, CLS, RGX):
        raise Error(f'cannot convert {defn.op} to a regular expression')
    elif defn.op != RGX:
        defn = _regex(defn, {}, count(start=1))
    return defn
示例#4
0
文件: _parse.py 项目: goodmami/pe
def _make_prioritized(exprs):
    if len(exprs) == 1:
        return exprs[0]
    elif len(exprs) > 1:
        return Choice(*exprs)
    else:
        raise Error(f'empty choice: {exprs}')
示例#5
0
文件: _parse.py 项目: goodmami/pe
def _make_sequential(exprs):
    if len(exprs) == 1:
        return exprs[0]
    elif len(exprs) > 1:
        return Sequence(*exprs)
    else:
        raise Error(f'empty sequence: {exprs}')
示例#6
0
 def _def_to_expr(self, definition: Definition):
     op = definition.op
     if op == Operator.SYM:
         name = definition.args[0]
         return self._exprs.setdefault(name, Rule(name))
     else:
         try:
             meth = self._op_map[op]
         except KeyError:
             raise Error(f'invalid definition: {definition!r}')
         else:
             return meth(self, definition)
示例#7
0
文件: _parse.py 项目: goodmami/pe
def loads(source: str) -> Tuple[str, Dict[str, Definition]]:
    """Parse the PEG at *source* and return a list of definitions."""
    m = _parser.match(source, flags=pe.STRICT | pe.MEMOIZE)
    if not m:
        raise Error('invalid grammar')
    defs = m.value()
    if isinstance(defs, Definition):
        start = 'Start'
        defmap = {'Start': defs}
    else:
        assert isinstance(defs, tuple)
        defs = cast(Tuple[Tuple[str, Definition], ...], defs)
        start = defs[0][0]
        defmap = dict(defs)
    return start, defmap
示例#8
0
文件: _grammar.py 项目: goodmami/pe
def _finalize(expr, defs, structured):
    op = expr.op
    args = expr.args
    if op == Operator.SYM:
        name = args[0]
        if name not in defs:
            raise Error(f'undefined nonterminal: {args[0]}')
    elif op in (Operator.DOT, Operator.LIT, Operator.CLS, Operator.RGX):
        pass
    elif op in (Operator.SEQ, Operator.CHC):
        for term in args[0]:
            _finalize(term, defs, structured)
    elif op == Operator.CAP:
        _finalize(args[0], defs, False)
    else:
        _finalize(args[0], defs, structured)
示例#9
0
    def _grammar_to_packrat(self, grammar):
        exprs = self._exprs
        for name, _def in grammar.definitions.items():
            expr = self._def_to_expr(_def)
            # if name is already in exprs, that means it was seen as a
            # nonterminal in some other rule, so don't replace the object
            # or the call chain will break.
            if name in exprs:
                if isinstance(expr, Rule):
                    action = expr.action
                    expr = expr.expression
                else:
                    action = None
                exprs[name].expression = expr
                exprs[name].action = action
            else:
                exprs[name] = expr

        # ensure all symbols are defined
        for name, expr in exprs.items():
            if expr is None or (isinstance(expr, Rule)
                                and expr.expression is None):
                raise Error(f'undefined rule: {name}')
        return exprs
示例#10
0
def _parsing_instructions(defn):  # noqa: C901
    try:
        return _op_map[defn.op](defn)
    except KeyError:
        raise Error(f'invalid definition: {defn!r}')
示例#11
0
def _match(  # noqa: C901
    pi: _Program,
    idx: int,
    s: str,
    pos: int,
    args: List[Any],
    kwargs: List[_Binding],
    memo: Optional[Memo],
) -> int:
    if s is None:
        raise TypeError
    if args is None:
        raise TypeError
    if kwargs is None:
        raise TypeError

    stack: List[_State] = [
        (0, 0, -1, 0, 0),  # failure (top-level backtrack entry)
        (-1, -1, -1, 0, 0),  # success
    ]

    # lookup optimizations
    push = stack.append
    pop = stack.pop
    slen = len(s)

    while stack:
        # print(idx, pos, s[pos], len(stack))
        # print(pi[idx])
        opcode, oploc, scanner, marking, capturing, action, name = pi[idx]

        if marking:
            push((0, -1, pos, len(args), len(kwargs)))

        if opcode == SCAN:
            assert scanner is not None
            pos = scanner._scan(s, pos, slen)
            if pos < 0:
                idx = FAILURE

        elif opcode == BRANCH:
            push((idx + oploc, pos, -1, len(args), len(kwargs)))
            idx += 1
            continue

        elif opcode == CALL:
            push((idx + 1, -1, -1, -1, -1))
            idx = oploc
            continue

        elif opcode == COMMIT:
            pop()
            idx += oploc
            continue

        elif opcode == UPDATE:
            next_idx, _, prev_mark, _, _ = pop()
            push((next_idx, pos, prev_mark, len(args), len(kwargs)))
            idx += oploc
            continue

        elif opcode == RESTORE:
            pos = pop()[1]
            idx += oploc
            continue

        elif opcode == FAILTWICE:
            pos = pop()[1]
            idx = FAILURE

        elif opcode == RETURN:
            idx = pop()[0]
            continue

        elif opcode == PASS:
            break

        elif opcode == FAIL:
            idx = FAILURE

        elif opcode != NOOP:
            raise Error(f'invalid operation: {opcode}')

        if idx == FAILURE:
            idx, pos, _, argidx, kwidx = pop()
            while pos < 0:  # pos is >= 0 only for backtracking entries
                idx, pos, _, argidx, kwidx = pop()
            args[argidx:] = []
            if kwargs:
                kwargs[kwidx:] = []
        else:
            if capturing:
                _, _, mark, argidx, kwidx = pop()
                args[argidx:] = [s[mark:pos]]
                kwargs[kwidx:] = []

            if action:
                _, _, mark, argidx, kwidx = pop()
                _args, _kwargs = action(s, mark, pos, args[argidx:],
                                        dict(kwargs[kwidx:]))
                args[argidx:] = _args
                if not _kwargs:
                    kwargs[kwidx:] = []
                else:
                    kwargs[kwidx:] = _kwargs.items()

            idx += 1

    if not stack:
        return -1
    return pos