def compile(source: Union[str, Grammar], actions: Dict[str, Callable] = None, parser: str = 'packrat', flags: Flag = Flag.OPTIMIZE) -> Parser: """Compile the parsing expression or grammar in *source*.""" parsername = parser.lower() if parsername == 'packrat': from pe.packrat import PackratParser as parser_class elif parsername == 'machine': from pe.machine import MachineParser as parser_class # type: ignore elif parsername == 'machine-python': from pe._py_machine import MachineParser as parser_class # type: ignore else: raise Error(f'unsupported parser: {parser}') if isinstance(source, Grammar): g = source if actions: raise Error('cannot assign actions to prepared grammar') else: assert isinstance(source, str) start, defmap = loads(source) g = Grammar(defmap, actions=actions, start=start) if flags & Flag.DEBUG: print('## Grammar ##') print(g) p = parser_class(g, flags=flags) if (flags & Flag.DEBUG) and (flags & Flag.OPTIMIZE): print('## Modified Grammar ##') print(p.modified_grammar) return p
def _format(defn: Definition, prev_op: Operator) -> str: try: func = _format_map[defn.op] except KeyError: raise Error(f'invalid operation: {defn.op!r}') return func(defn, prev_op)
def regex(defn: Definition): # this can be expanded if there are no nonterminals, captures, or actions if defn.op not in (DOT, LIT, CLS, RGX): raise Error(f'cannot convert {defn.op} to a regular expression') elif defn.op != RGX: defn = _regex(defn, {}, count(start=1)) return defn
def _make_prioritized(exprs): if len(exprs) == 1: return exprs[0] elif len(exprs) > 1: return Choice(*exprs) else: raise Error(f'empty choice: {exprs}')
def _make_sequential(exprs): if len(exprs) == 1: return exprs[0] elif len(exprs) > 1: return Sequence(*exprs) else: raise Error(f'empty sequence: {exprs}')
def _def_to_expr(self, definition: Definition): op = definition.op if op == Operator.SYM: name = definition.args[0] return self._exprs.setdefault(name, Rule(name)) else: try: meth = self._op_map[op] except KeyError: raise Error(f'invalid definition: {definition!r}') else: return meth(self, definition)
def loads(source: str) -> Tuple[str, Dict[str, Definition]]: """Parse the PEG at *source* and return a list of definitions.""" m = _parser.match(source, flags=pe.STRICT | pe.MEMOIZE) if not m: raise Error('invalid grammar') defs = m.value() if isinstance(defs, Definition): start = 'Start' defmap = {'Start': defs} else: assert isinstance(defs, tuple) defs = cast(Tuple[Tuple[str, Definition], ...], defs) start = defs[0][0] defmap = dict(defs) return start, defmap
def _finalize(expr, defs, structured): op = expr.op args = expr.args if op == Operator.SYM: name = args[0] if name not in defs: raise Error(f'undefined nonterminal: {args[0]}') elif op in (Operator.DOT, Operator.LIT, Operator.CLS, Operator.RGX): pass elif op in (Operator.SEQ, Operator.CHC): for term in args[0]: _finalize(term, defs, structured) elif op == Operator.CAP: _finalize(args[0], defs, False) else: _finalize(args[0], defs, structured)
def _grammar_to_packrat(self, grammar): exprs = self._exprs for name, _def in grammar.definitions.items(): expr = self._def_to_expr(_def) # if name is already in exprs, that means it was seen as a # nonterminal in some other rule, so don't replace the object # or the call chain will break. if name in exprs: if isinstance(expr, Rule): action = expr.action expr = expr.expression else: action = None exprs[name].expression = expr exprs[name].action = action else: exprs[name] = expr # ensure all symbols are defined for name, expr in exprs.items(): if expr is None or (isinstance(expr, Rule) and expr.expression is None): raise Error(f'undefined rule: {name}') return exprs
def _parsing_instructions(defn): # noqa: C901 try: return _op_map[defn.op](defn) except KeyError: raise Error(f'invalid definition: {defn!r}')
def _match( # noqa: C901 pi: _Program, idx: int, s: str, pos: int, args: List[Any], kwargs: List[_Binding], memo: Optional[Memo], ) -> int: if s is None: raise TypeError if args is None: raise TypeError if kwargs is None: raise TypeError stack: List[_State] = [ (0, 0, -1, 0, 0), # failure (top-level backtrack entry) (-1, -1, -1, 0, 0), # success ] # lookup optimizations push = stack.append pop = stack.pop slen = len(s) while stack: # print(idx, pos, s[pos], len(stack)) # print(pi[idx]) opcode, oploc, scanner, marking, capturing, action, name = pi[idx] if marking: push((0, -1, pos, len(args), len(kwargs))) if opcode == SCAN: assert scanner is not None pos = scanner._scan(s, pos, slen) if pos < 0: idx = FAILURE elif opcode == BRANCH: push((idx + oploc, pos, -1, len(args), len(kwargs))) idx += 1 continue elif opcode == CALL: push((idx + 1, -1, -1, -1, -1)) idx = oploc continue elif opcode == COMMIT: pop() idx += oploc continue elif opcode == UPDATE: next_idx, _, prev_mark, _, _ = pop() push((next_idx, pos, prev_mark, len(args), len(kwargs))) idx += oploc continue elif opcode == RESTORE: pos = pop()[1] idx += oploc continue elif opcode == FAILTWICE: pos = pop()[1] idx = FAILURE elif opcode == RETURN: idx = pop()[0] continue elif opcode == PASS: break elif opcode == FAIL: idx = FAILURE elif opcode != NOOP: raise Error(f'invalid operation: {opcode}') if idx == FAILURE: idx, pos, _, argidx, kwidx = pop() while pos < 0: # pos is >= 0 only for backtracking entries idx, pos, _, argidx, kwidx = pop() args[argidx:] = [] if kwargs: kwargs[kwidx:] = [] else: if capturing: _, _, mark, argidx, kwidx = pop() args[argidx:] = [s[mark:pos]] kwargs[kwidx:] = [] if action: _, _, mark, argidx, kwidx = pop() _args, _kwargs = action(s, mark, pos, args[argidx:], dict(kwargs[kwidx:])) args[argidx:] = _args if not _kwargs: kwargs[kwidx:] = [] else: kwargs[kwidx:] = _kwargs.items() idx += 1 if not stack: return -1 return pos