def matchPreviousExpr(expr): """Helper to define an expression that is indirectly defined from the tokens matched in a previous expression, that is, it looks for a 'repeat' of a previous expression. For example:: first = Word(nums) second = matchPreviousExpr(first) matchExpr = first + ":" + second will match ``"1:1"``, but not ``"1:2"``. Because this matches by expressions, will *not* match the leading ``"1:1"`` in ``"1:10"``; the expressions are evaluated first, and then compared, so ``"1"`` is compared with ``"10"``. Do *not* use with packrat parsing enabled. """ rep = Forward() e2 = expr.copy() rep <<= e2 def copyTokenToRepeater(t, l, s): matchTokens = _flatten(t) def mustMatchTheseTokens(t, l, s): theseTokens = _flatten(t) if theseTokens != matchTokens: raise ParseException("", 0, "") rep.addParseAction(mustMatchTheseTokens, callDuringTry=True) expr.addParseAction(copyTokenToRepeater, callDuringTry=True) rep.set_parser_name("(prev) " + text(expr)) return rep
def matchPreviousLiteral(expr): """Helper to define an expression that is indirectly defined from the tokens matched in a previous expression, that is, it looks for a 'repeat' of a previous expression. For example:: first = Word(nums) second = matchPreviousLiteral(first) matchExpr = first + ":" + second will match ``"1:1"``, but not ``"1:2"``. Because this matches a previous literal, will also match the leading ``"1:1"`` in ``"1:10"``. If this is not desired, use `matchPreviousExpr`. Do *not* use with packrat parsing enabled. """ rep = Forward() def copyTokenToRepeater(t, l, s): if t: if len(t) == 1: rep << t[0] else: # flatten t tokens tflat = _flatten(t) rep << And(Literal(tt) for tt in tflat) else: rep << Empty() expr.addParseAction(copyTokenToRepeater, callDuringTry=True) rep.set_parser_name("(prev) " + text(expr)) return rep
def countedArray(expr, intExpr=None): """Helper to define a counted list of expressions. This helper defines a pattern of the form:: integer expr expr expr... where the leading integer tells how many expr expressions follow. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. If ``intExpr`` is specified, it should be a mo_parsing expression that produces an integer value. Example:: countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] # in this parser, the leading integer value is given in binary, # '10' indicating that 2 values are in the array binaryConstant = Word('01').addParseAction(lambda t: int(t[0], 2)) countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] """ if intExpr is None: intExpr = Word(nums).addParseAction(lambda t: int(t[0])) arrayExpr = Forward() def countFieldParseAction(t, l, s): n = t[0] arrayExpr << Group(Many(expr, exact=n)) return [] intExpr = ( intExpr .set_parser_name("arrayLen") .addParseAction(countFieldParseAction, callDuringTry=True) ) return (intExpr + arrayExpr).set_parser_name("(len) " + text(expr) + "...")
def indentedBlock(blockStatementExpr, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the current level; set to False for block of left-most statements (default= ``True``) A valid block must contain at least one ``blockStatement``. """ blockStatementExpr.engine.add_ignore("\\" + LineEnd()) PEER = Forward() DEDENT = Forward() def _reset_stack(p=None, l=None, s=None, ex=None): oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent def peer_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol != expectedCol: if curCol > expectedCol: raise ParseException(t.type, s, l, "illegal nesting") raise ParseException(t.type, l, s, "not a peer entry") return output def dedent_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol not in (i for i, _, _ in _indent_stack): raise ParseException(s, l, "not an unindent") if curCol < _indent_stack[-1][0]: oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent return output def indent_stack(t, l, s): curCol = col(l, s) if curCol > _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, l, s, "not a subentry") def nodent_stack(t, l, s): curCol = col(l, s) if curCol == _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, s, l, "not a subentry") NL = OneOrMore(LineEnd().suppress()) INDENT = Empty().addParseAction(indent_stack) NODENT = Empty().addParseAction(nodent_stack) if indent: smExpr = Group( Optional(NL) + INDENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) else: smExpr = Group( Optional(NL) + NODENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): """Helper method for defining nested lists enclosed in opening and closing delimiters ("(" and ")" are the default). Parameters: - opener - opening character for a nested list (default= ``"("``); can also be a mo_parsing expression - closer - closing character for a nested list (default= ``")"``); can also be a mo_parsing expression - content - expression for items within the nested lists (default= ``None``) - ignoreExpr - expression for ignoring opening and closing delimiters (default= `quotedString`) If an expression is not provided for the content argument, the nested expression will capture all whitespace-delimited content between delimiters as a list of separate values. Use the ``ignoreExpr`` argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment expression. Specify multiple expressions using an `Or` or `MatchFirst`. The default is `quotedString`, but if no expressions are to be ignored, then pass ``None`` for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if not isinstance(opener, text) or not isinstance(closer, text): raise ValueError( "opening and closing arguments must be strings if no content expression" " is given" ) ignore_chars = engine.CURRENT.white_chars with Engine(""): def scrub(t): return t[0].strip() if len(opener) == 1 and len(closer) == 1: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + CharsNotIn(opener + closer + "".join(ignore_chars), exact=1,) )).addParseAction(scrub) else: content = Empty + CharsNotIn( opener + closer + "".join(ignore_chars) ).addParseAction(scrub) else: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) else: content = Combine(OneOrMore( ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) ) else: ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret
def infixNotation(baseExpr, spec, lpar=Suppress("("), rpar=Suppress(")")): """ :param baseExpr: expression representing the most basic element for the nested :param spec: list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form ``(opExpr, numTerms, rightLeftAssoc, parseAction)``, where: - opExpr is the mo_parsing expression for the operator; may also be a string, which will be converted to a Literal; if numTerms is 3, opExpr is a tuple of two expressions, for the two operators separating the 3 terms - numTerms is the number of terms for this operator (must be 1, 2, or 3) - rightLeftAssoc is the indicator whether the operator is right or left associative, using the mo_parsing-defined constants ``RIGHT_ASSOC`` and ``LEFT_ASSOC``. - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted); if the parse action is passed a tuple or list of functions, this is equivalent to calling ``setParseAction(*fn)`` (:class:`ParserElement.addParseAction`) :param lpar: expression for matching left-parentheses (default= ``Suppress('(')``) :param rpar: expression for matching right-parentheses (default= ``Suppress(')')``) :return: ParserElement """ all_op = {} def norm(op): output = all_op.get(id(op)) if output: return output def record_self(tok): ParseResults(tok.type, [tok.type.parser_name]) output = engine.CURRENT.normalize(op) is_suppressed = isinstance(output, Suppress) if is_suppressed: output = output.expr output = output.addParseAction(record_self) all_op[id(op)] = is_suppressed, output return is_suppressed, output opList = [] """ SCRUBBED LIST OF OPERATORS * expr - used exclusively for ParseResult(expr, [...]), not used to match * op - used to match * arity - same * assoc - same * parse_actions - same """ for operDef in spec: op, arity, assoc, rest = operDef[0], operDef[1], operDef[2], operDef[3:] parse_actions = list(map(wrap_parse_action, listwrap(rest[0]))) if rest else [] if arity == 1: is_suppressed, op = norm(op) if assoc == RIGHT_ASSOC: opList.append(( Group(baseExpr + op), op, is_suppressed, arity, assoc, parse_actions, )) else: opList.append(( Group(op + baseExpr), op, is_suppressed, arity, assoc, parse_actions, )) elif arity == 2: is_suppressed, op = norm(op) opList.append(( Group(baseExpr + op + baseExpr), op, is_suppressed, arity, assoc, parse_actions, )) elif arity == 3: is_suppressed, op = zip(norm(op[0]), norm(op[1])) opList.append(( Group(baseExpr + op[0] + baseExpr + op[1] + baseExpr), op, is_suppressed, arity, assoc, parse_actions, )) opList = tuple(opList) def record_op(op): def output(tokens): return ParseResults(NO_PARSER, [(tokens, op)]) return output prefix_ops = MatchFirst([ op.addParseAction(record_op(op)) for expr, op, is_suppressed, arity, assoc, pa in opList if arity == 1 and assoc == RIGHT_ASSOC ]) suffix_ops = MatchFirst([ op.addParseAction(record_op(op)) for expr, op, is_suppressed, arity, assoc, pa in opList if arity == 1 and assoc == LEFT_ASSOC ]) ops = Or([ opPart.addParseAction(record_op(opPart)) for expr, op, is_suppressed, arity, assoc, pa in opList if arity > 1 for opPart in (op if isinstance(op, tuple) else [op]) ]) def make_tree(tokens, loc, string): flat_tokens = list(tokens) num = len(opList) op_index = 0 while len(flat_tokens) > 1 and op_index < num: expr, op, is_suppressed, arity, assoc, parse_actions = opList[op_index] if arity == 1: if assoc == RIGHT_ASSOC: # PREFIX OPERATOR -3 todo = list(reversed(list(enumerate(flat_tokens[:-1])))) for i, (r, o) in todo: if o == op: if is_suppressed: result = ParseResults(expr, (flat_tokens[i + 1][0],)) else: result = ParseResults(expr, (r, flat_tokens[i + 1][0])) break else: op_index += 1 continue else: # SUFFIX OPERATOR 3! todo = list(enumerate(flat_tokens[1:])) for i, (r, o) in todo: if o == op: if is_suppressed: result = ParseResults(expr, (flat_tokens[i][0],)) else: result = ParseResults(expr, (flat_tokens[i][0], r,)) break else: op_index += 1 continue elif arity == 2: todo = list(enumerate(flat_tokens[1:-1])) if assoc == RIGHT_ASSOC: todo = list(reversed(todo)) for i, (r, o) in todo: if o == op: if is_suppressed: result = ParseResults( expr, (flat_tokens[i][0], flat_tokens[i + 2][0]) ) else: result = ParseResults( expr, (flat_tokens[i][0], r, flat_tokens[i + 2][0]) ) break else: op_index += 1 continue else: # arity==3 todo = list(enumerate(flat_tokens[1:-3])) if assoc == RIGHT_ASSOC: todo = list(reversed(todo)) for i, (r0, o0) in todo: if o0 == op[0]: r1, o1 = flat_tokens[i + 3] if o1 == op[1]: seq = [ flat_tokens[i][0], flat_tokens[i + 2][0], flat_tokens[i + 4][0], ] s0, s1 = is_suppressed if not s1: seq.insert(2, r1) if not s0: seq.insert(1, r0) result = ParseResults(expr, seq) break else: op_index += 1 continue for p in parse_actions: result = p(result, -1, string) offset = (0, 2, 3, 5)[arity] flat_tokens[i : i + offset] = [(result, (expr,))] op_index = 0 return flat_tokens[0][0] flat = Forward() iso = lpar.suppress() + flat + rpar.suppress() atom = (baseExpr | iso).addParseAction(record_op(baseExpr)) modified = ZeroOrMore(prefix_ops) + atom + ZeroOrMore(suffix_ops) flat << (modified + ZeroOrMore(ops + modified)).addParseAction(make_tree) return flat
).addParseAction(lambda t: Literal(unichr(int(t.value()[2:], 8)))) singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ( "[" + Optional("^")("negate") + OneOrMore(Group(charRange | singleChar | macro)("body")) + "]" ).addParseAction(to_bracket) ######################################################################################### # REGEX regex = Forward() line_start = Literal("^").addParseAction(lambda: LineStart()) line_end = Literal("$").addParseAction(lambda: LineEnd()) word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar)) simple_char = Word( printables, exclude=r".^$*+{}[]\|()" ).addParseAction(lambda t: Literal(t.value())) esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1])) with Engine(): # ALLOW SPACES IN THE RANGE repetition = ( Word(nums)("exact") + "}" | Word(nums)("min") + "," + Word(nums)("max") + "}" | Word(nums)("min") + "," + "}"