def repeat(tokens): if tokens.length() == 1: return tokens.value() try: operand, operator = tokens except Exception as cause: Log.error("not expected", cause=cause) mode = operator["mode"] if not mode: if operator["exact"]: return Many(operand, PLAIN_ENGINE, exact=int(operator["exact"])) else: return Many(operand, PLAIN_ENGINE, min_match=int(operator["min"]), max_match=int(operator["max"])) elif mode in "*?": return ZeroOrMore(operand, PLAIN_ENGINE) elif mode in "+?": return OneOrMore(operand, PLAIN_ENGINE) elif mode == "?": return Optional(operand, PLAIN_ENGINE) else: Log.error("not expected")
def makeHTMLTags(tagStr, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): """Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. """ if isinstance(tagStr, text): resname = tagStr tagStr = Keyword(tagStr, caseless=True) else: resname = tagStr.parser_name tagAttrName = Word(alphas, alphanums + "_-:") tagAttrValue = quotedString.addParseAction(removeQuotes) | Word( printables, exclude=">" ) simpler_name = "".join(resname.replace(":", " ").title().split()) openTag = ( ( suppress_LT + tagStr("tag") + OpenDict(ZeroOrMore(Group( tagAttrName.addParseAction(downcaseTokens) + Optional(Suppress("=") + tagAttrValue) ))) + Optional( "/", default=[False] )("empty").addParseAction(lambda t, l, s: t[0] == "/") + suppress_GT ) .set_token_name("start" + simpler_name) .set_parser_name("<%s>" % resname) ) closeTag = ( Combine(Literal("</") + tagStr + ">") .set_token_name("end" + simpler_name) .set_parser_name("</%s>" % resname) ) # openTag.tag = resname # closeTag.tag = resname # openTag.tag_body = SkipTo(closeTag) return openTag, closeTag
def repeat(tokens): if tokens.length() == 1: return tokens.value() operand, operator = tokens mode = operator["mode"] if not mode: if operator["exact"]: return Many(operand, exact=int(operator["exact"])) else: return Many( operand, min_match=int(operator["min"]), max_match=int(operator["max"]) ) elif mode in "*?": return ZeroOrMore(operand) elif mode in "+?": return OneOrMore(operand) elif mode == "?": return Optional(operand) else: Log.error("not expected")
def indentedBlock(blockStatementExpr, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the current level; set to False for block of left-most statements (default= ``True``) A valid block must contain at least one ``blockStatement``. """ blockStatementExpr.engine.add_ignore("\\" + LineEnd()) PEER = Forward() DEDENT = Forward() def _reset_stack(p=None, l=None, s=None, ex=None): oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent def peer_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol != expectedCol: if curCol > expectedCol: raise ParseException(t.type, s, l, "illegal nesting") raise ParseException(t.type, l, s, "not a peer entry") return output def dedent_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol not in (i for i, _, _ in _indent_stack): raise ParseException(s, l, "not an unindent") if curCol < _indent_stack[-1][0]: oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent return output def indent_stack(t, l, s): curCol = col(l, s) if curCol > _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, l, s, "not a subentry") def nodent_stack(t, l, s): curCol = col(l, s) if curCol == _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, s, l, "not a subentry") NL = OneOrMore(LineEnd().suppress()) INDENT = Empty().addParseAction(indent_stack) NODENT = Empty().addParseAction(nodent_stack) if indent: smExpr = Group( Optional(NL) + INDENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) else: smExpr = Group( Optional(NL) + NODENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment") cppStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment ).set_parser_name("C++ style comment") javaStyleComment = cppStyleComment pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment") _commasepitem = ( Combine(OneOrMore( Word(printables, exclude=",") + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()) )) .addParseAction(lambda t: text(t).strip()) .set_parser_name("commaItem") ) commaSeparatedList = delimitedList(Optional( quotedString | _commasepitem, default="" )).set_parser_name("commaSeparatedList") convertToInteger = tokenMap(int) convertToFloat = tokenMap(float) integer = Word(nums).set_parser_name("integer").addParseAction(convertToInteger) hex_integer = (
Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret with Engine(""): _escapedPunc = Word("\\", r"\[]-*.$+^?()~ ", exact=2).addParseAction(lambda t, l, s: t[0][1]) _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction( lambda t: unichr(int(t[0].lstrip("\\").lstrip("0").lstrip("xX"), 16))) _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction( lambda t, l, s: unichr(int(t[0][1:], 8))) _singleChar = (_escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)) _charRange = Group(_singleChar + Suppress("-") + _singleChar) _reBracketExpr = ("[" + Optional("^").set_token_name("negate") + Group( OneOrMore(_charRange | _singleChar)).set_token_name("body") + "]") def srange(s): r"""Helper to easily define string ranges for use in Word construction. Borrows syntax from regexp '[]' string range definitions:: srange("[0-9]") -> "0123456789" srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" The input string must be enclosed in []'s, and the returned string is the expanded character set joined into a single string. The values enclosed in the []'s may be:
def __or__(self, other): if other is Ellipsis: return _PendingSkip(Optional(self)) return MatchFirst([self, engine.CURRENT.normalize(other)]).streamline()
escapedHexChar = Combine( (Literal("\\0x") | Literal("\\x") | Literal("\\X")) # lookup literals is faster + OneOrMore(Char(hexnums)) ).addParseAction(hex_to_char) escapedOctChar = Combine( Literal("\\0") + OneOrMore(Char("01234567")) ).addParseAction(lambda t: Literal(unichr(int(t.value()[2:], 8)))) singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ( "[" + Optional("^")("negate") + OneOrMore(Group(charRange | singleChar | macro)("body")) + "]" ).addParseAction(to_bracket) ######################################################################################### # REGEX regex = Forward() line_start = Literal("^").addParseAction(lambda: LineStart()) line_end = Literal("$").addParseAction(lambda: LineEnd()) word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar)) simple_char = Word( printables, exclude=r".^$*+{}[]\|()" ).addParseAction(lambda t: Literal(t.value())) esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))
escapedHexChar = Combine((Literal("\\0x") | Literal("\\x") | Literal("\\X")) # lookup literals is faster + OneOrMore(Char(hexnums))).addParseAction(hex_to_char) escapedOctChar = Combine(Literal("\\0") + OneOrMore(Char("01234567"))).addParseAction( lambda t: Literal(unichr(int(t.value()[2:], 8)))) singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ("[" + Optional("^")("negate") + OneOrMore(Group(charRange | singleChar | macro)("body")) + "]").addParseAction(to_bracket) ######################################################################################### # REGEX regex = Forward() line_start = Literal("^").addParseAction(lambda: LineStart()) line_end = Literal("$").addParseAction(lambda: LineEnd()) word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar)) simple_char = Word( printables, exclude=r".^$*+{}[]\|()").addParseAction(lambda t: Literal(t.value())) esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))
escapedHexChar = Combine( (Literal("\\0x") | Literal("\\x") | Literal("\\X")) # lookup literals is faster + OneOrMore(Char(hexnums), PLAIN_ENGINE)).addParseAction(hex_to_char) escapedOctChar = Combine( Literal("\\0") + OneOrMore(Char("01234567"), PLAIN_ENGINE)).addParseAction( lambda t: Literal(unichr(int(t.value()[2:], 8)))) singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ( "[" + Optional("^", PLAIN_ENGINE)("negate") + OneOrMore(Group(charRange | singleChar | macro)("body"), PLAIN_ENGINE) + "]").addParseAction(to_bracket) ######################################################################################### # REGEX regex = Forward() line_start = Literal("^").addParseAction(lambda: LineStart()) line_end = Literal("$").addParseAction(lambda: LineEnd()) word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar)) simple_char = Word( printables, exclude=r".^$*+{}[]\|()").addParseAction(lambda t: Literal(t.value())) esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))