Exemplo n.º 1
0
def makeHTMLTags(tagStr, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
    """Helper to construct opening and closing tag expressions for HTML,
    given a tag name. Matches tags in either upper or lower case,
    attributes with namespaces and with quoted or unquoted values.
    """
    if isinstance(tagStr, text):
        resname = tagStr
        tagStr = Keyword(tagStr, caseless=True)
    else:
        resname = tagStr.parser_name

    tagAttrName = Word(alphas, alphanums + "_-:")
    tagAttrValue = quotedString.addParseAction(removeQuotes) | Word(
        printables, exclude=">"
    )
    simpler_name = "".join(resname.replace(":", " ").title().split())

    openTag = (
        (
            suppress_LT
            + tagStr("tag")
            + OpenDict(ZeroOrMore(Group(
                tagAttrName.addParseAction(downcaseTokens)
                + Optional(Suppress("=") + tagAttrValue)
            )))
            + Optional(
                "/", default=[False]
            )("empty").addParseAction(lambda t, l, s: t[0] == "/")
            + suppress_GT
        )
        .set_token_name("start" + simpler_name)
        .set_parser_name("<%s>" % resname)
    )

    closeTag = (
        Combine(Literal("</") + tagStr + ">")
        .set_token_name("end" + simpler_name)
        .set_parser_name("</%s>" % resname)
    )

    # openTag.tag = resname
    # closeTag.tag = resname
    # openTag.tag_body = SkipTo(closeTag)

    return openTag, closeTag
Exemplo n.º 2
0
def countedArray(expr, intExpr=None):
    """Helper to define a counted list of expressions.

    This helper defines a pattern of the form::

        integer expr expr expr...

    where the leading integer tells how many expr expressions follow.
    The matched tokens returns the array of expr tokens as a list - the
    leading count token is suppressed.

    If ``intExpr`` is specified, it should be a mo_parsing expression
    that produces an integer value.

    Example::

        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']

        # in this parser, the leading integer value is given in binary,
        # '10' indicating that 2 values are in the array
        binaryConstant = Word('01').addParseAction(lambda t: int(t[0], 2))
        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
    """
    if intExpr is None:
        intExpr = Word(nums).addParseAction(lambda t: int(t[0]))

    arrayExpr = Forward()

    def countFieldParseAction(t, l, s):
        n = t[0]
        arrayExpr << Group(Many(expr, exact=n))
        return []

    intExpr = (intExpr.set_parser_name("arrayLen").addParseAction(
        countFieldParseAction, callDuringTry=True))
    return (intExpr + arrayExpr).set_parser_name("(len) " + text(expr) + "...")
Exemplo n.º 3
0
            + INDENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    else:
        smExpr = Group(
            Optional(NL)
            + NODENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")


anyOpenTag, anyCloseTag = makeHTMLTags(
    Word(alphas, alphanums + "_:").set_parser_name("any tag")
)
_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), "><& \"'"))
commonHTMLEntity = Regex(
    "&(?P<entity>" + "|".join(_htmlEntityMap.keys()) + ");"
).set_parser_name("common HTML entity")


def replaceHTMLEntity(t):
    """Helper parser action to replace common HTML entities with their special characters"""
    return _htmlEntityMap.get(t.entity)


# it's easy to get these comment structures wrong - they're very common, so may as well make them available
cStyleComment = Combine(
    Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/"
Exemplo n.º 4
0
    else:
        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret


# convenience constants for positional expressions
empty = Empty().set_parser_name("empty")
lineStart = LineStart().set_parser_name("lineStart")
lineEnd = LineEnd().set_parser_name("lineEnd")
stringStart = StringStart().set_parser_name("stringStart")
stringEnd = StringEnd().set_parser_name("stringEnd")


_escapedPunc = Word(
    _bslash, r"\[]-*.$+^?()~ ", exact=2
).addParseAction(lambda t, l, s: t[0][1])
_escapedHexChar = (
    Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(lambda t: unichr(int(
        t[0].lstrip('\\').lstrip('0').lstrip('xX'), 16
    )))
)
_escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(lambda t, l, s: unichr(int(
    t[0][1:], 8
)))
_singleChar = (
    _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
)
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
_reBracketExpr = (
    Literal("[")
Exemplo n.º 5
0
brackets = (
    "["
    + Optional("^")("negate")
    + OneOrMore(Group(charRange | singleChar | macro)("body"))
    + "]"
).addParseAction(to_bracket)

#########################################################################################
# REGEX
regex = Forward()

line_start = Literal("^").addParseAction(lambda: LineStart())
line_end = Literal("$").addParseAction(lambda: LineEnd())
word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar))
simple_char = Word(
    printables, exclude=r".^$*+{}[]\|()"
).addParseAction(lambda t: Literal(t.value()))
esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))

with Engine():
    # ALLOW SPACES IN THE RANGE
    repetition = (
        Word(nums)("exact") + "}"
        | Word(nums)("min") + "," + Word(nums)("max") + "}"
        | Word(nums)("min") + "," + "}"
        | "," + Word(nums)("max") + "}"
    )

repetition = Group(
    "{" + repetition | (Literal("*?") | Literal("+?") | Char("*+?"))("mode")
)
Exemplo n.º 6
0
charRange = Group(singleChar("min") + "-" +
                  singleChar("max")).addParseAction(to_range)

brackets = ("[" + Optional("^")("negate") +
            OneOrMore(Group(charRange | singleChar | macro)("body")) +
            "]").addParseAction(to_bracket)

#########################################################################################
# REGEX
regex = Forward()

line_start = Literal("^").addParseAction(lambda: LineStart())
line_end = Literal("$").addParseAction(lambda: LineEnd())
word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar))
simple_char = Word(
    printables,
    exclude=r".^$*+{}[]\|()").addParseAction(lambda t: Literal(t.value()))
esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))

with Engine():
    # ALLOW SPACES IN THE RANGE
    repetition = (Word(nums)("exact") + "}"
                  | Word(nums)("min") + "," + Word(nums)("max") + "}"
                  | Word(nums)("min") + "," + "}"
                  | "," + Word(nums)("max") + "}")

repetition = Group("{" + repetition
                   | (Literal("*?") | Literal("+?") | Char("*+?"))("mode"))

LB = Char("(")