Exemplo n.º 1
0
def indentedBlock(blockStatementExpr, indent=True):
    """Helper method for defining space-delimited indentation blocks,
    such as those used to define block statements in Python source code.

    Parameters:

     - blockStatementExpr - expression defining syntax of statement that
       is repeated within the indented block
     - indentStack - list created by caller to manage indentation stack
       (multiple statementWithIndentedBlock expressions within a single
       grammar should share a common indentStack)
     - indent - boolean indicating whether block must be indented beyond
       the current level; set to False for block of left-most
       statements (default= ``True``)

    A valid block must contain at least one ``blockStatement``.
    """
    blockStatementExpr.engine.add_ignore("\\" + LineEnd())

    PEER = Forward()
    DEDENT = Forward()

    def _reset_stack(p=None, l=None, s=None, ex=None):
        oldCol, oldPeer, oldDedent = _indent_stack.pop()
        PEER << oldPeer
        DEDENT << oldDedent

    def peer_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol != expectedCol:
                if curCol > expectedCol:
                    raise ParseException(t.type, s, l, "illegal nesting")
                raise ParseException(t.type, l, s, "not a peer entry")

        return output

    def dedent_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol not in (i for i, _, _ in _indent_stack):
                raise ParseException(s, l, "not an unindent")
            if curCol < _indent_stack[-1][0]:
                oldCol, oldPeer, oldDedent = _indent_stack.pop()
                PEER << oldPeer
                DEDENT << oldDedent

        return output

    def indent_stack(t, l, s):
        curCol = col(l, s)
        if curCol > _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, l, s, "not a subentry")

    def nodent_stack(t, l, s):
        curCol = col(l, s)
        if curCol == _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, s, l, "not a subentry")

    NL = OneOrMore(LineEnd().suppress())
    INDENT = Empty().addParseAction(indent_stack)
    NODENT = Empty().addParseAction(nodent_stack)

    if indent:
        smExpr = Group(
            Optional(NL)
            + INDENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    else:
        smExpr = Group(
            Optional(NL)
            + NODENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
Exemplo n.º 2
0
    restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line")

    dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment")

    cppStyleComment = Combine(
        Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment
    ).set_parser_name("C++ style comment")

    javaStyleComment = cppStyleComment

    pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment")

_commasepitem = (
    Combine(OneOrMore(
        Word(printables, exclude=",")
        + Optional(Word(" \t") + ~Literal(",") + ~LineEnd())
    ))
    .addParseAction(lambda t: text(t).strip())
    .set_parser_name("commaItem")
)
commaSeparatedList = delimitedList(Optional(
    quotedString | _commasepitem, default=""
)).set_parser_name("commaSeparatedList")


convertToInteger = tokenMap(int)
convertToFloat = tokenMap(float)

integer = Word(nums).set_parser_name("integer").addParseAction(convertToInteger)

hex_integer = (
Exemplo n.º 3
0
    dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment")

    cppStyleComment = Combine(
        Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/"
        | dblSlashComment).set_parser_name("C++ style comment")

    javaStyleComment = cppStyleComment

    pythonStyleComment = Regex(r"#[^\n]*").set_parser_name(
        "Python style comment")

_commasepitem = (Combine(
    OneOrMore(
        Word(printables, excludeChars=",") +
        Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).addParseAction(
            lambda t: text(t).strip()).set_parser_name("commaItem"))
commaSeparatedList = delimitedList(
    Optional(quotedString | _commasepitem,
             default="")).set_parser_name("commaSeparatedList")
"""Here are some common low-level expressions that may be useful in
jump-starting parser development:

 - numeric forms (`integers<integer>`, `reals<real>`,
   `scientific notation<sci_real>`)
 - common `programming identifiers<identifier>`
 - network addresses (`MAC<mac_address>`,
   `IPv4<ipv4_address>`, `IPv6<ipv6_address>`)
 - ISO8601 `dates<iso8601_date>` and
   `datetime<iso8601_datetime>`
 - `UUID<uuid>`
Exemplo n.º 4
0
                    )).addParseAction(scrub)
    ret = Forward()
    if ignoreExpr is not None:
        ret <<= Group(
            Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
        )
    else:
        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret


# convenience constants for positional expressions
empty = Empty().set_parser_name("empty")
lineStart = LineStart().set_parser_name("lineStart")
lineEnd = LineEnd().set_parser_name("lineEnd")
stringStart = StringStart().set_parser_name("stringStart")
stringEnd = StringEnd().set_parser_name("stringEnd")


_escapedPunc = Word(
    _bslash, r"\[]-*.$+^?()~ ", exact=2
).addParseAction(lambda t, l, s: t[0][1])
_escapedHexChar = (
    Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(lambda t: unichr(int(
        t[0].lstrip('\\').lstrip('0').lstrip('xX'), 16
    )))
)
_escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(lambda t, l, s: unichr(int(
    t[0][1:], 8
)))
Exemplo n.º 5
0
charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range)

brackets = (
    "["
    + Optional("^")("negate")
    + OneOrMore(Group(charRange | singleChar | macro)("body"))
    + "]"
).addParseAction(to_bracket)

#########################################################################################
# REGEX
regex = Forward()

line_start = Literal("^").addParseAction(lambda: LineStart())
line_end = Literal("$").addParseAction(lambda: LineEnd())
word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar))
simple_char = Word(
    printables, exclude=r".^$*+{}[]\|()"
).addParseAction(lambda t: Literal(t.value()))
esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))

with Engine():
    # ALLOW SPACES IN THE RANGE
    repetition = (
        Word(nums)("exact") + "}"
        | Word(nums)("min") + "," + Word(nums)("max") + "}"
        | Word(nums)("min") + "," + "}"
        | "," + Word(nums)("max") + "}"
    )