Exemplo n.º 1
0
 def nodent_stack(t, l, s):
     curCol = col(l, s)
     if curCol == _indent_stack[-1][0]:
         PEER << Empty().addParseAction(peer_stack(curCol))
         DEDENT << Empty().addParseAction(dedent_stack(curCol))
         _indent_stack.append((curCol, PEER, DEDENT))
     else:
         raise ParseException(t.type, s, l, "not a subentry")
Exemplo n.º 2
0
def originalTextFor(expr, asString=True):
    """Helper to return the original, untokenized text for a given
    expression.  Useful to restore the parsed fields of an HTML start
    tag into the raw tag text itself, or to revert separate tokens with
    intervening whitespace back to the original matching input text. By
    default, returns astring containing the original parsed text.

    If the optional ``asString`` argument is passed as
    ``False``, then the return value is
    a `ParseResults` containing any results names that
    were originally matched, and a single token containing the original
    matched text from the input string.  So if the expression passed to
    `originalTextFor` contains expressions with defined
    results names, you must set ``asString`` to ``False`` if you
    want to preserve those results name values.

    Example::

        src = "this is test <b> bold <i>text</i> </b> normal text "
        for tag in ("b", "i"):
            opener, closer = makeHTMLTags(tag)
            patt = originalTextFor(opener + SkipTo(closer) + closer)
            print(patt.searchString(src)[0])

    prints::

        ['<b> bold <i>text</i> </b>']
        ['<i>text</i>']
    """
    locMarker = Empty().addParseAction(lambda t, l, s: l)
    matchExpr = locMarker("_original_start") + Group(expr) + locMarker("_original_end")
    matchExpr = matchExpr.addParseAction(extractText)
    return matchExpr
Exemplo n.º 3
0
def locatedExpr(expr):
    """Helper to decorate a returned token with its starting and ending
    locations in the input string.

    This helper adds the following results names:

     - locn_start = location where matched expression begins
     - locn_end = location where matched expression ends
     - value = the actual parsed results

    Be careful if the input text contains ``<TAB>`` characters, you
    may want to call `ParserElement.parseWithTabs`

    Example::

        wd = Word(alphas)
        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
            print(match)

    prints::

        [[0, 'ljsdf', 5]]
        [[8, 'lksdjjf', 15]]
        [[18, 'lkkjj', 23]]
    """
    locator = Empty().addParseAction(lambda t, l, s: l)
    return Group(locator("locn_start") + Group(expr)("value") + locator("locn_end"))
Exemplo n.º 4
0
    def streamline(self):
        if self.streamlined:
            return self
        self.streamlined = True

        # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d)
        # but only if there are no parse actions or resultsNames on the nested And's
        # (likewise for Or's and MatchFirst's)
        if not self.is_annotated() and not self.exprs:
            return Empty(self.parser_name)

        acc = []
        same = True
        for e in self.exprs:
            f = e.streamline()
            same = same and f is e
            if f.is_annotated():
                acc.append(f)
            elif isinstance(f, self.__class__):
                same = False
                acc.extend(f.exprs)
            else:
                acc.append(f)

        if same:
            return self

        output = self.copy()
        output.exprs = acc
        output.streamlined = True
        return output
Exemplo n.º 5
0
    def streamline(self):
        if self.streamlined:
            return self

        output = ParseExpression.streamline(self)

        if isinstance(output, Empty):
            return output
        if not output.is_annotated():
            if len(output.exprs) == 0:
                output = Empty()
            if len(output.exprs) == 1:
                output = output.exprs[0]

        output.streamlined = True
        output.checkRecursion()
        return output
Exemplo n.º 6
0
 def copyTokenToRepeater(t, l, s):
     if t:
         if len(t) == 1:
             rep << t[0]
         else:
             # flatten t tokens
             tflat = _flatten(t)
             rep << And(Literal(tt) for tt in tflat)
     else:
         rep << Empty()
Exemplo n.º 7
0
    def streamline(self):
        if self.streamlined:
            return self
        if not self.exprs:
            return Empty(self.parser_name)
        if len(self.exprs) == 1 and not self.is_annotated():
            return self.exprs[0].streamline()

        # collapse any _PendingSkip's
        same = True
        exprs = self.exprs
        if any(
            isinstance(e, ParseExpression)
            and e.exprs
            and isinstance(e.exprs[-1], _PendingSkip)
            for e in exprs[:-1]
        ):
            same = False
            for i, e in enumerate(exprs[:-1]):
                if (
                    isinstance(e, ParseExpression)
                    and e.exprs
                    and isinstance(e.exprs[-1], _PendingSkip)
                ):
                    ee = e.exprs[-1] + exprs[i + 1]
                    e.exprs[-1] = ee
                    e.streamlined = False
                    exprs[i + 1] = None

        # streamline INDIVIDUAL EXPRESSIONS
        acc = []
        for e in exprs:
            if e is None:
                continue
            f = e.streamline()
            same = same and f is e
            if f.is_annotated():
                acc.append(f)
            elif isinstance(f, And) and f.parser_config.engine is self.parser_config.engine:
                same = False
                acc.extend(f.exprs)
            else:
                acc.append(f)

        if same:
            self.streamlined = True
            return self

        output = self.copy()
        output.exprs = acc
        output.streamlined = True
        return output
Exemplo n.º 8
0
 def __init__(self, exprs):
     if exprs and Ellipsis in exprs:
         tmp = []
         for i, expr in enumerate(exprs):
             if expr is Ellipsis:
                 if i < len(exprs) - 1:
                     skipto_arg = (Empty() + exprs[i + 1]).exprs[-1]
                     tmp.append(SkipTo(skipto_arg)("_skipped"))
                 else:
                     raise Exception(
                         "cannot construct And with sequence ending in ...")
             else:
                 tmp.append(expr)
         exprs[:] = tmp
     super(And, self).__init__(exprs)
Exemplo n.º 9
0
    def streamline(self):
        if self.streamlined:
            return self
        self.streamlined = True

        # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d)
        # but only if there are no parse actions or resultsNames on the nested And's
        # (likewise for Or's and MatchFirst's)
        if not self.exprs:
            return Empty(self.parser_name)

        acc = []
        for e in self.exprs:
            e = e.streamline()
            if isinstance(e, self.__class__) and not is_decorated(e):
                acc.extend(e.exprs)
            else:
                acc.append(e)

        self.exprs = acc
        return self
Exemplo n.º 10
0
    def streamline(self):
        if self.streamlined:
            return self
        self.streamlined = True

        # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d)
        # but only if there are no parse actions or resultsNames on the nested And's
        # (likewise for Or's and MatchFirst's)
        if not self.is_annotated() and not self.exprs:
            return Empty(self.parser_name)

        acc = []
        same = True
        clazz = self.__class__
        if clazz == Or:
            clazz = (
                Or,
                MatchFirst,
            )  # TODO: not correct, but allows merging of the two to a single longer list
        for e in self.exprs:
            f = e.streamline()
            same = same and f is e
            if f.is_annotated():
                acc.append(f)
            elif isinstance(f, clazz):
                same = False
                acc.extend(f.exprs)
            else:
                acc.append(f)

        if same:
            return self

        output = self.copy()
        output.exprs = acc
        output.streamlined = True
        return output
Exemplo n.º 11
0
def indentedBlock(blockStatementExpr, indent=True):
    """Helper method for defining space-delimited indentation blocks,
    such as those used to define block statements in Python source code.

    Parameters:

     - blockStatementExpr - expression defining syntax of statement that
       is repeated within the indented block
     - indentStack - list created by caller to manage indentation stack
       (multiple statementWithIndentedBlock expressions within a single
       grammar should share a common indentStack)
     - indent - boolean indicating whether block must be indented beyond
       the current level; set to False for block of left-most
       statements (default= ``True``)

    A valid block must contain at least one ``blockStatement``.
    """
    blockStatementExpr.engine.add_ignore("\\" + LineEnd())

    PEER = Forward()
    DEDENT = Forward()

    def _reset_stack(p=None, l=None, s=None, ex=None):
        oldCol, oldPeer, oldDedent = _indent_stack.pop()
        PEER << oldPeer
        DEDENT << oldDedent

    def peer_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol != expectedCol:
                if curCol > expectedCol:
                    raise ParseException(t.type, s, l, "illegal nesting")
                raise ParseException(t.type, l, s, "not a peer entry")

        return output

    def dedent_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol not in (i for i, _, _ in _indent_stack):
                raise ParseException(s, l, "not an unindent")
            if curCol < _indent_stack[-1][0]:
                oldCol, oldPeer, oldDedent = _indent_stack.pop()
                PEER << oldPeer
                DEDENT << oldDedent

        return output

    def indent_stack(t, l, s):
        curCol = col(l, s)
        if curCol > _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, l, s, "not a subentry")

    def nodent_stack(t, l, s):
        curCol = col(l, s)
        if curCol == _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, s, l, "not a subentry")

    NL = OneOrMore(LineEnd().suppress())
    INDENT = Empty().addParseAction(indent_stack)
    NODENT = Empty().addParseAction(nodent_stack)

    if indent:
        smExpr = Group(
            Optional(NL)
            + INDENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    else:
        smExpr = Group(
            Optional(NL)
            + NODENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
Exemplo n.º 12
0
                        + ~Literal(closer)
                        + CharsNotIn(ignore_chars, exact=1)
                    )).addParseAction(scrub)
    ret = Forward()
    if ignoreExpr is not None:
        ret <<= Group(
            Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
        )
    else:
        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret


# convenience constants for positional expressions
empty = Empty().set_parser_name("empty")
lineStart = LineStart().set_parser_name("lineStart")
lineEnd = LineEnd().set_parser_name("lineEnd")
stringStart = StringStart().set_parser_name("stringStart")
stringEnd = StringEnd().set_parser_name("stringEnd")


_escapedPunc = Word(
    _bslash, r"\[]-*.$+^?()~ ", exact=2
).addParseAction(lambda t, l, s: t[0][1])
_escapedHexChar = (
    Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(lambda t: unichr(int(
        t[0].lstrip('\\').lstrip('0').lstrip('xX'), 16
    )))
)
_escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(lambda t, l, s: unichr(int(
Exemplo n.º 13
0
    singles = [s for s in symbols if len(s) == 1]
    rest = list(
        sorted([s for s in symbols if len(s) != 1], key=lambda s: -len(s)))

    acc = []
    acc.extend(re.escape(sym) for sym in rest)
    if singles:
        acc.append(regex_range("".join(singles)))
    regex = "|".join(acc)

    return Regex(regex).streamline()


LEFT_ASSOC = object()
RIGHT_ASSOC = object()
_no_op = Empty()


def infixNotation(baseExpr, spec, lpar=Suppress("("), rpar=Suppress(")")):
    """
    :param baseExpr: expression representing the most basic element for the
       nested
    :param spec: list of tuples, one for each operator precedence level
       in the expression grammar; each tuple is of the form ``(opExpr,
       numTerms, rightLeftAssoc, parseAction)``, where:

       - opExpr is the mo_parsing expression for the operator; may also
         be a string, which will be converted to a Literal; if numTerms
         is 3, opExpr is a tuple of two expressions, for the two
         operators separating the 3 terms
       - numTerms is the number of terms for this operator (must be 1,