示例#1
0
 def convert(expansion):
     parts = []
     for p in expansion:
         if parts and ignore_names:
             # Chance to insert ignored substrings between meaningful
             # tokens, e.g. whitespace between values in JSON.
             parts.append(
                 st.just(u"")
                 | st.one_of([strategies[name] for name in ignore_names]))
         if p.name in strategies:
             # This might be a Terminal, or it might be a NonTerminal
             # that we've previously handled.
             parts.append(strategies[p.name])
         else:
             # It must be the first time we've encountered this NonTerminal.
             # Recurse to handle it, relying on lazy strategy instantiation
             # to allow forward references, then add it to the strategies
             # cache to avoid infinite loops.
             assert isinstance(p, lark.grammar.NonTerminal)
             s = st.one_of([convert(ex) for ex in nonterminals[p.name]])
             parts.append(s)
             strategies[p.name] = s
     # Special-case rules with only one expansion; it's worthwhile being
     # efficient when this includes terminals!  Otherwise, join the parts.
     if len(parts) == 1:
         return parts[0]
     return st.tuples(*parts).map(u"".join)
示例#2
0
def scalar_dtypes():
    # type: () -> st.SearchStrategy[np.dtype]
    """Return a strategy that can return any non-flexible scalar dtype."""
    return st.one_of(
        boolean_dtypes(),
        integer_dtypes(),
        unsigned_integer_dtypes(),
        floating_dtypes(),
        complex_number_dtypes(),
        datetime64_dtypes(),
        timedelta64_dtypes(),
    )
示例#3
0
def all_types(draw):
    return draw(
        one_of(
            text(),
            integers(),
            none(),
            booleans(),
            floats(),
            tuples(),
            times(),
            uuids(),
            lists(integers()),
            dictionaries(text(), text()),
        ))
示例#4
0
def urls():
    """A strategy for :rfc:`3986`, generating http/https URLs."""

    def url_encode(s):
        safe_chars = set(string.ascii_letters + string.digits + "$-_.+!*'(),")
        return "".join(c if c in safe_chars else "%%%02X" % ord(c) for c in s)

    schemes = st.sampled_from(["http", "https"])
    ports = st.integers(min_value=0, max_value=2 ** 16 - 1).map(":{}".format)
    paths = st.lists(st.text(string.printable).map(url_encode)).map(
        lambda path: "/".join([""] + path)
    )

    return st.builds(
        "{}://{}{}{}".format, schemes, domains(), st.one_of(st.just(""), ports), paths
    )
示例#5
0
def from_lark(grammar, start=None):
    # type: (lark.lark.Lark, Text) -> st.SearchStrategy[Text]
    """A strategy for strings accepted by the given context-free grammar.

    ``grammar`` must be a ``Lark`` object, which wraps an EBNF specification.
    The Lark EBNF grammar reference can be found
    `here <https://lark-parser.readthedocs.io/en/latest/grammar/>`_.

    ``from_lark`` will automatically generate strings matching the
    nonterminal ``start`` symbol in the grammar, which was supplied as an
    argument to the Lark class.  To generate strings matching a different
    symbol, including terminals, you can override this by passing the
    ``start`` argument to ``from_lark``.
    """
    check_type(lark.lark.Lark, grammar, "grammar")
    if start is None:
        start = grammar.options.start

    # Compiling the EBNF grammar to a sanitised and canonicalised BNF
    # format makes further transformations much easier.
    terminals, rules, ignore_names = grammar.grammar.compile()

    # Map all terminals to the corresponging regular expression, and
    # thence to a strategy for producing matching strings.
    # We'll add strategies for non-terminals to this mapping later.
    strategies = {
        t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
        for t in terminals
    }
    if start in strategies:
        return strategies[start]

    # Reshape our flat list of rules into a dict of rulename to list of
    # possible productions for that rule.  We sort productions by increasing
    # number of parts as a heuristic for shrinking order.
    nonterminals = {
        origin.name:
        sorted([rule.expansion for rule in rules if rule.origin == origin],
               key=len)
        for origin in set(rule.origin for rule in rules)
    }

    @st.cacheable
    @st.defines_strategy_with_reusable_values
    def convert(expansion):
        parts = []
        for p in expansion:
            if parts and ignore_names:
                # Chance to insert ignored substrings between meaningful
                # tokens, e.g. whitespace between values in JSON.
                parts.append(
                    st.just(u"")
                    | st.one_of([strategies[name] for name in ignore_names]))
            if p.name in strategies:
                # This might be a Terminal, or it might be a NonTerminal
                # that we've previously handled.
                parts.append(strategies[p.name])
            else:
                # It must be the first time we've encountered this NonTerminal.
                # Recurse to handle it, relying on lazy strategy instantiation
                # to allow forward references, then add it to the strategies
                # cache to avoid infinite loops.
                assert isinstance(p, lark.grammar.NonTerminal)
                s = st.one_of([convert(ex) for ex in nonterminals[p.name]])
                parts.append(s)
                strategies[p.name] = s
        # Special-case rules with only one expansion; it's worthwhile being
        # efficient when this includes terminals!  Otherwise, join the parts.
        if len(parts) == 1:
            return parts[0]
        return st.tuples(*parts).map(u"".join)

    # Most grammars describe several production rules, so we check the start
    # option passed to Lark to see which nonterminal we're going to produce.
    return st.one_of([convert(ex) for ex in nonterminals[start]])