def convert(expansion): parts = [] for p in expansion: if parts and ignore_names: # Chance to insert ignored substrings between meaningful # tokens, e.g. whitespace between values in JSON. parts.append( st.just(u"") | st.one_of([strategies[name] for name in ignore_names])) if p.name in strategies: # This might be a Terminal, or it might be a NonTerminal # that we've previously handled. parts.append(strategies[p.name]) else: # It must be the first time we've encountered this NonTerminal. # Recurse to handle it, relying on lazy strategy instantiation # to allow forward references, then add it to the strategies # cache to avoid infinite loops. assert isinstance(p, lark.grammar.NonTerminal) s = st.one_of([convert(ex) for ex in nonterminals[p.name]]) parts.append(s) strategies[p.name] = s # Special-case rules with only one expansion; it's worthwhile being # efficient when this includes terminals! Otherwise, join the parts. if len(parts) == 1: return parts[0] return st.tuples(*parts).map(u"".join)
def scalar_dtypes(): # type: () -> st.SearchStrategy[np.dtype] """Return a strategy that can return any non-flexible scalar dtype.""" return st.one_of( boolean_dtypes(), integer_dtypes(), unsigned_integer_dtypes(), floating_dtypes(), complex_number_dtypes(), datetime64_dtypes(), timedelta64_dtypes(), )
def all_types(draw): return draw( one_of( text(), integers(), none(), booleans(), floats(), tuples(), times(), uuids(), lists(integers()), dictionaries(text(), text()), ))
def urls(): """A strategy for :rfc:`3986`, generating http/https URLs.""" def url_encode(s): safe_chars = set(string.ascii_letters + string.digits + "$-_.+!*'(),") return "".join(c if c in safe_chars else "%%%02X" % ord(c) for c in s) schemes = st.sampled_from(["http", "https"]) ports = st.integers(min_value=0, max_value=2 ** 16 - 1).map(":{}".format) paths = st.lists(st.text(string.printable).map(url_encode)).map( lambda path: "/".join([""] + path) ) return st.builds( "{}://{}{}{}".format, schemes, domains(), st.one_of(st.just(""), ports), paths )
def from_lark(grammar, start=None): # type: (lark.lark.Lark, Text) -> st.SearchStrategy[Text] """A strategy for strings accepted by the given context-free grammar. ``grammar`` must be a ``Lark`` object, which wraps an EBNF specification. The Lark EBNF grammar reference can be found `here <https://lark-parser.readthedocs.io/en/latest/grammar/>`_. ``from_lark`` will automatically generate strings matching the nonterminal ``start`` symbol in the grammar, which was supplied as an argument to the Lark class. To generate strings matching a different symbol, including terminals, you can override this by passing the ``start`` argument to ``from_lark``. """ check_type(lark.lark.Lark, grammar, "grammar") if start is None: start = grammar.options.start # Compiling the EBNF grammar to a sanitised and canonicalised BNF # format makes further transformations much easier. terminals, rules, ignore_names = grammar.grammar.compile() # Map all terminals to the corresponging regular expression, and # thence to a strategy for producing matching strings. # We'll add strategies for non-terminals to this mapping later. strategies = { t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True) for t in terminals } if start in strategies: return strategies[start] # Reshape our flat list of rules into a dict of rulename to list of # possible productions for that rule. We sort productions by increasing # number of parts as a heuristic for shrinking order. nonterminals = { origin.name: sorted([rule.expansion for rule in rules if rule.origin == origin], key=len) for origin in set(rule.origin for rule in rules) } @st.cacheable @st.defines_strategy_with_reusable_values def convert(expansion): parts = [] for p in expansion: if parts and ignore_names: # Chance to insert ignored substrings between meaningful # tokens, e.g. whitespace between values in JSON. parts.append( st.just(u"") | st.one_of([strategies[name] for name in ignore_names])) if p.name in strategies: # This might be a Terminal, or it might be a NonTerminal # that we've previously handled. parts.append(strategies[p.name]) else: # It must be the first time we've encountered this NonTerminal. # Recurse to handle it, relying on lazy strategy instantiation # to allow forward references, then add it to the strategies # cache to avoid infinite loops. assert isinstance(p, lark.grammar.NonTerminal) s = st.one_of([convert(ex) for ex in nonterminals[p.name]]) parts.append(s) strategies[p.name] = s # Special-case rules with only one expansion; it's worthwhile being # efficient when this includes terminals! Otherwise, join the parts. if len(parts) == 1: return parts[0] return st.tuples(*parts).map(u"".join) # Most grammars describe several production rules, so we check the start # option passed to Lark to see which nonterminal we're going to produce. return st.one_of([convert(ex) for ex in nonterminals[start]])