def run(self, state_machine, print_steps=None): if print_steps is None: print_steps = current_verbosity() >= Verbosity.debug self.data.hypothesis_runner = state_machine should_continue = cu.many( self.data, min_size=1, max_size=self.n_steps, average_size=self.n_steps, ) try: if print_steps: state_machine.print_start() state_machine.check_invariants() while should_continue.more(): value = self.data.draw(state_machine.steps()) if print_steps: state_machine.print_step(value) state_machine.execute_step(value) state_machine.check_invariants() finally: if print_steps: state_machine.print_end() state_machine.teardown()
def run_state_machine(factory, data): machine = factory() check_type(GenericStateMachine, machine, "state_machine_factory()") data.conjecture_data.hypothesis_runner = machine n_steps = settings.stateful_step_count should_continue = cu.many(data.conjecture_data, min_size=1, max_size=n_steps, average_size=n_steps) print_steps = (current_build_context().is_final or current_verbosity() >= Verbosity.debug) try: if print_steps: machine.print_start() machine.check_invariants() while should_continue.more(): value = data.conjecture_data.draw(machine.steps()) if print_steps: machine.print_step(value) machine.execute_step(value) machine.check_invariants() finally: if print_steps: machine.print_end() machine.teardown()
def do_draw(self, data): if self.element_strategy.is_empty: assert self.min_size == 0 return [] elements = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) seen_sets = tuple(set() for _ in self.keys) result = [] # We construct a filtered strategy here rather than using a check-and-reject # approach because some strategies have special logic for generation under a # filter, and FilteredStrategy can consolidate multiple filters. filtered = self.element_strategy.filter(lambda val: all( key(val) not in seen for (key, seen) in zip(self.keys, seen_sets))) while elements.more(): value = filtered.filtered_strategy.do_filtered_draw( data=data, filter_strategy=filtered) if value is filter_not_satisfied: elements.reject() else: for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) result.append(value) assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): if self.element_strategy.is_empty: assert self.min_size == 0 return [] elements = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) seen_sets = tuple(set() for _ in self.keys) result = [] # We construct a filtered strategy here rather than using a check-and-reject # approach because some strategies have special logic for generation under a # filter, and FilteredStrategy can consolidate multiple filters. def not_yet_in_unique_list(val): return all(key(val) not in seen for key, seen in zip(self.keys, seen_sets)) filtered = self.element_strategy._filter_for_filtered_draw( not_yet_in_unique_list ) while elements.more(): value = filtered.do_filtered_draw(data) if value is filter_not_satisfied: elements.reject() else: for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) if self.tuple_suffixes is not None: value = (value,) + data.draw(self.tuple_suffixes) result.append(value) assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): if self.element_strategy.is_empty: assert self.min_size == 0 return [] elements = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) seen_sets = tuple(set() for _ in self.keys) result = [] # We construct a filtered strategy here rather than using a check-and-reject # approach because some strategies have special logic for generation under a # filter, and FilteredStrategy can consolidate multiple filters. filtered = self.element_strategy.filter( lambda val: all( key(val) not in seen for (key, seen) in zip(self.keys, seen_sets) ) ) while elements.more(): value = filtered.filtered_strategy.do_filtered_draw( data=data, filter_strategy=filtered ) if value is filter_not_satisfied: elements.reject() else: for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) result.append(value) assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): should_draw = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) seen_sets = tuple(set() for _ in self.keys) result = [] remaining = LazySequenceCopy(self.element_strategy.elements) while should_draw.more(): i = len(remaining) - 1 j = cu.integer_range(data, 0, i) if j != i: remaining[i], remaining[j] = remaining[j], remaining[i] value = remaining.pop() if all( key(value) not in seen for (key, seen) in zip(self.keys, seen_sets)): for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) result.append(value) else: should_draw.reject() assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): # 1 - Select a valid top-level domain (TLD) name # 2 - Check that the number of characters in our selected TLD won't # prevent us from generating at least a 1 character subdomain. # 3 - Randomize the TLD between upper and lower case characters. domain = data.draw( st.sampled_from(TOP_LEVEL_DOMAINS).filter(lambda tld: len( tld) + 2 <= self.max_length).flatmap(lambda tld: st.tuples( *[st.sampled_from([c.lower(), c.upper()]) for c in tld]).map(u"".join))) # The maximum possible number of subdomains is 126, # 1 character subdomain + 1 '.' character, * 126 = 252, # with a max of 255, that leaves 3 characters for a TLD. # Allowing any more subdomains would not leave enough # characters for even the shortest possible TLDs. elements = cu.many(data, min_size=1, average_size=1, max_size=126) while elements.more(): # Generate a new valid subdomain using the regex strategy. sub_domain = data.draw( st.from_regex(self.label_regex, fullmatch=True)) if len(domain) + len(sub_domain) >= self.max_length: data.stop_example(discard=True) break domain = sub_domain + "." + domain return domain
def run_state_machine(factory, data): machine = factory() check_type(GenericStateMachine, machine, "state_machine_factory()") data.conjecture_data.hypothesis_runner = machine n_steps = settings.stateful_step_count should_continue = cu.many( data.conjecture_data, min_size=1, max_size=n_steps, average_size=n_steps ) print_steps = ( current_build_context().is_final or current_verbosity() >= Verbosity.debug ) try: if print_steps: machine.print_start() machine.check_invariants() while should_continue.more(): value = data.conjecture_data.draw(machine.steps()) if print_steps: machine.print_step(value) machine.execute_step(value) machine.check_invariants() finally: if print_steps: machine.print_end() machine.teardown()
def do_draw(self, data): result = [] seen = set() iterator = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=(self.min_size + self.max_size) / 2, ) while iterator.more(): elt = data.draw(self.elements) if self.unique: if elt in seen: iterator.reject() continue seen.add(elt) result.append(elt) dtype = infer_dtype_if_necessary(dtype=self.dtype, values=result, elements=self.elements, draw=data.draw) return pandas.Index(result, dtype=dtype, tupleize_cols=False)
def do_draw(self, data): should_draw = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) seen_sets = tuple(set() for _ in self.keys) result = [] remaining = LazySequenceCopy(self.element_strategy.elements) while remaining and should_draw.more(): i = len(remaining) - 1 j = cu.integer_range(data, 0, i) if j != i: remaining[i], remaining[j] = remaining[j], remaining[i] value = self.element_strategy._transform(remaining.pop()) if value is not filter_not_satisfied and all( key(value) not in seen for key, seen in zip(self.keys, seen_sets)): for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) if self.tuple_suffixes is not None: value = (value, ) + data.draw(self.tuple_suffixes) result.append(value) else: should_draw.reject() assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): elements = cu.many( data, min_size=self.min_dims, max_size=self.max_dims, average_size=min( max(self.min_dims * 2, self.min_dims + 5), 0.5 * (self.min_dims + self.max_dims), ), ) result = [] reversed_shape = tuple(self.shape[::-1]) while elements.more(): if len(result) < len(self.shape): # Shrinks towards original shape if reversed_shape[len(result)] == 1: if self.min_side <= 1 and not data.draw(st.booleans()): side = 1 else: side = data.draw(self.side_strat) elif self.max_side >= reversed_shape[len(result)] and ( not self.min_side <= 1 <= self.max_side or data.draw(st.booleans()) ): side = reversed_shape[len(result)] else: side = 1 else: side = data.draw(self.side_strat) result.append(side) assert self.min_dims <= len(result) <= self.max_dims assert all(self.min_side <= s <= self.max_side for s in result) return tuple(reversed(result))
def do_draw(self, data): result = [] seen = set() iterator = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=(self.min_size + self.max_size) / 2, ) while iterator.more(): elt = data.draw(self.elements) if self.unique: if elt in seen: iterator.reject() continue seen.add(elt) result.append(elt) dtype = infer_dtype_if_necessary( dtype=self.dtype, values=result, elements=self.elements, draw=data.draw ) return pandas.Index(result, dtype=dtype, tupleize_cols=False)
def test_many_with_max_size(): many = cu.many( ConjectureData.for_buffer([1] * 10), min_size=0, average_size=1, max_size=2 ) assert many.more() assert many.more() assert not many.more()
def test_fixed_size_draw_many(): many = cu.many( ConjectureData.for_buffer([]), min_size=3, max_size=3, average_size=3 ) assert many.more() assert many.more() assert many.more() assert not many.more()
def do_draw(self, data): elements = cu.many(data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_length) result = [] while elements.more(): result.append(data.draw(self.element_strategy)) return result
def test_rejection_eventually_terminates_many_invalid_for_min_size(): data = ConjectureData.for_buffer([1] * 1000) many = cu.many(data, min_size=1, max_size=1000, average_size=100) with pytest.raises(StopTest): while many.more(): many.reject() assert data.status == Status.INVALID
def from_object_schema(draw: Any) -> Any: """Here, we do some black magic with private Hypothesis internals. It's unfortunate, but also the only way that I know of to satisfy all the interacting constraints without making shrinking totally hopeless. If any Hypothesis maintainers are reading this... I'm so, so sorry. """ elements = cu.many( # type: ignore draw(st.data()).conjecture_data, min_size=min_size, max_size=max_size, average_size=min(min_size + 5, (min_size + max_size) // 2), ) out: dict = {} while elements.more(): for key in required: if key not in out: break else: for k in dep_names: if k in out: key = next((n for n in dep_names[k] if n not in out), None) if key is not None: break else: key = draw( all_names_strategy.filter(lambda s: s not in out)) if key in properties: out[key] = draw(from_schema(properties[key])) else: for rgx, matching_schema in patterns.items(): if re.search(rgx, string=key) is not None: out[key] = draw(from_schema(matching_schema)) # Check for overlapping conflicting schemata for rgx, matching_schema in patterns.items(): if re.search( rgx, string=key) is not None and not is_valid( out[key], matching_schema): out.pop(key) elements.reject() break break else: out[key] = draw(from_schema(additional)) for k, v in dep_schemas.items(): if k in out and not is_valid(out, v): out.pop(key) elements.reject() for k in dep_names: if k in out: assume(all(n in out for n in dep_names[k])) return out
def from_object_schema(draw: Any) -> Any: """Do some black magic with private Hypothesis internals for objects. It's unfortunate, but also the only way that I know of to satisfy all the interacting constraints without making shrinking totally hopeless. If any Hypothesis maintainers are reading this... I'm so, so sorry. """ # Hypothesis internals are not type-annotated... I do mean *black* magic! elements = cu.many( draw(st.data()).conjecture_data, min_size=min_size, max_size=max_size, average_size=min(min_size + 5, (min_size + max_size) / 2), ) out: dict = {} while elements.more(): for key in required: if key not in out: break else: for k in set(dep_names).intersection(out): # pragma: no cover # nocover because some of these conditionals are rare enough # that not all test runs hit them, but are still essential. key = next((n for n in dep_names[k] if n not in out), None) if key is not None: break else: key = draw( all_names_strategy.filter(lambda s: s not in out)) pattern_schemas = [ patterns[rgx] for rgx in sorted(patterns) if re.search(rgx, string=key) is not None ] if key in properties: pattern_schemas.insert(0, properties[key]) if pattern_schemas: out[key] = draw( merged_as_strategies(pattern_schemas, custom_formats)) else: out[key] = draw( from_schema(additional, custom_formats=custom_formats)) for k, v in dep_schemas.items(): if k in out and not make_validator(v).is_valid(out): out.pop(key) elements.reject() for k in set(dep_names).intersection(out): assume(set(out).issuperset(dep_names[k])) return out
def do_draw(self, data): result = data.draw(self.fixed) remaining = [k for k in self.optional_keys if not self.optional[k].is_empty] should_draw = cu.many( data, min_size=0, max_size=len(remaining), average_size=len(remaining) / 2 ) while should_draw.more(): j = cu.integer_range(data, 0, len(remaining) - 1) remaining[-1], remaining[j] = remaining[j], remaining[-1] key = remaining.pop() result[key] = data.draw(self.optional[key]) return result
def do_draw(self, data): if self.element_strategy.is_empty: assert self.min_size == 0 return [] elements = cu.many(data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size) result = [] while elements.more(): result.append(data.draw(self.element_strategy)) return result
def do_draw(self, data): if self.element_strategy.is_empty: assert self.min_size == 0 return [] elements = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size ) result = [] while elements.more(): result.append(data.draw(self.element_strategy)) return result
def test_rejection_eventually_terminates_many(): many = cu.many( ConjectureData.for_buffer([1] * 1000), min_size=0, max_size=1000, average_size=100, ) count = 0 while many.more(): count += 1 many.reject() assert count <= 100
def run_state_machine(factory, data): machine = factory() if isinstance(machine, GenericStateMachine) and not isinstance( machine, RuleBasedStateMachine): note_deprecation( "%s inherits from GenericStateMachine, which is deprecated. Use a " "RuleBasedStateMachine, or a test function with st.data(), instead." % (type(machine).__name__, ), since="2019-05-29", ) else: check_type(RuleBasedStateMachine, machine, "state_machine_factory()") data.conjecture_data.hypothesis_runner = machine n_steps = settings.stateful_step_count should_continue = cu.many(data.conjecture_data, min_size=1, max_size=n_steps, average_size=n_steps) print_steps = (current_build_context().is_final or current_verbosity() >= Verbosity.debug) try: if print_steps: machine.print_start() machine.check_invariants() while should_continue.more(): value = data.conjecture_data.draw(machine.steps()) # Assign 'result' here in case 'execute_step' fails below result = multiple() try: result = machine.execute_step(value) finally: if print_steps: # 'result' is only used if the step has target bundles. # If it does, and the result is a 'MultipleResult', # then 'print_step' prints a multi-variable assignment. machine.print_step(value, result) machine.check_invariants() finally: if print_steps: machine.print_end() machine.teardown()
def do_draw(self, data): elements = cu.many(data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size) seen = set() result = [] while elements.more(): value = data.draw(self.element_strategy) k = self.key(value) if k in seen: elements.reject() else: seen.add(k) result.append(value) assert self.max_size >= len(result) >= self.min_size return result
def run_state_machine(factory, data): machine = factory() if isinstance(machine, GenericStateMachine) and not isinstance( machine, RuleBasedStateMachine ): note_deprecation( "%s inherits from GenericStateMachine, which is deprecated. Use a " "RuleBasedStateMachine, or a test function with st.data(), instead." % (type(machine).__name__,), since="2019-05-29", ) else: check_type(RuleBasedStateMachine, machine, "state_machine_factory()") data.conjecture_data.hypothesis_runner = machine n_steps = settings.stateful_step_count should_continue = cu.many( data.conjecture_data, min_size=1, max_size=n_steps, average_size=n_steps ) print_steps = ( current_build_context().is_final or current_verbosity() >= Verbosity.debug ) try: if print_steps: machine.print_start() machine.check_invariants() while should_continue.more(): value = data.conjecture_data.draw(machine.steps()) if print_steps: machine.print_step(value) machine.execute_step(value) machine.check_invariants() finally: if print_steps: machine.print_end() machine.teardown()
def do_draw(self, data): if self.element_strategy.is_empty: assert self.min_size == 0 return [] elements = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size ) seen = set() result = [] while elements.more(): value = data.draw(self.element_strategy) k = self.key(value) if k in seen: elements.reject() else: seen.add(k) result.append(value) assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): if 0 in self.shape: return self.xp.zeros(self.shape, dtype=self.dtype) if self.fill.is_empty: # We have no fill value (either because the user explicitly # disabled it or because the default behaviour was used and our # elements strategy does not produce reusable values), so we must # generate a fully dense array with a freshly drawn value for each # entry. elems = data.draw( st.lists( self.elements_strategy, min_size=self.array_size, max_size=self.array_size, unique=self.unique, )) try: result = self.xp.asarray(elems, dtype=self.dtype) except Exception as e: if len(elems) <= 6: f_elems = str(elems) else: f_elems = f"[{elems[0]}, {elems[1]}, ..., {elems[-2]}, {elems[-1]}]" types = tuple( sorted({type(e) for e in elems}, key=lambda t: t.__name__)) f_types = f"type {types[0]}" if len( types) == 1 else f"types {types}" raise InvalidArgument( f"Generated elements {f_elems} from strategy " f"{self.elements_strategy} could not be converted " f"to array of dtype {self.dtype}. " f"Consider if elements of {f_types} " f"are compatible with {self.dtype}.") from e for i in range(self.array_size): self.check_set_value(elems[i], result[i], self.elements_strategy) else: # We draw arrays as "sparse with an offset". We assume not every # element will be assigned and so first draw a single value from our # fill strategy to create a full array. We then draw a collection of # index assignments within the array and assign fresh values from # our elements strategy to those indices. fill_val = data.draw(self.fill) try: result = self.xp.full(self.array_size, fill_val, dtype=self.dtype) except Exception as e: raise InvalidArgument( f"Could not create full array of dtype={self.dtype} " f"with fill value {fill_val!r}") from e sample = result[0] self.check_set_value(fill_val, sample, self.fill) if self.unique and not self.xp.all(self.xp.isnan(result)): raise InvalidArgument( f"Array module {self.xp.__name__} did not recognise fill " f"value {fill_val!r} as NaN - instead got {sample!r}. " "Cannot fill unique array with non-NaN values.") elements = cu.many( data, min_size=0, max_size=self.array_size, # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. average_size=min( 0.9 * self.array_size, # ensure small arrays sometimes use fill max(10, math.sqrt(self.array_size)), # ...but *only* sometimes ), ) assigned = set() seen = set() while elements.more(): i = cu.integer_range(data, 0, self.array_size - 1) if i in assigned: elements.reject() continue val = data.draw(self.elements_strategy) if self.unique: if val in seen: elements.reject() continue else: seen.add(val) try: result[i] = val except Exception as e: raise InvalidArgument( f"Could not add generated array element {val!r} " f"of type {type(val)} to array of dtype {result.dtype}." ) from e self.check_set_value(val, result[i], self.elements_strategy) assigned.add(i) result = self.xp.reshape(result, self.shape) return result
def do_draw(self, data): if 0 in self.shape: return np.zeros(dtype=self.dtype, shape=self.shape) # Because Numpy allocates memory for strings at array creation, if we have # an unsized string dtype we'll fill an object array and then cast it back. unsized_string_dtype = (self.dtype.kind in ("S", "a", "U") and self.dtype.itemsize == 0) # This could legitimately be a np.empty, but the performance gains for # that would be so marginal that there's really not much point risking # undefined behaviour shenanigans. result = np.zeros(shape=self.array_size, dtype=object if unsized_string_dtype else self.dtype) if self.fill.is_empty: # We have no fill value (either because the user explicitly # disabled it or because the default behaviour was used and our # elements strategy does not produce reusable values), so we must # generate a fully dense array with a freshly drawn value for each # entry. if self.unique: elems = st.lists( self.element_strategy, min_size=self.array_size, max_size=self.array_size, unique=True, ) for i, v in enumerate(data.draw(elems)): self.set_element(v, result, i) else: for i in range(len(result)): self.set_element(data.draw(self.element_strategy), result, i) else: # We draw numpy arrays as "sparse with an offset". We draw a # collection of index assignments within the array and assign # fresh values from our elements strategy to those indices. If at # the end we have not assigned every element then we draw a single # value from our fill strategy and use that to populate the # remaining positions with that strategy. elements = cu.many( data, min_size=0, max_size=self.array_size, # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. average_size=min( 0.9 * self.array_size, # ensure small arrays sometimes use fill max(10, math.sqrt(self.array_size)), # ...but *only* sometimes ), ) needs_fill = np.full(self.array_size, True) seen = set() while elements.more(): i = cu.integer_range(data, 0, self.array_size - 1) if not needs_fill[i]: elements.reject() continue self.set_element(data.draw(self.element_strategy), result, i) if self.unique: if result[i] in seen: elements.reject() continue else: seen.add(result[i]) needs_fill[i] = False if needs_fill.any(): # We didn't fill all of the indices in the early loop, so we # put a fill value into the rest. # We have to do this hilarious little song and dance to work # around numpy's special handling of iterable values. If the # value here were e.g. a tuple then neither array creation # nor putmask would do the right thing. But by creating an # array of size one and then assigning the fill value as a # single element, we both get an array with the right value in # it and putmask will do the right thing by repeating the # values of the array across the mask. one_element = np.zeros( shape=1, dtype=object if unsized_string_dtype else self.dtype) self.set_element(data.draw(self.fill), one_element, 0, fill=True) if unsized_string_dtype: one_element = one_element.astype(self.dtype) fill_value = one_element[0] if self.unique: try: is_nan = np.isnan(fill_value) except TypeError: is_nan = False if not is_nan: raise InvalidArgument( f"Cannot fill unique array with non-NaN value {fill_value!r}" ) np.putmask(result, needs_fill, one_element) if unsized_string_dtype: out = result.astype(self.dtype) mismatch = out != result if mismatch.any(): raise InvalidArgument( "Array elements %r cannot be represented as dtype %r - instead " "they become %r. Use a more precise strategy, e.g. without " "trailing null bytes, as this will be an error future versions." % (result[mismatch], self.dtype, out[mismatch])) result = out result = result.reshape(self.shape).copy() assert result.base is None return result
def do_draw(self, data): if 0 in self.shape: return np.zeros(dtype=self.dtype, shape=self.shape) # This could legitimately be a np.empty, but the performance gains for # that would be so marginal that there's really not much point risking # undefined behaviour shenanigans. result = np.zeros(shape=self.array_size, dtype=self.dtype) if self.fill.is_empty: # We have no fill value (either because the user explicitly # disabled it or because the default behaviour was used and our # elements strategy does not produce reusable values), so we must # generate a fully dense array with a freshly drawn value for each # entry. if self.unique: seen = set() elements = cu.many(data, min_size=self.array_size, max_size=self.array_size, average_size=self.array_size) i = 0 while elements.more(): # We assign first because this means we check for # uniqueness after numpy has converted it to the relevant # type for us. Because we don't increment the counter on # a duplicate we will overwrite it on the next draw. result[i] = data.draw(self.element_strategy) if result[i] not in seen: seen.add(result[i]) i += 1 else: elements.reject() else: for i in hrange(len(result)): result[i] = data.draw(self.element_strategy) else: # We draw numpy arrays as "sparse with an offset". We draw a # collection of index assignments within the array and assign # fresh values from our elements strategy to those indices. If at # the end we have not assigned every element then we draw a single # value from our fill strategy and use that to populate the # remaining positions with that strategy. elements = cu.many( data, min_size=0, max_size=self.array_size, # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. average_size=math.sqrt(self.array_size), ) needs_fill = np.full(self.array_size, True) seen = set() while elements.more(): i = cu.integer_range(data, 0, self.array_size - 1) if not needs_fill[i]: elements.reject() continue result[i] = data.draw(self.element_strategy) if self.unique: if result[i] in seen: elements.reject() continue else: seen.add(result[i]) needs_fill[i] = False if needs_fill.any(): # We didn't fill all of the indices in the early loop, so we # put a fill value into the rest. # We have to do this hilarious little song and dance to work # around numpy's special handling of iterable values. If the # value here were e.g. a tuple then neither array creation # nor putmask would do the right thing. But by creating an # array of size one and then assigning the fill value as a # single element, we both get an array with the right value in # it and putmask will do the right thing by repeating the # values of the array across the mask. one_element = np.zeros(shape=1, dtype=self.dtype) one_element[0] = data.draw(self.fill) fill_value = one_element[0] if self.unique: try: is_nan = np.isnan(fill_value) except TypeError: is_nan = False if not is_nan: raise InvalidArgument( 'Cannot fill unique array with non-NaN ' 'value %r' % (fill_value, )) np.putmask(result, needs_fill, one_element) return result.reshape(self.shape)
def do_draw(self, data): if 0 in self.shape: return np.zeros(dtype=self.dtype, shape=self.shape) # This could legitimately be a np.empty, but the performance gains for # that would be so marginal that there's really not much point risking # undefined behaviour shenanigans. result = np.zeros(shape=self.array_size, dtype=self.dtype) if self.fill.is_empty: # We have no fill value (either because the user explicitly # disabled it or because the default behaviour was used and our # elements strategy does not produce reusable values), so we must # generate a fully dense array with a freshly drawn value for each # entry. if self.unique: seen = set() elements = cu.many( data, min_size=self.array_size, max_size=self.array_size, average_size=self.array_size ) i = 0 while elements.more(): # We assign first because this means we check for # uniqueness after numpy has converted it to the relevant # type for us. Because we don't increment the counter on # a duplicate we will overwrite it on the next draw. result[i] = data.draw(self.element_strategy) if result[i] not in seen: seen.add(result[i]) i += 1 else: elements.reject() else: for i in hrange(len(result)): result[i] = data.draw(self.element_strategy) else: # We draw numpy arrays as "sparse with an offset". We draw a # collection of index assignments within the array and assign # fresh values from our elements strategy to those indices. If at # the end we have not assigned every element then we draw a single # value from our fill strategy and use that to populate the # remaining positions with that strategy. elements = cu.many( data, min_size=0, max_size=self.array_size, # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. average_size=math.sqrt(self.array_size), ) needs_fill = np.full(self.array_size, True) seen = set() while elements.more(): i = cu.integer_range(data, 0, self.array_size - 1) if not needs_fill[i]: elements.reject() continue result[i] = data.draw(self.element_strategy) if self.unique: if result[i] in seen: elements.reject() continue else: seen.add(result[i]) needs_fill[i] = False if needs_fill.any(): # We didn't fill all of the indices in the early loop, so we # put a fill value into the rest. # We have to do this hilarious little song and dance to work # around numpy's special handling of iterable values. If the # value here were e.g. a tuple then neither array creation # nor putmask would do the right thing. But by creating an # array of size one and then assigning the fill value as a # single element, we both get an array with the right value in # it and putmask will do the right thing by repeating the # values of the array across the mask. one_element = np.zeros(shape=1, dtype=self.dtype) one_element[0] = data.draw(self.fill) fill_value = one_element[0] if self.unique: try: is_nan = np.isnan(fill_value) except TypeError: is_nan = False if not is_nan: raise InvalidArgument( 'Cannot fill unique array with non-NaN ' 'value %r' % (fill_value,)) np.putmask(result, needs_fill, one_element) return result.reshape(self.shape)
def test_astronomically_unlikely_draw_many(): # Our internal helper doesn't underflow to zero or negative, but nor # will we ever generate an element for such a low average size. buffer = ConjectureData.for_buffer(1024 * [255]) many = cu.many(buffer, min_size=0, max_size=10, average_size=1e-5) assert not many.more()