def test_will_shrink_covering_examples(): best = [None] replaced = [] def tagged(data): b = hbytes(data.draw_bytes(4)) if any(b): data.add_tag('nonzero') if best[0] is None: best[0] = b elif b < best[0]: replaced.append(best[0]) best[0] = b db = InMemoryExampleDatabase() runner = ConjectureRunner(tagged, settings=settings( max_examples=100, max_iterations=10000, max_shrinks=0, buffer_size=1024, database=db, ), database_key=b'stuff') runner.run() saved = set(all_values(db)) assert best[0] in saved for r in replaced: assert r not in saved
def run_to_data(f): with deterministic_PRNG(): runner = ConjectureRunner(f, settings=TEST_SETTINGS) runner.run() assert runner.interesting_examples last_data, = runner.interesting_examples.values() return last_data
def test_can_cover_without_a_database_key(): def tagged(data): data.add_tag(0) runner = ConjectureRunner(tagged, settings=settings(), database_key=None) runner.run() assert len(runner.covering_examples) == 1
def test_saves_data_while_shrinking(monkeypatch): key = b'hi there' n = 5 db = InMemoryExampleDatabase() assert list(db.fetch(key)) == [] seen = set() monkeypatch.setattr( ConjectureRunner, 'generate_new_examples', lambda runner: runner.test_function( ConjectureData.for_buffer([255] * 10))) def f(data): x = data.draw_bytes(10) if sum(x) >= 2000 and len(seen) < n: seen.add(hbytes(x)) if hbytes(x) in seen: data.mark_interesting() runner = ConjectureRunner( f, settings=settings(database=db), database_key=key) runner.run() assert runner.interesting_examples assert len(seen) == n in_db = non_covering_examples(db) assert in_db.issubset(seen) assert in_db == seen
def test_debug_data(capsys): buf = [0, 1, 2] def f(data): for x in hbytes(buf): if data.draw_bits(8) != x: data.mark_invalid() data.start_example(1) data.stop_example() data.mark_interesting() runner = ConjectureRunner( f, settings=settings( max_examples=5000, buffer_size=1024, database=None, suppress_health_check=HealthCheck.all(), verbosity=Verbosity.debug, ), ) runner.cached_test_function(buf) runner.run() out, _ = capsys.readouterr() assert re.match(u"\\d+ bytes \\[.*\\] -> ", out) assert "INTERESTING" in out
def test_terminates_shrinks(n, monkeypatch): from hypothesis.internal.conjecture import engine db = InMemoryExampleDatabase() def generate_new_examples(self): def draw_bytes(data, n): return hbytes([255] * n) self.test_function(self.new_conjecture_data(draw_bytes)) monkeypatch.setattr( ConjectureRunner, "generate_new_examples", generate_new_examples ) monkeypatch.setattr(engine, "MAX_SHRINKS", n) runner = ConjectureRunner( slow_shrinker(), settings=settings(max_examples=5000, database=db), random=Random(0), database_key=b"key", ) runner.run() last_data, = runner.interesting_examples.values() assert last_data.status == Status.INTERESTING assert runner.shrinks == n in_db = set(db.data[runner.secondary_key]) assert len(in_db) == n
def test_can_delete_intervals(monkeypatch): def generate_new_examples(self): self.test_function( ConjectureData.for_buffer(hbytes([255] * 10 + [1, 3]))) monkeypatch.setattr( ConjectureRunner, 'generate_new_examples', generate_new_examples) monkeypatch.setattr( Shrinker, 'shrink', fixate(Shrinker.adaptive_example_deletion) ) def f(data): while True: n = data.draw_bits(8) if n == 255: continue elif n == 1: break else: data.mark_invalid() if data.draw_bits(8) == 3: data.mark_interesting() runner = ConjectureRunner(f, settings=settings(database=None)) runner.run() x, = runner.interesting_examples.values() assert x.buffer == hbytes([1, 3])
def run_benchmark_for_sizes(benchmark, n_runs): click.echo('Calculating data for %s' % (benchmark.name,)) total_sizes = [] with click.progressbar(range(n_runs)) as runs: for _ in runs: sizes = [] valid_seed = random.getrandbits(64).to_bytes(8, 'big') interesting_seed = random.getrandbits(64).to_bytes(8, 'big') def test_function(data): try: try: value = data.draw(benchmark.strategy) except UnsatisfiedAssumption: data.mark_invalid() if not data.frozen: if not benchmark.valid(valid_seed, data, value): data.mark_invalid() if benchmark.interesting( interesting_seed, data, value ): data.mark_interesting() finally: sizes.append(len(data.buffer)) engine = ConjectureRunner( test_function, settings=BENCHMARK_SETTINGS, random=random ) engine.run() assert len(sizes) > 0 total_sizes.append(sum(sizes)) return total_sizes
def test_database_clears_secondary_key(): key = b"key" database = InMemoryExampleDatabase() def f(data): if data.draw_bits(8) == 10: data.mark_interesting() else: data.mark_invalid() runner = ConjectureRunner( f, settings=settings( max_examples=1, buffer_size=1024, database=database, suppress_health_check=HealthCheck.all(), ), database_key=key, ) for i in range(10): database.save(runner.secondary_key, hbytes([i])) runner.cached_test_function([10]) assert runner.interesting_examples assert len(set(database.fetch(key))) == 1 assert len(set(database.fetch(runner.secondary_key))) == 10 runner.clear_secondary_key() assert len(set(database.fetch(key))) == 1 assert len(set(database.fetch(runner.secondary_key))) == 0
def test_database_uses_values_from_secondary_key(): key = b'key' database = InMemoryExampleDatabase() def f(data): if data.draw_bits(8) >= 5: data.mark_interesting() else: data.mark_invalid() runner = ConjectureRunner(f, settings=settings( max_examples=1, buffer_size=1024, database=database, suppress_health_check=HealthCheck.all(), ), database_key=key) for i in range(10): database.save(runner.secondary_key, hbytes([i])) runner.test_function(ConjectureData.for_buffer(hbytes([10]))) assert runner.interesting_examples assert len(set(database.fetch(key))) == 1 assert len(set(database.fetch(runner.secondary_key))) == 10 runner.clear_secondary_key() assert len(set(database.fetch(key))) == 1 assert set( map(int_from_bytes, database.fetch(runner.secondary_key)) ) == set(range(6, 11)) v, = runner.interesting_examples.values() assert list(v.buffer) == [5]
def test_run_nothing(): def f(data): assert False runner = ConjectureRunner(f, settings=settings(phases=())) runner.run() assert runner.call_count == 0
def test_saves_data_while_shrinking(): key = b'hi there' n = 5 db = ExampleDatabase(':memory:') assert list(db.fetch(key)) == [] seen = set() def f(data): x = data.draw_bytes(512) if sum(x) >= 5000 and len(seen) < n: seen.add(hbytes(x)) if hbytes(x) in seen: data.mark_interesting() runner = ConjectureRunner( f, settings=settings(database=db), database_key=key) runner.run() assert runner.last_data.status == Status.INTERESTING assert len(seen) == n in_db = set( v for vs in db.data.values() for v in vs ) assert in_db.issubset(seen) assert in_db == seen
def test_clears_out_its_database_on_shrinking( initial_attempt, skip_target, monkeypatch ): def generate_new_examples(self): self.test_function( ConjectureData.for_buffer(hbytes([initial_attempt]))) monkeypatch.setattr( ConjectureRunner, 'generate_new_examples', generate_new_examples) key = b'key' db = InMemoryExampleDatabase() def f(data): if data.draw_bits(8) >= 127: data.mark_interesting() runner = ConjectureRunner( f, settings=settings(database=db, max_examples=256), database_key=key, random=Random(0), ) for n in hrange(256): if n != 127 or not skip_target: db.save(runner.secondary_key, hbytes([n])) runner.run() assert len(runner.interesting_examples) == 1 for b in db.fetch(runner.secondary_key): assert b[0] >= 127 assert len(list(db.fetch(runner.database_key))) == 1
def test_note_events(event): def f(data): data.note_event(event) data.draw_bytes(1) runner = ConjectureRunner(f) runner.run() assert runner.event_call_counts[str(event)] == runner.call_count > 0
def test_overruns_if_prefix(): runner = ConjectureRunner( lambda data: [data.draw_bits(1) for _ in range(2)], settings=TEST_SETTINGS, random=Random(0), ) runner.cached_test_function(b"\0\0") assert runner.tree.rewrite(b"\0")[1] == Status.OVERRUN
def test_exhaust_space(): with deterministic_PRNG(): runner = ConjectureRunner( lambda data: data.draw_bits(1), settings=TEST_SETTINGS ) runner.run() assert runner.tree.is_exhausted assert runner.valid_examples == 2
def test_stops_after_max_examples_when_generating(): seen = [] def f(data): seen.append(data.draw_bytes(1)) runner = ConjectureRunner(f, settings=settings(max_examples=1, database=None)) runner.run() assert len(seen) == 1
def test_detects_too_small_block_starts(): def f(data): data.draw_bytes(8) data.mark_interesting() runner = ConjectureRunner(f, settings=settings(database=None)) r = ConjectureData.for_buffer(hbytes(8)) runner.test_function(r) assert r.status == Status.INTERESTING assert not runner.prescreen_buffer(hbytes([255] * 7))
def run_to_buffer(f): runner = ConjectureRunner(f, settings=settings( max_examples=5000, max_iterations=10000, max_shrinks=MAX_SHRINKS, buffer_size=1024, database=None, )) runner.run() assert runner.last_data.status == Status.INTERESTING return hbytes(runner.last_data.buffer)
def test_always_reduces_integers_to_smallest_suitable_sizes(problem): n, blob = problem blob = hbytes(blob) try: d = ConjectureData.for_buffer(blob) k = d.draw(st.integers()) stop = blob[len(d.buffer)] except (StopTest, IndexError): reject() assume(k > n) assume(stop > 0) def f(data): k = data.draw(st.integers()) data.output = repr(k) if data.draw_bits(8) == stop and k >= n: data.mark_interesting() runner = ConjectureRunner(f, random=Random(0), settings=settings( suppress_health_check=HealthCheck.all(), timeout=unlimited, phases=(Phase.shrink,), database=None, verbosity=Verbosity.debug ), database_key=None) runner.test_function(ConjectureData.for_buffer(blob)) assert runner.interesting_examples v, = runner.interesting_examples.values() shrinker = runner.new_shrinker(v, lambda x: x.status == Status.INTERESTING) shrinker.clear_passes() shrinker.add_new_pass('minimize_individual_blocks') shrinker.shrink() v = shrinker.shrink_target m = ConjectureData.for_buffer(v.buffer).draw(st.integers()) assert m == n # Upper bound on the length needed is calculated as follows: # * We have an initial byte at the beginning to decide the length of the # integer. # * We have a terminal byte as the stop value. # * The rest is the integer payload. This should be n. Including the sign # bit, n needs (1 + n.bit_length()) / 8 bytes (rounded up). But we only # have power of two sizes, so it may be up to a factor of two more than # that. bits_needed = 1 + n.bit_length() actual_bits_needed = min( [s for s in WideRangeIntStrategy.sizes if s >= bits_needed]) bytes_needed = actual_bits_needed // 8 # 3 extra bytes: two for the sampler, one for the capping value. assert len(v.buffer) == 3 + bytes_needed
def accept(f): runner = ConjectureRunner(f, settings=settings( max_examples=100, phases=no_shrink, buffer_size=1024, database=None, **kwargs )) with pytest.raises(FailedHealthCheck) as e: runner.run() assert e.value.health_check == label assert not runner.interesting_examples
def test_run_with_timeout_while_boring(): def f(data): time.sleep(0.1) runner = ConjectureRunner( f, settings=settings(database=None, timeout=0.2)) start = time.time() runner.run() assert time.time() <= start + 1 assert runner.last_data.status == Status.VALID
def test_run_with_timeout_while_boring(): def f(data): time.sleep(0.1) runner = ConjectureRunner( f, settings=settings(database=None, timeout=0.2)) start = time.time() runner.run() assert time.time() <= start + 1 assert runner.valid_examples > 0
def test_can_navigate_to_a_valid_example(): def f(data): i = int_from_bytes(data.draw_bytes(2)) data.draw_bytes(i) data.mark_interesting() runner = ConjectureRunner(f, settings=settings( max_examples=5000, buffer_size=2, database=None, )) runner.run() assert runner.interesting_examples
def run_to_buffer(f): with deterministic_PRNG(): runner = ConjectureRunner(f, settings=settings( max_examples=5000, buffer_size=1024, database=None, suppress_health_check=HealthCheck.all(), )) runner.run() assert runner.interesting_examples last_data, = runner.interesting_examples.values() return hbytes(last_data.buffer)
def x(data): rnd = Random(data.draw_bytes(1)) def g(d2): d2.draw_bytes(1) data.mark_interesting() runner = ConjectureRunner(g, random=rnd) children.append(runner) runner.run() if runner.interesting_examples: data.mark_interesting()
def test_max_shrinks_can_disable_shrinking(): seen = set() def f(data): seen.add(hbytes(data.draw_bytes(32))) data.mark_interesting() runner = ConjectureRunner( f, settings=settings(database=None, max_shrinks=0,)) runner.run() assert len(seen) == 1
def test_phases_can_disable_shrinking(): seen = set() def f(data): seen.add(hbytes(data.draw_bytes(32))) data.mark_interesting() runner = ConjectureRunner(f, settings=settings( database=None, phases=(Phase.reuse, Phase.generate), )) runner.run() assert len(seen) == 1
def accept(f): with deterministic_PRNG(): runner = ConjectureRunner(f, settings=settings( max_examples=5000, buffer_size=1024, database=None, suppress_health_check=HealthCheck.all(), )) runner.test_function(ConjectureData.for_buffer(start)) assert runner.interesting_examples last_data, = runner.interesting_examples.values() return runner.new_shrinker( last_data, lambda d: d.status == Status.INTERESTING )
def test_saves_on_interrupt(): def interrupts(data): raise KeyboardInterrupt() db = InMemoryExampleDatabase() runner = ConjectureRunner( interrupts, settings=settings(database=db), database_key=b'key') with pytest.raises(KeyboardInterrupt): runner.run() assert db.data
def learner_for(strategy): """Returns an LStar learner that predicts whether a buffer corresponds to a discard free choice sequence leading to a valid value for this strategy.""" try: return LEARNERS[strategy] except KeyError: pass def test_function(data): try: data.draw(strategy) except UnsatisfiedAssumption: data.mark_invalid() data.mark_interesting() runner = ConjectureRunner( test_function, settings=settings( database=None, verbosity=Verbosity.quiet, suppress_health_check=HealthCheck.all(), ), random=Random(0), ignore_limits=True, ) def predicate(s): result = runner.cached_test_function(s) if result.status < Status.VALID: return False if result.has_discards: return False return result.buffer == s learner = LStar(predicate) runner.run() (v, ) = runner.interesting_examples.values() # We make sure the learner has properly learned small examples. # This is all fairly ad hoc but is mostly designed to get it # to understand what the smallest example is and avoid any # loops at the beginning of the DFA that don't really exist. learner.learn(v.buffer) for n in [1, 2, 3]: for _ in range(5): learner.learn(uniform(runner.random, n) + v.buffer) prev = -1 while learner.generation != prev: prev = learner.generation for _ in range(10): s = uniform(runner.random, len(v.buffer)) + bytes(BUFFER_SIZE) learner.learn(s) data = runner.cached_test_function(s) if data.status >= Status.VALID: learner.learn(data.buffer) LEARNERS[strategy] = learner return learner
def find(specifier, condition, settings=None, random=None, database_key=None): """Returns the minimal example from the given strategy ``specifier`` that matches the predicate function ``condition``.""" settings = settings or Settings( max_examples=2000, min_satisfying_examples=0, max_shrinks=2000, ) settings = Settings(settings, perform_health_check=False) if database_key is None and settings.database is not None: database_key = function_digest(condition) if not isinstance(specifier, SearchStrategy): raise InvalidArgument('Expected SearchStrategy but got %r of type %s' % (specifier, type(specifier).__name__)) specifier.validate() search = specifier random = random or new_random() successful_examples = [0] last_data = [None] def template_condition(data): with BuildContext(data): try: data.is_find = True result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity == Verbosity.verbose: if not successful_examples[0]: report(lambda: u'Trying example %s' % (nicerepr(result), )) elif success: if successful_examples[0] == 1: report(lambda: u'Found satisfying example %s' % (nicerepr(result), )) last_data[0] = data elif (sort_key(hbytes(data.buffer)) < sort_key( last_data[0].buffer)): report(lambda: u'Shrunk example to %s' % (nicerepr(result), )) last_data[0] = data if success and not data.frozen: data.mark_interesting() start = time.time() runner = ConjectureRunner( template_condition, settings=settings, random=random, database_key=database_key, ) runner.run() note_engine_for_statistics(runner) run_time = time.time() - start if runner.interesting_examples: data = ConjectureData.for_buffer( list(runner.interesting_examples.values())[0].buffer) with BuildContext(data): return data.draw(search) if (runner.valid_examples <= settings.min_satisfying_examples and runner.exit_reason != ExitReason.finished): if settings.timeout > 0 and run_time > settings.timeout: raise Timeout( ('Ran out of time before finding enough valid examples for ' '%s. Only %d valid examples found in %.2f seconds.') % (get_pretty_function_description(condition), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of ' '%s. Only %d examples considered satisfied assumptions') % ( get_pretty_function_description(condition), runner.valid_examples, )) raise NoSuchExample(get_pretty_function_description(condition))
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True database_key = str_to_bytes(fully_qualified_name(self.test)) start_time = time.time() runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) runner.run() note_engine_for_statistics(runner) run_time = time.time() - start_time timed_out = (self.settings.timeout > 0 and run_time >= self.settings.timeout) if runner.last_data is None: return if runner.last_data.status == Status.INTERESTING: self.falsifying_example = runner.last_data.buffer if self.settings.database is not None: self.settings.database.save(database_key, self.falsifying_example) else: if runner.valid_examples < min( self.settings.min_satisfying_examples, self.settings.max_examples, ) and not (runner.exit_reason == ExitReason.finished and self.at_least_one_success): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(self.test), runner.valid_examples, )) if self.falsifying_example is None: return assert self.last_exception is not None try: with self.settings: self.test_runner( ConjectureData.for_buffer(self.falsifying_example), reify_and_execute(self.search_strategy, self.test, print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) raise Flaky('Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') report( 'Failed to reproduce exception. Expected: \n' + self.last_exception, ) filter_message = ( 'Unreliable test data: Failed to reproduce a failure ' 'and then when it came to recreating the example in ' 'order to print the test data with a flaky result ' 'the example was filtered out (by e.g. a ' 'call to filter in your strategy) when we didn\'t ' 'expect it to be.') try: self.test_runner( ConjectureData.for_buffer(self.falsifying_example), reify_and_execute(self.search_strategy, test_is_flaky(self.test, self.repr_for_last_exception), print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): raise Flaky(filter_message)
def normalize( base_name, test_function, *, required_successes=100, allowed_to_update=False, max_dfas=10, ): """Attempt to ensure that this test function successfully normalizes - i.e. whenever it declares a test case to be interesting, we are able to shrink that to the same interesting test case (which logically should be the shortlex minimal interesting test case, though we may not be able to detect if it is). Will run until we have seen ``required_successes`` many interesting test cases in a row normalize to the same value. If ``allowed_to_update`` is True, whenever we fail to normalize we will learn a new DFA-based shrink pass that allows us to make progress. Any learned DFAs will be written back into the learned DFA file at the end of this function. If ``allowed_to_update`` is False, this will raise an error as soon as it encounters a failure to normalize. Additionally, if more than ``max_dfas` DFAs are required to normalize this test function, this function will raise an error - it's essentially designed for small patches that other shrink passes don't cover, and if it's learning too many patches then you need a better shrink pass than this can provide. """ # Need import inside the function to avoid circular imports from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner runner = ConjectureRunner( test_function, settings=settings(database=None, suppress_health_check=HealthCheck.all()), ignore_limits=True, ) seen = set() dfas_added = 0 found_interesting = False consecutive_successes = 0 failures_to_find_interesting = 0 while consecutive_successes < required_successes: attempt = runner.cached_test_function(b"", extend=BUFFER_SIZE) if attempt.status < Status.INTERESTING: failures_to_find_interesting += 1 assert ( found_interesting or failures_to_find_interesting <= 1000 ), "Test function seems to have no interesting test cases" continue found_interesting = True target = attempt.interesting_origin def shrinking_predicate(d): return d.status == Status.INTERESTING and d.interesting_origin == target if target not in seen: seen.add(target) runner.shrink(attempt, shrinking_predicate) continue previous = fully_shrink( runner, runner.interesting_examples[target], shrinking_predicate ) current = fully_shrink(runner, attempt, shrinking_predicate) if current.buffer == previous.buffer: consecutive_successes += 1 continue consecutive_successes = 0 if not allowed_to_update: raise FailedToNormalise( "Shrinker failed to normalize %r to %r and we are not allowed to learn new DFAs." % (previous.buffer, current.buffer) ) if dfas_added >= max_dfas: raise FailedToNormalise( "Test function is too hard to learn: Added %d DFAs and still not done." % (dfas_added,) ) dfas_added += 1 new_dfa = learn_a_new_dfa( runner, previous.buffer, current.buffer, shrinking_predicate ) name = ( base_name + "-" + hashlib.sha256(repr(new_dfa).encode("utf-8")).hexdigest()[:10] ) # If there is a name collision this DFA should already be being # used for shrinking, so we should have already been able to shrink # v further. assert name not in SHRINKING_DFAS SHRINKING_DFAS[name] = new_dfa if dfas_added > 0: # We've learned one or more DFAs in the course of normalising, so now # we update the file to record those for posterity. update_learned_dfas()
def find( specifier, # type: SearchStrategy condition, # type: Callable[[Any], bool] settings=None, # type: Settings random=None, # type: Any database_key=None, # type: bytes ): # type: (...) -> Any """Returns the minimal example from the given strategy ``specifier`` that matches the predicate function ``condition``.""" note_deprecation( "`find(s, f)` is deprecated, because it is rarely used but takes " "ongoing work to maintain as we upgrade other parts of Hypothesis.", since="2019-07-11", ) if settings is None: settings = Settings(max_examples=2000) settings = Settings(settings, suppress_health_check=HealthCheck.all()) if database_key is None and settings.database is not None: database_key = function_digest(condition) if not isinstance(specifier, SearchStrategy): raise InvalidArgument( "Expected SearchStrategy but got %r of type %s" % (specifier, type(specifier).__name__) ) specifier.validate() search = specifier random = random or new_random() successful_examples = [0] last_data = [None] last_repr = [None] def template_condition(data): with deterministic_PRNG(): with BuildContext(data): try: data.is_find = True result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity >= Verbosity.verbose: if not successful_examples[0]: report(u"Tried non-satisfying example %s" % (nicerepr(result),)) elif success: if successful_examples[0] == 1: last_repr[0] = nicerepr(result) report(u"Found satisfying example %s" % (last_repr[0],)) last_data[0] = data elif ( sort_key(hbytes(data.buffer)) < sort_key(last_data[0].buffer) ) and nicerepr(result) != last_repr[0]: last_repr[0] = nicerepr(result) report(u"Shrunk example to %s" % (last_repr[0],)) last_data[0] = data if success and not data.frozen: data.mark_interesting() runner = ConjectureRunner( template_condition, settings=settings, random=random, database_key=database_key ) runner.run() note_engine_for_statistics(runner) if runner.interesting_examples: data = ConjectureData.for_buffer( list(runner.interesting_examples.values())[0].buffer ) with deterministic_PRNG(): with BuildContext(data): return data.draw(search) if runner.valid_examples == 0 and (runner.exit_reason != ExitReason.finished): raise Unsatisfiable( "Unable to satisfy assumptions of %s." % (get_pretty_function_description(condition),) ) raise NoSuchExample(get_pretty_function_description(condition))
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if global_force_seed is None: database_key = function_digest(self.test) else: database_key = None runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) try: runner.run() finally: self.used_examples_from_database = runner.used_examples_from_database note_engine_for_statistics(runner) self.used_examples_from_database = runner.used_examples_from_database if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True, ) else: if runner.valid_examples == 0: raise Unsatisfiable( "Unable to satisfy assumptions of hypothesis %s." % (get_pretty_function_description(self.test), )) if not self.falsifying_examples: return self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute( ran_example, print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, ), ) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( "Unreliable assumption: An example which satisfied " "assumptions on the first run now fails it.") except BaseException as e: if len(self.falsifying_examples) <= 1: raise tb = get_trimmed_traceback() report("".join(traceback.format_exception(type(e), e, tb))) finally: # pragma: no cover # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob is not PrintSettings.NEVER: failure_blob = encode_failure(falsifying_example.buffer) # Have to use the example we actually ran, not the original # falsifying example! Otherwise we won't catch problems # where the repr of the generated example doesn't parse. can_use_repr = ran_example.can_reproduce_example_from_repr if self.settings.print_blob is PrintSettings.ALWAYS or ( self.settings.print_blob is PrintSettings.INFER and self.settings.verbosity >= Verbosity.normal and not can_use_repr and len(failure_blob) < 200): report(( "\nYou can reproduce this example by temporarily " "adding @reproduce_failure(%r, %r) as a decorator " "on your test case") % (__version__, failure_blob)) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ("Hypothesis found %d distinct failures, but %d of them " "exhibited some sort of flaky behaviour.") % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(("Hypothesis found %d distinct failures.") % (len(self.falsifying_examples)))
def test_always_reduces_integers_to_smallest_suitable_sizes(problem): n, blob = problem blob = hbytes(blob) try: d = ConjectureData.for_buffer(blob) k = d.draw(st.integers()) stop = blob[len(d.buffer)] except (StopTest, IndexError): reject() assume(k > n) assume(stop > 0) def f(data): k = data.draw(st.integers()) data.output = repr(k) if data.draw_bits(8) == stop and k >= n: data.mark_interesting() runner = ConjectureRunner( f, random=Random(0), settings=settings( suppress_health_check=HealthCheck.all(), phases=(Phase.shrink,), database=None, verbosity=Verbosity.debug, ), database_key=None, ) runner.cached_test_function(blob) assert runner.interesting_examples (v,) = runner.interesting_examples.values() shrinker = runner.new_shrinker(v, lambda x: x.status == Status.INTERESTING) shrinker.fixate_shrink_passes(["minimize_individual_blocks"]) v = shrinker.shrink_target m = ConjectureData.for_buffer(v.buffer).draw(st.integers()) assert m == n # Upper bound on the length needed is calculated as follows: # * We have an initial byte at the beginning to decide the length of the # integer. # * We have a terminal byte as the stop value. # * The rest is the integer payload. This should be n. Including the sign # bit, n needs (1 + n.bit_length()) / 8 bytes (rounded up). But we only # have power of two sizes, so it may be up to a factor of two more than # that. bits_needed = 1 + n.bit_length() actual_bits_needed = min( [s for s in WideRangeIntStrategy.sizes if s >= bits_needed] ) bytes_needed = actual_bits_needed // 8 # 3 extra bytes: two for the sampler, one for the capping value. assert len(v.buffer) == 3 + bytes_needed
def test_overruns_if_not_enough_bytes_for_block(): runner = ConjectureRunner( lambda data: data.draw_bytes(2), settings=TEST_SETTINGS, random=Random(0) ) runner.cached_test_function(b"\0\0") assert runner.tree.rewrite(b"\0")[1] == Status.OVERRUN
def wrapped_test(*arguments, **kwargs): settings = wrapped_test._hypothesis_internal_use_settings if wrapped_test._hypothesis_internal_use_seed is not None: random = Random(wrapped_test._hypothesis_internal_use_seed) elif settings.derandomize: random = Random(function_digest(test)) else: random = new_random() import hypothesis.strategies as sd selfy = None arguments, kwargs = convert_positional_arguments( wrapped_test, arguments, kwargs) # If the test function is a method of some kind, the bound object # will be the first named argument if there are any, otherwise the # first vararg (if any). if argspec.args: selfy = kwargs.get(argspec.args[0]) elif arguments: selfy = arguments[0] test_runner = new_style_executor(selfy) for example in reversed( getattr(wrapped_test, 'hypothesis_explicit_examples', ())): if example.args: if len(example.args) > len(original_argspec.args): raise InvalidArgument( 'example has too many arguments for test. ' 'Expected at most %d but got %d' % (len(original_argspec.args), len(example.args))) example_kwargs = dict( zip(original_argspec.args[-len(example.args):], example.args)) else: example_kwargs = example.kwargs if Phase.explicit not in settings.phases: continue example_kwargs.update(kwargs) # Note: Test may mutate arguments and we can't rerun explicit # examples, so we have to calculate the failure message at this # point rather than than later. message_on_failure = 'Falsifying example: %s(%s)' % ( test.__name__, arg_string(test, arguments, example_kwargs)) try: with BuildContext(None) as b: test_runner( None, lambda data: test(*arguments, **example_kwargs)) except BaseException: traceback.print_exc() report(message_on_failure) for n in b.notes: report(n) raise if settings.max_examples <= 0: return arguments = tuple(arguments) given_specifier = sd.tuples( sd.just(arguments), sd.fixed_dictionaries(generator_kwargs).map( lambda args: dict(args, **kwargs))) def fail_health_check(message, label): if label in settings.suppress_health_check: return message += ( '\nSee https://hypothesis.readthedocs.io/en/latest/health' 'checks.html for more information about this. ') message += ( 'If you want to disable just this health check, add %s ' 'to the suppress_health_check settings for this test.') % ( label, ) raise FailedHealthCheck(message) search_strategy = given_specifier if selfy is not None: search_strategy = WithRunner(search_strategy, selfy) search_strategy.validate() perform_health_check = settings.perform_health_check perform_health_check &= Settings.default.perform_health_check from hypothesis.internal.conjecture.data import ConjectureData, \ Status, StopTest if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return if perform_health_check: health_check_random = Random(random.getrandbits(128)) # We "pre warm" the health check with one draw to give it some # time to calculate any cached data. This prevents the case # where the first draw of the health check takes ages because # of loading unicode data the first time. data = ConjectureData(max_length=settings.buffer_size, draw_bytes=lambda data, n, distribution: distribution(health_check_random, n)) with Settings(settings, verbosity=Verbosity.quiet): try: test_runner( data, reify_and_execute( search_strategy, lambda *args, **kwargs: None, )) except BaseException: pass count = 0 overruns = 0 filtered_draws = 0 start = time.time() while (count < 10 and time.time() < start + 1 and filtered_draws < 50 and overruns < 20): try: data = ConjectureData( max_length=settings.buffer_size, draw_bytes=lambda data, n, distribution: distribution(health_check_random, n)) with Settings(settings, verbosity=Verbosity.quiet): test_runner( data, reify_and_execute( search_strategy, lambda *args, **kwargs: None, )) count += 1 except UnsatisfiedAssumption: filtered_draws += 1 except StopTest: if data.status == Status.INVALID: filtered_draws += 1 else: assert data.status == Status.OVERRUN overruns += 1 except InvalidArgument: raise except Exception: if (HealthCheck.exception_in_generation in settings.suppress_health_check): raise report(traceback.format_exc()) if test_runner is default_new_style_executor: fail_health_check( 'An exception occurred during data ' 'generation in initial health check. ' 'This indicates a bug in the strategy. ' 'This could either be a Hypothesis bug or ' "an error in a function you've passed to " 'it to construct your data.', HealthCheck.exception_in_generation, ) else: fail_health_check( 'An exception occurred during data ' 'generation in initial health check. ' 'This indicates a bug in the strategy. ' 'This could either be a Hypothesis bug or ' 'an error in a function you\'ve passed to ' 'it to construct your data. Additionally, ' 'you have a custom executor, which means ' 'that this could be your executor failing ' 'to handle a function which returns None. ', HealthCheck.exception_in_generation, ) if overruns >= 20 or (not count and overruns > 0): fail_health_check(( 'Examples routinely exceeded the max allowable size. ' '(%d examples overran while generating %d valid ones)' '. Generating examples this large will usually lead to' ' bad results. You should try setting average_size or ' 'max_size parameters on your collections and turning ' 'max_leaves down on recursive() calls.') % (overruns, count), HealthCheck.data_too_large) if filtered_draws >= 50 or (not count and filtered_draws > 0): fail_health_check(( 'It looks like your strategy is filtering out a lot ' 'of data. Health check found %d filtered examples but ' 'only %d good ones. This will make your tests much ' 'slower, and also will probably distort the data ' 'generation quite a lot. You should adapt your ' 'strategy to filter less. This can also be caused by ' 'a low max_leaves parameter in recursive() calls') % (filtered_draws, count), HealthCheck.filter_too_much) runtime = time.time() - start if runtime > 1.0 or count < 10: fail_health_check( ('Data generation is extremely slow: Only produced ' '%d valid examples in %.2f seconds (%d invalid ones ' 'and %d exceeded maximum size). Try decreasing ' "size of the data you're generating (with e.g." 'average_size or max_leaves parameters).') % (count, runtime, filtered_draws, overruns), HealthCheck.too_slow, ) last_exception = [None] repr_for_last_exception = [None] def evaluate_test_data(data): try: result = test_runner( data, reify_and_execute( search_strategy, test, )) if result is not None and settings.perform_health_check: fail_health_check( ('Tests run under @given should return None, but ' '%s returned %r instead.') % (test.__name__, result), HealthCheck.return_value) return False except UnsatisfiedAssumption: data.mark_invalid() except ( HypothesisDeprecationWarning, FailedHealthCheck, StopTest, ): raise except Exception: last_exception[0] = traceback.format_exc() verbose_report(last_exception[0]) data.mark_interesting() from hypothesis.internal.conjecture.engine import ConjectureRunner falsifying_example = None database_key = str_to_bytes(fully_qualified_name(test)) start_time = time.time() runner = ConjectureRunner( evaluate_test_data, settings=settings, random=random, database_key=database_key, ) runner.run() note_engine_for_statistics(runner) run_time = time.time() - start_time timed_out = (settings.timeout > 0 and run_time >= settings.timeout) if runner.last_data is None: return if runner.last_data.status == Status.INTERESTING: falsifying_example = runner.last_data.buffer if settings.database is not None: settings.database.save(database_key, falsifying_example) else: if runner.valid_examples < min( settings.min_satisfying_examples, settings.max_examples, ): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description(test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(test), runner.valid_examples, )) return assert last_exception[0] is not None try: with settings: test_runner( ConjectureData.for_buffer(falsifying_example), reify_and_execute(search_strategy, test, print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) raise Flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') report( 'Failed to reproduce exception. Expected: \n' + last_exception[0], ) filter_message = ( 'Unreliable test data: Failed to reproduce a failure ' 'and then when it came to recreating the example in ' 'order to print the test data with a flaky result ' 'the example was filtered out (by e.g. a ' 'call to filter in your strategy) when we didn\'t ' 'expect it to be.') try: test_runner( ConjectureData.for_buffer(falsifying_example), reify_and_execute(search_strategy, test_is_flaky( test, repr_for_last_exception[0]), print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): raise Flaky(filter_message)
def find( specifier, # type: SearchStrategy condition, # type: Callable[[Any], bool] settings=None, # type: Settings random=None, # type: Any database_key=None, # type: bytes ): # type: (...) -> Any """Returns the minimal example from the given strategy ``specifier`` that matches the predicate function ``condition``.""" if settings is None: settings = Settings(max_examples=2000) settings = Settings(settings, suppress_health_check=HealthCheck.all()) if database_key is None and settings.database is not None: database_key = function_digest(condition) if not isinstance(specifier, SearchStrategy): raise InvalidArgument( 'Expected SearchStrategy but got %r of type %s' % ( specifier, type(specifier).__name__ )) specifier.validate() search = specifier random = random or new_random() successful_examples = [0] last_data = [None] last_repr = [None] def template_condition(data): with BuildContext(data): try: data.is_find = True with deterministic_PRNG(): result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity >= Verbosity.verbose: if not successful_examples[0]: report( u'Tried non-satisfying example %s' % (nicerepr(result),)) elif success: if successful_examples[0] == 1: last_repr[0] = nicerepr(result) report(u'Found satisfying example %s' % (last_repr[0],)) last_data[0] = data elif ( sort_key(hbytes(data.buffer)) < sort_key(last_data[0].buffer) ) and nicerepr(result) != last_repr[0]: last_repr[0] = nicerepr(result) report(u'Shrunk example to %s' % (last_repr[0],)) last_data[0] = data if success and not data.frozen: data.mark_interesting() start = benchmark_time() runner = ConjectureRunner( template_condition, settings=settings, random=random, database_key=database_key, ) runner.run() note_engine_for_statistics(runner) run_time = benchmark_time() - start if runner.interesting_examples: data = ConjectureData.for_buffer( list(runner.interesting_examples.values())[0].buffer) with BuildContext(data): with deterministic_PRNG(): return data.draw(search) if runner.valid_examples == 0 and ( runner.exit_reason != ExitReason.finished ): if settings.timeout > 0 and run_time > settings.timeout: raise Timeout(( # pragma: no cover 'Ran out of time before finding enough valid examples for ' '%s. Only %d valid examples found in %.2f seconds.' ) % ( get_pretty_function_description(condition), runner.valid_examples, run_time)) else: raise Unsatisfiable( 'Unable to satisfy assumptions of %s.' % (get_pretty_function_description(condition),) ) raise NoSuchExample(get_pretty_function_description(condition))
def find(specifier, condition, settings=None, random=None, database_key=None): settings = settings or Settings( max_examples=2000, min_satisfying_examples=0, max_shrinks=2000, ) if database_key is None and settings.database is not None: database_key = function_digest(condition) if not isinstance(specifier, SearchStrategy): raise InvalidArgument('Expected SearchStrategy but got %r of type %s' % (specifier, type(specifier).__name__)) specifier.validate() search = specifier random = random or new_random() successful_examples = [0] last_data = [None] def template_condition(data): with BuildContext(data): try: data.is_find = True result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity == Verbosity.verbose: if not successful_examples[0]: report(lambda: u'Trying example %s' % (nicerepr(result), )) elif success: if successful_examples[0] == 1: report(lambda: u'Found satisfying example %s' % (nicerepr(result), )) else: report(lambda: u'Shrunk example to %s' % (nicerepr(result), )) last_data[0] = data if success and not data.frozen: data.mark_interesting() from hypothesis.internal.conjecture.engine import ConjectureRunner from hypothesis.internal.conjecture.data import ConjectureData, Status start = time.time() runner = ConjectureRunner( template_condition, settings=settings, random=random, database_key=database_key, ) runner.run() note_engine_for_statistics(runner) run_time = time.time() - start if runner.last_data.status == Status.INTERESTING: data = ConjectureData.for_buffer(runner.last_data.buffer) with BuildContext(data): return data.draw(search) if runner.valid_examples <= settings.min_satisfying_examples: if settings.timeout > 0 and run_time > settings.timeout: raise Timeout( ('Ran out of time before finding enough valid examples for ' '%s. Only %d valid examples found in %.2f seconds.') % (get_pretty_function_description(condition), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of ' '%s. Only %d examples considered satisfied assumptions') % ( get_pretty_function_description(condition), runner.valid_examples, )) raise NoSuchExample(get_pretty_function_description(condition))
def run_engine(self): """Run the test function many times, on database input and generated input, using the Conjecture engine. """ # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True try: database_key = self.wrapped_test._hypothesis_internal_database_key except AttributeError: if global_force_seed is None: database_key = function_digest(self.test) else: database_key = None runner = ConjectureRunner( self._execute_once_for_engine, settings=self.settings, random=self.random, database_key=database_key, ) # Use the Conjecture engine to run the test function many times # on different inputs. runner.run() note_engine_for_statistics(runner) if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( runner.interesting_examples.values(), key=lambda d: sort_key(d.buffer), reverse=True, ) else: if runner.valid_examples == 0: raise Unsatisfiable( "Unable to satisfy assumptions of hypothesis %s." % (get_pretty_function_description(self.test),) ) if not self.falsifying_examples: return elif not self.settings.report_multiple_bugs: # Pretend that we only found one failure, by discarding the others. del self.falsifying_examples[:-1] # The engine found one or more failures, so we need to reproduce and # report them. self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: info = falsifying_example.extra_information ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert info.__expected_exception is not None try: self.execute_once( ran_example, print_example=not self.is_find, is_final=True, expected_failure=( info.__expected_exception, info.__expected_traceback, ), ) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( "Unreliable assumption: An example which satisfied " "assumptions on the first run now fails it." ) except BaseException as e: if len(self.falsifying_examples) <= 1: # There is only one failure, so we can report it by raising # it directly. raise # We are reporting multiple failures, so we need to manually # print each exception's stack trace and information. tb = get_trimmed_traceback() report("".join(traceback.format_exception(type(e), e, tb))) finally: # pragma: no cover # Mostly useful for ``find`` and ensuring that objects that # hold on to a reference to ``data`` know that it's now been # finished and they shouldn't attempt to draw more data from # it. ran_example.freeze() # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob: report( ( "\nYou can reproduce this example by temporarily " "adding @reproduce_failure(%r, %r) as a decorator " "on your test case" ) % (__version__, encode_failure(falsifying_example.buffer)) ) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ( "Hypothesis found %d distinct failures, but %d of them " "exhibited some sort of flaky behaviour." ) % (len(self.falsifying_examples), flaky) ) else: raise MultipleFailures( ("Hypothesis found %d distinct failures.") % (len(self.falsifying_examples)) )
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True database_key = str_to_bytes(fully_qualified_name(self.test)) self.start_time = time.time() global in_given runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) if in_given or self.collector is None: runner.run() else: # pragma: no cover in_given = True original_trace = sys.gettrace() try: sys.settrace(None) runner.run() finally: in_given = False sys.settrace(original_trace) note_engine_for_statistics(runner) run_time = time.time() - self.start_time timed_out = runner.exit_reason == ExitReason.timeout if runner.last_data is None: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True) else: if timed_out: note_deprecation(( 'Your tests are hitting the settings timeout (%.2fs). ' 'This functionality will go away in a future release ' 'and you should not rely on it. Instead, try setting ' 'max_examples to be some value lower than %d (the number ' 'of examples your test successfully ran here). Or, if you ' 'would prefer your tests to run to completion, regardless ' 'of how long they take, you can set the timeout value to ' 'hypothesis.unlimited.') % (self.settings.timeout, runner.valid_examples), self.settings) if runner.valid_examples < min( self.settings.min_satisfying_examples, self.settings.max_examples, ) and not (runner.exit_reason == ExitReason.finished and self.at_least_one_success): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(self.test), runner.valid_examples, )) if not self.falsifying_examples: return flaky = 0 self.__in_final_replay = True for falsifying_example in self.falsifying_examples: self.__was_flaky = False raised_exception = False try: with self.settings: self.test_runner( ConjectureData.for_buffer(falsifying_example.buffer), reify_and_execute(self.search_strategy, self.test, print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') except: if len(self.falsifying_examples) <= 1: raise raised_exception = True report(traceback.format_exc()) if not raised_exception: if (isinstance(falsifying_example.__expected_exception, DeadlineExceeded) and self.__test_runtime is not None): report(( 'Unreliable test timings! On an initial run, this ' 'test took %.2fms, which exceeded the deadline of ' '%.2fms, but on a subsequent run it took %.2f ms, ' 'which did not. If you expect this sort of ' 'variability in your test timings, consider turning ' 'deadlines off for this test by setting deadline=None.' ) % (falsifying_example.__expected_exception.runtime, self.settings.deadline, self.__test_runtime)) else: report( 'Failed to reproduce exception. Expected: \n' + falsifying_example.__expected_traceback, ) filter_message = ( 'Unreliable test data: Failed to reproduce a failure ' 'and then when it came to recreating the example in ' 'order to print the test data with a flaky result ' 'the example was filtered out (by e.g. a ' 'call to filter in your strategy) when we didn\'t ' 'expect it to be.') try: self.test_runner( ConjectureData.for_buffer(falsifying_example.buffer), reify_and_execute(self.search_strategy, test_is_flaky( self.test, self.repr_for_last_exception), print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): self.__flaky(filter_message) except Flaky as e: if len(self.falsifying_examples) > 1: self.__flaky(e.args[0]) else: raise if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ('Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(('Hypothesis found %d distinct failures.') % (len(self.falsifying_examples, )))
def test_can_reduce_poison_from_any_subtree(size, seed): """This test validates that we can minimize to any leaf node of a binary tree, regardless of where in the tree the leaf is.""" random = Random(seed) # Initially we create the minimal tree of size n, regardless of whether it # is poisoned (which it won't be - the poison event essentially never # happens when drawing uniformly at random). # Choose p so that the expected size of the tree is equal to the desired # size. p = 1.0 / (2.0 - 1.0 / size) strat = PoisonedTree(p) def test_function(data): v = data.draw(strat) if len(v) >= size: data.mark_interesting() runner = ConjectureRunner(test_function, random=random, settings=TEST_SETTINGS) while not runner.interesting_examples: runner.test_function( runner.new_conjecture_data(lambda data, n: uniform(random, n)) ) runner.shrink_interesting_examples() data, = runner.interesting_examples.values() assert len(ConjectureData.for_buffer(data.buffer).draw(strat)) == size starts = [b.start for b in data.blocks if b.length == 2] assert len(starts) % 2 == 0 for i in hrange(0, len(starts), 2): # Now for each leaf position in the tree we try inserting a poison # value artificially. Additionally, we add a marker to the end that # must be preserved. The marker means that we are not allow to rely on # discarding the end of the buffer to get the desired shrink. u = starts[i] marker = hbytes([1, 2, 3, 4]) def test_function_with_poison(data): v = data.draw(strat) m = data.draw_bytes(len(marker)) if POISON in v and m == marker: data.mark_interesting() runner = ConjectureRunner( test_function_with_poison, random=random, settings=TEST_SETTINGS ) runner.cached_test_function( data.buffer[:u] + hbytes([255]) * 4 + data.buffer[u + 4 :] + marker ) assert runner.interesting_examples runner.shrink_interesting_examples() shrunk, = runner.interesting_examples.values() assert ConjectureData.for_buffer(shrunk.buffer).draw(strat) == (POISON,)
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if global_force_seed is None: database_key = str_to_bytes(fully_qualified_name(self.test)) else: database_key = None self.start_time = time.time() global in_given runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) if in_given or self.collector is None: runner.run() else: # pragma: no cover in_given = True original_trace = sys.gettrace() try: sys.settrace(None) runner.run() finally: in_given = False sys.settrace(original_trace) self.used_examples_from_database = \ runner.used_examples_from_database note_engine_for_statistics(runner) run_time = time.time() - self.start_time self.used_examples_from_database = runner.used_examples_from_database if runner.used_examples_from_database: if self.settings.derandomize: note_deprecation( 'In future derandomize will imply database=None, but your ' 'test is currently using examples from the database. To ' 'get the future behaviour, update your settings to ' 'include database=None.') if self.__had_seed: note_deprecation( 'In future use of @seed will imply database=None in your ' 'settings, but your test is currently using examples from ' 'the database. To get the future behaviour, update your ' 'settings for this test to include database=None.') timed_out = runner.exit_reason == ExitReason.timeout if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True) else: if timed_out: note_deprecation(( 'Your tests are hitting the settings timeout (%.2fs). ' 'This functionality will go away in a future release ' 'and you should not rely on it. Instead, try setting ' 'max_examples to be some value lower than %d (the number ' 'of examples your test successfully ran here). Or, if you ' 'would prefer your tests to run to completion, regardless ' 'of how long they take, you can set the timeout value to ' 'hypothesis.unlimited.') % (self.settings.timeout, runner.valid_examples), self.settings) if runner.valid_examples < min( self.settings.min_satisfying_examples, self.settings.max_examples, ) and not (runner.exit_reason == ExitReason.finished and self.at_least_one_success): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(self.test), runner.valid_examples, )) if not self.falsifying_examples: return self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute(ran_example, print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, )) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) finally: # pragma: no cover # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob is not PrintSettings.NEVER: failure_blob = encode_failure(falsifying_example.buffer) # Have to use the example we actually ran, not the original # falsifying example! Otherwise we won't catch problems # where the repr of the generated example doesn't parse. can_use_repr = ran_example.can_reproduce_example_from_repr if (self.settings.print_blob is PrintSettings.ALWAYS or (self.settings.print_blob is PrintSettings.INFER and not can_use_repr and len(failure_blob) < 200)): report(( '\n' 'You can reproduce this example by temporarily ' 'adding @reproduce_failure(%r, %r) as a decorator ' 'on your test case') % ( __version__, failure_blob, )) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ('Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(('Hypothesis found %d distinct failures.') % (len(self.falsifying_examples, )))
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True database_key = str_to_bytes(fully_qualified_name(self.test)) self.start_time = time.time() global in_given runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) if in_given or self.collector is None: runner.run() else: # pragma: no cover in_given = True original_trace = sys.gettrace() try: sys.settrace(None) runner.run() finally: in_given = False sys.settrace(original_trace) note_engine_for_statistics(runner) run_time = time.time() - self.start_time timed_out = runner.exit_reason == ExitReason.timeout if runner.last_data is None: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True) else: if timed_out: note_deprecation(( 'Your tests are hitting the settings timeout (%.2fs). ' 'This functionality will go away in a future release ' 'and you should not rely on it. Instead, try setting ' 'max_examples to be some value lower than %d (the number ' 'of examples your test successfully ran here). Or, if you ' 'would prefer your tests to run to completion, regardless ' 'of how long they take, you can set the timeout value to ' 'hypothesis.unlimited.') % (self.settings.timeout, runner.valid_examples), self.settings) if runner.valid_examples < min( self.settings.min_satisfying_examples, self.settings.max_examples, ) and not (runner.exit_reason == ExitReason.finished and self.at_least_one_success): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(self.test), runner.valid_examples, )) if not self.falsifying_examples: return flaky = 0 for falsifying_example in self.falsifying_examples: self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute(ConjectureData.for_buffer( falsifying_example.buffer), print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, )) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ('Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(('Hypothesis found %d distinct failures.') % (len(self.falsifying_examples, )))