def run(data): if not hasattr(data, 'can_reproduce_example_from_repr'): data.can_reproduce_example_from_repr = True with local_settings(self.settings): with BuildContext(data, is_final=is_final): with deterministic_PRNG(): args, kwargs = data.draw(self.search_strategy) if expected_failure is not None: text_repr[0] = arg_string(test, args, kwargs) if print_example: example = '%s(%s)' % ( test.__name__, arg_string(test, args, kwargs)) try: ast.parse(example) except SyntaxError: data.can_reproduce_example_from_repr = False report('Falsifying example: %s' % (example,)) elif current_verbosity() >= Verbosity.verbose: report( lambda: 'Trying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) with deterministic_PRNG(): return test(*args, **kwargs)
def print_step(self, step): """Print a step to the current reporter. This is called right before a step is executed. """ self.step_count = getattr(self, u'step_count', 0) + 1 report(u'Step #%d: %s' % (self.step_count, nicerepr(step)))
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [name for name, value in generator_kwargs.items() if value is infer]: if name not in hints: raise InvalidArgument( 'passed %s=infer for %s, but %s has no type annotation' % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings ) arguments, kwargs, test_runner, search_strategy = processed_args execute_explicit_examples( test_runner, test, wrapped_test, settings, arguments, kwargs ) if settings.max_examples <= 0: return if not ( Phase.reuse in settings.phases or Phase.generate in settings.phases ): return try: perform_health_checks( random, settings, test_runner, search_strategy) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random) state.run() except: generated_seed = \ wrapped_test._hypothesis_internal_use_generated_seed if generated_seed is not None: if running_under_pytest: report(( 'You can add @seed(%(seed)d) to this test or run ' 'pytest with --hypothesis-seed=%(seed)d to ' 'reproduce this failure.') % { 'seed': generated_seed},) else: report(( 'You can add @seed(%d) to this test to reproduce ' 'this failure.') % (generated_seed,)) raise
def note(value): # type: (AnyStr) -> None """Report this value in the final execution.""" context = _current_build_context.value if context is None: raise InvalidArgument("Cannot make notes outside of a test") context.notes.append(value) if context.is_final or settings.default.verbosity >= Verbosity.verbose: report(value)
def close(self): any_failed = False for task in self.tasks: try: task() except: any_failed = True report(traceback.format_exc()) return any_failed
def note(value): """Report this value in the final execution.""" context = _current_build_context.value if context is None: raise InvalidArgument( 'Cannot make notes outside of a test') context.notes.append(value) if context.is_final: report(value)
def run(): with BuildContext(): args, kwargs = search_strategy.reify(template) text_version = arg_string(test, args, kwargs) if print_example: report(lambda: "Falsifying example: %s(%s)" % (test.__name__, text_version)) elif current_verbosity() >= Verbosity.verbose or always_print: report(lambda: "Trying example: %s(%s)" % (test.__name__, text_version)) if record_repr is not None: record_repr[0] = text_version return test(*args, **kwargs)
def print_step(self, step): rule, data = step data_repr = {} for k, v in data.items(): data_repr[k] = self.__pretty(v) self.step_count = getattr(self, u'step_count', 0) + 1 report(u'%sstate.%s(%s)' % ( u'%s = ' % (self.upcoming_name(),) if rule.targets else u'', rule.function.__name__, u', '.join(u'%s=%s' % kv for kv in data_repr.items()) ))
def __exit__(self, exc_type, exc_value, tb): any_failed = False for task in self.tasks: try: task() except: any_failed = True report(traceback.format_exc()) self.assign_variable.__exit__(exc_type, exc_value, tb) if exc_type is None and any_failed: raise CleanupFailed()
def run(data): with BuildContext(is_final=is_final): args, kwargs = data.draw(search_strategy) if print_example: report( lambda: 'Falsifying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) elif current_verbosity() >= Verbosity.verbose: report( lambda: 'Trying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) return test(*args, **kwargs)
def run(data): from hypothesis.control import note with BuildContext(is_final=is_final): seed = data.draw(random_module()).seed if seed != 0: note("random.seed(%d)" % (seed,)) args, kwargs = data.draw(search_strategy) if print_example: report(lambda: "Falsifying example: %s(%s)" % (test.__name__, arg_string(test, args, kwargs))) elif current_verbosity() >= Verbosity.verbose: report(lambda: "Trying example: %s(%s)" % (test.__name__, arg_string(test, args, kwargs))) return test(*args, **kwargs)
def note(value): """Report this value in the final execution. Will always call string conversion function of the value even if not printing for consistency of execution """ context = _current_build_context.value if context is None: raise InvalidArgument( 'Cannot make notes outside of build context') context.notes.append(value) if context.is_final: report(value)
def print_step(self, step): rule, data = step data_repr = {} for k, v in data.items(): data_repr[k] = self.__pretty(v) self.step_count = getattr(self, u"step_count", 0) + 1 report( u"%sstate.%s(%s)" % ( u"%s = " % (self.upcoming_name(),) if rule.targets else u"", rule.function.__name__, u", ".join(u"%s=%s" % kv for kv in data_repr.items()), ) )
def print_step(self, step): if isinstance(step, ShuffleBundle): return rule, data = step data_repr = {} for k, v in data.items(): data_repr[k] = self.__pretty(v) self.step_count = getattr(self, u'step_count', 0) + 1 report(u'Step #%d: %s%s(%s)' % ( self.step_count, u'%s = ' % (self.upcoming_name(),) if rule.targets else u'', rule.function.__name__, u', '.join(u'%s=%s' % kv for kv in data_repr.items()) ))
def print_step(self, step): rule, data = step data_repr = {} for k, v in data.items(): if isinstance(v, VarReference): data_repr[k] = v.name else: data_repr[k] = repr(v) self.step_count = getattr(self, u'step_count', 0) + 1 report(u'Step #%d: %s%s(%s)' % ( self.step_count, u'%s = ' % (self.upcoming_name(),) if rule.targets else u'', rule.function.__name__, u', '.join(u'%s=%s' % kv for kv in data_repr.items()) ))
def execute_explicit_examples( test_runner, test, wrapped_test, settings, arguments, kwargs ): original_argspec = getfullargspec(test) for example in reversed(getattr( wrapped_test, 'hypothesis_explicit_examples', () )): example_kwargs = dict(original_argspec.kwonlydefaults or {}) if example.args: if len(example.args) > len(original_argspec.args): raise InvalidArgument( 'example has too many arguments for test. ' 'Expected at most %d but got %d' % ( len(original_argspec.args), len(example.args))) example_kwargs.update(dict(zip( original_argspec.args[-len(example.args):], example.args ))) else: example_kwargs.update(example.kwargs) if Phase.explicit not in settings.phases: continue example_kwargs.update(kwargs) # Note: Test may mutate arguments and we can't rerun explicit # examples, so we have to calculate the failure message at this # point rather than than later. example_string = '%s(%s)' % ( test.__name__, arg_string(test, arguments, example_kwargs) ) with local_settings(settings): try: with BuildContext(None) as b: verbose_report('Trying example: ' + example_string) test_runner( None, lambda data: test(*arguments, **example_kwargs) ) except BaseException: report('Falsifying example: ' + example_string) for n in b.notes: report(n) raise
def run(data): with BuildContext(data, is_final=is_final): orig = sys.gettrace() try: # pragma: no cover sys.settrace(None) import random as rnd_module rnd_module.seed(0) finally: # pragma: no cover sys.settrace(orig) args, kwargs = data.draw(search_strategy) if print_example: report( lambda: 'Falsifying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) elif current_verbosity() >= Verbosity.verbose: report( lambda: 'Trying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) return test(*args, **kwargs)
def run(): args, kwargs = search_strategy.reify(template) if print_example: report( lambda: 'Falsifying example: %s(%s)' % ( test.__name__, arg_string( test, args, kwargs ) ) ) else: verbose_report( lambda: 'Trying example: %s(%s)' % ( test.__name__, arg_string( test, args, kwargs ) ) ) return test(*args, **kwargs)
def run(): args, kwargs = search_strategy.reify(template) if print_example: report( lambda: 'Falsifying example: %s(%s)' % ( test.__name__, arg_string( test, args, kwargs ) ) ) elif current_verbosity() >= Verbosity.verbose or always_print: report( lambda: 'Trying example: %s(%s)' % ( test.__name__, arg_string( test, args, kwargs ) ) ) return test(*args, **kwargs)
def template_condition(data): with BuildContext(data): try: data.is_find = True result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity == Verbosity.verbose: if not successful_examples[0]: report(lambda: u'Trying example %s' % ( nicerepr(result), )) elif success: if successful_examples[0] == 1: report(lambda: u'Found satisfying example %s' % ( nicerepr(result), )) else: report(lambda: u'Shrunk example to %s' % ( nicerepr(result), )) last_data[0] = data if success and not data.frozen: data.mark_interesting()
def template_condition(data): with BuildContext(data): try: data.is_find = True with deterministic_PRNG(): result = data.draw(search) data.note(result) success = condition(result) except UnsatisfiedAssumption: data.mark_invalid() if success: successful_examples[0] += 1 if settings.verbosity >= Verbosity.verbose: if not successful_examples[0]: report( u'Tried non-satisfying example %s' % (nicerepr(result),)) elif success: if successful_examples[0] == 1: last_repr[0] = nicerepr(result) report(u'Found satisfying example %s' % (last_repr[0],)) last_data[0] = data elif ( sort_key(hbytes(data.buffer)) < sort_key(last_data[0].buffer) ) and nicerepr(result) != last_repr[0]: last_repr[0] = nicerepr(result) report(u'Shrunk example to %s' % (last_repr[0],)) last_data[0] = data if success and not data.frozen: data.mark_interesting()
def run(data): with BuildContext(data, is_final=is_final): import random as rnd_module rnd_module.seed(0) args, kwargs = data.draw(search_strategy) if print_example: report( lambda: 'Falsifying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) elif current_verbosity() >= Verbosity.verbose: report( lambda: 'Trying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) if collector is None: return test(*args, **kwargs) else: # pragma: no cover try: collector.start() return test(*args, **kwargs) finally: collector.stop()
def print_start(self): report(u'state = %s()' % (self.__class__.__name__, ))
def print_end(self): report(u'state.teardown()')
def execute_once(self, data, print_example=False, is_final=False, expected_failure=None): """Run the test function once, using ``data`` as input. If the test raises an exception, it will propagate through to the caller of this method. Depending on its type, this could represent an ordinary test failure, or a fatal error, or a control exception. If this method returns normally, the test might have passed, or it might have placed ``data`` in an unsuccessful state and then swallowed the corresponding control exception. """ text_repr = [None] if self.settings.deadline is None: test = self.test else: @proxies(self.test) def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = datetime.timedelta(seconds=finish - start - internal_draw_time) self.__test_runtime = runtime current_deadline = self.settings.deadline if not is_final: current_deadline = (current_deadline // 4) * 5 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result def run(data): # Set up dynamic context needed by a single test run. with local_settings(self.settings): with deterministic_PRNG(): with BuildContext(data, is_final=is_final): # Generate all arguments to the test function. args, kwargs = data.draw(self.search_strategy) if expected_failure is not None: text_repr[0] = arg_string(test, args, kwargs) if print_example: example = "%s(%s)" % ( test.__name__, arg_string(test, args, kwargs), ) report("Falsifying example: %s" % (example, )) elif current_verbosity() >= Verbosity.verbose: report(lambda: "Trying example: %s(%s)" % ( test.__name__, arg_string(test, args, kwargs))) return test(*args, **kwargs) # Run the test function once, via the executor hook. # In most cases this will delegate straight to `run(data)`. result = self.test_runner(data, run) # If a failure was expected, it should have been raised already, so # instead raise an appropriate diagnostic error. if expected_failure is not None: exception, traceback = expected_failure if (isinstance(exception, DeadlineExceeded) and self.__test_runtime is not None): report( ("Unreliable test timings! On an initial run, this " "test took %.2fms, which exceeded the deadline of " "%.2fms, but on a subsequent run it took %.2f ms, " "which did not. If you expect this sort of " "variability in your test timings, consider turning " "deadlines off for this test by setting deadline=None.") % ( exception.runtime.total_seconds() * 1000, self.settings.deadline.total_seconds() * 1000, self.__test_runtime.total_seconds() * 1000, )) else: report("Failed to reproduce exception. Expected: \n" + traceback) self.__flaky( ("Hypothesis %s(%s) produces unreliable results: Falsified" " on the first call but did not on a subsequent one") % (test.__name__, text_repr[0])) return result
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if getattr(test, 'is_hypothesis_test', False): note_deprecation( 'You have applied @given to a test more than once. In ' 'future this will be an error. Applying @given twice ' 'wraps the test twice, which can be extremely slow. A ' 'similar effect can be gained by combining the arguments ' 'of the two calls to given. For example, instead of ' '@given(booleans()) @given(integers()), you could write ' '@given(booleans(), integers())') settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [ name for name, value in generator_kwargs.items() if value is infer ]: if name not in hints: raise InvalidArgument( 'passed %s=infer for %s, but %s has no type annotation' % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, 'runner', None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ('You have applied @given to the method %s, which is ' 'used by the unittest runner but is not itself a test.' ' This is not useful in any way.' % test.__name__) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( 'You have applied @given to a method on %s, but this ' 'class does not inherit from the supported versions in ' '`hypothesis.extra.django`. Use the Hypothesis variants ' 'to ensure that each example is run in a separate ' 'database transaction.' % qualname(type(runner))) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, had_seed=wrapped_test._hypothesis_internal_use_seed) reproduce_failure = \ wrapped_test._hypothesis_internal_use_reproduce_failure if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument( ('Attempting to reproduce a failure from a different ' 'version of Hypothesis. This failure is from %s, but ' 'you are currently running %r. Please change your ' 'Hypothesis version to a matching one.') % (expected_version, __version__)) try: state.execute( ConjectureData.for_buffer(decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( 'Expected the test to raise an error, but it ' 'completed successfully.') except StopTest: raise DidNotReproduce( 'The shape of the test data has changed in some way ' 'from where this blob was defined. Are you sure ' "you're running the same test?") except UnsatisfiedAssumption: raise DidNotReproduce( 'The test data failed to satisfy an assumption in the ' 'test. Have you added it since this blob was ' 'generated?') execute_explicit_examples(test_runner, test, wrapped_test, settings, arguments, kwargs) if settings.max_examples <= 0: return if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return try: if isinstance(runner, TestCase) and hasattr(runner, 'subTest'): subTest = runner.subTest try: setattr(runner, 'subTest', fake_subTest) state.run() finally: setattr(runner, 'subTest', subTest) else: state.run() except BaseException: generated_seed = \ wrapped_test._hypothesis_internal_use_generated_seed if generated_seed is not None and not state.failed_normally: if running_under_pytest: report( ('You can add @seed(%(seed)d) to this test or run ' 'pytest with --hypothesis-seed=%(seed)d to ' 'reproduce this failure.') % {'seed': generated_seed}, ) else: report( ('You can add @seed(%d) to this test to reproduce ' 'this failure.') % (generated_seed, )) raise
def execute( self, data, print_example=False, is_final=False, expected_failure=None, collect=False, ): text_repr = [None] if self.settings.deadline is None: test = self.test else: @proxies(self.test) def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = (finish - start - internal_draw_time) * 1000 self.__test_runtime = runtime if self.settings.deadline is not_set: if ( not self.__warned_deadline and runtime >= 200 ): self.__warned_deadline = True note_deprecation(( 'Test: %s took %.2fms to run. In future the ' 'default deadline setting will be 200ms, which ' 'will make this an error. You can set deadline to ' 'an explicit value of e.g. %d to turn tests ' 'slower than this into an error, or you can set ' 'it to None to disable this check entirely.') % ( self.test.__name__, runtime, ceil(runtime / 100) * 100, )) else: current_deadline = self.settings.deadline if not is_final: current_deadline *= 1.25 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result def run(data): if not hasattr(data, 'can_reproduce_example_from_repr'): data.can_reproduce_example_from_repr = True with local_settings(self.settings): with BuildContext(data, is_final=is_final): with deterministic_PRNG(): args, kwargs = data.draw(self.search_strategy) if expected_failure is not None: text_repr[0] = arg_string(test, args, kwargs) if print_example: example = '%s(%s)' % ( test.__name__, arg_string(test, args, kwargs)) try: ast.parse(example) except SyntaxError: data.can_reproduce_example_from_repr = False report('Falsifying example: %s' % (example,)) elif current_verbosity() >= Verbosity.verbose: report( lambda: 'Trying example: %s(%s)' % ( test.__name__, arg_string(test, args, kwargs))) with deterministic_PRNG(): return test(*args, **kwargs) result = self.test_runner(data, run) if expected_failure is not None: exception, traceback = expected_failure if ( isinstance( exception, DeadlineExceeded ) and self.__test_runtime is not None ): report(( 'Unreliable test timings! On an initial run, this ' 'test took %.2fms, which exceeded the deadline of ' '%.2fms, but on a subsequent run it took %.2f ms, ' 'which did not. If you expect this sort of ' 'variability in your test timings, consider turning ' 'deadlines off for this test by setting deadline=None.' ) % ( exception.runtime, self.settings.deadline, self.__test_runtime )) else: report( 'Failed to reproduce exception. Expected: \n' + traceback, ) self.__flaky(( 'Hypothesis %s(%s) produces unreliable results: Falsified' ' on the first call but did not on a subsequent one' ) % (test.__name__, text_repr[0],)) return result
def wrapped_test(*arguments, **kwargs): settings = wrapped_test._hypothesis_internal_use_settings if wrapped_test._hypothesis_internal_use_seed is not None: random = Random(wrapped_test._hypothesis_internal_use_seed) elif settings.derandomize: random = Random(function_digest(test)) else: random = new_random() import hypothesis.strategies as sd selfy = None arguments, kwargs = convert_positional_arguments( wrapped_test, arguments, kwargs) # If the test function is a method of some kind, the bound object # will be the first named argument if there are any, otherwise the # first vararg (if any). if argspec.args: selfy = kwargs.get(argspec.args[0]) elif arguments: selfy = arguments[0] test_runner = new_style_executor(selfy) for example in reversed( getattr(wrapped_test, 'hypothesis_explicit_examples', ())): if example.args: if len(example.args) > len(original_argspec.args): raise InvalidArgument( 'example has too many arguments for test. ' 'Expected at most %d but got %d' % (len(original_argspec.args), len(example.args))) example_kwargs = dict( zip(original_argspec.args[-len(example.args):], example.args)) else: example_kwargs = example.kwargs if Phase.explicit not in settings.phases: continue example_kwargs.update(kwargs) # Note: Test may mutate arguments and we can't rerun explicit # examples, so we have to calculate the failure message at this # point rather than than later. message_on_failure = 'Falsifying example: %s(%s)' % ( test.__name__, arg_string(test, arguments, example_kwargs)) try: with BuildContext() as b: test_runner( None, lambda data: test(*arguments, **example_kwargs)) except BaseException: report(message_on_failure) for n in b.notes: report(n) raise if settings.max_examples <= 0: return arguments = tuple(arguments) given_specifier = sd.tuples( sd.just(arguments), sd.fixed_dictionaries(generator_kwargs).map( lambda args: dict(args, **kwargs))) def fail_health_check(message, label): if label in settings.suppress_health_check: return message += ( '\nSee http://hypothesis.readthedocs.org/en/latest/health' 'checks.html for more information about this. ') message += ( 'If you want to disable just this health check, add %s ' 'to the suppress_health_check settings for this test.') % ( label, ) raise FailedHealthCheck(message) search_strategy = given_specifier if selfy is not None: search_strategy = WithRunner(search_strategy, selfy) search_strategy.validate() perform_health_check = settings.perform_health_check perform_health_check &= Settings.default.perform_health_check from hypothesis.internal.conjecture.data import TestData, Status, \ StopTest if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return if perform_health_check: initial_state = getglobalrandomstate() health_check_random = Random(random.getrandbits(128)) # We "pre warm" the health check with one draw to give it some # time to calculate any cached data. This prevents the case # where the first draw of the health check takes ages because # of loading unicode data the first time. data = TestData(max_length=settings.buffer_size, draw_bytes=lambda data, n, distribution: distribution(health_check_random, n)) with Settings(settings, verbosity=Verbosity.quiet): try: test_runner( data, reify_and_execute( search_strategy, lambda *args, **kwargs: None, )) except BaseException: pass count = 0 overruns = 0 filtered_draws = 0 start = time.time() while (count < 10 and time.time() < start + 1 and filtered_draws < 50 and overruns < 20): try: data = TestData( max_length=settings.buffer_size, draw_bytes=lambda data, n, distribution: distribution(health_check_random, n)) with Settings(settings, verbosity=Verbosity.quiet): test_runner( data, reify_and_execute( search_strategy, lambda *args, **kwargs: None, )) count += 1 except UnsatisfiedAssumption: filtered_draws += 1 except StopTest: if data.status == Status.INVALID: filtered_draws += 1 else: assert data.status == Status.OVERRUN overruns += 1 except InvalidArgument: raise except Exception: report(traceback.format_exc()) if test_runner is default_new_style_executor: fail_health_check( 'An exception occurred during data ' 'generation in initial health check. ' 'This indicates a bug in the strategy. ' 'This could either be a Hypothesis bug or ' "an error in a function you've passed to " 'it to construct your data.', HealthCheck.exception_in_generation, ) else: fail_health_check( 'An exception occurred during data ' 'generation in initial health check. ' 'This indicates a bug in the strategy. ' 'This could either be a Hypothesis bug or ' 'an error in a function you\'ve passed to ' 'it to construct your data. Additionally, ' 'you have a custom executor, which means ' 'that this could be your executor failing ' 'to handle a function which returns None. ', HealthCheck.exception_in_generation, ) if overruns >= 20 or (not count and overruns > 0): fail_health_check(( 'Examples routinely exceeded the max allowable size. ' '(%d examples overran while generating %d valid ones)' '. Generating examples this large will usually lead to' ' bad results. You should try setting average_size or ' 'max_size parameters on your collections and turning ' 'max_leaves down on recursive() calls.') % (overruns, count), HealthCheck.data_too_large) if filtered_draws >= 50 or (not count and filtered_draws > 0): fail_health_check(( 'It looks like your strategy is filtering out a lot ' 'of data. Health check found %d filtered examples but ' 'only %d good ones. This will make your tests much ' 'slower, and also will probably distort the data ' 'generation quite a lot. You should adapt your ' 'strategy to filter less. This can also be caused by ' 'a low max_leaves parameter in recursive() calls') % (filtered_draws, count), HealthCheck.filter_too_much) runtime = time.time() - start if runtime > 1.0 or count < 10: fail_health_check( ('Data generation is extremely slow: Only produced ' '%d valid examples in %.2f seconds (%d invalid ones ' 'and %d exceeded maximum size). Try decreasing ' "size of the data you're generating (with e.g." 'average_size or max_leaves parameters).') % (count, runtime, filtered_draws, overruns), HealthCheck.too_slow, ) if getglobalrandomstate() != initial_state: fail_health_check( 'Data generation depends on global random module. ' 'This makes results impossible to replay, which ' 'prevents Hypothesis from working correctly. ' 'If you want to use methods from random, use ' 'randoms() from hypothesis.strategies to get an ' 'instance of Random you can use. Alternatively, you ' 'can use the random_module() strategy to explicitly ' 'seed the random module.', HealthCheck.random_module, ) last_exception = [None] repr_for_last_exception = [None] performed_random_check = [False] def evaluate_test_data(data): if perform_health_check and not performed_random_check[0]: initial_state = getglobalrandomstate() performed_random_check[0] = True else: initial_state = None try: result = test_runner( data, reify_and_execute( search_strategy, test, )) if result is not None and settings.perform_health_check: fail_health_check( ('Tests run under @given should return None, but ' '%s returned %r instead.') % (test.__name__, result), HealthCheck.return_value) return False except UnsatisfiedAssumption: data.mark_invalid() except ( HypothesisDeprecationWarning, FailedHealthCheck, StopTest, ): raise except Exception: last_exception[0] = traceback.format_exc() verbose_report(last_exception[0]) data.mark_interesting() finally: if (initial_state is not None and getglobalrandomstate() != initial_state): fail_health_check( 'Your test used the global random module. ' 'This is unlikely to work correctly. You should ' 'consider using the randoms() strategy from ' 'hypothesis.strategies instead. Alternatively, ' 'you can use the random_module() strategy to ' 'explicitly seed the random module.', HealthCheck.random_module, ) from hypothesis.internal.conjecture.engine import TestRunner falsifying_example = None database_key = str_to_bytes(fully_qualified_name(test)) start_time = time.time() runner = TestRunner( evaluate_test_data, settings=settings, random=random, database_key=database_key, ) runner.run() run_time = time.time() - start_time timed_out = (settings.timeout > 0 and run_time >= settings.timeout) if runner.last_data is None: return if runner.last_data.status == Status.INTERESTING: falsifying_example = runner.last_data.buffer if settings.database is not None: settings.database.save(database_key, falsifying_example) else: if runner.valid_examples < min( settings.min_satisfying_examples, settings.max_examples, ): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description(test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(test), runner.valid_examples, )) return assert last_exception[0] is not None try: with settings: test_runner( TestData.for_buffer(falsifying_example), reify_and_execute(search_strategy, test, print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) raise Flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') report( 'Failed to reproduce exception. Expected: \n' + last_exception[0], ) filter_message = ( 'Unreliable test data: Failed to reproduce a failure ' 'and then when it came to recreating the example in ' 'order to print the test data with a flaky result ' 'the example was filtered out (by e.g. a ' 'call to filter in your strategy) when we didn\'t ' 'expect it to be.') try: test_runner( TestData.for_buffer(falsifying_example), reify_and_execute(search_strategy, test_is_flaky( test, repr_for_last_exception[0]), print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): raise Flaky(filter_message)
def test_can_print_bytes(): with capture_out() as o: with reporting.with_reporter(reporting.default): report(b"hi") assert o.getvalue() == u"hi\n"
def execute( self, data, print_example=False, is_final=False, expected_failure=None, collect=False, ): text_repr = [None] if self.settings.deadline is None: test = self.test else: @proxies(self.test) def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = (finish - start - internal_draw_time) * 1000 self.__test_runtime = runtime if self.settings.deadline is not_set: if (not self.__warned_deadline and runtime >= 200): self.__warned_deadline = True note_deprecation( ('Test took %.2fms to run. In future the default ' 'deadline setting will be 200ms, which will ' 'make this an error. You can set deadline to ' 'an explicit value of e.g. %d to turn tests ' 'slower than this into an error, or you can set ' 'it to None to disable this check entirely.') % ( runtime, ceil(runtime / 100) * 100, )) else: current_deadline = self.settings.deadline if not is_final: current_deadline *= 1.25 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result def run(data): if not hasattr(data, 'can_reproduce_example_from_repr'): data.can_reproduce_example_from_repr = True with local_settings(self.settings): with BuildContext(data, is_final=is_final): with deterministic_PRNG(): args, kwargs = data.draw(self.search_strategy) if expected_failure is not None: text_repr[0] = arg_string(test, args, kwargs) if print_example: example = '%s(%s)' % (test.__name__, arg_string(test, args, kwargs)) try: ast.parse(example) except SyntaxError: data.can_reproduce_example_from_repr = False report('Falsifying example: %s' % (example, )) elif current_verbosity() >= Verbosity.verbose: report(lambda: 'Trying example: %s(%s)' % (test.__name__, arg_string(test, args, kwargs))) if self.collector is None or not collect: with deterministic_PRNG(): return test(*args, **kwargs) else: # pragma: no cover try: self.collector.start() with deterministic_PRNG(): return test(*args, **kwargs) finally: self.collector.stop() result = self.test_runner(data, run) if expected_failure is not None: exception, traceback = expected_failure if (isinstance(exception, DeadlineExceeded) and self.__test_runtime is not None): report( ('Unreliable test timings! On an initial run, this ' 'test took %.2fms, which exceeded the deadline of ' '%.2fms, but on a subsequent run it took %.2f ms, ' 'which did not. If you expect this sort of ' 'variability in your test timings, consider turning ' 'deadlines off for this test by setting deadline=None.') % (exception.runtime, self.settings.deadline, self.__test_runtime)) else: report( 'Failed to reproduce exception. Expected: \n' + traceback, ) self.__flaky( ('Hypothesis %s(%s) produces unreliable results: Falsified' ' on the first call but did not on a subsequent one') % ( test.__name__, text_repr[0], )) return result
def run_state_machine(factory, data): cd = data.conjecture_data machine = factory() check_type(RuleBasedStateMachine, machine, "state_machine_factory()") cd.hypothesis_runner = machine print_steps = (current_build_context().is_final or current_verbosity() >= Verbosity.debug) try: if print_steps: report(f"state = {machine.__class__.__name__}()") machine.check_invariants() max_steps = settings.stateful_step_count steps_run = 0 while True: # We basically always want to run the maximum number of steps, # but need to leave a small probability of terminating early # in order to allow for reducing the number of steps once we # find a failing test case, so we stop with probability of # 2 ** -16 during normal operation but force a stop when we've # generated enough steps. cd.start_example(STATE_MACHINE_RUN_LABEL) if steps_run == 0: cd.draw_bits(16, forced=1) elif steps_run >= max_steps: cd.draw_bits(16, forced=0) break else: # All we really care about is whether this value is zero # or non-zero, so if it's > 1 we discard it and insert a # replacement value after cd.start_example(SHOULD_CONTINUE_LABEL) should_continue_value = cd.draw_bits(16) if should_continue_value > 1: cd.stop_example(discard=True) cd.draw_bits(16, forced=int(bool(should_continue_value))) else: cd.stop_example() if should_continue_value == 0: break steps_run += 1 # Choose a rule to run, preferring an initialize rule if there are # any which have not been run yet. if machine._initialize_rules_to_run: init_rules = [ st.tuples(st.just(rule), st.fixed_dictionaries(rule.arguments)) for rule in machine._initialize_rules_to_run ] rule, data = cd.draw(st.one_of(init_rules)) machine._initialize_rules_to_run.remove(rule) else: rule, data = cd.draw(machine._rules_strategy) # Pretty-print the values this rule was called with *before* calling # _add_result_to_targets, to avoid printing arguments which are also # a return value using the variable name they are assigned to. # See https://github.com/HypothesisWorks/hypothesis/issues/2341 if print_steps: data_to_print = { k: machine._pretty_print(v) for k, v in data.items() } # Assign 'result' here in case executing the rule fails below result = multiple() try: data = dict(data) for k, v in list(data.items()): if isinstance(v, VarReference): data[k] = machine.names_to_values[v.name] result = rule.function(machine, **data) if rule.targets: if isinstance(result, MultipleResults): for single_result in result.values: machine._add_result_to_targets( rule.targets, single_result) else: machine._add_result_to_targets( rule.targets, result) finally: if print_steps: # 'result' is only used if the step has target bundles. # If it does, and the result is a 'MultipleResult', # then 'print_step' prints a multi-variable assignment. machine._print_step(rule, data_to_print, result) machine.check_invariants() cd.stop_example() finally: if print_steps: report("state.teardown()") machine.teardown()
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if global_force_seed is None: database_key = function_digest(self.test) else: database_key = None runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) try: runner.run() finally: self.used_examples_from_database = runner.used_examples_from_database note_engine_for_statistics(runner) self.used_examples_from_database = runner.used_examples_from_database if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True, ) else: if runner.valid_examples == 0: raise Unsatisfiable( "Unable to satisfy assumptions of hypothesis %s." % (get_pretty_function_description(self.test), )) if not self.falsifying_examples: return elif not self.settings.report_multiple_bugs: del self.falsifying_examples[:-1] self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: info = falsifying_example.extra_information ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert info.__expected_exception is not None try: self.execute( ran_example, print_example=True, is_final=True, expected_failure=( info.__expected_exception, info.__expected_traceback, ), ) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( "Unreliable assumption: An example which satisfied " "assumptions on the first run now fails it.") except BaseException as e: if len(self.falsifying_examples) <= 1: raise tb = get_trimmed_traceback() report("".join(traceback.format_exception(type(e), e, tb))) finally: # pragma: no cover # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob is not PrintSettings.NEVER: failure_blob = encode_failure(falsifying_example.buffer) # Have to use the example we actually ran, not the original # falsifying example! Otherwise we won't catch problems # where the repr of the generated example doesn't parse. can_use_repr = ran_example.can_reproduce_example_from_repr if self.settings.print_blob is PrintSettings.ALWAYS or ( self.settings.print_blob is PrintSettings.INFER and self.settings.verbosity >= Verbosity.normal and not can_use_repr and len(failure_blob) < 200): report(( "\nYou can reproduce this example by temporarily " "adding @reproduce_failure(%r, %r) as a decorator " "on your test case") % (__version__, failure_blob)) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ("Hypothesis found %d distinct failures, but %d of them " "exhibited some sort of flaky behaviour.") % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(("Hypothesis found %d distinct failures.") % (len(self.falsifying_examples)))
def test_can_print_bytes(): with capture_out() as o: with reporting.with_reporter(reporting.default): report(b'hi') assert o.getvalue() == u'hi\n'
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if global_force_seed is None: database_key = str_to_bytes(fully_qualified_name(self.test)) else: database_key = None self.start_time = time.time() global in_given runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) if in_given or self.collector is None: runner.run() else: # pragma: no cover in_given = True original_trace = sys.gettrace() try: sys.settrace(None) runner.run() finally: in_given = False sys.settrace(original_trace) self.used_examples_from_database = \ runner.used_examples_from_database note_engine_for_statistics(runner) run_time = time.time() - self.start_time self.used_examples_from_database = runner.used_examples_from_database if runner.used_examples_from_database: if self.settings.derandomize: note_deprecation( 'In future derandomize will imply database=None, but your ' 'test is currently using examples from the database. To ' 'get the future behaviour, update your settings to ' 'include database=None.') if self.__had_seed: note_deprecation( 'In future use of @seed will imply database=None in your ' 'settings, but your test is currently using examples from ' 'the database. To get the future behaviour, update your ' 'settings for this test to include database=None.') timed_out = runner.exit_reason == ExitReason.timeout if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True) else: if timed_out: note_deprecation(( 'Your tests are hitting the settings timeout (%.2fs). ' 'This functionality will go away in a future release ' 'and you should not rely on it. Instead, try setting ' 'max_examples to be some value lower than %d (the number ' 'of examples your test successfully ran here). Or, if you ' 'would prefer your tests to run to completion, regardless ' 'of how long they take, you can set the timeout value to ' 'hypothesis.unlimited.') % (self.settings.timeout, runner.valid_examples), self.settings) if runner.valid_examples == 0: if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for %s. Only found %d examples in %.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( 'Unable to satisfy assumptions of hypothesis %s.' % (get_pretty_function_description(self.test), )) if not self.falsifying_examples: return self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute(ran_example, print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, )) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) finally: # pragma: no cover # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob is not PrintSettings.NEVER: failure_blob = encode_failure(falsifying_example.buffer) # Have to use the example we actually ran, not the original # falsifying example! Otherwise we won't catch problems # where the repr of the generated example doesn't parse. can_use_repr = ran_example.can_reproduce_example_from_repr if (self.settings.print_blob is PrintSettings.ALWAYS or (self.settings.print_blob is PrintSettings.INFER and not can_use_repr and len(failure_blob) < 200)): report(( '\n' 'You can reproduce this example by temporarily ' 'adding @reproduce_failure(%r, %r) as a decorator ' 'on your test case') % ( __version__, failure_blob, )) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ('Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(('Hypothesis found %d distinct failures.') % (len(self.falsifying_examples, )))
def wrapped_test(*arguments, **kwargs): settings = wrapped_test._hypothesis_internal_use_settings if wrapped_test._hypothesis_internal_use_seed is not None: random = Random(wrapped_test._hypothesis_internal_use_seed) elif settings.derandomize: random = Random(function_digest(test)) else: random = new_random() import hypothesis.strategies as sd selfy = None arguments, kwargs = convert_positional_arguments(wrapped_test, arguments, kwargs) # If the test function is a method of some kind, the bound object # will be the first named argument if there are any, otherwise the # first vararg (if any). if argspec.args: selfy = kwargs.get(argspec.args[0]) elif arguments: selfy = arguments[0] test_runner = new_style_executor(selfy) for example in reversed(getattr(wrapped_test, "hypothesis_explicit_examples", ())): if example.args: if len(example.args) > len(original_argspec.args): raise InvalidArgument( "example has too many arguments for test. " "Expected at most %d but got %d" % (len(original_argspec.args), len(example.args)) ) example_kwargs = dict(zip(original_argspec.args[-len(example.args) :], example.args)) else: example_kwargs = example.kwargs if Phase.explicit not in settings.phases: continue example_kwargs.update(kwargs) # Note: Test may mutate arguments and we can't rerun explicit # examples, so we have to calculate the failure message at this # point rather than than later. message_on_failure = "Falsifying example: %s(%s)" % ( test.__name__, arg_string(test, arguments, example_kwargs), ) try: with BuildContext() as b: test_runner(None, lambda data: test(*arguments, **example_kwargs)) except BaseException: report(message_on_failure) for n in b.notes: report(n) raise if settings.max_examples <= 0: return arguments = tuple(arguments) given_specifier = sd.tuples( sd.just(arguments), sd.fixed_dictionaries(generator_kwargs).map(lambda args: dict(args, **kwargs)) ) def fail_health_check(message, label): if label in settings.suppress_health_check: return message += ( "\nSee https://hypothesis.readthedocs.io/en/latest/health" "checks.html for more information about this. " ) message += ( "If you want to disable just this health check, add %s " "to the suppress_health_check settings for this test." ) % (label,) raise FailedHealthCheck(message) search_strategy = given_specifier if selfy is not None: search_strategy = WithRunner(search_strategy, selfy) search_strategy.validate() perform_health_check = settings.perform_health_check perform_health_check &= Settings.default.perform_health_check from hypothesis.internal.conjecture.data import TestData, Status, StopTest if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return if perform_health_check: health_check_random = Random(random.getrandbits(128)) # We "pre warm" the health check with one draw to give it some # time to calculate any cached data. This prevents the case # where the first draw of the health check takes ages because # of loading unicode data the first time. data = TestData( max_length=settings.buffer_size, draw_bytes=lambda data, n, distribution: distribution(health_check_random, n), ) with Settings(settings, verbosity=Verbosity.quiet): try: test_runner(data, reify_and_execute(search_strategy, lambda *args, **kwargs: None)) except BaseException: pass count = 0 overruns = 0 filtered_draws = 0 start = time.time() while count < 10 and time.time() < start + 1 and filtered_draws < 50 and overruns < 20: try: data = TestData( max_length=settings.buffer_size, draw_bytes=lambda data, n, distribution: distribution(health_check_random, n), ) with Settings(settings, verbosity=Verbosity.quiet): test_runner(data, reify_and_execute(search_strategy, lambda *args, **kwargs: None)) count += 1 except UnsatisfiedAssumption: filtered_draws += 1 except StopTest: if data.status == Status.INVALID: filtered_draws += 1 else: assert data.status == Status.OVERRUN overruns += 1 except InvalidArgument: raise except Exception: if HealthCheck.exception_in_generation in settings.suppress_health_check: raise report(traceback.format_exc()) if test_runner is default_new_style_executor: fail_health_check( "An exception occurred during data " "generation in initial health check. " "This indicates a bug in the strategy. " "This could either be a Hypothesis bug or " "an error in a function you've passed to " "it to construct your data.", HealthCheck.exception_in_generation, ) else: fail_health_check( "An exception occurred during data " "generation in initial health check. " "This indicates a bug in the strategy. " "This could either be a Hypothesis bug or " "an error in a function you've passed to " "it to construct your data. Additionally, " "you have a custom executor, which means " "that this could be your executor failing " "to handle a function which returns None. ", HealthCheck.exception_in_generation, ) if overruns >= 20 or (not count and overruns > 0): fail_health_check( ( "Examples routinely exceeded the max allowable size. " "(%d examples overran while generating %d valid ones)" ". Generating examples this large will usually lead to" " bad results. You should try setting average_size or " "max_size parameters on your collections and turning " "max_leaves down on recursive() calls." ) % (overruns, count), HealthCheck.data_too_large, ) if filtered_draws >= 50 or (not count and filtered_draws > 0): fail_health_check( ( "It looks like your strategy is filtering out a lot " "of data. Health check found %d filtered examples but " "only %d good ones. This will make your tests much " "slower, and also will probably distort the data " "generation quite a lot. You should adapt your " "strategy to filter less. This can also be caused by " "a low max_leaves parameter in recursive() calls" ) % (filtered_draws, count), HealthCheck.filter_too_much, ) runtime = time.time() - start if runtime > 1.0 or count < 10: fail_health_check( ( "Data generation is extremely slow: Only produced " "%d valid examples in %.2f seconds (%d invalid ones " "and %d exceeded maximum size). Try decreasing " "size of the data you're generating (with e.g." "average_size or max_leaves parameters)." ) % (count, runtime, filtered_draws, overruns), HealthCheck.too_slow, ) last_exception = [None] repr_for_last_exception = [None] def evaluate_test_data(data): try: result = test_runner(data, reify_and_execute(search_strategy, test)) if result is not None and settings.perform_health_check: fail_health_check( ("Tests run under @given should return None, but " "%s returned %r instead.") % (test.__name__, result), HealthCheck.return_value, ) return False except UnsatisfiedAssumption: data.mark_invalid() except (HypothesisDeprecationWarning, FailedHealthCheck, StopTest): raise except Exception: last_exception[0] = traceback.format_exc() verbose_report(last_exception[0]) data.mark_interesting() from hypothesis.internal.conjecture.engine import TestRunner falsifying_example = None database_key = str_to_bytes(fully_qualified_name(test)) start_time = time.time() runner = TestRunner(evaluate_test_data, settings=settings, random=random, database_key=database_key) runner.run() run_time = time.time() - start_time timed_out = settings.timeout > 0 and run_time >= settings.timeout if runner.last_data is None: return if runner.last_data.status == Status.INTERESTING: falsifying_example = runner.last_data.buffer if settings.database is not None: settings.database.save(database_key, falsifying_example) else: if runner.valid_examples < min(settings.min_satisfying_examples, settings.max_examples): if timed_out: raise Timeout( ( "Ran out of time before finding a satisfying " "example for " "%s. Only found %d examples in " + "%.2fs." ) % (get_pretty_function_description(test), runner.valid_examples, run_time) ) else: raise Unsatisfiable( ( "Unable to satisfy assumptions of hypothesis " "%s. Only %d examples considered " "satisfied assumptions" ) % (get_pretty_function_description(test), runner.valid_examples) ) return assert last_exception[0] is not None try: with settings: test_runner( TestData.for_buffer(falsifying_example), reify_and_execute(search_strategy, test, print_example=True, is_final=True), ) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) raise Flaky( "Unreliable assumption: An example which satisfied " "assumptions on the first run now fails it." ) report("Failed to reproduce exception. Expected: \n" + last_exception[0]) filter_message = ( "Unreliable test data: Failed to reproduce a failure " "and then when it came to recreating the example in " "order to print the test data with a flaky result " "the example was filtered out (by e.g. a " "call to filter in your strategy) when we didn't " "expect it to be." ) try: test_runner( TestData.for_buffer(falsifying_example), reify_and_execute( search_strategy, test_is_flaky(test, repr_for_last_exception[0]), print_example=True, is_final=True, ), ) except (UnsatisfiedAssumption, StopTest): raise Flaky(filter_message)
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if getattr(test, 'is_hypothesis_test', False): note_deprecation( ('You have applied @given to a test more than once. In ' 'future this will be an error. Applying @given twice ' 'wraps the test twice, which can be extremely slow. A ' 'similar effect can be gained by combining the arguments ' 'to the two calls to given. For example, instead of ' '@given(booleans()) @given(integers()), you could write ' '@given(booleans(), integers())')) settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [ name for name, value in generator_kwargs.items() if value is infer ]: if name not in hints: raise InvalidArgument( 'passed %s=infer for %s, but %s has no type annotation' % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings) arguments, kwargs, test_runner, search_strategy = processed_args execute_explicit_examples(test_runner, test, wrapped_test, settings, arguments, kwargs) if settings.max_examples <= 0: return if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return try: state = StateForActualGivenExecution(test_runner, search_strategy, test, settings, random) state.run() except BaseException: generated_seed = \ wrapped_test._hypothesis_internal_use_generated_seed if generated_seed is not None: if running_under_pytest: report( ('You can add @seed(%(seed)d) to this test or run ' 'pytest with --hypothesis-seed=%(seed)d to ' 'reproduce this failure.') % {'seed': generated_seed}, ) else: report( ('You can add @seed(%d) to this test to reproduce ' 'this failure.') % (generated_seed, )) raise
def print_end(self): report(u"state.teardown()")
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True database_key = str_to_bytes(fully_qualified_name(self.test)) self.start_time = time.time() global in_given runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) if in_given or self.collector is None: runner.run() else: # pragma: no cover in_given = True original_trace = sys.gettrace() try: sys.settrace(None) runner.run() finally: in_given = False sys.settrace(original_trace) note_engine_for_statistics(runner) run_time = time.time() - self.start_time timed_out = runner.exit_reason == ExitReason.timeout if runner.last_data is None: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True) else: if timed_out: note_deprecation(( 'Your tests are hitting the settings timeout (%.2fs). ' 'This functionality will go away in a future release ' 'and you should not rely on it. Instead, try setting ' 'max_examples to be some value lower than %d (the number ' 'of examples your test successfully ran here). Or, if you ' 'would prefer your tests to run to completion, regardless ' 'of how long they take, you can set the timeout value to ' 'hypothesis.unlimited.') % (self.settings.timeout, runner.valid_examples), self.settings) if runner.valid_examples < min( self.settings.min_satisfying_examples, self.settings.max_examples, ) and not (runner.exit_reason == ExitReason.finished and self.at_least_one_success): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(self.test), runner.valid_examples, )) if not self.falsifying_examples: return flaky = 0 for falsifying_example in self.falsifying_examples: self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute(ConjectureData.for_buffer( falsifying_example.buffer), print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, )) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ('Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(('Hypothesis found %d distinct failures.') % (len(self.falsifying_examples, )))
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True database_key = str_to_bytes(fully_qualified_name(self.test)) self.start_time = time.time() global in_given runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) if in_given or self.collector is None: runner.run() else: # pragma: no cover in_given = True original_trace = sys.gettrace() try: sys.settrace(None) runner.run() finally: in_given = False sys.settrace(original_trace) note_engine_for_statistics(runner) run_time = time.time() - self.start_time timed_out = runner.exit_reason == ExitReason.timeout if runner.last_data is None: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True) else: if timed_out: note_deprecation(( 'Your tests are hitting the settings timeout (%.2fs). ' 'This functionality will go away in a future release ' 'and you should not rely on it. Instead, try setting ' 'max_examples to be some value lower than %d (the number ' 'of examples your test successfully ran here). Or, if you ' 'would prefer your tests to run to completion, regardless ' 'of how long they take, you can set the timeout value to ' 'hypothesis.unlimited.') % (self.settings.timeout, runner.valid_examples), self.settings) if runner.valid_examples < min( self.settings.min_satisfying_examples, self.settings.max_examples, ) and not (runner.exit_reason == ExitReason.finished and self.at_least_one_success): if timed_out: raise Timeout( ('Ran out of time before finding a satisfying ' 'example for ' '%s. Only found %d examples in ' + '%.2fs.') % (get_pretty_function_description( self.test), runner.valid_examples, run_time)) else: raise Unsatisfiable( ('Unable to satisfy assumptions of hypothesis ' '%s. Only %d examples considered ' 'satisfied assumptions') % ( get_pretty_function_description(self.test), runner.valid_examples, )) if not self.falsifying_examples: return flaky = 0 self.__in_final_replay = True for falsifying_example in self.falsifying_examples: self.__was_flaky = False try: with self.settings: self.test_runner( ConjectureData.for_buffer(falsifying_example.buffer), reify_and_execute(self.search_strategy, self.test, print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.') except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) else: if (isinstance(falsifying_example.__expected_exception, DeadlineExceeded) and self.__test_runtime is not None): report(( 'Unreliable test timings! On an initial run, this ' 'test took %.2fms, which exceeded the deadline of ' '%.2fms, but on a subsequent run it took %.2f ms, ' 'which did not. If you expect this sort of ' 'variability in your test timings, consider turning ' 'deadlines off for this test by setting deadline=None.' ) % (falsifying_example.__expected_exception.runtime, self.settings.deadline, self.__test_runtime)) else: report( 'Failed to reproduce exception. Expected: \n' + falsifying_example.__expected_traceback, ) filter_message = ( 'Unreliable test data: Failed to reproduce a failure ' 'and then when it came to recreating the example in ' 'order to print the test data with a flaky result ' 'the example was filtered out (by e.g. a ' 'call to filter in your strategy) when we didn\'t ' 'expect it to be.') try: self.test_runner( ConjectureData.for_buffer(falsifying_example.buffer), reify_and_execute(self.search_strategy, test_is_flaky( self.test, self.repr_for_last_exception), print_example=True, is_final=True)) except (UnsatisfiedAssumption, StopTest): self.__flaky(filter_message) except Flaky as e: if len(self.falsifying_examples) > 1: self.__flaky(e.args[0]) else: raise if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky( ('Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % (len(self.falsifying_examples), flaky)) else: raise MultipleFailures(('Hypothesis found %d distinct failures.') % (len(self.falsifying_examples, )))
def run(self): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True if global_force_seed is None: database_key = str_to_bytes(fully_qualified_name(self.test)) else: database_key = None self.start_time = benchmark_time() runner = ConjectureRunner( self.evaluate_test_data, settings=self.settings, random=self.random, database_key=database_key, ) try: runner.run() finally: self.used_examples_from_database = \ runner.used_examples_from_database note_engine_for_statistics(runner) run_time = benchmark_time() - self.start_time self.used_examples_from_database = runner.used_examples_from_database if runner.used_examples_from_database: if self.settings.derandomize: note_deprecation(( 'In future derandomize will imply database=None, but your ' 'test: %s is currently using examples from the database. ' 'To get the future behaviour, update your settings to ' 'include database=None.') % (self.test.__name__, ) ) if self.__had_seed: note_deprecation(( 'In future use of @seed will imply database=None in your ' 'settings, but your test: %s is currently using examples ' 'from the database. To get the future behaviour, update ' 'your settings for this test to include database=None.') % (self.test.__name__,) ) timed_out = runner.exit_reason == ExitReason.timeout if runner.call_count == 0: return if runner.interesting_examples: self.falsifying_examples = sorted( [d for d in runner.interesting_examples.values()], key=lambda d: sort_key(d.buffer), reverse=True ) else: if runner.valid_examples == 0: if timed_out: raise Timeout(( 'Ran out of time before finding a satisfying ' 'example for %s. Only found %d examples in %.2fs.' ) % ( get_pretty_function_description(self.test), runner.valid_examples, run_time )) else: raise Unsatisfiable( 'Unable to satisfy assumptions of hypothesis %s.' % (get_pretty_function_description(self.test),) ) if not self.falsifying_examples: return self.failed_normally = True flaky = 0 for falsifying_example in self.falsifying_examples: ran_example = ConjectureData.for_buffer(falsifying_example.buffer) self.__was_flaky = False assert falsifying_example.__expected_exception is not None try: self.execute( ran_example, print_example=True, is_final=True, expected_failure=( falsifying_example.__expected_exception, falsifying_example.__expected_traceback, ) ) except (UnsatisfiedAssumption, StopTest): report(traceback.format_exc()) self.__flaky( 'Unreliable assumption: An example which satisfied ' 'assumptions on the first run now fails it.' ) except BaseException: if len(self.falsifying_examples) <= 1: raise report(traceback.format_exc()) finally: # pragma: no cover # This section is in fact entirely covered by the tests in # test_reproduce_failure, but it seems to trigger a lovely set # of coverage bugs: The branches show up as uncovered (despite # definitely being covered - you can add an assert False else # branch to verify this and see it fail - and additionally the # second branch still complains about lack of coverage even if # you add a pragma: no cover to it! # See https://bitbucket.org/ned/coveragepy/issues/623/ if self.settings.print_blob is not PrintSettings.NEVER: failure_blob = encode_failure(falsifying_example.buffer) # Have to use the example we actually ran, not the original # falsifying example! Otherwise we won't catch problems # where the repr of the generated example doesn't parse. can_use_repr = ran_example.can_reproduce_example_from_repr if ( self.settings.print_blob is PrintSettings.ALWAYS or ( self.settings.print_blob is PrintSettings.INFER and self.settings.verbosity >= Verbosity.normal and not can_use_repr and len(failure_blob) < 200 ) ): report(( '\n' 'You can reproduce this example by temporarily ' 'adding @reproduce_failure(%r, %r) as a decorator ' 'on your test case') % ( __version__, failure_blob,)) if self.__was_flaky: flaky += 1 # If we only have one example then we should have raised an error or # flaky prior to this point. assert len(self.falsifying_examples) > 1 if flaky > 0: raise Flaky(( 'Hypothesis found %d distinct failures, but %d of them ' 'exhibited some sort of flaky behaviour.') % ( len(self.falsifying_examples), flaky)) else: raise MultipleFailures(( 'Hypothesis found %d distinct failures.') % ( len(self.falsifying_examples,)))
def test_function(self, data): if self.__pending_call_explanation is not None: self.debug(self.__pending_call_explanation) self.__pending_call_explanation = None assert isinstance(data.observer, TreeRecordingObserver) self.call_count += 1 interrupted = False try: self.__stoppable_test_function(data) except KeyboardInterrupt: interrupted = True raise except BaseException: self.save_buffer(data.buffer) raise finally: # No branch, because if we're interrupted we always raise # the KeyboardInterrupt, never continue to the code below. if not interrupted: # pragma: no branch data.freeze() call_stats = { "status": data.status.name.lower(), "runtime": data.finish_time - data.start_time, "drawtime": math.fsum(data.draw_times), "events": sorted({self.event_to_string(e) for e in data.events}), } self.stats_per_test_case.append(call_stats) self.__data_cache[data.buffer] = data.as_result() self.debug_data(data) if self.pareto_front is not None and self.pareto_front.add( data.as_result()): self.save_buffer(data.buffer, sub_key=b"pareto") assert len(data.buffer) <= BUFFER_SIZE if data.status >= Status.VALID: for k, v in data.target_observations.items(): self.best_observed_targets[k] = max( self.best_observed_targets[k], v) if k not in self.best_examples_of_observed_targets: self.best_examples_of_observed_targets[k] = data.as_result( ) continue existing_example = self.best_examples_of_observed_targets[k] existing_score = existing_example.target_observations[k] if v < existing_score: continue if v > existing_score or sort_key(data.buffer) < sort_key( existing_example.buffer): self.best_examples_of_observed_targets[k] = data.as_result( ) if data.status == Status.VALID: self.valid_examples += 1 if data.status == Status.INTERESTING: key = data.interesting_origin changed = False try: existing = self.interesting_examples[key] except KeyError: changed = True self.last_bug_found_at = self.call_count if self.first_bug_found_at is None: self.first_bug_found_at = self.call_count else: if sort_key(data.buffer) < sort_key(existing.buffer): self.shrinks += 1 self.downgrade_buffer(existing.buffer) self.__data_cache.unpin(existing.buffer) changed = True if changed: self.save_buffer(data.buffer) self.interesting_examples[key] = data.as_result() self.__data_cache.pin(data.buffer) self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: self.exit_with(ExitReason.max_shrinks) if (not self.ignore_limits and self.finish_shrinking_deadline is not None and self.finish_shrinking_deadline < time.perf_counter()): # See https://github.com/HypothesisWorks/hypothesis/issues/2340 report( "WARNING: Hypothesis has spent more than five minutes working to shrink " "a failing example, and stopped because it is making very slow " "progress. When you re-run your tests, shrinking will resume and " "may take this long before aborting again.\n" "PLEASE REPORT THIS if you can provide a reproducing example, so that " "we can improve shrinking performance for everyone.") self.exit_with(ExitReason.very_slow_shrinking) if not self.interesting_examples: # Note that this logic is reproduced to end the generation phase when # we have interesting examples. Update that too if you change this! # (The doubled implementation is because here we exit the engine entirely, # while in the other case below we just want to move on to shrinking.) if self.valid_examples >= self.settings.max_examples: self.exit_with(ExitReason.max_examples) if self.call_count >= max( self.settings.max_examples * 10, # We have a high-ish default max iterations, so that tests # don't become flaky when max_examples is too low. 1000, ): self.exit_with(ExitReason.max_iterations) if self.__tree_is_exhausted(): self.exit_with(ExitReason.finished) self.record_for_health_check(data)
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True test = wrapped_test.hypothesis.inner_test if getattr(test, 'is_hypothesis_test', False): note_deprecation(( 'You have applied @given to test: %s more than once. In ' 'future this will be an error. Applying @given twice ' 'wraps the test twice, which can be extremely slow. A ' 'similar effect can be gained by combining the arguments ' 'of the two calls to given. For example, instead of ' '@given(booleans()) @given(integers()), you could write ' '@given(booleans(), integers())') % (test.__name__, ) ) settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [name for name, value in generator_kwargs.items() if value is infer]: if name not in hints: raise InvalidArgument( 'passed %s=infer for %s, but %s has no type annotation' % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings ) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, 'runner', None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ('You have applied @given to the method %s, which is ' 'used by the unittest runner but is not itself a test.' ' This is not useful in any way.' % test.__name__) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( 'You have applied @given to a method on %s, but this ' 'class does not inherit from the supported versions in ' '`hypothesis.extra.django`. Use the Hypothesis variants ' 'to ensure that each example is run in a separate ' 'database transaction.' % qualname(type(runner)) ) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, had_seed=wrapped_test._hypothesis_internal_use_seed ) reproduce_failure = \ wrapped_test._hypothesis_internal_use_reproduce_failure if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument(( 'Attempting to reproduce a failure from a different ' 'version of Hypothesis. This failure is from %s, but ' 'you are currently running %r. Please change your ' 'Hypothesis version to a matching one.' ) % (expected_version, __version__)) try: state.execute(ConjectureData.for_buffer( decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( 'Expected the test to raise an error, but it ' 'completed successfully.' ) except StopTest: raise DidNotReproduce( 'The shape of the test data has changed in some way ' 'from where this blob was defined. Are you sure ' "you're running the same test?" ) except UnsatisfiedAssumption: raise DidNotReproduce( 'The test data failed to satisfy an assumption in the ' 'test. Have you added it since this blob was ' 'generated?' ) execute_explicit_examples( test_runner, test, wrapped_test, settings, arguments, kwargs ) if settings.max_examples <= 0: return if not ( Phase.reuse in settings.phases or Phase.generate in settings.phases ): return try: if isinstance(runner, TestCase) and hasattr(runner, 'subTest'): subTest = runner.subTest try: setattr(runner, 'subTest', fake_subTest) state.run() finally: setattr(runner, 'subTest', subTest) else: state.run() except BaseException: generated_seed = \ wrapped_test._hypothesis_internal_use_generated_seed if generated_seed is not None and not state.failed_normally: with local_settings(settings): if running_under_pytest: report( 'You can add @seed(%(seed)d) to this test or ' 'run pytest with --hypothesis-seed=%(seed)d ' 'to reproduce this failure.' % { 'seed': generated_seed}) else: report( 'You can add @seed(%d) to this test to ' 'reproduce this failure.' % (generated_seed,)) raise
def wrapped_test(*arguments, **kwargs): # Tell pytest to omit the body of this function from tracebacks __tracebackhide__ = True test = wrapped_test.hypothesis.inner_test if getattr(test, "is_hypothesis_test", False): raise InvalidArgument(( "You have applied @given to the test %s more than once, which " "wraps the test several times and is extremely slow. A " "similar effect can be gained by combining the arguments " "of the two calls to given. For example, instead of " "@given(booleans()) @given(integers()), you could write " "@given(booleans(), integers())") % (test.__name__, )) settings = wrapped_test._hypothesis_internal_use_settings random = get_random_for_wrapped_test(test, wrapped_test) if infer in generator_kwargs.values(): hints = get_type_hints(test) for name in [ name for name, value in generator_kwargs.items() if value is infer ]: if name not in hints: raise InvalidArgument( "passed %s=infer for %s, but %s has no type annotation" % (name, test.__name__, name)) generator_kwargs[name] = st.from_type(hints[name]) processed_args = process_arguments_to_given( wrapped_test, arguments, kwargs, generator_arguments, generator_kwargs, argspec, test, settings, ) arguments, kwargs, test_runner, search_strategy = processed_args runner = getattr(search_strategy, "runner", None) if isinstance(runner, TestCase) and test.__name__ in dir(TestCase): msg = ("You have applied @given to the method %s, which is " "used by the unittest runner but is not itself a test." " This is not useful in any way." % test.__name__) fail_health_check(settings, msg, HealthCheck.not_a_test_method) if bad_django_TestCase(runner): # pragma: no cover # Covered by the Django tests, but not the pytest coverage task raise InvalidArgument( "You have applied @given to a method on %s, but this " "class does not inherit from the supported versions in " "`hypothesis.extra.django`. Use the Hypothesis variants " "to ensure that each example is run in a separate " "database transaction." % qualname(type(runner))) state = StateForActualGivenExecution( test_runner, search_strategy, test, settings, random, had_seed=wrapped_test._hypothesis_internal_use_seed, ) reproduce_failure = wrapped_test._hypothesis_internal_use_reproduce_failure if reproduce_failure is not None: expected_version, failure = reproduce_failure if expected_version != __version__: raise InvalidArgument( ("Attempting to reproduce a failure from a different " "version of Hypothesis. This failure is from %s, but " "you are currently running %r. Please change your " "Hypothesis version to a matching one.") % (expected_version, __version__)) try: state.execute( ConjectureData.for_buffer(decode_failure(failure)), print_example=True, is_final=True, ) raise DidNotReproduce( "Expected the test to raise an error, but it " "completed successfully.") except StopTest: raise DidNotReproduce( "The shape of the test data has changed in some way " "from where this blob was defined. Are you sure " "you're running the same test?") except UnsatisfiedAssumption: raise DidNotReproduce( "The test data failed to satisfy an assumption in the " "test. Have you added it since this blob was " "generated?") execute_explicit_examples(test_runner, test, wrapped_test, settings, arguments, kwargs) if settings.max_examples <= 0: return if not (Phase.reuse in settings.phases or Phase.generate in settings.phases): return try: if isinstance(runner, TestCase) and hasattr(runner, "subTest"): subTest = runner.subTest try: setattr(runner, "subTest", fake_subTest) state.run() finally: setattr(runner, "subTest", subTest) else: state.run() except BaseException as e: generated_seed = wrapped_test._hypothesis_internal_use_generated_seed with local_settings(settings): if not (state.failed_normally or generated_seed is None): if running_under_pytest: report( "You can add @seed(%(seed)d) to this test or " "run pytest with --hypothesis-seed=%(seed)d " "to reproduce this failure." % {"seed": generated_seed}) else: report("You can add @seed(%d) to this test to " "reproduce this failure." % (generated_seed, )) # The dance here is to avoid showing users long tracebacks # full of Hypothesis internals they don't care about. # We have to do this inline, to avoid adding another # internal stack frame just when we've removed the rest. if PY2: # Python 2 doesn't have Exception.with_traceback(...); # instead it has a three-argument form of the `raise` # statement. Unfortunately this is a SyntaxError on # Python 3, and before Python 2.7.9 it was *also* a # SyntaxError to use it in a nested function so we # can't `exec` or `eval` our way out (BPO-21591). # So unless we break some versions of Python 2, none # of them get traceback elision. raise # On Python 3, we swap out the real traceback for our # trimmed version. Using a variable ensures that the line # which will actually appear in trackbacks is as clear as # possible - "raise the_error_hypothesis_found". the_error_hypothesis_found = e.with_traceback( get_trimmed_traceback()) raise the_error_hypothesis_found
def __flaky(self, message): if len(self.falsifying_examples) <= 1: raise Flaky(message) else: self.__was_flaky = True report('Flaky example! ' + message)
def custom_reporter(value): """ Custom reporter used to slightly modify hypothesis output """ textified = f"{value}".replace("state", "machine") if "teardown" not in textified and "check" not in textified: print(textified) if __name__ == "__main__": args = parser().parse_args() with reporter.with_value(custom_reporter): try: run_state_machine_as_test( lambda: state_machine_factory( args.path, args.seed, config.states, config.inputs, config.outputs, ), settings=settings(max_examples=1000), ) except AssertionError as e: report(e) report( "Implementation contains errors, correct them and try again!", )
def wrapped_test(*arguments, **kwargs): selfy = None arguments, kwargs = convert_positional_arguments(wrapped_test, arguments, kwargs) # Because we converted all kwargs to given into real args and # error if we have neither args nor kwargs, this should always # be valid assert argspec.args selfy = kwargs.get(argspec.args[0]) if isinstance(selfy, HypothesisProvided): selfy = None test_runner = executor(selfy) for example in getattr(wrapped_test, "hypothesis_explicit_examples", ()): if example.args: example_kwargs = dict(zip(argspec.args[-len(example.args) :], example.args)) else: example_kwargs = dict(example.kwargs) for k, v in kwargs.items(): if not isinstance(v, HypothesisProvided): example_kwargs[k] = v # Note: Test may mutate arguments and we can't rerun explicit # examples, so we have to calculate the failure message at this # point rather than than later. message_on_failure = "Falsifying example: %s(%s)" % ( test.__name__, arg_string(test, arguments, example_kwargs), ) try: test_runner(lambda: test(*arguments, **example_kwargs)) except BaseException: report(message_on_failure) raise if not any(isinstance(x, HypothesisProvided) for xs in (arguments, kwargs.values()) for x in xs): # All arguments have been satisfied without needing to invoke # hypothesis test_runner(lambda: test(*arguments, **kwargs)) return def convert_to_specifier(v): if isinstance(v, HypothesisProvided): return strategy(v.value, settings) else: return sd.just(v) given_specifier = sd.tuples( sd.tuples(*map(convert_to_specifier, arguments)), sd.fixed_dictionaries(dict((k, convert_to_specifier(v)) for (k, v) in kwargs.items())), ) search_strategy = strategy(given_specifier, settings) if settings.database: storage = settings.database.storage(fully_qualified_name(test)) else: storage = None last_exception = [None] repr_for_last_exception = [None] def is_template_example(xs): record_repr = [None] try: test_runner( reify_and_execute( search_strategy, xs, test, always_print=settings.max_shrinks <= 0, record_repr=record_repr ) ) return False except UnsatisfiedAssumption as e: raise e except Exception as e: if settings.max_shrinks <= 0: raise e last_exception[0] = traceback.format_exc() repr_for_last_exception[0] = record_repr[0] verbose_report(last_exception[0]) return True is_template_example.__name__ = test.__name__ is_template_example.__qualname__ = qualname(test) falsifying_template = None try: falsifying_template = best_satisfying_template( search_strategy, random, is_template_example, settings, storage ) except NoSuchExample: return assert last_exception[0] is not None with settings: test_runner(reify_and_execute(search_strategy, falsifying_template, test, print_example=True)) report("Failed to reproduce exception. Expected: \n" + last_exception[0]) test_runner( reify_and_execute( search_strategy, falsifying_template, test_is_flaky(test, repr_for_last_exception[0]), print_example=True, ) )
def report_buffered(): for f in fragments_reported: report(f) del fragments_reported[:]
def wrapped_test(*arguments, **kwargs): import hypothesis.strategies as sd from hypothesis.internal.strategymethod import strategy selfy = None arguments, kwargs = convert_positional_arguments( wrapped_test, arguments, kwargs) # Anything in unused_kwargs hasn't been injected through # argspec.defaults, so we need to add them. for k in unused_kwargs: if k not in kwargs: kwargs[k] = unused_kwargs[k] # If the test function is a method of some kind, the bound object # will be the first named argument if there are any, otherwise the # first vararg (if any). if argspec.args: selfy = kwargs.get(argspec.args[0]) elif arguments: selfy = arguments[0] if isinstance(selfy, HypothesisProvided): selfy = None test_runner = executor(selfy) for example in getattr( wrapped_test, u'hypothesis_explicit_examples', () ): if example.args: example_kwargs = dict(zip( argspec.args[-len(example.args):], example.args )) else: example_kwargs = dict(example.kwargs) for k, v in kwargs.items(): if not isinstance(v, HypothesisProvided): example_kwargs[k] = v # Note: Test may mutate arguments and we can't rerun explicit # examples, so we have to calculate the failure message at this # point rather than than later. message_on_failure = u'Falsifying example: %s(%s)' % ( test.__name__, arg_string(test, arguments, example_kwargs) ) try: test_runner( lambda: test(*arguments, **example_kwargs) ) except BaseException: report(message_on_failure) raise if not any( isinstance(x, HypothesisProvided) for xs in (arguments, kwargs.values()) for x in xs ): # All arguments have been satisfied without needing to invoke # hypothesis test_runner(lambda: test(*arguments, **kwargs)) return def convert_to_specifier(v): if isinstance(v, HypothesisProvided): return strategy(v.value, settings) else: return sd.just(v) given_specifier = sd.tuples( sd.tuples(*map(convert_to_specifier, arguments)), sd.fixed_dictionaries(dict( (k, convert_to_specifier(v)) for (k, v) in kwargs.items())) ) search_strategy = strategy(given_specifier, settings) if settings.database: storage = settings.database.storage( fully_qualified_name(test)) else: storage = None last_exception = [None] repr_for_last_exception = [None] def is_template_example(xs): record_repr = [None] try: test_runner(reify_and_execute( search_strategy, xs, test, always_print=settings.max_shrinks <= 0, record_repr=record_repr, )) return False except UnsatisfiedAssumption as e: raise e except Exception as e: if settings.max_shrinks <= 0: raise e last_exception[0] = traceback.format_exc() repr_for_last_exception[0] = record_repr[0] verbose_report(last_exception[0]) return True is_template_example.__name__ = test.__name__ is_template_example.__qualname__ = qualname(test) falsifying_template = None try: falsifying_template = best_satisfying_template( search_strategy, random, is_template_example, settings, storage ) except NoSuchExample: return assert last_exception[0] is not None with settings: test_runner(reify_and_execute( search_strategy, falsifying_template, test, print_example=True, is_final=True )) report( u'Failed to reproduce exception. Expected: \n' + last_exception[0], ) test_runner(reify_and_execute( search_strategy, falsifying_template, test_is_flaky(test, repr_for_last_exception[0]), print_example=True, is_final=True ))
def execute( self, data, print_example=False, is_final=False, expected_failure=None, collect=False, ): text_repr = [None] if self.settings.deadline is None: test = self.test else: @proxies(self.test) def test(*args, **kwargs): self.__test_runtime = None initial_draws = len(data.draw_times) start = benchmark_time() result = self.test(*args, **kwargs) finish = benchmark_time() internal_draw_time = sum(data.draw_times[initial_draws:]) runtime = (finish - start - internal_draw_time) * 1000 self.__test_runtime = runtime current_deadline = self.settings.deadline if not is_final: current_deadline *= 1.25 if runtime >= current_deadline: raise DeadlineExceeded(runtime, self.settings.deadline) return result def run(data): if not hasattr(data, "can_reproduce_example_from_repr"): data.can_reproduce_example_from_repr = True with local_settings(self.settings): with BuildContext(data, is_final=is_final): with deterministic_PRNG(): args, kwargs = data.draw(self.search_strategy) if expected_failure is not None: text_repr[0] = arg_string(test, args, kwargs) if print_example: example = "%s(%s)" % ( test.__name__, arg_string(test, args, kwargs), ) try: ast.parse(example) except SyntaxError: data.can_reproduce_example_from_repr = False report("Falsifying example: %s" % (example, )) elif current_verbosity() >= Verbosity.verbose: report(lambda: "Trying example: %s(%s)" % ( test.__name__, arg_string(test, args, kwargs))) return test(*args, **kwargs) result = self.test_runner(data, run) if expected_failure is not None: exception, traceback = expected_failure if (isinstance(exception, DeadlineExceeded) and self.__test_runtime is not None): report( ("Unreliable test timings! On an initial run, this " "test took %.2fms, which exceeded the deadline of " "%.2fms, but on a subsequent run it took %.2f ms, " "which did not. If you expect this sort of " "variability in your test timings, consider turning " "deadlines off for this test by setting deadline=None.") % (exception.runtime, self.settings.deadline, self.__test_runtime)) else: report("Failed to reproduce exception. Expected: \n" + traceback) self.__flaky( ("Hypothesis %s(%s) produces unreliable results: Falsified" " on the first call but did not on a subsequent one") % (test.__name__, text_repr[0])) return result