Exemplo n.º 1
0
def function_digest(function):
    """Returns a string that is stable across multiple invocations across
    multiple processes and is prone to changing significantly in response to
    minor changes to the function.

    No guarantee of uniqueness though it usually will be.

    """
    hasher = hashlib.md5()
    try:
        hasher.update(to_unicode(inspect.getsource(function)).encode('utf-8'))
    # Different errors on different versions of python. What fun.
    except (OSError, IOError, TypeError):
        pass
    try:
        hasher.update(str_to_bytes(function.__name__))
    except AttributeError:
        pass
    try:
        hasher.update(function.__module__.__name__.encode('utf-8'))
    except AttributeError:
        pass
    try:
        hasher.update(str_to_bytes(repr(getargspec(function))))
    except TypeError:
        pass
    return hasher.digest()
Exemplo n.º 2
0
def function_digest(function):
    """Returns a string that is stable across multiple invocations across
    multiple processes and is prone to changing significantly in response to
    minor changes to the function.

    No guarantee of uniqueness though it usually will be.
    """
    hasher = hashlib.sha384()
    try:
        hasher.update(to_unicode(inspect.getsource(function)).encode("utf-8"))
    except (OSError, TypeError):
        pass
    try:
        hasher.update(str_to_bytes(function.__name__))
    except AttributeError:
        pass
    try:
        hasher.update(function.__module__.__name__.encode("utf-8"))
    except AttributeError:
        pass
    try:
        hasher.update(str_to_bytes(repr(inspect.getfullargspec(function))))
    except TypeError:
        pass
    try:
        hasher.update(function._hypothesis_internal_add_digest)
    except AttributeError:
        pass
    return hasher.digest()
Exemplo n.º 3
0
def function_digest(function):
    """Returns a string that is stable across multiple invocations across
    multiple processes and is prone to changing significantly in response to
    minor changes to the function.

    No guarantee of uniqueness though it usually will be.

    """
    hasher = hashlib.md5()
    try:
        hasher.update(to_unicode(inspect.getsource(function)).encode('utf-8'))
    # Different errors on different versions of python. What fun.
    except (OSError, IOError, TypeError):
        pass
    try:
        hasher.update(str_to_bytes(function.__name__))
    except AttributeError:
        pass
    try:
        hasher.update(function.__module__.__name__.encode('utf-8'))
    except AttributeError:
        pass
    try:
        hasher.update(str_to_bytes(repr(getfullargspec(function))))
    except TypeError:
        pass
    return hasher.digest()
Exemplo n.º 4
0
def source_exec_as_module(source):
    try:
        return eval_cache[source]
    except KeyError:
        pass

    result = ModuleType('hypothesis_temporary_module_%s' %
                        (hashlib.sha1(str_to_bytes(source)).hexdigest(), ))
    assert isinstance(source, str)
    exec(source, result.__dict__)
    eval_cache[source] = result
    return result
Exemplo n.º 5
0
def source_exec_as_module(source):
    try:
        return eval_cache[source]
    except KeyError:
        pass

    result = ModuleType('hypothesis_temporary_module_%s' % (
        hashlib.sha1(str_to_bytes(source)).hexdigest(),
    ))
    assert isinstance(source, str)
    exec(source, result.__dict__)
    eval_cache[source] = result
    return result
Exemplo n.º 6
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        if global_force_seed is None:
            database_key = str_to_bytes(fully_qualified_name(self.test))
        else:
            database_key = None
        self.start_time = benchmark_time()
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings, random=self.random,
            database_key=database_key,
        )
        try:
            runner.run()
        finally:
            self.used_examples_from_database = \
                runner.used_examples_from_database
        note_engine_for_statistics(runner)
        run_time = benchmark_time() - self.start_time

        self.used_examples_from_database = runner.used_examples_from_database

        if runner.used_examples_from_database:
            if self.settings.derandomize:
                note_deprecation((
                    'In future derandomize will imply database=None, but your '
                    'test: %s is currently using examples from the database. '
                    'To get the future behaviour, update your settings to '
                    'include database=None.') % (self.test.__name__, )
                )
            if self.__had_seed:
                note_deprecation((
                    'In future use of @seed will imply database=None in your '
                    'settings, but your test: %s is currently using examples '
                    'from the database. To get the future behaviour, update '
                    'your settings for this test to include database=None.')
                    % (self.test.__name__,)
                )

        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.call_count == 0:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer), reverse=True
            )
        else:
            if runner.valid_examples == 0:
                if timed_out:
                    raise Timeout((
                        'Ran out of time before finding a satisfying '
                        'example for %s. Only found %d examples in %.2fs.'
                    ) % (
                        get_pretty_function_description(self.test),
                        runner.valid_examples, run_time
                    ))
                else:
                    raise Unsatisfiable(
                        'Unable to satisfy assumptions of hypothesis %s.' %
                        (get_pretty_function_description(self.test),)
                    )

        if not self.falsifying_examples:
            return

        self.failed_normally = True

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
            self.__was_flaky = False
            assert falsifying_example.__expected_exception is not None
            try:
                self.execute(
                    ran_example,
                    print_example=True, is_final=True,
                    expected_failure=(
                        falsifying_example.__expected_exception,
                        falsifying_example.__expected_traceback,
                    )
                )
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.'
                )
            except BaseException:
                if len(self.falsifying_examples) <= 1:
                    raise
                report(traceback.format_exc())
            finally:  # pragma: no cover
                # This section is in fact entirely covered by the tests in
                # test_reproduce_failure, but it seems to trigger a lovely set
                # of coverage bugs: The branches show up as uncovered (despite
                # definitely being covered - you can add an assert False else
                # branch to verify this and see it fail - and additionally the
                # second branch still complains about lack of coverage even if
                # you add a pragma: no cover to it!
                # See https://bitbucket.org/ned/coveragepy/issues/623/
                if self.settings.print_blob is not PrintSettings.NEVER:
                    failure_blob = encode_failure(falsifying_example.buffer)
                    # Have to use the example we actually ran, not the original
                    # falsifying example! Otherwise we won't catch problems
                    # where the repr of the generated example doesn't parse.
                    can_use_repr = ran_example.can_reproduce_example_from_repr
                    if (
                        self.settings.print_blob is PrintSettings.ALWAYS or (
                            self.settings.print_blob is PrintSettings.INFER and
                            self.settings.verbosity >= Verbosity.normal and
                            not can_use_repr and
                            len(failure_blob) < 200
                        )
                    ):
                        report((
                            '\n'
                            'You can reproduce this example by temporarily '
                            'adding @reproduce_failure(%r, %r) as a decorator '
                            'on your test case') % (
                                __version__, failure_blob,))
            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky((
                'Hypothesis found %d distinct failures, but %d of them '
                'exhibited some sort of flaky behaviour.') % (
                    len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures((
                'Hypothesis found %d distinct failures.') % (
                    len(self.falsifying_examples,)))
Exemplo n.º 7
0
def calc_label(cls):
    name = str_to_bytes(qualname(cls))
    hashed = hashlib.md5(name).digest()
    return int_from_bytes(hashed[:8])
Exemplo n.º 8
0
        def wrapped_test(*arguments, **kwargs):
            settings = wrapped_test._hypothesis_internal_use_settings
            if wrapped_test._hypothesis_internal_use_seed is not None:
                random = Random(wrapped_test._hypothesis_internal_use_seed)
            elif settings.derandomize:
                random = Random(function_digest(test))
            else:
                random = new_random()

            import hypothesis.strategies as sd

            selfy = None
            arguments, kwargs = convert_positional_arguments(wrapped_test, arguments, kwargs)

            # If the test function is a method of some kind, the bound object
            # will be the first named argument if there are any, otherwise the
            # first vararg (if any).
            if argspec.args:
                selfy = kwargs.get(argspec.args[0])
            elif arguments:
                selfy = arguments[0]
            test_runner = new_style_executor(selfy)

            for example in reversed(getattr(wrapped_test, "hypothesis_explicit_examples", ())):
                if example.args:
                    if len(example.args) > len(original_argspec.args):
                        raise InvalidArgument(
                            "example has too many arguments for test. "
                            "Expected at most %d but got %d" % (len(original_argspec.args), len(example.args))
                        )
                    example_kwargs = dict(zip(original_argspec.args[-len(example.args) :], example.args))
                else:
                    example_kwargs = example.kwargs
                if Phase.explicit not in settings.phases:
                    continue
                example_kwargs.update(kwargs)
                # Note: Test may mutate arguments and we can't rerun explicit
                # examples, so we have to calculate the failure message at this
                # point rather than than later.
                message_on_failure = "Falsifying example: %s(%s)" % (
                    test.__name__,
                    arg_string(test, arguments, example_kwargs),
                )
                try:
                    with BuildContext() as b:
                        test_runner(None, lambda data: test(*arguments, **example_kwargs))
                except BaseException:
                    report(message_on_failure)
                    for n in b.notes:
                        report(n)
                    raise
            if settings.max_examples <= 0:
                return

            arguments = tuple(arguments)

            given_specifier = sd.tuples(
                sd.just(arguments), sd.fixed_dictionaries(generator_kwargs).map(lambda args: dict(args, **kwargs))
            )

            def fail_health_check(message, label):
                if label in settings.suppress_health_check:
                    return
                message += (
                    "\nSee https://hypothesis.readthedocs.io/en/latest/health"
                    "checks.html for more information about this. "
                )
                message += (
                    "If you want to disable just this health check, add %s "
                    "to the suppress_health_check settings for this test."
                ) % (label,)
                raise FailedHealthCheck(message)

            search_strategy = given_specifier
            if selfy is not None:
                search_strategy = WithRunner(search_strategy, selfy)

            search_strategy.validate()

            perform_health_check = settings.perform_health_check
            perform_health_check &= Settings.default.perform_health_check

            from hypothesis.internal.conjecture.data import TestData, Status, StopTest

            if not (Phase.reuse in settings.phases or Phase.generate in settings.phases):
                return

            if perform_health_check:
                health_check_random = Random(random.getrandbits(128))
                # We "pre warm" the health check with one draw to give it some
                # time to calculate any cached data. This prevents the case
                # where the first draw of the health check takes ages because
                # of loading unicode data the first time.
                data = TestData(
                    max_length=settings.buffer_size,
                    draw_bytes=lambda data, n, distribution: distribution(health_check_random, n),
                )
                with Settings(settings, verbosity=Verbosity.quiet):
                    try:
                        test_runner(data, reify_and_execute(search_strategy, lambda *args, **kwargs: None))
                    except BaseException:
                        pass
                count = 0
                overruns = 0
                filtered_draws = 0
                start = time.time()
                while count < 10 and time.time() < start + 1 and filtered_draws < 50 and overruns < 20:
                    try:
                        data = TestData(
                            max_length=settings.buffer_size,
                            draw_bytes=lambda data, n, distribution: distribution(health_check_random, n),
                        )
                        with Settings(settings, verbosity=Verbosity.quiet):
                            test_runner(data, reify_and_execute(search_strategy, lambda *args, **kwargs: None))
                        count += 1
                    except UnsatisfiedAssumption:
                        filtered_draws += 1
                    except StopTest:
                        if data.status == Status.INVALID:
                            filtered_draws += 1
                        else:
                            assert data.status == Status.OVERRUN
                            overruns += 1
                    except InvalidArgument:
                        raise
                    except Exception:
                        if HealthCheck.exception_in_generation in settings.suppress_health_check:
                            raise
                        report(traceback.format_exc())
                        if test_runner is default_new_style_executor:
                            fail_health_check(
                                "An exception occurred during data "
                                "generation in initial health check. "
                                "This indicates a bug in the strategy. "
                                "This could either be a Hypothesis bug or "
                                "an error in a function you've passed to "
                                "it to construct your data.",
                                HealthCheck.exception_in_generation,
                            )
                        else:
                            fail_health_check(
                                "An exception occurred during data "
                                "generation in initial health check. "
                                "This indicates a bug in the strategy. "
                                "This could either be a Hypothesis bug or "
                                "an error in a function you've passed to "
                                "it to construct your data. Additionally, "
                                "you have a custom executor, which means "
                                "that this could be your executor failing "
                                "to handle a function which returns None. ",
                                HealthCheck.exception_in_generation,
                            )
                if overruns >= 20 or (not count and overruns > 0):
                    fail_health_check(
                        (
                            "Examples routinely exceeded the max allowable size. "
                            "(%d examples overran while generating %d valid ones)"
                            ". Generating examples this large will usually lead to"
                            " bad results. You should try setting average_size or "
                            "max_size parameters on your collections and turning "
                            "max_leaves down on recursive() calls."
                        )
                        % (overruns, count),
                        HealthCheck.data_too_large,
                    )
                if filtered_draws >= 50 or (not count and filtered_draws > 0):
                    fail_health_check(
                        (
                            "It looks like your strategy is filtering out a lot "
                            "of data. Health check found %d filtered examples but "
                            "only %d good ones. This will make your tests much "
                            "slower, and also will probably distort the data "
                            "generation quite a lot. You should adapt your "
                            "strategy to filter less. This can also be caused by "
                            "a low max_leaves parameter in recursive() calls"
                        )
                        % (filtered_draws, count),
                        HealthCheck.filter_too_much,
                    )
                runtime = time.time() - start
                if runtime > 1.0 or count < 10:
                    fail_health_check(
                        (
                            "Data generation is extremely slow: Only produced "
                            "%d valid examples in %.2f seconds (%d invalid ones "
                            "and %d exceeded maximum size). Try decreasing "
                            "size of the data you're generating (with e.g."
                            "average_size or max_leaves parameters)."
                        )
                        % (count, runtime, filtered_draws, overruns),
                        HealthCheck.too_slow,
                    )
            last_exception = [None]
            repr_for_last_exception = [None]

            def evaluate_test_data(data):
                try:
                    result = test_runner(data, reify_and_execute(search_strategy, test))
                    if result is not None and settings.perform_health_check:
                        fail_health_check(
                            ("Tests run under @given should return None, but " "%s returned %r instead.")
                            % (test.__name__, result),
                            HealthCheck.return_value,
                        )
                    return False
                except UnsatisfiedAssumption:
                    data.mark_invalid()
                except (HypothesisDeprecationWarning, FailedHealthCheck, StopTest):
                    raise
                except Exception:
                    last_exception[0] = traceback.format_exc()
                    verbose_report(last_exception[0])
                    data.mark_interesting()

            from hypothesis.internal.conjecture.engine import TestRunner

            falsifying_example = None
            database_key = str_to_bytes(fully_qualified_name(test))
            start_time = time.time()
            runner = TestRunner(evaluate_test_data, settings=settings, random=random, database_key=database_key)
            runner.run()
            run_time = time.time() - start_time
            timed_out = settings.timeout > 0 and run_time >= settings.timeout
            if runner.last_data is None:
                return
            if runner.last_data.status == Status.INTERESTING:
                falsifying_example = runner.last_data.buffer
                if settings.database is not None:
                    settings.database.save(database_key, falsifying_example)
            else:
                if runner.valid_examples < min(settings.min_satisfying_examples, settings.max_examples):
                    if timed_out:
                        raise Timeout(
                            (
                                "Ran out of time before finding a satisfying "
                                "example for "
                                "%s. Only found %d examples in " + "%.2fs."
                            )
                            % (get_pretty_function_description(test), runner.valid_examples, run_time)
                        )
                    else:
                        raise Unsatisfiable(
                            (
                                "Unable to satisfy assumptions of hypothesis "
                                "%s. Only %d examples considered "
                                "satisfied assumptions"
                            )
                            % (get_pretty_function_description(test), runner.valid_examples)
                        )
                return

            assert last_exception[0] is not None

            try:
                with settings:
                    test_runner(
                        TestData.for_buffer(falsifying_example),
                        reify_and_execute(search_strategy, test, print_example=True, is_final=True),
                    )
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                raise Flaky(
                    "Unreliable assumption: An example which satisfied " "assumptions on the first run now fails it."
                )

            report("Failed to reproduce exception. Expected: \n" + last_exception[0])

            filter_message = (
                "Unreliable test data: Failed to reproduce a failure "
                "and then when it came to recreating the example in "
                "order to print the test data with a flaky result "
                "the example was filtered out (by e.g. a "
                "call to filter in your strategy) when we didn't "
                "expect it to be."
            )

            try:
                test_runner(
                    TestData.for_buffer(falsifying_example),
                    reify_and_execute(
                        search_strategy,
                        test_is_flaky(test, repr_for_last_exception[0]),
                        print_example=True,
                        is_final=True,
                    ),
                )
            except (UnsatisfiedAssumption, StopTest):
                raise Flaky(filter_message)
Exemplo n.º 9
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        if global_force_seed is None:
            database_key = str_to_bytes(fully_qualified_name(self.test))
        else:
            database_key = None
        self.start_time = time.time()
        global in_given
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )

        if in_given or self.collector is None:
            runner.run()
        else:  # pragma: no cover
            in_given = True
            original_trace = sys.gettrace()
            try:
                sys.settrace(None)
                runner.run()
            finally:
                in_given = False
                sys.settrace(original_trace)
                self.used_examples_from_database = \
                    runner.used_examples_from_database
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time

        self.used_examples_from_database = runner.used_examples_from_database

        if runner.used_examples_from_database:
            if self.settings.derandomize:
                note_deprecation(
                    'In future derandomize will imply database=None, but your '
                    'test is currently using examples from the database. To '
                    'get the future behaviour, update your settings to '
                    'include database=None.')
            if self.__had_seed:
                note_deprecation(
                    'In future use of @seed will imply database=None in your '
                    'settings, but your test is currently using examples from '
                    'the database. To get the future behaviour, update your '
                    'settings for this test to include database=None.')

        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.call_count == 0:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer),
                reverse=True)
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.') % (self.settings.timeout,
                                                runner.valid_examples),
                                 self.settings)
            if runner.valid_examples == 0:
                if timed_out:
                    raise Timeout(
                        ('Ran out of time before finding a satisfying '
                         'example for %s. Only found %d examples in %.2fs.') %
                        (get_pretty_function_description(
                            self.test), runner.valid_examples, run_time))
                else:
                    raise Unsatisfiable(
                        'Unable to satisfy assumptions of hypothesis %s.' %
                        (get_pretty_function_description(self.test), ))

        if not self.falsifying_examples:
            return

        self.failed_normally = True

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
            self.__was_flaky = False
            assert falsifying_example.__expected_exception is not None
            try:
                self.execute(ran_example,
                             print_example=True,
                             is_final=True,
                             expected_failure=(
                                 falsifying_example.__expected_exception,
                                 falsifying_example.__expected_traceback,
                             ))
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.')
            except BaseException:
                if len(self.falsifying_examples) <= 1:
                    raise
                report(traceback.format_exc())
            finally:  # pragma: no cover
                # This section is in fact entirely covered by the tests in
                # test_reproduce_failure, but it seems to trigger a lovely set
                # of coverage bugs: The branches show up as uncovered (despite
                # definitely being covered - you can add an assert False else
                # branch to verify this and see it fail - and additionally the
                # second branch still complains about lack of coverage even if
                # you add a pragma: no cover to it!
                # See https://bitbucket.org/ned/coveragepy/issues/623/
                if self.settings.print_blob is not PrintSettings.NEVER:
                    failure_blob = encode_failure(falsifying_example.buffer)
                    # Have to use the example we actually ran, not the original
                    # falsifying example! Otherwise we won't catch problems
                    # where the repr of the generated example doesn't parse.
                    can_use_repr = ran_example.can_reproduce_example_from_repr
                    if (self.settings.print_blob is PrintSettings.ALWAYS or
                        (self.settings.print_blob is PrintSettings.INFER
                         and not can_use_repr and len(failure_blob) < 200)):
                        report((
                            '\n'
                            'You can reproduce this example by temporarily '
                            'adding @reproduce_failure(%r, %r) as a decorator '
                            'on your test case') % (
                                __version__,
                                failure_blob,
                            ))
            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky(
                ('Hypothesis found %d distinct failures, but %d of them '
                 'exhibited some sort of flaky behaviour.') %
                (len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures(('Hypothesis found %d distinct failures.') %
                                   (len(self.falsifying_examples, )))
Exemplo n.º 10
0
def calc_label_from_name(name):
    hashed = hashlib.md5(str_to_bytes(name)).digest()
    return int_from_bytes(hashed[:8])
Exemplo n.º 11
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        database_key = str_to_bytes(fully_qualified_name(self.test))
        self.start_time = time.time()
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )
        runner.run()
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time
        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.last_data is None:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer),
                reverse=True)
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.') % (self.settings.timeout,
                                                runner.valid_examples),
                                 self.settings)
            if runner.valid_examples < min(
                    self.settings.min_satisfying_examples,
                    self.settings.max_examples,
            ) and not (runner.exit_reason == ExitReason.finished
                       and self.at_least_one_success):
                if timed_out:
                    raise Timeout(
                        ('Ran out of time before finding a satisfying '
                         'example for '
                         '%s. Only found %d examples in ' + '%.2fs.') %
                        (get_pretty_function_description(
                            self.test), runner.valid_examples, run_time))
                else:
                    raise Unsatisfiable(
                        ('Unable to satisfy assumptions of hypothesis '
                         '%s. Only %d examples considered '
                         'satisfied assumptions') % (
                             get_pretty_function_description(self.test),
                             runner.valid_examples,
                         ))

        if not self.falsifying_examples:
            return

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            self.__was_flaky = False
            raised_exception = False
            try:
                with self.settings:
                    self.test_runner(
                        ConjectureData.for_buffer(falsifying_example.buffer),
                        reify_and_execute(self.search_strategy,
                                          self.test,
                                          print_example=True,
                                          is_final=True))
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.')
            except:
                if len(self.falsifying_examples) <= 1:
                    raise
                raised_exception = True
                report(traceback.format_exc())

            if not raised_exception:
                report(
                    'Failed to reproduce exception. Expected: \n' +
                    falsifying_example.__expected_exception, )

                filter_message = (
                    'Unreliable test data: Failed to reproduce a failure '
                    'and then when it came to recreating the example in '
                    'order to print the test data with a flaky result '
                    'the example was filtered out (by e.g. a '
                    'call to filter in your strategy) when we didn\'t '
                    'expect it to be.')

                try:
                    self.test_runner(
                        ConjectureData.for_buffer(falsifying_example.buffer),
                        reify_and_execute(self.search_strategy,
                                          test_is_flaky(
                                              self.test,
                                              self.repr_for_last_exception),
                                          print_example=True,
                                          is_final=True))
                except (UnsatisfiedAssumption, StopTest):
                    self.__flaky(filter_message)
                except Flaky as e:
                    if len(self.falsifying_examples) > 1:
                        self.__flaky(e.args[0])
                    else:
                        raise

            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky(
                ('Hypothesis found %d distinct failures, but %d of them '
                 'exhibited some sort of flaky behaviour.') %
                (len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures(('Hypothesis found %d distinct failures.') %
                                   (len(self.falsifying_examples, )))
Exemplo n.º 12
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        database_key = str_to_bytes(fully_qualified_name(self.test))
        self.start_time = time.time()
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )
        runner.run()
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time
        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.last_data is None:
            return
        if runner.last_data.status == Status.INTERESTING:
            self.falsifying_example = runner.last_data.buffer
            if self.settings.database is not None:
                self.settings.database.save(database_key,
                                            self.falsifying_example)
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.') % (self.settings.timeout,
                                                runner.valid_examples),
                                 self.settings)
            if runner.valid_examples < min(
                    self.settings.min_satisfying_examples,
                    self.settings.max_examples,
            ) and not (runner.exit_reason == ExitReason.finished
                       and self.at_least_one_success):
                if timed_out:
                    raise Timeout(
                        ('Ran out of time before finding a satisfying '
                         'example for '
                         '%s. Only found %d examples in ' + '%.2fs.') %
                        (get_pretty_function_description(
                            self.test), runner.valid_examples, run_time))
                else:
                    raise Unsatisfiable(
                        ('Unable to satisfy assumptions of hypothesis '
                         '%s. Only %d examples considered '
                         'satisfied assumptions') % (
                             get_pretty_function_description(self.test),
                             runner.valid_examples,
                         ))

        if self.falsifying_example is None:
            return

        assert self.last_exception is not None

        try:
            with self.settings:
                self.test_runner(
                    ConjectureData.for_buffer(self.falsifying_example),
                    reify_and_execute(self.search_strategy,
                                      self.test,
                                      print_example=True,
                                      is_final=True))
        except (UnsatisfiedAssumption, StopTest):
            report(traceback.format_exc())
            raise Flaky('Unreliable assumption: An example which satisfied '
                        'assumptions on the first run now fails it.')

        report(
            'Failed to reproduce exception. Expected: \n' +
            self.last_exception, )

        filter_message = (
            'Unreliable test data: Failed to reproduce a failure '
            'and then when it came to recreating the example in '
            'order to print the test data with a flaky result '
            'the example was filtered out (by e.g. a '
            'call to filter in your strategy) when we didn\'t '
            'expect it to be.')

        try:
            self.test_runner(
                ConjectureData.for_buffer(self.falsifying_example),
                reify_and_execute(self.search_strategy,
                                  test_is_flaky(self.test,
                                                self.repr_for_last_exception),
                                  print_example=True,
                                  is_final=True))
        except (UnsatisfiedAssumption, StopTest):
            raise Flaky(filter_message)
Exemplo n.º 13
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        if global_force_seed is None:
            database_key = str_to_bytes(fully_qualified_name(self.test))
        else:
            database_key = None
        self.start_time = time.time()
        global in_given
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings, random=self.random,
            database_key=database_key,
        )

        if in_given or self.collector is None:
            runner.run()
        else:  # pragma: no cover
            in_given = True
            original_trace = sys.gettrace()
            try:
                sys.settrace(None)
                runner.run()
            finally:
                in_given = False
                sys.settrace(original_trace)
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time

        self.used_examples_from_database = runner.used_examples_from_database

        if runner.used_examples_from_database:
            if self.settings.derandomize:
                note_deprecation(
                    'In future derandomize will imply database=None, but your '
                    'test is currently using examples from the database. To '
                    'get the future behaviour, update your settings to '
                    'include database=None.'
                )
            if self.__had_seed:
                note_deprecation(
                    'In future use of @seed will imply database=None in your '
                    'settings, but your test is currently using examples from '
                    'the database. To get the future behaviour, update your '
                    'settings for this test to include database=None.'
                )

        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.last_data is None:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer), reverse=True
            )
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.'
                ) % (
                    self.settings.timeout, runner.valid_examples),
                    self.settings)
            if runner.valid_examples < min(
                self.settings.min_satisfying_examples,
                self.settings.max_examples,
            ) and not (
                runner.exit_reason == ExitReason.finished and
                self.at_least_one_success
            ):
                if timed_out:
                    raise Timeout((
                        'Ran out of time before finding a satisfying '
                        'example for '
                        '%s. Only found %d examples in ' +
                        '%.2fs.'
                    ) % (
                        get_pretty_function_description(self.test),
                        runner.valid_examples, run_time
                    ))
                else:
                    raise Unsatisfiable((
                        'Unable to satisfy assumptions of hypothesis '
                        '%s. Only %d examples considered '
                        'satisfied assumptions'
                    ) % (
                        get_pretty_function_description(self.test),
                        runner.valid_examples,))

        if not self.falsifying_examples:
            return

        flaky = 0

        for falsifying_example in self.falsifying_examples:
            self.__was_flaky = False
            assert falsifying_example.__expected_exception is not None
            try:
                self.execute(
                    ConjectureData.for_buffer(falsifying_example.buffer),
                    print_example=True, is_final=True,
                    expected_failure=(
                        falsifying_example.__expected_exception,
                        falsifying_example.__expected_traceback,
                    )
                )
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.'
                )
            except BaseException:
                if len(self.falsifying_examples) <= 1:
                    raise
                report(traceback.format_exc())
            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky((
                'Hypothesis found %d distinct failures, but %d of them '
                'exhibited some sort of flaky behaviour.') % (
                    len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures((
                'Hypothesis found %d distinct failures.') % (
                    len(self.falsifying_examples,)))
Exemplo n.º 14
0
        def wrapped_test(*arguments, **kwargs):
            settings = wrapped_test._hypothesis_internal_use_settings
            if wrapped_test._hypothesis_internal_use_seed is not None:
                random = Random(
                    wrapped_test._hypothesis_internal_use_seed)
            elif settings.derandomize:
                random = Random(function_digest(test))
            else:
                random = new_random()

            import hypothesis.strategies as sd

            selfy = None
            arguments, kwargs = convert_positional_arguments(
                wrapped_test, arguments, kwargs)

            # If the test function is a method of some kind, the bound object
            # will be the first named argument if there are any, otherwise the
            # first vararg (if any).
            if argspec.args:
                selfy = kwargs.get(argspec.args[0])
            elif arguments:
                selfy = arguments[0]
            test_runner = new_style_executor(selfy)

            for example in reversed(getattr(
                wrapped_test, 'hypothesis_explicit_examples', ()
            )):
                if example.args:
                    if len(example.args) > len(original_argspec.args):
                        raise InvalidArgument(
                            'example has too many arguments for test. '
                            'Expected at most %d but got %d' % (
                                len(original_argspec.args), len(example.args)))
                    example_kwargs = dict(zip(
                        original_argspec.args[-len(example.args):],
                        example.args
                    ))
                else:
                    example_kwargs = example.kwargs
                example_kwargs.update(kwargs)
                # Note: Test may mutate arguments and we can't rerun explicit
                # examples, so we have to calculate the failure message at this
                # point rather than than later.
                message_on_failure = 'Falsifying example: %s(%s)' % (
                    test.__name__, arg_string(test, arguments, example_kwargs)
                )
                try:
                    with BuildContext() as b:
                        test_runner(
                            None,
                            lambda data: test(*arguments, **example_kwargs)
                        )
                except BaseException:
                    report(message_on_failure)
                    for n in b.notes:
                        report(n)
                    raise
            if settings.max_examples <= 0:
                return

            arguments = tuple(arguments)

            given_specifier = sd.tuples(
                sd.just(arguments),
                sd.fixed_dictionaries(generator_kwargs).map(
                    lambda args: dict(args, **kwargs)
                )
            )

            def fail_health_check(message):
                message += (
                    '\nSee http://hypothesis.readthedocs.org/en/latest/health'
                    'checks.html for more information about this.'
                )
                raise FailedHealthCheck(message)

            search_strategy = given_specifier
            search_strategy.validate()

            perform_health_check = settings.perform_health_check
            perform_health_check &= Settings.default.perform_health_check

            from hypothesis.internal.conjecture.data import TestData, Status, \
                StopTest

            if perform_health_check:
                initial_state = getglobalrandomstate()
                health_check_random = Random(random.getrandbits(128))
                # We "pre warm" the health check with one draw to give it some
                # time to calculate any cached data. This prevents the case
                # where the first draw of the health check takes ages because
                # of loading unicode data the first time.
                data = TestData(
                    max_length=settings.buffer_size,
                    draw_bytes=lambda data, n, distribution:
                    distribution(health_check_random, n)
                )
                with Settings(settings, verbosity=Verbosity.quiet):
                    try:
                        test_runner(data, reify_and_execute(
                            search_strategy,
                            lambda *args, **kwargs: None,
                        ))
                    except BaseException:
                        pass
                count = 0
                overruns = 0
                filtered_draws = 0
                start = time.time()
                while (
                    count < 10 and time.time() < start + 1 and
                    filtered_draws < 50 and overruns < 20
                ):
                    try:
                        data = TestData(
                            max_length=settings.buffer_size,
                            draw_bytes=lambda data, n, distribution:
                            distribution(health_check_random, n)
                        )
                        with Settings(settings, verbosity=Verbosity.quiet):
                            test_runner(data, reify_and_execute(
                                search_strategy,
                                lambda *args, **kwargs: None,
                            ))
                        count += 1
                    except UnsatisfiedAssumption:
                        filtered_draws += 1
                    except StopTest:
                        if data.status == Status.INVALID:
                            filtered_draws += 1
                        else:
                            assert data.status == Status.OVERRUN
                            overruns += 1
                    except Exception:
                        report(traceback.format_exc())
                        if test_runner is default_new_style_executor:
                            fail_health_check(
                                'An exception occurred during data '
                                'generation in initial health check. '
                                'This indicates a bug in the strategy. '
                                'This could either be a Hypothesis bug or '
                                "an error in a function yo've passed to "
                                'it to construct your data.'
                            )
                        else:
                            fail_health_check(
                                'An exception occurred during data '
                                'generation in initial health check. '
                                'This indicates a bug in the strategy. '
                                'This could either be a Hypothesis bug or '
                                'an error in a function you\'ve passed to '
                                'it to construct your data. Additionally, '
                                'you have a custom executor, which means '
                                'that this could be your executor failing '
                                'to handle a function which returns None. '
                            )
                if overruns >= 20 or (
                    not count and overruns > 0
                ):
                    fail_health_check((
                        'Examples routinely exceeded the max allowable size. '
                        '(%d examples overran while generating %d valid ones)'
                        '. Generating examples this large will usually lead to'
                        ' bad results. You should try setting average_size or '
                        'max_size parameters on your collections and turning '
                        'max_leaves down on recursive() calls.') % (
                        overruns, count
                    ))
                if filtered_draws >= 50 or (
                    not count and filtered_draws > 0
                ):
                    fail_health_check((
                        'It looks like your strategy is filtering out a lot '
                        'of data. Health check found %d filtered examples but '
                        'only %d good ones. This will make your tests much '
                        'slower, and also will probably distort the data '
                        'generation quite a lot. You should adapt your '
                        'strategy to filter less. This can also be caused by '
                        'a low max_leaves parameter in recursive() calls') % (
                        filtered_draws, count
                    ))
                runtime = time.time() - start
                if runtime > 1.0 or count < 10:
                    fail_health_check((
                        'Data generation is extremely slow: Only produced '
                        '%d valid examples in %.2f seconds (%d invalid ones '
                        'and %d exceeded maximum size). Try decreasing '
                        "size of the data you're generating (with e.g."
                        'average_size or max_leaves parameters).'
                    ) % (count, runtime, filtered_draws, overruns))
                if getglobalrandomstate() != initial_state:
                    fail_health_check(
                        'Data generation depends on global random module. '
                        'This makes results impossible to replay, which '
                        'prevents Hypothesis from working correctly. '
                        'If you want to use methods from random, use '
                        'randoms() from hypothesis.strategies to get an '
                        'instance of Random you can use. Alternatively, you '
                        'can use the random_module() strategy to explicitly '
                        'seed the random module.'
                    )
            last_exception = [None]
            repr_for_last_exception = [None]
            performed_random_check = [False]

            def evaluate_test_data(data):
                if perform_health_check and not performed_random_check[0]:
                    initial_state = getglobalrandomstate()
                    performed_random_check[0] = True
                else:
                    initial_state = None
                try:
                    result = test_runner(data, reify_and_execute(
                        search_strategy, test,
                    ))
                    if result is not None and settings.perform_health_check:
                        raise FailedHealthCheck((
                            'Tests run under @given should return None, but '
                            '%s returned %r instead.'
                        ) % (test.__name__, result), settings)
                    return False
                except UnsatisfiedAssumption:
                    data.mark_invalid()
                except (
                    HypothesisDeprecationWarning, FailedHealthCheck,
                    StopTest,
                ):
                    raise
                except Exception:
                    last_exception[0] = traceback.format_exc()
                    verbose_report(last_exception[0])
                    data.mark_interesting()
                finally:
                    if (
                        initial_state is not None and
                        getglobalrandomstate() != initial_state
                    ):
                        fail_health_check(
                            'Your test used the global random module. '
                            'This is unlikely to work correctly. You should '
                            'consider using the randoms() strategy from '
                            'hypothesis.strategies instead. Alternatively, '
                            'you can use the random_module() strategy to '
                            'explicitly seed the random module.')

            from hypothesis.internal.conjecture.engine import TestRunner

            falsifying_example = None
            database_key = str_to_bytes(fully_qualified_name(test))
            start_time = time.time()
            runner = TestRunner(
                evaluate_test_data,
                settings=settings, random=random,
                database_key=database_key,
            )
            runner.run()
            run_time = time.time() - start_time
            timed_out = (
                settings.timeout > 0 and
                run_time >= settings.timeout
            )
            if runner.last_data.status == Status.INTERESTING:
                falsifying_example = runner.last_data.buffer
                if settings.database is not None:
                    settings.database.save(
                        database_key, falsifying_example
                    )
            else:
                if runner.valid_examples < min(
                    settings.min_satisfying_examples,
                    settings.max_examples,
                ):
                    if timed_out:
                        raise Timeout((
                            'Ran out of time before finding a satisfying '
                            'example for '
                            '%s. Only found %d examples in ' +
                            '%.2fs.'
                        ) % (
                            get_pretty_function_description(test),
                            runner.valid_examples, run_time
                        ))
                    else:
                        raise Unsatisfiable((
                            'Unable to satisfy assumptions of hypothesis '
                            '%s. Only %d examples considered '
                            'satisfied assumptions'
                        ) % (
                            get_pretty_function_description(test),
                            runner.valid_examples,))
                return

            assert last_exception[0] is not None

            try:
                with settings:
                    test_runner(
                        TestData.for_buffer(falsifying_example),
                        reify_and_execute(
                            search_strategy, test,
                            print_example=True, is_final=True
                        ))
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                raise Flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.'
                )

            report(
                'Failed to reproduce exception. Expected: \n' +
                last_exception[0],
            )

            filter_message = (
                'Unreliable test data: Failed to reproduce a failure '
                'and then when it came to recreating the example in '
                'order to print the test data with a flaky result '
                'the example was filtered out (by e.g. a '
                'call to filter in your strategy) when we didn\'t '
                'expect it to be.'
            )

            try:
                test_runner(
                    TestData.for_buffer(falsifying_example),
                    reify_and_execute(
                        search_strategy,
                        test_is_flaky(test, repr_for_last_exception[0]),
                        print_example=True, is_final=True
                    ))
            except (UnsatisfiedAssumption, StopTest):
                raise Flaky(filter_message)
Exemplo n.º 15
0
        def wrapped_test(*arguments, **kwargs):
            settings = wrapped_test._hypothesis_internal_use_settings
            if wrapped_test._hypothesis_internal_use_seed is not None:
                random = Random(
                    wrapped_test._hypothesis_internal_use_seed)
            elif settings.derandomize:
                random = Random(function_digest(test))
            else:
                random = new_random()

            import hypothesis.strategies as sd

            selfy = None
            arguments, kwargs = convert_positional_arguments(
                wrapped_test, arguments, kwargs)

            # If the test function is a method of some kind, the bound object
            # will be the first named argument if there are any, otherwise the
            # first vararg (if any).
            if argspec.args:
                selfy = kwargs.get(argspec.args[0])
            elif arguments:
                selfy = arguments[0]
            test_runner = new_style_executor(selfy)

            for example in reversed(getattr(
                wrapped_test, 'hypothesis_explicit_examples', ()
            )):
                if example.args:
                    if len(example.args) > len(original_argspec.args):
                        raise InvalidArgument(
                            'example has too many arguments for test. '
                            'Expected at most %d but got %d' % (
                                len(original_argspec.args), len(example.args)))
                    example_kwargs = dict(zip(
                        original_argspec.args[-len(example.args):],
                        example.args
                    ))
                else:
                    example_kwargs = example.kwargs
                if Phase.explicit not in settings.phases:
                    continue
                example_kwargs.update(kwargs)
                # Note: Test may mutate arguments and we can't rerun explicit
                # examples, so we have to calculate the failure message at this
                # point rather than than later.
                message_on_failure = 'Falsifying example: %s(%s)' % (
                    test.__name__, arg_string(test, arguments, example_kwargs)
                )
                try:
                    with BuildContext(None) as b:
                        test_runner(
                            None,
                            lambda data: test(*arguments, **example_kwargs)
                        )
                except BaseException:
                    traceback.print_exc()
                    report(message_on_failure)
                    for n in b.notes:
                        report(n)
                    raise
            if settings.max_examples <= 0:
                return

            arguments = tuple(arguments)

            given_specifier = sd.tuples(
                sd.just(arguments),
                sd.fixed_dictionaries(generator_kwargs).map(
                    lambda args: dict(args, **kwargs)
                )
            )

            def fail_health_check(message, label):
                if label in settings.suppress_health_check:
                    return
                message += (
                    '\nSee https://hypothesis.readthedocs.io/en/latest/health'
                    'checks.html for more information about this. '
                )
                message += (
                    'If you want to disable just this health check, add %s '
                    'to the suppress_health_check settings for this test.'
                ) % (label,)
                raise FailedHealthCheck(message)

            search_strategy = given_specifier
            if selfy is not None:
                search_strategy = WithRunner(search_strategy, selfy)

            search_strategy.validate()

            perform_health_check = settings.perform_health_check
            perform_health_check &= Settings.default.perform_health_check

            from hypothesis.internal.conjecture.data import ConjectureData, \
                Status, StopTest
            if not (
                Phase.reuse in settings.phases or
                Phase.generate in settings.phases
            ):
                return

            if perform_health_check:
                health_check_random = Random(random.getrandbits(128))
                # We "pre warm" the health check with one draw to give it some
                # time to calculate any cached data. This prevents the case
                # where the first draw of the health check takes ages because
                # of loading unicode data the first time.
                data = ConjectureData(
                    max_length=settings.buffer_size,
                    draw_bytes=lambda data, n, distribution:
                    distribution(health_check_random, n)
                )
                with Settings(settings, verbosity=Verbosity.quiet):
                    try:
                        test_runner(data, reify_and_execute(
                            search_strategy,
                            lambda *args, **kwargs: None,
                        ))
                    except BaseException:
                        pass
                count = 0
                overruns = 0
                filtered_draws = 0
                start = time.time()
                while (
                    count < 10 and time.time() < start + 1 and
                    filtered_draws < 50 and overruns < 20
                ):
                    try:
                        data = ConjectureData(
                            max_length=settings.buffer_size,
                            draw_bytes=lambda data, n, distribution:
                            distribution(health_check_random, n)
                        )
                        with Settings(settings, verbosity=Verbosity.quiet):
                            test_runner(data, reify_and_execute(
                                search_strategy,
                                lambda *args, **kwargs: None,
                            ))
                        count += 1
                    except UnsatisfiedAssumption:
                        filtered_draws += 1
                    except StopTest:
                        if data.status == Status.INVALID:
                            filtered_draws += 1
                        else:
                            assert data.status == Status.OVERRUN
                            overruns += 1
                    except InvalidArgument:
                        raise
                    except Exception:
                        if (
                            HealthCheck.exception_in_generation in
                            settings.suppress_health_check
                        ):
                            raise
                        report(traceback.format_exc())
                        if test_runner is default_new_style_executor:
                            fail_health_check(
                                'An exception occurred during data '
                                'generation in initial health check. '
                                'This indicates a bug in the strategy. '
                                'This could either be a Hypothesis bug or '
                                "an error in a function you've passed to "
                                'it to construct your data.',
                                HealthCheck.exception_in_generation,
                            )
                        else:
                            fail_health_check(
                                'An exception occurred during data '
                                'generation in initial health check. '
                                'This indicates a bug in the strategy. '
                                'This could either be a Hypothesis bug or '
                                'an error in a function you\'ve passed to '
                                'it to construct your data. Additionally, '
                                'you have a custom executor, which means '
                                'that this could be your executor failing '
                                'to handle a function which returns None. ',
                                HealthCheck.exception_in_generation,
                            )
                if overruns >= 20 or (
                    not count and overruns > 0
                ):
                    fail_health_check((
                        'Examples routinely exceeded the max allowable size. '
                        '(%d examples overran while generating %d valid ones)'
                        '. Generating examples this large will usually lead to'
                        ' bad results. You should try setting average_size or '
                        'max_size parameters on your collections and turning '
                        'max_leaves down on recursive() calls.') % (
                        overruns, count
                    ), HealthCheck.data_too_large)
                if filtered_draws >= 50 or (
                    not count and filtered_draws > 0
                ):
                    fail_health_check((
                        'It looks like your strategy is filtering out a lot '
                        'of data. Health check found %d filtered examples but '
                        'only %d good ones. This will make your tests much '
                        'slower, and also will probably distort the data '
                        'generation quite a lot. You should adapt your '
                        'strategy to filter less. This can also be caused by '
                        'a low max_leaves parameter in recursive() calls') % (
                        filtered_draws, count
                    ), HealthCheck.filter_too_much)
                runtime = time.time() - start
                if runtime > 1.0 or count < 10:
                    fail_health_check((
                        'Data generation is extremely slow: Only produced '
                        '%d valid examples in %.2f seconds (%d invalid ones '
                        'and %d exceeded maximum size). Try decreasing '
                        "size of the data you're generating (with e.g."
                        'average_size or max_leaves parameters).'
                    ) % (count, runtime, filtered_draws, overruns),
                        HealthCheck.too_slow,
                    )
            last_exception = [None]
            repr_for_last_exception = [None]

            def evaluate_test_data(data):
                try:
                    result = test_runner(data, reify_and_execute(
                        search_strategy, test,
                    ))
                    if result is not None and settings.perform_health_check:
                        fail_health_check((
                            'Tests run under @given should return None, but '
                            '%s returned %r instead.'
                        ) % (test.__name__, result), HealthCheck.return_value)
                    return False
                except UnsatisfiedAssumption:
                    data.mark_invalid()
                except (
                    HypothesisDeprecationWarning, FailedHealthCheck,
                    StopTest,
                ):
                    raise
                except Exception:
                    last_exception[0] = traceback.format_exc()
                    verbose_report(last_exception[0])
                    data.mark_interesting()

            from hypothesis.internal.conjecture.engine import ConjectureRunner

            falsifying_example = None
            database_key = str_to_bytes(fully_qualified_name(test))
            start_time = time.time()
            runner = ConjectureRunner(
                evaluate_test_data,
                settings=settings, random=random,
                database_key=database_key,
            )
            runner.run()
            note_engine_for_statistics(runner)
            run_time = time.time() - start_time
            timed_out = (
                settings.timeout > 0 and
                run_time >= settings.timeout
            )
            if runner.last_data is None:
                return
            if runner.last_data.status == Status.INTERESTING:
                falsifying_example = runner.last_data.buffer
                if settings.database is not None:
                    settings.database.save(
                        database_key, falsifying_example
                    )
            else:
                if runner.valid_examples < min(
                    settings.min_satisfying_examples,
                    settings.max_examples,
                ):
                    if timed_out:
                        raise Timeout((
                            'Ran out of time before finding a satisfying '
                            'example for '
                            '%s. Only found %d examples in ' +
                            '%.2fs.'
                        ) % (
                            get_pretty_function_description(test),
                            runner.valid_examples, run_time
                        ))
                    else:
                        raise Unsatisfiable((
                            'Unable to satisfy assumptions of hypothesis '
                            '%s. Only %d examples considered '
                            'satisfied assumptions'
                        ) % (
                            get_pretty_function_description(test),
                            runner.valid_examples,))
                return

            assert last_exception[0] is not None

            try:
                with settings:
                    test_runner(
                        ConjectureData.for_buffer(falsifying_example),
                        reify_and_execute(
                            search_strategy, test,
                            print_example=True, is_final=True
                        ))
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                raise Flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.'
                )

            report(
                'Failed to reproduce exception. Expected: \n' +
                last_exception[0],
            )

            filter_message = (
                'Unreliable test data: Failed to reproduce a failure '
                'and then when it came to recreating the example in '
                'order to print the test data with a flaky result '
                'the example was filtered out (by e.g. a '
                'call to filter in your strategy) when we didn\'t '
                'expect it to be.'
            )

            try:
                test_runner(
                    ConjectureData.for_buffer(falsifying_example),
                    reify_and_execute(
                        search_strategy,
                        test_is_flaky(test, repr_for_last_exception[0]),
                        print_example=True, is_final=True
                    ))
            except (UnsatisfiedAssumption, StopTest):
                raise Flaky(filter_message)
Exemplo n.º 16
0
    def run(self):
        database_key = str_to_bytes(fully_qualified_name(self.test))
        start_time = time.time()
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings,
            random=self.random,
            database_key=database_key,
        )
        runner.run()
        note_engine_for_statistics(runner)
        run_time = time.time() - start_time
        timed_out = (self.settings.timeout > 0
                     and run_time >= self.settings.timeout)
        if runner.last_data is None:
            return
        if runner.last_data.status == Status.INTERESTING:
            self.falsifying_example = runner.last_data.buffer
            if self.settings.database is not None:
                self.settings.database.save(database_key,
                                            self.falsifying_example)
        else:
            if runner.valid_examples < min(
                    self.settings.min_satisfying_examples,
                    self.settings.max_examples,
            ) and not (runner.exit_reason == ExitReason.finished
                       and self.at_least_one_success):
                if timed_out:
                    raise Timeout(
                        ('Ran out of time before finding a satisfying '
                         'example for '
                         '%s. Only found %d examples in ' + '%.2fs.') %
                        (get_pretty_function_description(
                            self.test), runner.valid_examples, run_time))
                else:
                    raise Unsatisfiable(
                        ('Unable to satisfy assumptions of hypothesis '
                         '%s. Only %d examples considered '
                         'satisfied assumptions') % (
                             get_pretty_function_description(self.test),
                             runner.valid_examples,
                         ))

        if self.falsifying_example is None:
            return

        assert self.last_exception is not None

        try:
            with self.settings:
                self.test_runner(
                    ConjectureData.for_buffer(self.falsifying_example),
                    reify_and_execute(self.search_strategy,
                                      self.test,
                                      print_example=True,
                                      is_final=True))
        except (UnsatisfiedAssumption, StopTest):
            report(traceback.format_exc())
            raise Flaky('Unreliable assumption: An example which satisfied '
                        'assumptions on the first run now fails it.')

        report(
            'Failed to reproduce exception. Expected: \n' +
            self.last_exception, )

        filter_message = (
            'Unreliable test data: Failed to reproduce a failure '
            'and then when it came to recreating the example in '
            'order to print the test data with a flaky result '
            'the example was filtered out (by e.g. a '
            'call to filter in your strategy) when we didn\'t '
            'expect it to be.')

        try:
            self.test_runner(
                ConjectureData.for_buffer(self.falsifying_example),
                reify_and_execute(self.search_strategy,
                                  test_is_flaky(self.test,
                                                self.repr_for_last_exception),
                                  print_example=True,
                                  is_final=True))
        except (UnsatisfiedAssumption, StopTest):
            raise Flaky(filter_message)
Exemplo n.º 17
0
def calc_label_from_name(name):
    hashed = hashlib.sha384(str_to_bytes(name)).digest()
    return int_from_bytes(hashed[:8])
Exemplo n.º 18
0
    def run(self):
        # Tell pytest to omit the body of this function from tracebacks
        __tracebackhide__ = True
        database_key = str_to_bytes(fully_qualified_name(self.test))
        self.start_time = time.time()
        global in_given
        runner = ConjectureRunner(
            self.evaluate_test_data,
            settings=self.settings, random=self.random,
            database_key=database_key,
        )

        if in_given or self.collector is None:
            runner.run()
        else:  # pragma: no cover
            in_given = True
            original_trace = sys.gettrace()
            try:
                sys.settrace(None)
                runner.run()
            finally:
                in_given = False
                sys.settrace(original_trace)
        note_engine_for_statistics(runner)
        run_time = time.time() - self.start_time
        timed_out = runner.exit_reason == ExitReason.timeout
        if runner.last_data is None:
            return
        if runner.interesting_examples:
            self.falsifying_examples = sorted(
                [d for d in runner.interesting_examples.values()],
                key=lambda d: sort_key(d.buffer), reverse=True
            )
        else:
            if timed_out:
                note_deprecation((
                    'Your tests are hitting the settings timeout (%.2fs). '
                    'This functionality will go away in a future release '
                    'and you should not rely on it. Instead, try setting '
                    'max_examples to be some value lower than %d (the number '
                    'of examples your test successfully ran here). Or, if you '
                    'would prefer your tests to run to completion, regardless '
                    'of how long they take, you can set the timeout value to '
                    'hypothesis.unlimited.'
                ) % (
                    self.settings.timeout, runner.valid_examples),
                    self.settings)
            if runner.valid_examples < min(
                self.settings.min_satisfying_examples,
                self.settings.max_examples,
            ) and not (
                runner.exit_reason == ExitReason.finished and
                self.at_least_one_success
            ):
                if timed_out:
                    raise Timeout((
                        'Ran out of time before finding a satisfying '
                        'example for '
                        '%s. Only found %d examples in ' +
                        '%.2fs.'
                    ) % (
                        get_pretty_function_description(self.test),
                        runner.valid_examples, run_time
                    ))
                else:
                    raise Unsatisfiable((
                        'Unable to satisfy assumptions of hypothesis '
                        '%s. Only %d examples considered '
                        'satisfied assumptions'
                    ) % (
                        get_pretty_function_description(self.test),
                        runner.valid_examples,))

        if not self.falsifying_examples:
            return

        flaky = 0

        self.__in_final_replay = True

        for falsifying_example in self.falsifying_examples:
            self.__was_flaky = False
            raised_exception = False
            try:
                with self.settings:
                    self.test_runner(
                        ConjectureData.for_buffer(falsifying_example.buffer),
                        reify_and_execute(
                            self.search_strategy, self.test,
                            print_example=True, is_final=True
                        ))
            except (UnsatisfiedAssumption, StopTest):
                report(traceback.format_exc())
                self.__flaky(
                    'Unreliable assumption: An example which satisfied '
                    'assumptions on the first run now fails it.'
                )
            except:
                if len(self.falsifying_examples) <= 1:
                    raise
                raised_exception = True
                report(traceback.format_exc())

            if not raised_exception:
                if (
                    isinstance(
                        falsifying_example.__expected_exception,
                        DeadlineExceeded
                    ) and self.__test_runtime is not None
                ):
                    report((
                        'Unreliable test timings! On an initial run, this '
                        'test took %.2fms, which exceeded the deadline of '
                        '%.2fms, but on a subsequent run it took %.2f ms, '
                        'which did not. If you expect this sort of '
                        'variability in your test timings, consider turning '
                        'deadlines off for this test by setting deadline=None.'
                    ) % (
                        falsifying_example.__expected_exception.runtime,
                        self.settings.deadline, self.__test_runtime
                    ))
                else:
                    report(
                        'Failed to reproduce exception. Expected: \n' +
                        falsifying_example.__expected_traceback,
                    )

                filter_message = (
                    'Unreliable test data: Failed to reproduce a failure '
                    'and then when it came to recreating the example in '
                    'order to print the test data with a flaky result '
                    'the example was filtered out (by e.g. a '
                    'call to filter in your strategy) when we didn\'t '
                    'expect it to be.'
                )

                try:
                    self.test_runner(
                        ConjectureData.for_buffer(falsifying_example.buffer),
                        reify_and_execute(
                            self.search_strategy,
                            test_is_flaky(
                                self.test, self.repr_for_last_exception),
                            print_example=True, is_final=True
                        ))
                except (UnsatisfiedAssumption, StopTest):
                    self.__flaky(filter_message)
                except Flaky as e:
                    if len(self.falsifying_examples) > 1:
                        self.__flaky(e.args[0])
                    else:
                        raise

            if self.__was_flaky:
                flaky += 1

        # If we only have one example then we should have raised an error or
        # flaky prior to this point.
        assert len(self.falsifying_examples) > 1

        if flaky > 0:
            raise Flaky((
                'Hypothesis found %d distinct failures, but %d of them '
                'exhibited some sort of flaky behaviour.') % (
                    len(self.falsifying_examples), flaky))
        else:
            raise MultipleFailures((
                'Hypothesis found %d distinct failures.') % (
                    len(self.falsifying_examples,)))