Exemplo n.º 1
    def sort_regions_with_gaps(self):
        """Guarantees that for each i we have tried to swap index i with
        index i + 2.

        This uses an adaptive algorithm that works by sorting contiguous
        regions centered on each element, where that element is treated as
        fixed and the elements around it are sorted..
        for i in range(1, len(self.current) - 1):
            if self.current[i - 1] <= self.current[i] <= self.current[i + 1]:
                # The `continue` line is optimised out of the bytecode on
                # CPython >= 3.7 (https://bugs.python.org/issue2506) and on
                # PyPy, and so coverage cannot tell that it has been taken.
                continue  # pragma: no cover

            def can_sort(a, b):
                if a < 0 or b > len(self.current):
                    return False
                assert a <= i < b
                split = i - a
                values = sorted(self.current[a:i] + self.current[i + 1:b])
                return self.consider(
                    list(self.current[:a]) + values[:split] +
                    [self.current[i]] + values[split:] +

            left = i
            right = i + 1
            right += find_integer(lambda k: can_sort(left, right + k))
            find_integer(lambda k: can_sort(left - k, right))
Exemplo n.º 3
    def distinguish(self, value, test):
        """Checks whether ``test`` gives the same answer for
        ``value`` and ``self.normalize(value)``. If it does
        not, updates the list of canonical values so that
        it does.

        Returns True if and only if this makes a change to
        the underlying canonical values."""
        canonical = self.normalize(value)
        if canonical == value:
            return False

        value_test = test(value)
        if test(canonical) == value_test:
            return False

        def can_lower(k):
            new_canon = value - k
            if new_canon <= canonical:
                return False
            return test(new_canon) == value_test

        new_canon = value - find_integer(can_lower)

        assert new_canon not in self.__values

        insort(self.__values, new_canon)

        assert self.normalize(value) == new_canon
        return True
Exemplo n.º 4
    def sort_regions(self):
        """Guarantees that for each i we have tried to swap index i with
        index i + 1.

        This uses an adaptive algorithm that works by sorting contiguous
        regions starting from each element.
        i = 0
        while i + 1 < len(self.current):
            prefix = list(self.current[:i])
            k = find_integer(
                lambda k: i + k <= len(self.current) and self.consider(
                    prefix + sorted(self.current[i:i + k], key=self.key
                                    ) + list(self.current[i + k:])))
            i += k
Exemplo n.º 5
    def hill_climb(self):
        """The main hill climbing loop where we actually do the work: Take
        data, and attempt to improve its score for target. select_example takes
        a data object and returns an index to an example where we should focus
        our efforts."""

        blocks_examined = set()

        prev = None
        i = len(self.current_data.blocks) - 1
        while i >= 0 and self.improvements <= self.max_improvements:
            if prev is not self.current_data:
                i = len(self.current_data.blocks) - 1
                prev = self.current_data

            if i in blocks_examined:
                i -= 1

            data = self.current_data
            block = data.blocks[i]
            prefix = data.buffer[:block.start]

            existing = data.buffer[block.start:block.end]
            existing_as_int = int_from_bytes(existing)
            max_int_value = (256**len(existing)) - 1

            if existing_as_int == max_int_value:

            def attempt_replace(v):
                """Try replacing the current block in the current best test case
                 with an integer of value i. Note that we use the *current*
                best and not the one we started with. This helps ensure that
                if we luck into a good draw when making random choices we get
                to keep the good bits."""
                if v < 0 or v > max_int_value:
                    return False
                v_as_bytes = int_to_bytes(v, len(existing))

                # We make a couple attempts at replacement. This only matters
                # if we end up growing the buffer - otherwise we exit the loop
                # early - but in the event that there *is* some randomized
                # component we want to give it a couple of tries to succeed.
                for _ in range(3):
                    attempt = self.engine.cached_test_function(
                        prefix + v_as_bytes +
                        self.current_data.buffer[block.end:] +
                        bytes(BUFFER_SIZE), )

                    if self.consider_new_test_data(attempt):
                        return True

                    if attempt.status < Status.INVALID or len(
                            attempt.buffer) == len(self.current_data.buffer):
                        return False

                    for i, ex in enumerate(self.current_data.examples):
                        if ex.start >= block.end:
                        if ex.end <= block.start:
                        ex_attempt = attempt.examples[i]
                        if ex.length == ex_attempt.length:
                        replacement = attempt.buffer[ex_attempt.
                        if self.consider_new_test_data(
                                    prefix + replacement +
                            return True
                return False

            # We unconditionally scan both upwards and downwards. The reason
            # for this is that we allow "lateral" moves that don't increase the
            # score but instead leave it constant. All else being equal we'd
            # like to leave the test case closer to shrunk, so afterwards we
            # try lowering the value towards zero even if we've just raised it.

            if not attempt_replace(max_int_value):
                find_integer(lambda k: attempt_replace(k + existing_as_int))

            existing = self.current_data.buffer[block.start:block.end]
            existing_as_int = int_from_bytes(existing)
            if not attempt_replace(0):
                find_integer(lambda k: attempt_replace(existing_as_int - k))
Exemplo n.º 6
Exemplo n.º 7
    def learn(self, string):
        """Learn to give the correct answer on this string.
        That is, after this method completes we will have
        ``self.dfa.matches(s) == self.member(s)``.

        Note that we do not guarantee that this will remain
        true in the event that learn is called again with
        a different string. It is in principle possible that
        future learning will cause us to make a mistake on
        this string. However, repeatedly calling learn on
        each of a set of strings until the generation stops
        changing is guaranteed to terminate.
        string = bytes(string)
        correct_outcome = self.member(string)

        # We don't want to check this inside the loop because it potentially
        # causes us to evaluate more of the states than we actually need to,
        # but if our model is mostly correct then this will be faster because
        # we only need to evaluate strings that are of the form
        # ``state + experiment``, which will generally be cached and/or needed
        # later.
        if self.dfa.matches(string) == correct_outcome:

        # In the papers they assume that we only run this process
        # once, but this is silly - often when you've got a messy
        # string it will be wrong for many different reasons.
        # Thus we iterate this to a fixed point where we repair
        # the DFA by repeatedly adding experiments until the DFA
        # agrees with the membership function on this string.

        # First we make sure that normalization is not the source of the
        # failure to match.
        while True:
            normalized = bytes(self.normalizer.normalize(c) for c in string)
            # We can correctly replace the string with its normalized version
            # so normalization is not the problem here.
            if self.member(normalized) == correct_outcome:
                string = normalized
            alphabet = sorted(set(string), reverse=True)
            target = string
            for a in alphabet:

                def replace(b):
                    if a == b:
                        return target
                    return bytes(b if c == a else c for c in target)

                                            lambda x: self.member(replace(x)))
                target = replace(self.normalizer.normalize(a))
                assert self.member(target) == correct_outcome
            assert target != normalized

        if self.dfa.matches(string) == correct_outcome:

        # Now we know normalization is correct we can attempt to determine if
        # any of our transitions are wrong.
        while True:
            dfa = self.dfa

            states = [dfa.start]

            def seems_right(n):
                """After reading n characters from s, do we seem to be
                in the right state?

                We determine this by replacing the first n characters
                of s with the label of the state we expect to be in.
                If we are in the right state, that will replace a substring
                with an equivalent one so must produce the same answer.
                if n > len(string):
                    return False

                # Populate enough of the states list to know where we are.
                while n >= len(states):
                        dfa.transition(states[-1], string[len(states) - 1]))

                return self.member(dfa.label(states[n]) +
                                   string[n:]) == correct_outcome

            assert seems_right(0)

            n = find_integer(seems_right)

            # We got to the end without ever finding ourself in a bad
            # state, so we must correctly match this string.
            if n == len(string):
                assert dfa.matches(string) == correct_outcome

            # Reading n characters does not put us in a bad state but
            # reading n + 1 does. This means that the remainder of
            # the string that we have not read yet is an experiment
            # that allows us to distinguish the state that we ended
            # up in from the state that we should have ended up in.

            source = states[n]
            character = string[n]
            wrong_destination = states[n + 1]

            # We've made an error in transitioning from ``source`` to
            # ``wrong_destination`` via ``character``. We now need to update
            # the DFA so that this transition no longer occurs. Note that we
            # do not guarantee that the transition is *correct* after this,
            # only that we don't make this particular error.
            assert self.transition(source, character) == wrong_destination

            labels_wrong_destination = self.dfa.label(wrong_destination)
            labels_correct_destination = self.dfa.label(source) + bytes(

            ex = string[n + 1:]

            assert self.member(labels_wrong_destination +
                               ex) != self.member(labels_correct_destination +

            # Adding this experiment causes us to distinguish the wrong
            # destination from the correct one.
            self.__states[wrong_destination].experiments[ex] = self.member(
                labels_wrong_destination + ex)

            # We now clear the cached details that caused us to make this error
            # so that when we recalculate this transition we get to a
            # (hopefully now correct) different state.
            del self.__states[source].transitions[character]

            # We immediately recalculate the transition so that we can check
            # that it has changed as we expect it to have.
            new_destination = self.transition(source, string[n])
            assert new_destination != wrong_destination
Exemplo n.º 8
 def shift_right(self):
     base = self.current
     find_integer(lambda k: k <= self.size and self.consider(base >> k))