Exemplo n.º 1
0
def test_crossentropy():
    predictions = np.array([0.1, 0.5, 0.7, 0.4])
    probabilities = softmax(predictions)
    for i in range(len(predictions)):
        ce = crossentropy(logits=predictions, label=i)
        assert ce == approx(-np.log(probabilities[i]))
Exemplo n.º 2
0
    def _run_one(self, a, epsilon, stepsize, iterations,
                 random_start, targeted, class_, return_early, scale, 
                 bb_step=15, RO=False, m=2, RC=False, TAP=False, uniform_or_not=False, moment_or_not=False):
        min_, max_ = a.bounds()
        s = max_ - min_

        original = a.original_image.copy()

        if random_start:
            noise = np.random.uniform(
                -epsilon * s, epsilon * s, original.shape).astype(
                    original.dtype)
            x = original + self._clip_perturbation(a, noise, epsilon)
            strict = False
        else:
            x = original
            strict = True

        if RC:   #use curl iteration to update adversarial example
            success = False
            momentum_up = 0
            momentum_down = 0
            go_up_flag = True   #gradient descend flag
            x_up = x.copy()

            logits_init, is_adversarial_init = a.predictions(np.round(x))
            ce_init = crossentropy(class_, logits_init)
            up_better_start = x.copy()

            for _ in range(iterations):
                avg_gradient_down = 0
                avg_gradient_up = 0
                for m_counter in range(m):
                    #gradient ascent trajectory
                    if RO:
                        if uniform_or_not:  #add uniform noise to gradient calculation process 
                            temp_x_up = np.clip(np.random.uniform(-scale, scale, original.shape) + x_up + stepsize*self.success_dir, min_, max_).astype(np.float32)
                        else:   #add gaussian noise to gradient calculation process
                            temp_x_up = np.clip(np.random.normal(loc=x_up, scale=scale) + stepsize*self.success_dir, min_, max_).astype(np.float32)
                    else:
                        if uniform_or_not:
                            temp_x_up = np.clip(np.random.uniform(-scale, scale, original.shape) + x_up, min_, max_).astype(np.float32)
                        else:
                            temp_x_up = np.clip(np.random.normal(loc=x_up, scale=scale), min_, max_).astype(np.float32)
                    temp_x_up.dtype = "float32"
                    gradient_up = self._gradient(a, temp_x_up, class_, strict=strict)   #calculate gradient on substitute model
                    avg_gradient_up += gradient_up

                    #gradient descent trajectory
                    if RO:
                        if uniform_or_not:
                            temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x + stepsize*self.success_dir, min_, max_).astype(np.float32)
                        else:
                            temp_x_down = np.clip(np.random.normal(loc=x, scale=scale) + stepsize*self.success_dir, min_, max_).astype(np.float32)
                    else:
                        if uniform_or_not:
                            temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x, min_, max_).astype(np.float32)
                        else:
                            temp_x_down = np.clip(np.random.normal(loc=x, scale=scale), min_, max_).astype(np.float32)
                    temp_x_down.dtype = "float32"
                    gradient_down = self._gradient(a, temp_x_down, class_, strict=strict)
                    avg_gradient_down += gradient_down
                
                avg_gradient_up = avg_gradient_up/m
                avg_gradient_down = avg_gradient_down/m

                strict = True
                if targeted:
                    avg_gradient_down = -avg_gradient_down
                    avg_gradient_up = -avg_gradient_up

                if moment_or_not:   #whether use momentum as in MI-FGSM
                    momentum_up += avg_gradient_up
                    momentum_up_norm = np.sqrt(np.mean(np.square(momentum_up)))
                    momentum_up_norm = max(1e-12, momentum_up_norm)  # avoid divsion by zero

                    momentum_down += avg_gradient_down
                    momentum_down_norm = np.sqrt(np.mean(np.square(momentum_down)))
                    momentum_down_norm = max(1e-12, momentum_down_norm)  # avoid divsion by zero
                    if go_up_flag:
                        x_up = x_up - stepsize * (momentum_up/momentum_up_norm)
                    else:
                        x_up = x_up + stepsize * (momentum_up/momentum_up_norm)

                    x = x + stepsize * (momentum_down/momentum_down_norm)

                else: 
                    if go_up_flag:
                        avg_gradient_up = -avg_gradient_up
                        x_up = x_up + stepsize * avg_gradient_up
                    else:
                         x_up = x_up + stepsize * avg_gradient_up

                    x = x + stepsize * avg_gradient_down

                x = original + self._clip_perturbation(a, x - original, epsilon)
                x_up = original + self._clip_perturbation(a, x_up - original, epsilon)

                x = np.clip(x, min_, max_)
                x_up = np.clip(x_up, min_, max_)

                logits_down, is_adversarial_down = a.predictions(np.round(x))
                logits_up, is_adversarial_up = a.predictions(np.round(x_up))

                if logging.getLogger().isEnabledFor(logging.DEBUG):
                    if targeted:
                        ce = crossentropy(a.original_class, logits_down)
                        logging.debug('crossentropy to {} is {}'.format(
                            a.original_class, ce))
                    ce = crossentropy(class_, logits_down)
                    logging.debug('crossentropy to {} is {}'.format(class_, ce))

                if is_adversarial_up:
                    if RO:
                        self.update_success_dir(x_up)
                    #start binary search
                    left = original
                    right = x_up
                    for binary_counter in range(bb_step):
                        middle = np.clip((left + right)/2, min_, max_)
                        temp_logits, temp_is_adversarial = a.predictions(np.round(middle))

                        if temp_is_adversarial: #find a better adversarial example
                            if RO:
                                self.update_success_dir(middle)
                            right = middle
                        else:
                            left = middle
                    if return_early:
                        return True
                    else:
                        success = True

                if is_adversarial_down:
                    if RO:
                        self.update_success_dir(x)
                    left = original
                    right = x
                    for binary_counter in range(bb_step):
                        middle = np.clip((left + right)/2, min_, max_)
                        temp_logits, temp_is_adversarial = a.predictions(np.round(middle))

                        if temp_is_adversarial:
                            if RO:
                                self.update_success_dir(middle)
                            right = middle
                        else:
                            left = middle
                    if return_early:
                        return True
                    else:
                        success = True

    
                if go_up_flag:
                    ce_now = crossentropy(class_, logits_up)
                    if ce_now < ce_init:
                        ce_init = ce_now
                        up_better_start = x_up
                    else:
                        go_up_flag = False    #stop gradient descent, start gradient ascent
                        momentum_up = 0
                        x_up = up_better_start


        else:    #normal iterative process
            success = False
            momentum_down = 0

            for _ in range(iterations):
                avg_gradient_down = 0
                avg_gradient_up = 0
                for m_counter in range(m):
                    if RO:
                        if uniform_or_not:
                            temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x + stepsize*self.success_dir, min_, max_).astype(np.float32)
                        else:
                            temp_x_down = np.clip(np.random.normal(loc=x, scale=scale) + stepsize*self.success_dir, min_, max_).astype(np.float32)
                    else:
                        if uniform_or_not:
                            temp_x_down = np.clip(np.random.uniform(-scale, scale, original.shape) + x, min_, max_).astype(np.float32)
                        else:
                            temp_x_down = np.clip(np.random.normal(loc=x, scale=scale), min_, max_).astype(np.float32)
                    temp_x_down.dtype = "float32"
                    gradient_down = self._gradient(a, temp_x_down, class_, strict=strict)
                    avg_gradient_down += gradient_down
                
                avg_gradient_down = avg_gradient_down/m

                strict = True
                if targeted:
                    avg_gradient_down = -avg_gradient_down

                if moment_or_not:
                    momentum_down += avg_gradient_down
                    momentum_down_norm = np.sqrt(np.mean(np.square(momentum_down)))
                    momentum_down_norm = max(1e-12, momentum_down_norm)  # avoid divsion by zero
                    x = x + stepsize * (momentum_down/momentum_down_norm)

                else: 
                    x = x + stepsize * avg_gradient_down

                x = original + self._clip_perturbation(a, x - original, epsilon)
                x = np.clip(x, min_, max_)

                logits_down, is_adversarial_down = a.predictions(np.round(x))

                if logging.getLogger().isEnabledFor(logging.DEBUG):
                    if targeted:
                        ce = crossentropy(a.original_class, logits_down)
                        logging.debug('crossentropy to {} is {}'.format(
                            a.original_class, ce))
                    ce = crossentropy(class_, logits_down)
                    logging.debug('crossentropy to {} is {}'.format(class_, ce))

                if is_adversarial_down:
                    if RO:
                        self.update_success_dir(x)
                    left = original
                    right = x
                    for binary_counter in range(bb_step):
                        middle = np.clip((left + right)/2, min_, max_)
                        temp_logits, temp_is_adversarial = a.predictions(np.round(middle))

                        if temp_is_adversarial: 
                            if RO:
                                self.update_success_dir(middle)
                            right = middle
                        else:
                            left = middle
                    if return_early:
                        return True
                    else:
                        success = True
        return success
Exemplo n.º 3
0
 def crossentropy(x):
     # lbfgs with approx grad does not seem to respect the bounds
     # setting strict to False
     logits, _ = a.predictions(x.reshape(shape), strict=False)
     ce = utils.crossentropy(logits=logits, label=target_class)
     return ce
Exemplo n.º 4
0
 def crossentropy(x):
     logits, gradient, _ = a.predictions_and_gradient(
         x.reshape(shape), target_class, strict=False)
     gradient = gradient.reshape(-1)
     ce = utils.crossentropy(logits=logits, label=target_class)
     return ce, gradient
Exemplo n.º 5
0
    def __call__(self, input_or_adv, label=None, unpack=True,
                 steps=100, subsample=10, p=None):

        """Simple and close to optimal gradient-based
        adversarial attack.

        Parameters
        ----------
        input_or_adv : `numpy.ndarray` or :class:`Adversarial`
            The original, unperturbed input as a `numpy.ndarray` or
            an :class:`Adversarial` instance.
        label : int
            The reference label of the original input. Must be passed
            if `a` is a `numpy.ndarray`, must not be passed if `a` is
            an :class:`Adversarial` instance.
        unpack : bool
            If true, returns the adversarial input, otherwise returns
            the Adversarial object.
        steps : int
            Maximum number of steps to perform.
        subsample : int
            Limit on the number of the most likely classes that should
            be considered. A small value is usually sufficient and much
            faster.
        p : int or float
            Lp-norm that should be minimzed, must be 2 or np.inf.

        """

        a = input_or_adv
        del input_or_adv
        del label
        del unpack

        if not a.has_gradient():
            return

        if a.target_class() is not None:
            logging.fatal('DeepFool is an untargeted adversarial attack.')
            return

        if p is None:
            # set norm to optimize based on the distance measure
            if a._distance == MeanSquaredDistance:
                p = 2
            elif a._distance == Linfinity:
                p = np.inf
            else:
                raise NotImplementedError('Please choose a distance measure'
                                          ' for which DeepFool is implemented'
                                          ' or specify manually which norm'
                                          ' to optimize.')

        if not (1 <= p <= np.inf):
            raise ValueError

        if p not in [2, np.inf]:
            raise NotImplementedError

        _label = a.original_class

        # define labels
        logits, _ = a.predictions(a.original_image)
        labels = np.argsort(logits)[::-1]
        if subsample:
            # choose the top-k classes
            logging.info('Only testing the top-{} classes'.format(subsample))
            assert isinstance(subsample, int)
            labels = labels[:subsample]

        def get_residual_labels(logits):
            """Get all labels with p < p[original_class]"""
            return [
                k for k in labels
                if logits[k] < logits[_label]]

        perturbed = a.original_image
        min_, max_ = a.bounds()

        for step in range(steps):
            logits, grad, is_adv = a.predictions_and_gradient(perturbed)
            if is_adv:
                return

            # correspondance to algorithm 2 in [1]_:
            #
            # loss corresponds to f (in the paper: negative cross-entropy)
            # grad corresponds to -df/dx (gradient of cross-entropy)

            loss = -crossentropy(logits=logits, label=_label)

            residual_labels = get_residual_labels(logits)

            # instead of using the logits and the gradient of the logits,
            # we use a numerically stable implementation of the cross-entropy
            # and expect that the deep learning frameworks also use such a
            # stable implemenation to calculate the gradient
            losses = [
                -crossentropy(logits=logits, label=k)
                for k in residual_labels]
            grads = [a.gradient(perturbed, label=k) for k in residual_labels]

            # compute optimal direction (and loss difference)
            # pairwise between each label and the target
            diffs = [(l - loss, g - grad) for l, g in zip(losses, grads)]

            # calculate distances
            if p == 2:
                distances = [abs(dl) / (np.linalg.norm(dg) + 1e-8)
                             for dl, dg in diffs]
            elif p == np.inf:
                distances = [abs(dl) / (np.sum(np.abs(dg)) + 1e-8)
                             for dl, dg in diffs]
            else:  # pragma: no cover
                assert False

            # choose optimal one
            optimal = np.argmin(distances)
            df, dg = diffs[optimal]

            # apply perturbation
            # the (-dg) corrects the sign, gradient here is -gradient of paper
            if p == 2:
                perturbation = abs(df) / (np.linalg.norm(dg) + 1e-8)**2 * (-dg)
            elif p == np.inf:
                perturbation = abs(df) / (np.sum(np.abs(dg)) + 1e-8) \
                    * np.sign(-dg)
            else:  # pragma: no cover
                assert False

            # the original implementation accumulates the perturbations
            # and only adds the overshoot when adding the accumulated
            # perturbation to the original image; we apply the overshoot
            # to each perturbation (step)
            perturbed = perturbed + 1.05 * perturbation
            perturbed = np.clip(perturbed, min_, max_)

        a.predictions(perturbed)  # to find an adversarial in the last step