def eval(self, ground_examples, adversarials, labels, topk=1):
        """ Evaluates how good the adversarial examples are
        ARGS:
            ground_truths: Variable (NxCxHxW) - examples before we did
                           adversarial perturbation. Vals in [0, 1] range
            adversarials: Variable (NxCxHxW) - examples after we did
                           adversarial perturbation. Should be same shape and
                           in same order as ground_truth
            labels: Variable (longTensor N) - correct labels of classification
                    output
        RETURNS:
            tuple of (% of correctly classified original examples,
                      % of correctly classified adversarial examples)
        """
        ground_examples = utils.safe_var(ground_examples)
        adversarials = utils.safe_var(adversarials)
        labels = utils.safe_var(labels)

        normed_ground = self.normalizer.forward(ground_examples)
        ground_output = self.classifier_net.forward(normed_ground)

        normed_advs = self.normalizer.forward(adversarials)
        adv_output = self.classifier_net.forward(normed_advs)

        start_prec = utils.accuracy(ground_output.data,
                                    labels.data,
                                    topk=(topk, ))
        adv_prec = utils.accuracy(adv_output.data, labels.data, topk=(topk, ))

        return float(start_prec[0]), float(adv_prec[0])
    def adversarial_tensors(self, x=None):
        """ Little helper method to get the tensors of the adversarial images
            directly
        """
        assert x is not None or self.originals is not None
        if x is None:
            x = self.originals

        return self.forward(utils.safe_var(x)).data
    def eval_attack_only(self, adversarials, labels, topk=1):
        """ Outputs the accuracy of the adv_inputs only
        ARGS:
            adv_inputs: Variable NxCxHxW - examples after we did adversarial
                                           perturbation
            labels: Variable (longtensor N) - correct labels of classification
                                              output
            topk: int - criterion for 'correct' classification
        RETURNS:
            (int) number of correctly classified examples
        """

        adversarials = utils.safe_var(adversarials)
        labels = utils.safe_var(labels)
        normed_advs = self.normalizer.forward(adversarials)

        adv_output = self.classifier_net.forward(normed_advs)
        return utils.accuracy_int(adv_output, labels, topk=topk)
    def top1_accuracy(self, eval_label, attack_out, ground_examples, labels):

        ######################################################################
        #  First set up evaluation result if doesn't exist:                  #
        ######################################################################
        if self.results[eval_label] is None:
            self.results[eval_label] = utils.AverageMeter()

        result = self.results[eval_label]

        ######################################################################
        #  Computes the top 1 accuracy and updates the averageMeter          #
        ######################################################################
        attack_examples = utils.safe_var(attack_out[0])
        pre_adv_labels = utils.safe_var(attack_out[1])
        num_examples = float(attack_examples.shape[0])

        attack_accuracy_int = self.attack_params.eval_attack_only(
            attack_examples, pre_adv_labels, topk=1)
        result.update(attack_accuracy_int / num_examples, n=int(num_examples))
示例#5
0
    def make_grid(self, x):
        assert isinstance(x, Variable)
        cos_xform = self.xform_params.cos()
        sin_xform = self.xform_params.sin()
        zeros = utils.safe_var(torch.zeros_like(self.xform_params))

        affine_xform = torch.stack(
            [cos_xform, -sin_xform, zeros, sin_xform, cos_xform, zeros])
        affine_xform = affine_xform.transpose(0, 1).contiguous().view(-1, 2, 3)

        return F.affine_grid(affine_xform, x.shape)
def display_adversarial_2row(classifier_net, normalizer, original_images,
                        adversarial_images, num_to_show=4, which='incorrect',
                        ipython=False, margin_width=2):
    """ Displays adversarial images side-by-side with their unperturbed
        counterparts. Opens a window displaying two rows: top row is original
        images, bottom row is perturbed
    ARGS:
        classifier_net : nn - with a .forward method that takes normalized
                              variables and outputs logits
        normalizer : object w/ .forward method - should probably be an instance
                    of utils.DifferentiableNormalize or utils.IdentityNormalize
        original_images: Variable or Tensor (NxCxHxW) - original images to
                         display. Images in [0., 1.] range
        adversarial_images: Variable or Tensor (NxCxHxW) - perturbed images to
                            display. Should be same shape as original_images
        num_to_show : int - number of images to show
        which : string in ['incorrect', 'random', 'correct'] - which images to
                show.
                -- 'incorrect' means successfully attacked images,
                -- 'random' means some random selection of images
                -- 'correct' means unsuccessfully attacked images
        ipython: bool - if True, we use in an ipython notebook so slightly
                        different way to show Images
        margin_width - int : height in pixels of the red margin separating top
                             and bottom rows. Set to 0 for no margin
    RETURNS:
        None, but displays images
    """
    assert which in ['incorrect', 'random', 'correct']


    # If not 'random' selection, prune to only the valid things
    to_sample_idxs = []
    if which != 'random':
        classifier_net.eval() # can never be too safe =)

        # classify the originals with top1
        original_norm_var = normalizer.forward(utils.safe_var(original_images))
        original_out_logits = classifier_net.forward(original_norm_var)
        _, original_out_classes = original_out_logits.max(1)

        # classify the adversarials with top1
        adv_norm_var = normalizer.forward(utils.safe_var(adversarial_images))
        adv_out_logits = classifier_net.forward(adv_norm_var)
        _, adv_out_classes = adv_out_logits.max(1)


        # collect indices of matching
        selector = lambda var: (which == 'correct') == bool(float(var))
        for idx, var_el in enumerate(original_out_classes == adv_out_classes):
            if selector(var_el):
                to_sample_idxs.append(idx)
    else:
        to_sample_idxs = range(original_images.shape[0])

    # Now select some indices to show
    if to_sample_idxs == []:
        print "Couldn't show anything. Try changing the 'which' argument here"
        return

    to_show_idxs = random.sample(to_sample_idxs, min([num_to_show,
                                                      len(to_sample_idxs)]))

    # Now start building up the images : first horizontally, then vertically
    top_row = torch.cat([original_images[idx] for idx in to_show_idxs], dim=2)
    bottom_row = torch.cat([adversarial_images[idx] for idx in to_show_idxs],
                           dim=2)

    if margin_width > 0:
        margin = torch.zeros(3, margin_width, top_row.shape[-1])
        margin[0] = 1.0 # make it red
        margin = margin.type(type(top_row))
        stack = [top_row, margin, bottom_row]
    else:
        stack = [top_row, bottom_row]

    plt.imshow(torch.cat(stack, dim=1).cpu().numpy().transpose(1, 2, 0))
    plt.show()
示例#7
0
def discretized_adversarial(img_tensor,
                            classifier_net,
                            normalizer,
                            flavor='greedy'):
    """ Takes in an image_tensor and classifier/normalizer pair and outputs a
        'discretized' image_tensor [each val is i/255.0 for some integer i]
        with the same classification
    ARGS:
        img_tensor : tensor (NxCxHxW) - tensor of images with values between
                     0.0 and 1.0.
        classifier_net : NN - neural net with .forward method to classify
                         normalized images
        normalizer : differentiableNormalizer object - normalizes 0,1 images
                     into classifier_domain
        flavor : string - either 'random' or 'greedy', determining which
                 'next_pixel_to_flip' function we use
    RETURNS:
        img_tensor of the same shape, but no with values of the form i/255.0
        for integers i.
    """

    img_tensor = utils.safe_tensor(img_tensor)

    nptf_map = {'random': flip_random_pixel, 'greedy': flip_greedy_pixel}
    next_pixel_to_flip = nptf_map[flavor](classifier_net, normalizer)

    ##########################################################################
    # First figure out 'correct' labels and the 'discretized' labels         #
    ##########################################################################
    var_img = utils.safe_var(img_tensor)
    norm_var = normalizer.forward(var_img)
    norm_output = classifier_net.forward(norm_var)
    correct_targets = norm_output.max(1)[1]

    og_discretized = utils.safe_var(discretize_image(img_tensor,
                                                     zero_one=True))
    norm_discretized = normalizer.forward(og_discretized)
    discretized_output = classifier_net.forward(norm_discretized)
    discretized_targets = discretized_output.max(1)[1]

    ##########################################################################
    # Collect idxs for examples affected by discretization                   #
    ##########################################################################
    incorrect_idxs = set()

    for i, el in enumerate(correct_targets.ne(discretized_targets)):
        if float(el) != 0:
            incorrect_idxs.add(i)

    ##########################################################################
    #   Fix all bad images                                                   #
    ##########################################################################

    corrected_imgs = []
    for idx in incorrect_idxs:
        desired_target = correct_targets[idx]
        example = og_discretized[idx].data.clone()  # tensor
        signs = torch.sign(var_img - og_discretized)
        bad_discretization = True
        pixels_changed_so_far = set()  # populated with tuples of idxs

        while bad_discretization:
            pixel_idx, grad_sign = next_pixel_to_flip(example,
                                                      pixels_changed_so_far,
                                                      desired_target)
            pixels_changed_so_far.add(pixel_idx)

            if grad_sign == 0:
                grad_sign = utils.tuple_getter(signs[idx], pixel_idx)

            new_val = (grad_sign / 255. +
                       utils.tuple_getter(example, pixel_idx))
            utils.tuple_setter(example, pixel_idx, float(new_val))

            new_out = classifier_net.forward(normalizer.forward(\
                                             Variable(example.unsqueeze(0))))
            bad_discretization = (int(desired_target) != int(
                new_out.max(1)[1]))
        corrected_imgs.append(example)

    # Stack up results
    output = []

    for idx in range(len(img_tensor)):
        if idx in incorrect_idxs:
            output.append(corrected_imgs.pop(0))
        else:
            output.append(og_discretized[idx].data)

    return torch.stack(output)  # Variable