def eval(self, ground_examples, adversarials, labels, topk=1): """ Evaluates how good the adversarial examples are ARGS: ground_truths: Variable (NxCxHxW) - examples before we did adversarial perturbation. Vals in [0, 1] range adversarials: Variable (NxCxHxW) - examples after we did adversarial perturbation. Should be same shape and in same order as ground_truth labels: Variable (longTensor N) - correct labels of classification output RETURNS: tuple of (% of correctly classified original examples, % of correctly classified adversarial examples) """ ground_examples = utils.safe_var(ground_examples) adversarials = utils.safe_var(adversarials) labels = utils.safe_var(labels) normed_ground = self.normalizer.forward(ground_examples) ground_output = self.classifier_net.forward(normed_ground) normed_advs = self.normalizer.forward(adversarials) adv_output = self.classifier_net.forward(normed_advs) start_prec = utils.accuracy(,, topk=(topk, )) adv_prec = utils.accuracy(,, topk=(topk, )) return float(start_prec[0]), float(adv_prec[0])
def adversarial_tensors(self, x=None): """ Little helper method to get the tensors of the adversarial images directly """ assert x is not None or self.originals is not None if x is None: x = self.originals return self.forward(utils.safe_var(x)).data
def eval_attack_only(self, adversarials, labels, topk=1): """ Outputs the accuracy of the adv_inputs only ARGS: adv_inputs: Variable NxCxHxW - examples after we did adversarial perturbation labels: Variable (longtensor N) - correct labels of classification output topk: int - criterion for 'correct' classification RETURNS: (int) number of correctly classified examples """ adversarials = utils.safe_var(adversarials) labels = utils.safe_var(labels) normed_advs = self.normalizer.forward(adversarials) adv_output = self.classifier_net.forward(normed_advs) return utils.accuracy_int(adv_output, labels, topk=topk)
def top1_accuracy(self, eval_label, attack_out, ground_examples, labels): ###################################################################### # First set up evaluation result if doesn't exist: # ###################################################################### if self.results[eval_label] is None: self.results[eval_label] = utils.AverageMeter() result = self.results[eval_label] ###################################################################### # Computes the top 1 accuracy and updates the averageMeter # ###################################################################### attack_examples = utils.safe_var(attack_out[0]) pre_adv_labels = utils.safe_var(attack_out[1]) num_examples = float(attack_examples.shape[0]) attack_accuracy_int = self.attack_params.eval_attack_only( attack_examples, pre_adv_labels, topk=1) result.update(attack_accuracy_int / num_examples, n=int(num_examples))
def make_grid(self, x): assert isinstance(x, Variable) cos_xform = self.xform_params.cos() sin_xform = self.xform_params.sin() zeros = utils.safe_var(torch.zeros_like(self.xform_params)) affine_xform = torch.stack( [cos_xform, -sin_xform, zeros, sin_xform, cos_xform, zeros]) affine_xform = affine_xform.transpose(0, 1).contiguous().view(-1, 2, 3) return F.affine_grid(affine_xform, x.shape)
def display_adversarial_2row(classifier_net, normalizer, original_images, adversarial_images, num_to_show=4, which='incorrect', ipython=False, margin_width=2): """ Displays adversarial images side-by-side with their unperturbed counterparts. Opens a window displaying two rows: top row is original images, bottom row is perturbed ARGS: classifier_net : nn - with a .forward method that takes normalized variables and outputs logits normalizer : object w/ .forward method - should probably be an instance of utils.DifferentiableNormalize or utils.IdentityNormalize original_images: Variable or Tensor (NxCxHxW) - original images to display. Images in [0., 1.] range adversarial_images: Variable or Tensor (NxCxHxW) - perturbed images to display. Should be same shape as original_images num_to_show : int - number of images to show which : string in ['incorrect', 'random', 'correct'] - which images to show. -- 'incorrect' means successfully attacked images, -- 'random' means some random selection of images -- 'correct' means unsuccessfully attacked images ipython: bool - if True, we use in an ipython notebook so slightly different way to show Images margin_width - int : height in pixels of the red margin separating top and bottom rows. Set to 0 for no margin RETURNS: None, but displays images """ assert which in ['incorrect', 'random', 'correct'] # If not 'random' selection, prune to only the valid things to_sample_idxs = [] if which != 'random': classifier_net.eval() # can never be too safe =) # classify the originals with top1 original_norm_var = normalizer.forward(utils.safe_var(original_images)) original_out_logits = classifier_net.forward(original_norm_var) _, original_out_classes = original_out_logits.max(1) # classify the adversarials with top1 adv_norm_var = normalizer.forward(utils.safe_var(adversarial_images)) adv_out_logits = classifier_net.forward(adv_norm_var) _, adv_out_classes = adv_out_logits.max(1) # collect indices of matching selector = lambda var: (which == 'correct') == bool(float(var)) for idx, var_el in enumerate(original_out_classes == adv_out_classes): if selector(var_el): to_sample_idxs.append(idx) else: to_sample_idxs = range(original_images.shape[0]) # Now select some indices to show if to_sample_idxs == []: print "Couldn't show anything. Try changing the 'which' argument here" return to_show_idxs = random.sample(to_sample_idxs, min([num_to_show, len(to_sample_idxs)])) # Now start building up the images : first horizontally, then vertically top_row =[original_images[idx] for idx in to_show_idxs], dim=2) bottom_row =[adversarial_images[idx] for idx in to_show_idxs], dim=2) if margin_width > 0: margin = torch.zeros(3, margin_width, top_row.shape[-1]) margin[0] = 1.0 # make it red margin = margin.type(type(top_row)) stack = [top_row, margin, bottom_row] else: stack = [top_row, bottom_row] plt.imshow(, dim=1).cpu().numpy().transpose(1, 2, 0))
def discretized_adversarial(img_tensor, classifier_net, normalizer, flavor='greedy'): """ Takes in an image_tensor and classifier/normalizer pair and outputs a 'discretized' image_tensor [each val is i/255.0 for some integer i] with the same classification ARGS: img_tensor : tensor (NxCxHxW) - tensor of images with values between 0.0 and 1.0. classifier_net : NN - neural net with .forward method to classify normalized images normalizer : differentiableNormalizer object - normalizes 0,1 images into classifier_domain flavor : string - either 'random' or 'greedy', determining which 'next_pixel_to_flip' function we use RETURNS: img_tensor of the same shape, but no with values of the form i/255.0 for integers i. """ img_tensor = utils.safe_tensor(img_tensor) nptf_map = {'random': flip_random_pixel, 'greedy': flip_greedy_pixel} next_pixel_to_flip = nptf_map[flavor](classifier_net, normalizer) ########################################################################## # First figure out 'correct' labels and the 'discretized' labels # ########################################################################## var_img = utils.safe_var(img_tensor) norm_var = normalizer.forward(var_img) norm_output = classifier_net.forward(norm_var) correct_targets = norm_output.max(1)[1] og_discretized = utils.safe_var(discretize_image(img_tensor, zero_one=True)) norm_discretized = normalizer.forward(og_discretized) discretized_output = classifier_net.forward(norm_discretized) discretized_targets = discretized_output.max(1)[1] ########################################################################## # Collect idxs for examples affected by discretization # ########################################################################## incorrect_idxs = set() for i, el in enumerate( if float(el) != 0: incorrect_idxs.add(i) ########################################################################## # Fix all bad images # ########################################################################## corrected_imgs = [] for idx in incorrect_idxs: desired_target = correct_targets[idx] example = og_discretized[idx].data.clone() # tensor signs = torch.sign(var_img - og_discretized) bad_discretization = True pixels_changed_so_far = set() # populated with tuples of idxs while bad_discretization: pixel_idx, grad_sign = next_pixel_to_flip(example, pixels_changed_so_far, desired_target) pixels_changed_so_far.add(pixel_idx) if grad_sign == 0: grad_sign = utils.tuple_getter(signs[idx], pixel_idx) new_val = (grad_sign / 255. + utils.tuple_getter(example, pixel_idx)) utils.tuple_setter(example, pixel_idx, float(new_val)) new_out = classifier_net.forward(normalizer.forward(\ Variable(example.unsqueeze(0)))) bad_discretization = (int(desired_target) != int( new_out.max(1)[1])) corrected_imgs.append(example) # Stack up results output = [] for idx in range(len(img_tensor)): if idx in incorrect_idxs: output.append(corrected_imgs.pop(0)) else: output.append(og_discretized[idx].data) return torch.stack(output) # Variable