def main(): parser = argparse.ArgumentParser() parser.add_argument('--steps', type=int, default=20000, help='Iteration of BA') parser.add_argument('--targeted', action='store', default=False, help='For targeted attack') args = parser.parse_args() model = Net() model.load_state_dict(torch.load('mnist_cnn.pt')) model.eval() preprocessing = dict(mean=0.1307, std=0.3081) fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing) fmodel = fmodel.transform_bounds((0, 1)) assert fmodel.bounds == (0, 1) images, labels = ep.astensors( *samples(fmodel, dataset="mnist", batchsize=10)) print('Model accuracy on clean examples: {}'.format( accuracy(fmodel, images, labels))) if args.targeted: target_class = (labels + 7) % 10 criterion = fb.criteria.TargetedMisclassification(target_class) else: criterion = fb.criteria.Misclassification(labels) attack = fa.BoundaryAttack(steps=args.steps, tensorboard=None) epsilons = np.linspace(0.01, 10, 20) raw, clipped, success = attack(fmodel, images, labels, epsilons=epsilons) robust_accuracy = 1 - success.float32().mean(axis=-1) plt.plot(epsilons, robust_accuracy.numpy()) plt.xlabel("Epsilons") plt.ylabel("Robust Accuracy") plt.savefig('mnist_BA_robust_acc.jpg') plt.show() mean_distance = [] for i in range(len(clipped)): dist = np.mean(fb.distances.l2(clipped[i], images).numpy()) mean_distance.append(dist) plt.plot(epsilons, mean_distance) plt.xlabel('Epsilons') plt.ylabel('Mean L2 distance') plt.savefig("mnist_BA_mean_L2distance.jpg") plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--steps', type=int, default=10000, help='Iteration of BA') parser.add_argument('--targeted', action='store', default=False, help='For targeted attack') args = parser.parse_args() model = Net() model.load_state_dict(torch.load('mnist_cnn.pt')) model.eval() preprocessing = dict(mean=0.1307, std=0.3081) fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing) fmodel = fmodel.transform_bounds((0, 1)) assert fmodel.bounds == (0, 1) images, labels = ep.astensors( *samples(fmodel, dataset="mnist", batchsize=10)) print('Model accuracy on clean examples: {}'.format( accuracy(fmodel, images, labels))) epsilons = np.linspace(0.01, 10, 20) boundary_attack = fa.BoundaryAttack(steps=args.steps, tensorboard=None) _, _, ba_success = boundary_attack(fmodel, images, labels, epsilons=epsilons) ba_robust_accuracy = 1 - ba_success.float32().mean(axis=-1) random_attack = fa.L2RepeatedAdditiveGaussianNoiseAttack( repeats=args.steps) _, _, ra_success = random_attack(fmodel, images, labels, epsilons=epsilons) ra_robust_accuracy = 1 - ra_success.float32().mean(axis=-1) legends = ["Boundary Attack", "Random Attack"] plt.plot(epsilons, ba_robust_accuracy.numpy()) plt.plot(epsilons, ra_robust_accuracy.numpy()) plt.legend(legends, loc='upper right') plt.xlabel("Perturbation Norm (L2)") plt.ylabel("Robust Accuracy") plt.title("{} Queries".format(args.steps)) plt.savefig('mnist_robust_acc.jpg') plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--steps', type=int, default=1000, help='Maximum number of steps to perform') parser.add_argument('--targeted', action='store', default=False, help='For targeted attack') args = parser.parse_args() model = Net() model.load_state_dict(torch.load('mnist_cnn.pt')) model.eval() preprocessing = dict(mean=0.1307, std=0.3081) fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing) fmodel = fmodel.transform_bounds((0, 1)) assert fmodel.bounds == (0, 1) images, labels = ep.astensors( *samples(fmodel, dataset="mnist", batchsize=10)) print('Model accuracy on clean examples: {}'.format( accuracy(fmodel, images, labels))) if args.targeted: target_class = (labels + 7) % 10 criterion = fb.criteria.TargetedMisclassification(target_class) else: criterion = fb.criteria.Misclassification(labels) attack = fa.L2DeepFoolAttack(steps=args.steps) epsilons = None raw, clipped, success = attack(fmodel, images, labels, epsilons=epsilons) robust_accuracy = 1 - success.float32().mean() print("Robust Accuracy", robust_accuracy.item()) dist = np.mean(fb.distances.l2(clipped, images).numpy()) print("Average perturbation norm", dist)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--min_norm', type=float, default=0.01, help='Minimum perturbation norm') parser.add_argument('--max_norm', type=float, default=15, help='Maximum perturbation norm') parser.add_argument('--num', type=int, default=12, help='Number of norms to evaluate on') args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # For Colab # class args: # # repeats = 1000 # min_norm = 0.01 # max_norm = 15 # num = 12 # List of max query count queries = [10, 100, 1000, 5000] # Load the pretrained model model = Net() model.load_state_dict(torch.load('mnist_cnn.pt')) model.eval() # preprocess the model inputs and set pixel bound in [0, 1] preprocessing = dict(mean=0.1307, std=0.3081) fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing) fmodel = fmodel.transform_bounds((0, 1)) assert fmodel.bounds == (0, 1) # Set the perturbation norm space epsilons = np.linspace(args.min_norm, args.max_norm, args.num) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, download=True, transform=transforms.ToTensor()), batch_size=10000, shuffle=True) total = 0 # total input count successful = torch.zeros(args.num, device=device) # success for each norm legends = [] start = time() for query in queries: for images, labels in test_loader: images = images.to(device) labels = labels.to(device) ep.astensor_(images) ep.astensor_(labels) # Additive Gaussian noise attack with L2 norm attack = fa.L2RepeatedAdditiveGaussianNoiseAttack(repeats=query) raw, clipped, success = attack(fmodel, images, labels, epsilons=epsilons) # Add the total number of successful attacks for each norm value successful += success.sum(axis=1) total += len(labels) robust_accuracy = (1 - 1.0 * successful / total).cpu() plt.plot(epsilons, robust_accuracy.numpy()) legends.append("{} Queries".format(query)) plt.xlabel("Perturbation Norm (L2)") plt.ylabel("Robust Accuracy") plt.title("Gaussian Noise") plt.legend(legends, loc='upper right') plt.ylim([0, 1]) plt.savefig('mnist_RA_robust_acc.jpg') plt.show() end = time() print("Time taken: {:.1f} minutes".format((end - start) / 60))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default="mnist", help='Dataset Name') parser.add_argument('--std', type=float, default=0.25, help='To control the norm of perturbation') parser.add_argument('--steps', type=int, default=1e5, help='The number of calls made to the model') parser.add_argument('--save_count', type=int, default=10, help='Number of adversarial images to be saved') args = parser.parse_args() path = os.path.join("./Results", args.dataset) if not os.path.exists(path): os.makedirs(path) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) ## Download imagent data and set the correct Path if args.dataset == "imagenet": model = torchvision.models.resnet18(pretrained=True) test_loader = load_ImageNet() elif args.dataset == "mnist": # Load pretrained CNN on MNIST model = Net() model.load_state_dict(torch.load('mnist_cnn.pt', map_location=device)) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=100, shuffle=True) model = model.to(device) else: raise ValueError(f"Dataset {args.dataset} not available") model = model.to(device) model = model.eval() # Loading Test data successful = 0 total = 0 steps = args.steps while True: # Need data in proper format to use PyTorch loader # instead using foolbox! if args.dataset == "imagenet": preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3) bounds = (0, 1) fmodel = PyTorchModel(model, bounds=bounds, preprocessing=preprocessing) fmodel = fmodel.transform_bounds((0, 1)) assert fmodel.bounds == (0, 1) images, labels = samples(fmodel, dataset='imagenet', batchsize=20) batch = 500 # number of random perturbations in each iteration else: examples = iter(test_loader) images, labels = examples.next() batch = 10000 # number of random perturbations in each iteration iterations = int(np.ceil(steps / batch)) + 1 images = images.to(device) labels = labels.to(device) # no more test images if not labels.size: break total += len(labels) for image, label in zip(images, labels): output = model(image.unsqueeze(0)) if output.argmax() == label: base_image = torch.clamp(image, 0, 1) base_label = label for iteration in range(1, iterations): perturbed_samples = get_perturbed_samples( base_image, batch, args.std, device) prediction = model(perturbed_samples).argmax(dim=1) success = (False == prediction.eq(base_label)).nonzero( ) # Indexes of all incorrect predictions if success.nelement(): successful += 1 print("Success rate so far :{}/{}".format( successful, total)) if args.save_count: index = success[0].item() print("Norm of image", torch.norm(base_image)) print( "Norm of added noise", torch.norm(perturbed_samples[index] - base_image)) adversarial_image = perturbed_samples[index].to( "cpu") if adversarial_image.shape[0] == 1: plt.imshow(adversarial_image[0], cmap='gray') plt.show() else: plt.imshow(adversarial_image.permute(1, 2, 0)) plt.show() # rescale image before saving resize = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(size=200), transforms.ToTensor() ]) adversarial_image = resize(adversarial_image) save_image(adversarial_image, os.path.join( path, str(args.save_count) + ".png"), padding=0) args.save_count -= 1 break print("Accuracy on perturbed samples", 100.0 * successful / total)