def singleFGSM(torch_model, xs, ys, eps, c, h, w, clip_min, clip_max): sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, c, h, w, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack atk_op = FastGradientMethod(cleverhans_model, sess=sess) atk_params = {'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max} adv_x_op = atk_op.generate(x_op, **atk_params) # Run an evaluation of our model against fgsm xs, ys = xs.to(device), ys.to(device) adv = torch.from_numpy(sess.run(adv_x_op, feed_dict={x_op: xs})) pred = np.argmax(torch_model(adv).data.cpu().numpy()) if ys != pred: adv = adv.numpy() return adv else: return []
def __init__(self, dataset, model): super(PGDAdaptor, self).__init__(dataset, model) self.config = tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.5)) self.config.gpu_options.allow_growth = True self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph, config=self.config) input_shape = get_input_shape(dataset) with self.sess.graph.as_default(): with self.sess.as_default(): self.tf_model = convert_pytorch_model_to_tf(self.model) self.ch_model = CallableModelWrapper(self.tf_model, output_layer='logits') self.x_op = tf.placeholder(tf.float32, shape=( None, input_shape[0], input_shape[1], input_shape[2], )) self.attk = ProjectedGradientDescent(self.ch_model, sess=self.sess) self.adv_preds_ops = dict()
def __init__(self, dataset, model): super(CWAdaptor, self).__init__(dataset, model) self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True self.sess = tf.Session(config=self.config) self.tf_model = convert_pytorch_model_to_tf(self.model) self.ch_model = CallableModelWrapper(self.tf_model, output_layer='logits') self.dataset = dataset
def CW_attack_l2(): tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 32, 32, )) y_op = tf.placeholder(tf.float32, shape=(None, 10)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an CW attack cw = CarliniWagnerL2(cleverhans_model, sess=sess) cw_params = { 'binary_search_steps': 1, 'max_iterations': 100, 'batch_size': args.b, 'clip_min': 0., 'clip_max': 1., 'y': y_op } adv_x_op = cw.generate(x_op, **cw_params) adv_preds_op = tf_model_fn(adv_x_op) # Evaluation against PGD attacks correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): adv = sess.run(adv_x_op, feed_dict={ x_op: inputs, y_op: torch.nn.functional.one_hot(targets, 10) }) diff = (torch.tensor(adv) - inputs).renorm(p=2, dim=0, maxnorm=0.5) adv = (inputs + diff).clamp(0., 1.) correct += model(adv).topk(1)[1][:, 0].eq( targets.cuda()).cpu().sum().item() total += len(inputs) sys.stdout.write("\rWhite-box CW l2 attack... Acc: %.3f%% (%d/%d)" % (100. * correct / total, correct, total)) sys.stdout.flush() print('Accuracy under CW l2 attack: %.3f%%' % (100. * correct / total))
def pgd_attack(): # Use tf for evaluation on adversarial data tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 32, 32, )) y_op = tf.placeholder(tf.float32, shape=(None, 10)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an PGD attack pgd = ProjectedGradientDescent(cleverhans_model, sess=sess) pgd_params = { 'eps': args.eps, 'eps_iter': args.ss, 'nb_iter': args.ns, 'clip_min': 0., 'clip_max': 1., 'y': y_op } adv_x_op = pgd.generate(x_op, **pgd_params) adv_preds_op = tf_model_fn(adv_x_op) # Evaluation against PGD attacks correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): adv_preds = sess.run(adv_preds_op, feed_dict={ x_op: inputs, y_op: torch.nn.functional.one_hot(targets, 10) }) correct += (np.argmax(adv_preds, axis=1) == targets.numpy()).sum() total += len(inputs) sys.stdout.write("\rWhite-box PGD attack... Acc: %.3f%% (%d/%d)" % (100. * correct / total, correct, total)) sys.stdout.flush() print('Accuracy under PGD attack: %.3f%%' % (100. * correct / total))
def spsa_attack(): # Use tf for evaluation on adversarial data tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(model, out_dims=10) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') cleverhans_model.nb_classes = 10 # Create an SPSA attack spsa = SPSA(cleverhans_model, sess=sess) spsa_params = { 'eps': args.eps, 'nb_iter': args.ns, 'clip_min': 0., 'clip_max': 1., 'spsa_samples': args. spsa_samples, # in this case, the batch_size is equal to spsa_samples 'spsa_iters': 1, 'early_stop_loss_threshold': 0 } # Evaluation against SPSA attacks correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): advs = spsa.generate_np(inputs.numpy(), y=targets.numpy().astype(np.int32), **spsa_params) with torch.no_grad(): correct += (model( torch.tensor(advs).cuda()).topk(1)[1].cpu().eq(targets) ).sum().item() total += len(inputs) sys.stdout.write("\rBlack-box SPSA attack... Acc: %.3f%% (%d/%d)" % (100. * correct / total, correct, total)) sys.stdout.flush() print('Accuracy under SPSA attack: %.3f%%' % (100. * correct / total))
torch.utils.data.TensorDataset(train_data, train_label)) test_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(test_data, test_label)) sess = tf.Session() x_op1 = tf.placeholder(tf.float32, shape=( None, 3, 32, 32, )) #x_op2 = tf.placeholder(tf.float32, shape=(None,3, 32, 32,)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_net = convert_pytorch_model_to_tf(net) cleverhans_model = CallableModelWrapper(tf_net, output_layer='logits') # Create an FGSM attack fgsm_op = FastGradientMethod(cleverhans_model, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x_op = fgsm_op.generate(x_op1, **fgsm_params) adv_preds_op = tf_net(adv_x_op) no_runs = 10000 correct = 0 for xs, ys in test_loader: xs, ys = Variable(xs), Variable(ys) adv_example = sess.run(adv_x_op, feed_dict={x_op1: xs}) adv_preds = sess.run(adv_preds_op, feed_dict={adv_x_op: adv_example}) correct += (np.argmax(adv_preds, axis=1) == ys).sum()
if args.dataset == 'mnist': mean = normalization['mnist'][0] std = [normalization['mnist'][1] for _ in mean] x = input_shape['mnist'] x_op = tf.placeholder(tf.float32, shape=(None, x[0], x[1], x[2],)) elif args.dataset == 'cifar': mean = normalization['cifar'][0] std = [normalization['cifar'][1] for _ in mean] x = input_shape['cifar'] x_op = tf.placeholder(tf.float32, shape=(None, x[0], x[1], x[2],)) train_loader, test_loader = data_loaders(args.dataset, args.batch_size, shuffle_test=False, norm_mean=mean, norm_std=std) sess = tf.Session(config=config) tf_model = convert_pytorch_model_to_tf(model) ch_model = CallableModelWrapper(tf_model, output_layer='logits') if args.attack == 'CW': attk = CarliniWagnerL2(ch_model, sess=sess) params = {'binary_search_steps': 10, 'max_iterations': 100, 'learning_rate': 0.2, 'batch_size': args.batch_size, 'initial_const': 10} elif args.attack == 'PGD': attk = ProjectedGradientDescent(ch_model, sess=sess) clip_min = (0.0 - 1E-6 - max(mean)) / std[0] clip_max = (1.0 + 1E-6 - min(mean)) / std[0] params = {'eps': eps, 'clip_min': clip_min,
def __init__(self, tf_pytorch, df_classes, num_classes, origin_class, target_class, model, device, params, attack_type): self.origin_class = origin_class self.target_class = target_class self.df_classes = df_classes self.params = params self.num_classes = num_classes self.attack_type = attack_type self.device = device # CHOOSE BETWEEN TENSORFLOW AND PYTORCH IMPLEMENTATION if tf_pytorch == 'pytorch': # NEW PYTORCH IMPLEMENTATION self.model = model self.model.to(self.device) elif tf_pytorch == 'tf': self.tf_model = convert_pytorch_model_to_tf(model) self.cleverhans_model = CallableModelWrapper(self.tf_model, output_layer='logits') self.sess = tf.Session() self.adv_x_op = None self.y_target = np.zeros((1, 1000), dtype=np.uint8) self.one_hot_encoded() if self.attack_type == 'cw': self.params.pop('y_target') elif self.attack_type == 'spsa': self.y_target_tf = tf.placeholder(tf.int64) else: self.params["y_target"] = self.y_target else: raise NotImplementedError('Library not recognized') # SET AND INITIALIZE ATTACK TYPE if self.attack_type == 'fgsm': print("\nSetting fgsm attack") # self.attack_op = FastGradientMethod(self.cleverhans_model, sess=self.sess) elif self.attack_type == 'cw': print("\nSetting carlini & wagner attack") # self.attack_op = CarliniWagnerL2(self.cleverhans_model, sess=self.sess) self.attack_op = CarliniWagnerL2Std(self.cleverhans_model, sess=self.sess) elif self.attack_type == 'pgd': print("\nSetting pgd attack") # self.attack_op = MadryEtAl(self.cleverhans_model, sess=self.sess) elif self.attack_type == 'jsma': print("\nSetting jsma attack") self.attack_op = SaliencyMapMethodMemory(self.cleverhans_model, sess=self.sess) elif self.attack_type == 'zoo': print("\nSetting zoo attack") self.batch_size = params["batch_size"] print("Batch size set to: %d" % self.batch_size) # self.attack_op = ZOOL2(model=self.tf_model, # sess=self.sess) # elif self.attack_type == 'spsa': # print("\nSetting spsa attack") # self.batch_size = params["batch_size"] # print("Batch size set to: %d" % self.batch_size) # self.attack_op = SPSANoClip(model=self.cleverhans_model, sess=self.sess) else: raise NotImplementedError('Not implemented attack.')
def test_transferability_subset(loader, attack_method, epsilon, torch_model1, torch_model2, verbose, batch_size): batch_time = AverageMeter() err12s = AverageMeter() err21s = AverageMeter() end = time.time() sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 1, 28, 28, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn1 = convert_pytorch_model_to_tf(torch_model1) tf_model_fn2 = convert_pytorch_model_to_tf(torch_model2) # Attack Parameters if attack_method == 'CW': params = { 'binary_search_steps': 1, # 'y': None, 'max_iterations': CW_ATTACK_ITERATIONS, 'learning_rate': CW_LEARNING_RATE, 'batch_size': batch_size, 'initial_const': 10 } elif attack_method == 'PGD': params = { 'eps': epsilon, 'clip_min': 0., 'clip_max': 1., 'eps_iter': 0.005, 'nb_iter': 100, 'rand_init': False } elif attack_method == 'FGSM': params = {'eps': epsilon, 'clip_min': 0., 'clip_max': 1.} else: raise Exception('Unknown attack method %s'.format(attack_method)) # Model1 --> Model2 cleverhans_model1 = CallableModelWrapper(tf_model_fn1, output_layer='logits') cleverhans_model2 = CallableModelWrapper(tf_model_fn2, output_layer='logits') # Create an attack if attack_method == 'CW': attk1 = CarliniWagnerL2(cleverhans_model1, sess=sess) if attack_method == 'PGD': attk1 = ProjectedGradientDescent(cleverhans_model1, sess=sess) if attack_method == 'FGSM': attk1 = FastGradientMethod(cleverhans_model1, sess=sess) if attack_method == 'CW': attk2 = CarliniWagnerL2(cleverhans_model2, sess=sess) if attack_method == 'PGD': attk2 = ProjectedGradientDescent(cleverhans_model2, sess=sess) if attack_method == 'FGSM': attk2 = FastGradientMethod(cleverhans_model2, sess=sess) adv_x_op1 = attk1.generate(x_op, **params) adv_x_op2 = attk2.generate(x_op, **params) # Test on model1 and model2 adv_preds_op11 = tf_model_fn1(adv_x_op1) adv_preds_op12 = tf_model_fn2(adv_x_op1) adv_preds_op21 = tf_model_fn1(adv_x_op2) adv_preds_op22 = tf_model_fn2(adv_x_op2) for i, (xs, ys) in enumerate(loader): (adv_preds11, adv_preds12) = sess.run((adv_preds_op11, adv_preds_op12), feed_dict={x_op: xs}) (adv_preds21, adv_preds22) = sess.run((adv_preds_op21, adv_preds_op22), feed_dict={x_op: xs}) cnt11 = int((np.argmax(adv_preds11, axis=1) != ys).sum()) cnt22 = int((np.argmax(adv_preds22, axis=1) != ys).sum()) if cnt11 > 0: err12 = float( ((np.argmax(adv_preds12, axis=1) != ys) * (np.argmax(adv_preds11, axis=1) != ys)).sum()) / float(cnt11) err12s.update(err12, cnt11) if cnt22 > 0: err21 = float( ((np.argmax(adv_preds22, axis=1) != ys) * (np.argmax(adv_preds21, axis=1) != ys)).sum()) / float(cnt22) err21s.update(err21, cnt22) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if verbose: endline = '\n' if i % verbose == 0 else '\r' print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'error 1->2 {err12.val:.3f} ({err12.avg:.3f})\t' 'error 2->1 {err21.val:.3f} ({err21.avg:.3f})\t'.format( i, len(loader), batch_time=batch_time, err12=err12s, err21=err21s), end=endline) sess.close() return err12s.avg, err21s.avg
def mnist(nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, train_end=-1, test_end=-1, learning_rate=LEARNING_RATE): """ MNIST cleverhans tutorial :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Train a pytorch MNIST model torch_model = MNIST_arch_0() if torch.cuda.is_available(): torch_model = torch_model.cuda() report = AccuracyReport() data_dir = '/scratch/etv21/conv_gp_data/MNIST_data/expA' training_dataset, test_dataset = mnist_sevens_vs_twos(data_dir, noisy=True) train_loader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size) #adversarial_loader = torch.utils.data.DataLoader(Adversarial_MNIST_Dataset(), batch_size=batch_size) # Train our model optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate) train_loss = [] total = 0 correct = 0 step = 0 for _epoch in range(nb_epochs): for xs, ys in train_loader: xs, ys = Variable(xs), Variable(ys) if torch.cuda.is_available(): xs, ys = xs.cuda(), ys.cuda() optimizer.zero_grad() preds = torch_model(xs) loss = F.nll_loss(preds, ys) loss.backward() # calc gradients train_loss.append(loss.data.item()) optimizer.step() # update gradients preds_np = preds.data.cpu().numpy() correct += (np.argmax(preds_np, axis=1) == ys.cpu().numpy()).sum() total += len(xs) step += 1 if total % 200 == 0: acc = float(correct) / total print('[%s] Training accuracy: %.2f%%' % (step, acc * 100)) total = 0 correct = 0 #examine_weights_biases(torch_model) # Evaluate on clean data total = 0 correct = 0 for xs, ys in test_loader: xs, ys = Variable(xs), Variable(ys) if torch.cuda.is_available(): xs, ys = xs.cuda(), ys.cuda() preds = torch_model(xs) preds_np = preds.data.cpu().numpy() correct += (np.argmax(preds_np, axis=1) == ys.cpu().numpy()).sum() total += len(xs) acc = float(correct) / total report.clean_train_clean_eval = acc print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100)) ''' For transfer from GP examples to CNN: total = 0 correct = 0 #import pdb; pdb.set_trace() c = 0 for xs, ys in adversarial_loader: xs, ys = Variable(xs), Variable(ys) if torch.cuda.is_available(): xs, ys = xs.cuda(), ys.cuda() preds = torch_model(xs) preds_np = preds.data.cpu().numpy() correct += (np.argmax(preds_np, axis=1) == ys.cpu().numpy()).sum() total += len(xs) acc = float(correct) / total print('[%s] Adversarial accuracy: %.2f%%' % (step, acc * 100)) ''' # We use tf for evaluation on adversarial data sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, 1, 28, 28, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack fgsm_op = FastGradientMethod(cleverhans_model, sess=sess) epsilon = 10 norm = 2 fgsm_params = {'eps': epsilon, 'clip_min': 0., 'clip_max': 1., 'ord': norm} attack_name = 'CNN_FGSM_eps={}_norm={}'.format(epsilon, norm) attack_dir = os.path.join(data_dir, attack_name) if not os.path.exists(attack_dir): os.makedirs(attack_dir) print("Directory ", attack_dir, " Created ") adv_x_op = fgsm_op.generate(x_op, **fgsm_params) adv_preds_op = tf_model_fn(adv_x_op) # Run an evaluation of our model against fgsm total = 0 correct = 0 all_adv_preds = np.array(0) for xs, ys in test_loader: adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs}) all_adv_preds = np.append(all_adv_preds, adv_preds) correct += (np.argmax(adv_preds, axis=1) == ys.cpu().numpy()).sum() total += len(xs) np.save('adv_predictions', all_adv_preds) acc = float(correct) / total print('Adv accuracy: {:.3f}'.format(acc * 100)) report.clean_train_adv_eval = acc single_adv_x_op = tf.placeholder(tf.float32, shape=(1, 28, 28)) encode_op = tf.image.encode_png( tf.reshape(tf.cast(single_adv_x_op * 255, tf.uint8), (28, 28, 1))) adv_images, clean_images, adv_labels = None, None, None #Print the first and 8th batches of images i.e. a batch of 2s and a batch of 7s b = 0 for xs, ys in test_loader: adv_xs = sess.run(adv_x_op, feed_dict={x_op: xs}) if b == 0 or b == 10: c = b * batch_size for i in range(0, adv_xs.shape[0]): enc_img = sess.run(encode_op, feed_dict={single_adv_x_op: adv_xs[i]}) f = open( '/scratch/etv21/conv_gp_data/MNIST_data/expA/{}/{}.png'. format(attack_name, c), "wb+") f.write(enc_img) f.close() c += 1 if adv_images is None: adv_images = np.array(adv_xs.reshape(adv_xs.shape[0], 28, 28)) clean_images = np.array(xs.reshape(xs.shape[0], 28, 28)) adv_labels = np.array(ys) else: adv_images = np.append(adv_images, adv_xs.reshape(adv_xs.shape[0], 28, 28), 0) clean_images = np.append(clean_images, xs.reshape(xs.shape[0], 28, 28), 0) adv_labels = np.append(adv_labels, ys, 0) b += 1 np.save('/scratch/etv21/conv_gp_data/MNIST_data/expA/two_vs_seven_adv_{}'. format(attack_name), adv_images, allow_pickle=False) np.save('/scratch/etv21/conv_gp_data/MNIST_data/expA/two_vs_seven_labels', adv_labels, allow_pickle=False) return report
def mnist_tutorial(nb_epochs=6, batch_size=128, train_end=-1, test_end=-1, learning_rate=0.001): """ MNIST cleverhans tutorial :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Train a pytorch MNIST model torch_model = PytorchMnistModel() if torch.cuda.is_available(): torch_model = torch_model.cuda() report = AccuracyReport() train_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=False, transform=transforms.ToTensor()), batch_size=batch_size) # Truncate the datasets so that our test run more quickly train_loader.dataset.train_data = train_loader.dataset.train_data[ :train_end] test_loader.dataset.test_data = test_loader.dataset.test_data[:test_end] # Train our model optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate) train_loss = [] total = 0 correct = 0 step = 0 for epoch in range(nb_epochs): for xs, ys in train_loader: xs, ys = Variable(xs), Variable(ys) if torch.cuda.is_available(): xs, ys = xs.cuda(), ys.cuda() optimizer.zero_grad() preds = torch_model(xs) loss = F.nll_loss(preds, ys) loss.backward() # calc gradients train_loss.append(loss.data.item()) optimizer.step() # update gradients preds_np = preds.data.cpu().numpy() correct += (np.argmax(preds_np, axis=1) == ys).sum() total += len(xs) step += 1 if total % 1000 == 0: acc = float(correct) / total print('[%s] Training accuracy: %.2f%%' % (step, acc * 100)) total = 0 correct = 0 # Evaluate on clean data total = 0 correct = 0 for xs, ys in test_loader: xs, ys = Variable(xs), Variable(ys) if torch.cuda.is_available(): xs, ys = xs.cuda(), ys.cuda() preds = torch_model(xs) preds_np = preds.data.cpu().numpy() correct += (np.argmax(preds_np, axis=1) == ys).sum() total += len(xs) acc = float(correct) / total report.clean_train_clean_eval = acc print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100)) # We use tf for evaluation on adversarial data sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 1, 28, 28,)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack fgsm_op = FastGradientMethod(cleverhans_model, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x_op = fgsm_op.generate(x_op, **fgsm_params) adv_preds_op = tf_model_fn(adv_x_op) # Run an evaluation of our model against fgsm total = 0 correct = 0 for xs, ys in test_loader: adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs}) correct += (np.argmax(adv_preds, axis=1) == ys).sum() total += len(xs) acc = float(correct) / total print('Adv accuracy: {:.3f}'.format(acc * 100)) report.clean_train_adv_eval = acc return report
def main(train_method, dataset, model_name, params): # prepare dataset and normalize settings normalize = None if params.get('normalized', False): if dataset == 'mnist': normalize = (_MNIST_MEAN, _MNIST_STDDEV) elif dataset == 'cifar10': normalize = (_CIFAR10_MEAN, _CIFAR10_STDDEV) elif dataset == 'imagenet': normalize = (_IMAGENET_MEAN, _IMAGENET_STDDEV) train_set = get_dataset(dataset, 'train', normalize) test_set = get_dataset(dataset, 'test', normalize) # read input shape (c, h, w) input_shape = get_input_shape(dataset) # read params batch_size = params['batch_size'] optimizer_name = params.get('optimizer', 'sgd') if optimizer_name == 'sgd': lr = params.get('learning_rate', 0.1) momentum = params.get('momentum', 0.1) weight_decay = params.get('weight_decay', 5e-4) elif optimizer_name == 'adam': lr = params.get('learning_rate', 0.1) else: raise NotImplementedError cur_lr = lr print('default learning rate =', cur_lr, file=stderr) start_epoch = 0 epochs = params.get('epochs', 0) eps = normed_eps = params['eps'] if train_method == 'adv': # Note: for adversarial training, in training phase, we use the manual implementation version for precision, # and use the clearhans implementation in test phase for precision eps_iter_coef = params['eps_iter_coef'] clip_min = params['clip_min'] clip_max = params['clip_max'] if normalize is not None: mean, std = normalize clip_min = (clip_min - max(mean)) / min(std) - 1e-6 clip_max = (clip_max - min(mean)) / min(std) + 1e-6 normed_eps = eps / min(std) nb_iter = params['nb_iter'] rand_init = params['rand_init'] adv_params = { 'eps': normed_eps, 'clip_min': clip_min, 'clip_max': clip_max, 'eps_iter': eps_iter_coef * eps, 'nb_iter': nb_iter, 'rand_init': rand_init } elif train_method == 'certadv': # Note: for certified adversarially trained models, we test its accuracy still using PGD attack eps_iter_coef = params['eps_iter_coef'] clip_min = params['clip_min'] clip_max = params['clip_max'] if normalize is not None: mean, std = normalize clip_min = (clip_min - max(mean)) / min(std) - 1e-6 clip_max = (clip_max - min(mean)) / min(std) + 1e-6 normed_eps = eps / min(std) nb_iter = params['nb_iter'] rand_init = params['rand_init'] adv_params = { 'eps': normed_eps, 'clip_min': clip_min, 'clip_max': clip_max, 'eps_iter': eps_iter_coef * eps, 'nb_iter': nb_iter, 'rand_init': rand_init } print(adv_params, file=stderr) # prepare loader train_loader = torch.utils.data.DataLoader(train_set, batch_size, shuffle=True, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size, shuffle=True, pin_memory=True) # stats train_tot = len(train_set) test_tot = len(test_set) best_acc = 0.0 best_robacc = 0.0 # load model m = model.load_model('exp', dataset, model_name).cuda() print(m) if train_method == 'adv' and params['retrain']: # retrain from the best clean model clean_model_name = f'{dataset}_{model_name}_clean_0_best' new_m, stats = try_load_weight(m, clean_model_name) assert stats == True, "Could not load pretrained clean model." if isinstance(new_m[0], NormalizeLayer): # squeeze the normalize layer out new_m = new_m[1] m = new_m elif train_method == 'certadv': configdir = params['configpath'] ds_mapping = {'cifar10': 'cifar', 'mnist': 'mnist'} ds_multiplier = {'cifar10': 255., 'mnist': 10.} configfilename = f'exp_{ds_mapping[dataset]}{int(round(eps * ds_multiplier[dataset]))}.json' with open(os.path.join(configdir, configfilename), 'r') as f: real_config = json.load(f) epochs = real_config['training_params']['epochs'] start_epoch = epochs - 1 model_path = os.path.join( os.path.join(real_config['path_prefix'], real_config['models_path']), f'{model_name}_best.pth') d = torch.load(model_path) print(f'certadv load from {model_path}', file=stderr) m.load_state_dict(d['state_dict']) # open file handler save_name = f'{ds}_{model_name}_{now_method}_{eps}' mode = 'a' if os.path.exists(f'{SAVE_PATH}/{save_name}_train.log') or os.path.exists( f'{SAVE_PATH}/{save_name}_test.log'): choice = getpass.getpass( f'Log exists. Do you want to rewrite it? (Y/others) ') if choice == 'Y': mode = 'w' print('Rewrite log', file=stderr) else: mode = 'a' train_log = open(f'{SAVE_PATH}/{save_name}_train.log', mode) test_log = open(f'{SAVE_PATH}/{save_name}_test.log', mode) # special treatment for model G - layerwise training if model_name == 'G' and train_method == 'adv': new_last_layer = nn.Linear(1024, 10) # start for epoch in range(start_epoch, epochs): if epoch % LR_REDUCE == 0 and epoch > 0: # learning rate reduced to LR_REDUCE_RATE every LR_REDUCE epochs cur_lr *= LR_REDUCE_RATE print(f' reduce learning rate to {cur_lr}', file=stderr) # special treatment for model G - layerwise training if model_name == 'G' and train_method == 'adv': new_m = list() tmp_cnt = 0 for l in m: new_m.append(l) if isinstance(l, nn.Linear) and l.out_features == 1024: tmp_cnt += 1 if tmp_cnt > epoch / 5: if l.out_features == 1024: new_m.append(nn.ReLU()) new_m.append(new_last_layer) break new_m = nn.Sequential(*new_m).cuda() m, new_m = new_m, m print(m, file=stderr) cur_lr = lr print(f' learning rate restored to {cur_lr}', file=stderr) # init optimizer if optimizer_name == 'adam': opt = optim.Adam(m.parameters(), lr=cur_lr) elif optimizer_name == 'sgd': opt = optim.SGD(m.parameters(), lr=cur_lr, momentum=momentum, weight_decay=weight_decay) else: raise Exception("Fail to create the optimizer") cur_idx = 0 cur_acc = 0.0 cur_robacc = 0.0 batch_tot = 0 batch_acc_tot = 0 batch_robacc_tot = 0 clean_ce = 0.0 adv_ce = 0.0 # now eps now_eps = normed_eps * min((epoch + 1) / EPS_WARMUP_EPOCHS, 1.0) # =========== Training =========== print(f'Epoch {epoch}: training', file=stderr) if train_method != 'clean': print(f' Training eps={now_eps:.3f}', file=stderr) m.train() for i, (X, y) in enumerate(train_loader): if DEBUG and i > 10: break start_t = time.time() X_clean, y_clean = X.cuda(), y.cuda().long() clean_out = m(Variable(X_clean)) clean_ce = nn.CrossEntropyLoss()(clean_out, Variable(y_clean)) batch_tot = X.size(0) batch_acc_tot = ( clean_out.data.max(1)[1] == y_clean).float().sum().item() if train_method == 'clean': opt.zero_grad() clean_ce.backward() opt.step() elif train_method == 'adv': X_pgd = Variable(X, requires_grad=True) for _ in range(nb_iter): opt_pgd = optim.Adam([X_pgd], lr=1e-3) opt.zero_grad() loss = nn.CrossEntropyLoss()(m(X_pgd.cuda()), Variable(y_clean)) loss.backward() eta = now_eps * eps_iter_coef * X_pgd.grad.data.sign() X_pgd = Variable(X_pgd.data + eta, requires_grad=True) eta = torch.clamp(X_pgd.data - X, -now_eps, now_eps) X_pgd.data = X + eta X_pgd.data = torch.clamp(X_pgd.data, clip_min, clip_max) # print(X_pgd.data, la.norm((X_pgd.data - X).numpy().reshape(-1), np.inf), file=stderr) adv_out = m(Variable(X_pgd.data).cuda()) adv_ce = nn.CrossEntropyLoss()(adv_out, Variable(y_clean)) batch_robacc_tot = ( adv_out.data.max(1)[1] == y_clean).float().sum() opt.zero_grad() adv_ce.backward() opt.step() elif train_method == 'certadv': # no action to do for training adv_ce = torch.Tensor([0.0]).cuda() pass end_t = time.time() clean_ce = clean_ce.detach().cpu().item() if train_method != 'clean': adv_ce = adv_ce.detach().cpu().item() runtime = end_t - start_t cur_acc = (cur_acc * cur_idx + batch_acc_tot) / (cur_idx + batch_tot) if train_method != 'clean': cur_robacc = (cur_robacc * cur_idx + batch_robacc_tot) / (cur_idx + batch_tot) cur_idx += batch_tot print( f'{epoch} {cur_idx} {cur_acc} {cur_robacc} {batch_acc_tot/batch_tot:.3f} {batch_robacc_tot/batch_tot:.3f}' f' {clean_ce:.3f} {adv_ce:.3f} {runtime:.3f}', file=train_log) if i % STEP == 0 or cur_idx == train_tot: print( f' [train] {epoch}/{cur_idx} acc={cur_acc:.3f}({batch_acc_tot/batch_tot:.3f}) ' f'robacc={cur_robacc:.3f}({batch_robacc_tot/batch_tot:.3f}) ce={clean_ce:.3f} adv_ce={adv_ce:.3f} time={runtime:.3f}', file=stderr) train_log.flush() # =========== Testing =========== print(f'Epoch {epoch}: testing', file=stderr) m.eval() torch.set_grad_enabled(False) cur_idx = 0 cur_acc = 0.0 cur_robacc = 0.0 batch_tot = 0 batch_acc_tot = 0 batch_robacc_tot = 0 clean_ce = 0.0 adv_ce = 0.0 if train_method in ['adv', 'certadv']: tf_model = convert_pytorch_model_to_tf(m) ch_model = CallableModelWrapper(tf_model, output_layer='logits') x_op = tf.placeholder(tf.float32, shape=(None, ) + tuple(input_shape)) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.5))) attk = ProjectedGradientDescent(ch_model, sess=sess) adv_x = attk.generate(x_op, **adv_params) adv_preds_op = tf_model(adv_x) for i, (X, y) in enumerate(test_loader): if DEBUG and i >= 10: break start_t = time.time() X_clean, y_clean = X.cuda(), y.cuda().long() clean_out = m(Variable(X_clean)) clean_ce = nn.CrossEntropyLoss()(clean_out, Variable(y_clean)) batch_tot = X.size(0) batch_acc_tot = ( clean_out.data.max(1)[1] == y_clean).float().sum().item() if train_method in ['adv', 'certadv']: (adv_preds, ) = sess.run((adv_preds_op, ), feed_dict={x_op: X}) adv_preds = torch.Tensor(adv_preds) adv_ce = nn.CrossEntropyLoss()(adv_preds, Variable(y)) batch_robacc_tot = ( adv_preds.data.max(1)[1] == y).float().sum().item() # elif train_method == 'certadv': # # adv_ce, robust_err = robust_loss(m, eps, # Variable(X_clean), Variable(y_clean), # proj=50, norm_type='l1_median', bounded_input=True) # # batch_robacc_tot = (1.0 - robust_err) * batch_tot end_t = time.time() clean_ce = clean_ce.detach().cpu().item() if train_method != 'clean': adv_ce = adv_ce.detach().cpu().item() runtime = end_t - start_t cur_acc = (cur_acc * cur_idx + batch_acc_tot) / (cur_idx + batch_tot) if train_method != 'clean': cur_robacc = (cur_robacc * cur_idx + batch_robacc_tot) / (cur_idx + batch_tot) cur_idx += batch_tot print( f'{epoch} {cur_idx} {cur_acc} {cur_robacc} {batch_acc_tot / batch_tot:.3f} {batch_robacc_tot / batch_tot:.3f}' f' {clean_ce} {adv_ce} {runtime:.3f}', file=test_log) if i % STEP == 0 or cur_idx == train_tot: print( f' [test] {epoch}/{cur_idx} acc={cur_acc:.3f}({batch_acc_tot / batch_tot:.3f}) ' f'robacc={cur_robacc:.3f}({batch_robacc_tot / batch_tot:.3f}) time={runtime:.3f}', file=stderr) torch.set_grad_enabled(True) if model_name == 'G' and train_method == 'adv': # switch back m, new_m = new_m, m def save_with_configs(m, path): torch.save( { 'state_dict': m.state_dict(), 'acc': cur_acc, 'robacc': cur_robacc, 'epoch': epoch, 'normalized': normalize is not None, 'dataset': dataset }, path) if not os.path.exists(f'{SAVE_PATH}/{save_name}_chkpt'): os.makedirs(f'{SAVE_PATH}/{save_name}_chkpt') save_with_configs( m, f'{SAVE_PATH}/{save_name}_chkpt/{save_name}_ep_{epoch:03d}.pth') if (train_method == 'clean' and cur_acc > best_acc) or (train_method != 'clean' and cur_robacc > best_robacc): save_with_configs(m, f'{SAVE_PATH}/{save_name}_best.pth') print( f" Updated, acc {best_acc:.3f} => {cur_acc:.3f} robacc {best_robacc:.3f} => {cur_robacc:.3f}", file=stderr) best_acc = cur_acc best_robacc = cur_robacc test_log.flush() # memory clean after each batch torch.cuda.empty_cache() if train_method == 'adv': sess.close() train_log.close() test_log.close()
np.save( f'results/fmnist/y_test_mnist_{name}_{h_dim}_{h_dim_hypernet}_{m}_{lr}_{args.wd}_{S}.npy', y_MNIST) np.save( f'results/fmnist/y_test_notmnist_{name}_{h_dim}_{h_dim_hypernet}_{m}_{lr}_{args.wd}_{S}.npy', y_notMNIST) """ ======================= Adversarial examples experiments ======================= """ model.eval() # We use tf for evaluation on adversarial data sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 784)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(model, out_dims=10) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') X_test = fmnist.test.images y_test = fmnist.test.labels M = X_test.shape[0] adv_accs = [] adv_ents = [] def test_tf(m=100): preds = [] for i in range(0, 1000, m):
clf = Classifier(net) clf.to(device) clf = nn.DataParallel(clf) clf.eval() print('configuring TensorFlow...') config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=(None, 3, 299, 299)) y_op = tf.placeholder(tf.int64, shape=(BATCH_SIZE, )) onehot_op = tf.one_hot(y_op, 1000) tf_model_fn = convert_pytorch_model_to_tf(clf, out_dims=1000) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # pgd_op = MadryEtAl(cleverhans_model, sess=sess) pgd_op = ProjectedGradientDescent(cleverhans_model, sess=sess, default_rand_init=True) pgd_params = { 'eps': 16 / 255.0, 'eps_iter': 2 / 255.0, 'nb_iter': 10, 'clip_min': 0.0, 'clip_max': 1.0 } adv_x_op = pgd_op.generate(x_op, y=onehot_op, **pgd_params)
resnet50 = models.resnet50(pretrained=True) resnet50.to(device) resnet50.eval() images = load_dataset('./data/amazon_men/images_MEN_category_under_attack') images_paths = os.listdir( './data/amazon_men/images_MEN_category_under_attack/img') images_paths.sort() classes_txt = './data/amazon_men/imagenet1000_clsidx_to_labels.txt' sess = tf.compat.v1.Session() x_op = tf.placeholder(tf.float32, shape=(1, 3, None, None)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(resnet50) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') fgsm_op = FastGradientMethod(cleverhans_model, sess=sess) # pgd_op = ProjectedGradientDescent(cleverhans_model, sess=sess) y_target = np.zeros((1, 1000)) y_target[0, 770] = 1 fgsm_params = { 'eps': 0.015686275, 'y_target': y_target, 'clip_min': 0, 'clip_max': 1 } pgd_params = {
def targeted(model, input_dim, sess, X_test, target, eps, n_adv, attack, multi_model=False): ''' Calculates adversarial examples with the projected gradient decent method by madry et al. :return: adversarial examples for X_test ''' tf_model_fn = convert_pytorch_model_to_tf(model, out_dims=95) if multi_model: cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='probs') else: cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') x_op = tf.compat.v1.placeholder(tf.float32, shape=(None, input_dim[1])) if attack == 'PGD': attack_op = MadryEtAl(cleverhans_model, sess=sess) attack_params = { 'eps': eps, 'y_target': target, 'clip_min': -1, 'clip_max': 1 } elif attack == 'FGSM': attack_op = FastGradientMethod(cleverhans_model, sess=sess) attack_params = { 'eps': eps, 'y_target': target, 'clip_min': -1, 'clip_max': 1 } elif attack == 'CWL2': attack_op = CarliniWagnerL2(cleverhans_model, sess=sess) attack_params = {'max_iterations': 100, 'clip_min': -1, 'clip_max': 1} else: raise ValueError('[+] Attack not supported') if not os.path.exists('/root/asr-python/src/tmp2'): os.makedirs('/root/asr-python/src/tmp2') adv_x_op = attack_op.generate(x_op, **attack_params) m = input_dim[0] #minibatchsize adv_x = np.zeros([m, input_dim[1]]) adv_samples = n_adv single_advs = [] for i in range(adv_samples): single_adv = sess.run(adv_x_op, feed_dict={x_op: X_test}) single_advs.append(single_adv) # np.save(Path('/root/asr-python/src/tmp2', f'''{i}.npy'''), single_adv) adv_x += (1 / adv_samples) * single_adv # np.save(Path('/root/asr-python/src/tmp2', f'''combined.npy'''), adv_x) # adv_x return adv_x, np.array(single_advs)
def main(): ## setup experimental preparation global args args = parse_args() # built save folder if not os.path.exists(args.save): os.makedirs(args.save) # global logger logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) formatter = logging.Formatter( "[%(asctime)s] %(levelname)s:%(name)s:%(message)s") # file logger fh = logging.FileHandler(os.path.join(args.save, args.expname) + '.log', mode='w') fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger.addHandler(fh) # GPU select args.cuda = args.cuda and torch.cuda.is_available() device = torch.device("cuda:0" if args.cuda else "cpu") if args.sparse and args.wd != 0: logger.error('Sparsity and weight decay are incompatible, pick one!') exit() # debugging args logger.debug(args) # set seed for torch.manual_seed(args.seed) random.seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True # datadet file train_dir = os.path.join(args.data, 'aclImdb/train/') dev_dir = os.path.join(args.data, 'aclImdb/dev/') test_dir = os.path.join(args.data, 'aclImdb/test/') token_file_labels = [dev_dir, train_dir, test_dir] ## processe_raw_data # deal with IMDB dataset: sentence to tree # for token_file_label in token_file_labels: # utils.processe_raw_data(token_file_label) ## build vocab token_files = [] for k in ['pos', 'neg']: token_files.extend([ os.path.join(token_file_label, k + ".json") for token_file_label in token_file_labels ]) imdb_vocab_file = os.path.join(args.data, 'imdb.vocab') utils.build_vocab(token_files, imdb_vocab_file) # get vocab object from vocab file previously written vocab = Vocab(filename=imdb_vocab_file, data=[ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]) logger.debug('==> imdb vocabulary size : %d ' % vocab.size()) ## build embedding of vocab # for words common to dataset vocab and GLOVE, use GLOVE vectors # for other words in dataset vocab, use random normal vectors emb_file = os.path.join(args.data, 'imdb_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) else: # load glove embeddings and vocab glove_vocab, glove_emb = utils.load_word_vectors( os.path.join(args.glove, 'glove.840B.300d')) logger.debug('==> GLOVE vocabulary size: %d ' % glove_vocab.size()) emb = torch.zeros(vocab.size(), glove_emb.size(1), dtype=torch.float, device=device) emb.normal_(0, 0.05) # zero out the embeddings for padding and other special words if they are absent in vocab for idx, item in enumerate([ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]): if idx == 0: emb[idx].fill_(10e-3) if idx == 1: emb[idx].fill_(10e-1) if idx == 2: emb[idx].fill_(1) if idx == 3: emb[idx].fill_(2) for word in vocab.labelToIdx.keys(): if glove_vocab.getIndex(word): emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex( word)] torch.save(emb, emb_file) ## build dataset for treelstm # load imdb dataset splits train_file = os.path.join(args.data, 'imdb_train.pth') train_dataset = IMDBdataset(train_dir, vocab, args.num_classes) torch.save(train_dataset, train_file) # train_dataset = torch.load(train_file) logger.debug('==> Size of train data : %d ' % len(train_dataset)) dev_file = os.path.join(args.data, 'imdb_dev.pth') dev_dataset = IMDBdataset(dev_dir, vocab, args.num_classes) torch.save(dev_dataset, dev_file) # dev_dataset = torch.load(dev_file) logger.debug('==> Size of dev data : %d ' % len(dev_dataset)) test_file = os.path.join(args.data, 'imdb_test.pth') test_dataset = IMDBdataset(test_dir, vocab, args.num_classes) torch.save(test_dataset, test_file) # test_dataset = torch.load(test_file) logger.debug('==> Size of test data : %d ' % len(test_dataset)) ## built treeLSTM model # initialize tree_model, criterion/loss_function, optimizer tree_model = TreeLSTM(vocab.size(), args.input_dim, args.mem_dim, args.hidden_dim, args.num_classes, args.sparse, args.freeze_embed) criterion = nn.KLDivLoss() tree_model.to(device), criterion.to(device) # plug these into embedding matrix inside tree_model tree_model.emb.weight.data.copy_(emb) if args.optim == 'adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, tree_model.parameters()), lr=args.lr, weight_decay=args.wd) elif args.optim == 'adagrad': optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, tree_model.parameters()), lr=args.lr, weight_decay=args.wd) elif args.optim == 'sgd': optimizer = optim.SGD(filter(lambda p: p.requires_grad, tree_model.parameters()), lr=args.lr, weight_decay=args.wd) metrics = Metrics(args.num_classes) ## train treeLSTM model # create trainer object for training and testing trainer = Trainer(args, tree_model, criterion, optimizer, device) best = -float('inf') for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) train_loss, train_pred = trainer.test(train_dataset) dev_loss, dev_pred = trainer.test(dev_dataset) test_loss, test_pred = trainer.test(test_dataset) train_pearson = metrics.pearson(train_pred, train_dataset.labels) train_mse = metrics.mse(train_pred, train_dataset.labels) logger.info( '==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format( epoch, train_loss, train_pearson, train_mse)) dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels) dev_mse = metrics.mse(dev_pred, dev_dataset.labels) logger.info( '==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format( epoch, dev_loss, dev_pearson, dev_mse)) test_pearson = metrics.pearson(test_pred, test_dataset.labels) test_mse = metrics.mse(test_pred, test_dataset.labels) logger.info( '==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format( epoch, test_loss, test_pearson, test_mse)) if best < test_pearson: best = test_pearson checkpoint = { 'model': trainer.model.state_dict(), 'optim': trainer.optimizer, 'pearson': test_pearson, 'mse': test_mse, 'args': args, 'epoch': epoch } logger.debug( '==> New optimum found, checkpointing everything now...') torch.save(checkpoint, '%s.pt' % os.path.join(args.save, args.expname)) ## get the tree root note position of every sentence with open('%s.pt' % os.path.join(args.save, args.expname), 'rb') as f: tree_model.load_state_dict(torch.load(f)['model']) datasets = [train_dataset, test_dataset, dev_dataset] for dataset in datasets: dataset.get_root(tree_model, device) # for dataset in datasets: # indices = torch.randperm(len(dataset), dtype=torch.long, device='cpu') # for idx in tqdm(range(len(dataset)), desc='Building root representation...'): # sents, trees, _ = dataset[indices[idx]] # # print('SENTS:', sents) # # print('TREES:', trees) # sents = [sent.to(device) for sent in sents] # hiddens, _ = tree_model(sents, trees) # print('ROOTS:', hiddens) # # print('TO ADD:', dataset[indices[idx]]) # dataset[indices[idx]][0].append(hiddens) # # print('TO ADD:', dataset[indices[idx]]) ## build dataset for seqbackLSTM seqback_train_file = os.path.join(args.data, 'imdb_seqback_train.pth') # seqback_train_dataset = seqbackDataset(train_dir, vocab, device).sequenses seqback_train_data = seqbackDataset(train_dir, vocab, device) torch.save(seqback_train_data, seqback_train_file) # seqback_train_dataset = torch.load(seqback_train_file) logger.debug('==> Size of train data : %d ' % len(seqback_train_data)) seqback_val_file = os.path.join(args.data, 'imdb_seqback_dev.pth') # seqback_val_dataset = seqbackDataset(dev_dir, vocab, device).sequenses seqback_val_data = seqbackDataset(dev_dir, vocab, device) torch.save(seqback_val_data, seqback_val_file) # seqback_dev_dataset = torch.load(seqback_dev_file) logger.debug('==> Size of dev data : %d ' % len(seqback_val_data)) seqback_test_file = os.path.join(args.data, 'imdb_seqback_test.pth') # seqback_test_dataset = seqbackDataset(test_dir, vocab, device).sequenses seqback_test_data = seqbackDataset(test_dir, vocab, device) torch.save(seqback_test_data, seqback_test_file) # seqback_test_dataset = torch.load(seqback_test_file) logger.debug('==> Size of test data : %d ' % len(seqback_test_data)) ## build seqbackLSTM model seqback_criterion = nn.CrossEntropyLoss() seqback_model = SeqbackLSTM(vocab, device) seqback_model.to(device), seqback_criterion.to(device) seqback_model.emb.weight.data.copy_(emb) ## train seqbackLSTM model seqback_trainer = seqbackTrainer(seqback_model, vocab, seqback_criterion, device, optimizer) lr = 20 best_val_loss = None # At any point you can hit Ctrl + C to break out of training early. for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() print('EPOCH:', epoch) seqback_trainer.train(seqback_train_data, lr) val_loss = seqback_trainer.evaluate(seqback_val_data) print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: with open(args.save_seqback, 'wb') as f: torch.save(seqback_model, f) best_val_loss = val_loss else: # Anneal the learning rate if no improvement has been seen in the validation dataset. lr /= 4.0 # load the best saved seqback_model. with open(args.save_seqback, 'rb') as f: seqback_model = torch.load(f) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass seqback_model.chainLSTM.lstm.flatten_parameters() ## SeqbackLSTM run on test data. test_loss = seqback_trainer.evaluate(seqback_test_data) print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89) ## build dataset of wordCNN wordcnn_train_file = os.path.join(args.data, 'imdb_wordcnn_train.pth') wordcnn_train_data = WordCNNDataset(train_dir, vocab, device).Preprocessor wordcnn_train_dataloader = WordCNNDataLoader(dataset=wordcnn_train_data, batch_size=64) torch.save(wordcnn_train_dataloader, wordcnn_train_file) logger.debug('==> Size of train data : %d ' % len(wordcnn_train_data)) wordcnn_val_file = os.path.join(args.data, 'imdb_wordcnn_dev.pth') wordcnn_val_data = WordCNNDataset(dev_dir, vocab, device).Preprocessor wordcnn_val_dataloader = WordCNNDataLoader(dataset=wordcnn_val_data, batch_size=64) torch.save(wordcnn_val_dataloader, wordcnn_val_file) logger.debug('==> Size of dev data : %d ' % len(wordcnn_val_data)) wordcnn_test_file = os.path.join(args.data, 'imdb_wordcnn_test.pth') wordcnn_test_data = WordCNNDataset(test_dir, vocab, device).Preprocessor wordcnn_test_dataloader = WordCNNDataLoader(dataset=wordcnn_test_data, batch_size=64) torch.save(wordcnn_test_dataloader, wordcnn_test_file) logger.debug('==> Size of test data : %d ' % len(wordcnn_test_data)) wordcnn_model = WordCNN(2, vocab, emb) wordcnn_model.to(device) trainable_params = [ p for p in wordcnn_model.parameters() if p.requires_grad ] wordcnn_optimizer = optim.Adam(params=trainable_params, lr=0.01) # wordcnn_optimizer = Adadelta(params=trainable_params, lr=0.01, weight_decay=0.95) lr_plateau = optim.lr_scheduler.ReduceLROnPlateau(wordcnn_optimizer, factor=0.7, patience=5, min_lr=0.0001) wordcnn_criterion = nn.CrossEntropyLoss wordcnn_trainer = WordCNNTrainer(wordcnn_model, wordcnn_train_dataloader, wordcnn_val_dataloader, criterion=wordcnn_criterion, optimizer=wordcnn_optimizer, lr_schedule='store_true', lr_scheduler=lr_plateau, use_gpu=torch.cuda.is_available(), logger=logger) wordcnn_trainer.run(epochs=10) logger.info("Evaluating...") logger.info('Best Model: {}'.format( wordcnn_trainer.best_checkpoint_filepath)) wordcnn_model.load_state_dict( torch.load(wordcnn_trainer.best_checkpoint_filepath)) wordcnn_evaluator = WordCNNEvaluator(wordcnn_model, wordcnn_test_dataloader, use_gpu=torch.cuda.is_available(), logger=logger) wordcnn_evaluator.evaluate() ## Craft adversarial examples using Carlini and Wagner's approach # nn.sequential to Merge seqbackLSTM and wordCNN nb_classes = 2 source_samples = 10 sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, 1, 28, 28, )) # Convert pytorch model to a tf_model and wrap it in cleverhans seqback_model.train() wordcnn_model.train() seqbacklstm_and_wordcnn_model = torch.nn.Sequential( seqback_model, wordcnn_model) tf_seqbacklstm_and_wordcnn_model = convert_pytorch_model_to_tf( seqbacklstm_and_wordcnn_model) chans_tf_seqbacklstm_and_wordcnn_model = CallableModelWrapper( tf_seqbacklstm_and_wordcnn_model, output_layer='logits') # tf_seqback_model = convert_pytorch_model_to_tf(seqback_model) #cleverhans_model1 = CallableModelWrapper(tf_model1, output_layer='logits') # tf_wordcnn_model = convert_pytorch_model_to_tf(wordcnn_model) #cleverhans_model2 = CallableModelWrapper(tf_model2, output_layer='logits') # cleverhans_model = torch.nn.Sequential(tf_model1, tf_model2) # cleverhans_model = CallableModelWrapper(cleverhans_model, output_layer='logits') # CW model cw = CarliniWagnerL2(chans_tf_seqbacklstm_and_wordcnn_model, back='tf', sess=sess) # build adv_inputs #adv_inputs = np.array([[instance] * nb_classes for instance in x_test[:source_samples]], dtype=np.float32) #adv_inputs = adv_inputs.reshape((source_samples * nb_classes, img_rows, img_cols, nchannels)) #one_hot = np.zeros((nb_classes, nb_classes)) #one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 #adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape((source_samples * nb_classes, nb_classes)) yname = "y_target" adv_inputs, adv_ys = seqback_trainer.attack(seqback_test_data) cw_params = { 'binary_search_steps': 1, yname: adv_ys, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': 1, 'initial_const': 10 } adv = cw.generate_np(adv_inputs, **cw_params) print('ROOT ADV', adv) eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys, args=eval_params) # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close()
def FGSM(torch_model, dataset, eps_list, opt, c, h, w, clip_min, clip_max): if opt == 'evaluate': acclist = [] for eps in eps_list: sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, c, h, w, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack atk_op = FastGradientMethod(cleverhans_model, sess=sess) atk_params = { 'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max } adv_x_op = atk_op.generate(x_op, **atk_params) adv_preds_op = tf_model_fn(adv_x_op) # Run an evaluation of our model against fgsm total = 0 correct = 0 for xs, ys in dataset: xs, ys = xs.to(device), ys.to(device) adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs}) correct += (np.argmax( adv_preds, axis=1) == ys.cpu().detach().numpy()).sum() total += dataset.batch_size acc = float(correct) / total print('Adv accuracy: {:.3f}'.format(acc * 100)) acclist.append(acc) return acclist elif opt == 'generate': advpacklist = [] for eps in eps_list: advlist = [] sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, c, h, w, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack atk_op = FastGradientMethod(cleverhans_model, sess=sess) atk_params = { 'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max } adv_x_op = atk_op.generate(x_op, **atk_params) # Run an evaluation of our model against fgsm for xs, ys in dataset: xs, ys = xs.to(device), ys.to(device) adv = torch.from_numpy(sess.run(adv_x_op, feed_dict={x_op: xs})) if ys == np.argmax(torch_model(xs).data.cpu().numpy()): pred = np.argmax(torch_model(adv).data.cpu().numpy()) if ys != pred: adv = adv.numpy() advlist.append(adv) print(len(advlist)) advpacklist.append(advlist) return advpacklist
def main(seed=0, n_epochs=5, batch_size=100, time=50, update_interval=50, plot=False, save=True): np.random.seed(seed) if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.manual_seed_all(seed) else: torch.manual_seed(seed) print() print('Loading MNIST data...') print() # Get the CIFAR-10 data. images, labels = MNIST('../../data/MNIST', download=True).get_train() images /= images.max() # Standardizing to [0, 1]. images = images.view(-1, 784) labels = labels.long() test_images, test_labels = MNIST('../../data/MNIST', download=True).get_test() test_images /= test_images.max() # Standardizing to [0, 1]. test_images = test_images.view(-1, 784) test_labels = test_labels.long() if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() test_images = test_images.cuda() test_labels = test_labels.cuda() ANN = FullyConnectedNetwork() model_name = '_'.join( [str(x) for x in [seed, n_epochs, batch_size, time, update_interval]]) # Specify loss function. criterion = nn.CrossEntropyLoss() if save and os.path.isfile(os.path.join(params_path, model_name + '.pt')): print() print('Loading trained ANN from disk...') ANN.load_state_dict( torch.load(os.path.join(params_path, model_name + '.pt'))) if torch.cuda.is_available(): ANN = ANN.cuda() else: print() print('Creating and training the ANN...') print() # Specify optimizer. optimizer = optim.Adam(params=ANN.parameters(), lr=1e-3, weight_decay=1e-4) batches_per_epoch = int(images.size(0) / batch_size) # Train the ANN. for i in range(n_epochs): losses = [] accuracies = [] for j in range(batches_per_epoch): batch_idxs = torch.from_numpy( np.random.choice(np.arange(images.size(0)), size=batch_size, replace=False)) im_batch = images[batch_idxs] label_batch = labels[batch_idxs] outputs = ANN.forward(im_batch) loss = criterion(outputs, label_batch) predictions = torch.max(outputs, 1)[1] correct = (label_batch == predictions).sum().float() / batch_size optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.item()) accuracies.append(correct.item() * 100) outputs = ANN.forward(test_images) loss = criterion(outputs, test_labels).item() predictions = torch.max(outputs, 1)[1] test_accuracy = ((test_labels == predictions).sum().float() / test_labels.numel()).item() * 100 avg_loss = np.mean(losses) avg_acc = np.mean(accuracies) print( f'Epoch: {i+1} / {n_epochs}; Train Loss: {avg_loss:.4f}; Train Accuracy: {avg_acc:.4f}' ) print( f'\tTest Loss: {loss:.4f}; Test Accuracy: {test_accuracy:.4f}') if save: torch.save(ANN.state_dict(), os.path.join(params_path, model_name + '.pt')) outputs = ANN.forward(test_images) loss = criterion(outputs, test_labels) predictions = torch.max(outputs, 1)[1] accuracy = ((test_labels == predictions).sum().float() / test_labels.numel()).item() * 100 print() print( f'(Post training) Test Loss: {loss:.4f}; Test Accuracy: {accuracy:.4f}' ) print() print('Evaluating ANN on adversarial examples from FSGM method...') # Convert pytorch model to a tf_model and wrap it in cleverhans. tf_model_fn = convert_pytorch_model_to_tf(ANN) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, 784, )) # Create an FGSM attack. fgsm_op = FastGradientMethod(cleverhans_model, sess=sess) fgsm_params = {'eps': 0.2, 'clip_min': 0.0, 'clip_max': 1.0} adv_x_op = fgsm_op.generate(x_op, **fgsm_params) adv_preds_op = tf_model_fn(adv_x_op) # Run an evaluation of our model against FGSM white-box attack. total = 0 correct = 0 adv_preds = sess.run(adv_preds_op, feed_dict={x_op: test_images}) correct += (np.argmax(adv_preds, axis=1) == test_labels).sum() total += len(test_images) accuracy = float(correct) / total print() print('Adversarial accuracy: {:.3f}'.format(accuracy * 100)) print() print('Converting ANN to SNN...') with sess.as_default(): test_images = adv_x_op.eval(feed_dict={x_op: test_images}) test_images = torch.tensor(test_images) # Do ANN to SNN conversion. SNN = ann_to_snn(ANN, input_shape=(784, ), data=test_images, percentile=100) for l in SNN.layers: if l != 'Input': SNN.add_monitor(Monitor(SNN.layers[l], state_vars=['s', 'v'], time=time), name=l) print() print('Testing SNN on FGSM-modified MNIST data...') print() # Test SNN on MNIST data. spike_ims = None spike_axes = None correct = [] n_images = test_images.size(0) start = t() for i in range(n_images): if i > 0 and i % update_interval == 0: accuracy = np.mean(correct) * 100 print( f'Progress: {i} / {n_images}; Elapsed: {t() - start:.4f}; Accuracy: {accuracy:.4f}' ) start = t() SNN.run(inpts={'Input': test_images[i].repeat(time, 1, 1)}, time=time) spikes = { layer: SNN.monitors[layer].get('s') for layer in SNN.monitors } voltages = { layer: SNN.monitors[layer].get('v') for layer in SNN.monitors } prediction = torch.softmax(voltages['fc3'].sum(1), 0).argmax() correct.append((prediction == test_labels[i]).item()) SNN.reset_() if plot: spikes = {k: spikes[k].cpu() for k in spikes} spike_ims, spike_axes = plot_spikes(spikes, ims=spike_ims, axes=spike_axes) plt.pause(1e-3)
def attack(self, path, session): print_and_log(self.logfile, "") # add a blank line print_and_log(self.logfile, 'Attacking model {0:}: '.format(path)) self.model = self.init_model() self.model.load_state_dict(torch.load(path)) pgd_parameters = self.pgd_params() class_index = 0 context_images, target_images, context_labels, target_labels, context_images_np = None, None, None, None, None def model_wrapper(context_point_x): # Insert context_point at correct spot context_images_attack = torch.cat([ context_images[0:class_index], context_point_x, context_images[class_index + 1:] ], dim=0) target_logits = self.model(context_images_attack, context_labels, target_images) return target_logits[0] tf_model_conv = convert_pytorch_model_to_tf(model_wrapper, out_dims=self.args.way) tf_model = cleverhans.model.CallableModelWrapper( tf_model_conv, 'logits') pgd = ProjectedGradientDescent(tf_model, sess=session, dtypestr='float32') for item in self.test_set: for t in range(self.args.attack_tasks): task_dict = self.dataset.get_test_task(item, session) context_images, target_images, context_labels, target_labels, context_images_np = self.prepare_task( task_dict, shuffle=False) # Detach shares storage with the original tensor, which isn't what we want. context_images_attack_all = context_images.clone() # Is require_grad true here, for context_images? for c in torch.unique(context_labels): # Adversarial input context image class_index = extract_class_indices(context_labels, c)[0].item() context_x = np.expand_dims(context_images_np[class_index], 0) # Input to the model wrapper is automatically converted to Torch tensor for us x = tf.placeholder(tf.float32, shape=context_x.shape) adv_x_op = pgd.generate(x, **pgd_parameters) preds_adv_op = tf_model.get_logits(adv_x_op) feed_dict = {x: context_x} adv_x, preds_adv = session.run((adv_x_op, preds_adv_op), feed_dict=feed_dict) context_images_attack_all[class_index] = torch.from_numpy( adv_x) save_image(adv_x, os.path.join(self.checkpoint_dir, 'adv.png')) save_image(context_x, os.path.join(self.checkpoint_dir, 'in.png')) acc_after = torch.mean( torch.eq( target_labels, torch.argmax(torch.from_numpy(preds_adv).to( self.device), dim=-1)).float()).item() with torch.no_grad(): logits = self.model(context_images, context_labels, target_images) acc_before = torch.mean( torch.eq(target_labels, torch.argmax(logits, dim=-1)).float()).item() del logits diff = acc_before - acc_after print_and_log( self.logfile, "Task = {}, Class = {} \t Diff = {}".format( t, c, diff)) print_and_log(self.logfile, "Accuracy before {}".format(acc_after)) logits = self.model(context_images_attack_all, context_labels, target_images) acc_all_attack = torch.mean( torch.eq(target_labels, torch.argmax(logits, dim=-1)).float()).item() print_and_log(self.logfile, "Accuracy after {}".format(acc_all_attack))
total += len(xs) acc = float(correct) / total report.clean_train_clean_eval = acc print('[%s] Clean accuracy on test set: %.2f%%' % (step, acc * 100)) sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, 1, 28, 28, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack fgsm_op = FastGradientMethod(cleverhans_model, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x_op = fgsm_op.generate(x_op, **fgsm_params) adv_preds_op = tf_model_fn(adv_x_op) # Run an evaluation of our model against fgsm total = 0 correct = 0 counter = 0 for xs, ys in test_loader: adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs}) correct += (np.argmax(adv_preds, axis=1) == ys).sum()
def trans_train(loader, model, opt, epoch, epsilon, ref_model, clip_min=0., clip_max=1., eps_iter=0.005, nb_iter=100, rand_init=False, verbose=20): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() errors = AverageMeter() model.train() params = { 'eps': epsilon, 'clip_min': clip_min, 'clip_max': clip_max, 'eps_iter': eps_iter, 'nb_iter': nb_iter, 'rand_init': rand_init } sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 1, 28, 28, )) tf_model = convert_pytorch_model_to_tf(ref_model) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') attk = ProjectedGradientDescent(cleverhans_model, sess=sess) adv_x_op = attk.generate(x_op, **params) end = time.time() for i, (X, y) in enumerate(loader): X_adv = sess.run((adv_x_op), feed_dict={x_op: X}) X, y = Variable(torch.tensor(X_adv)).cuda(), y.cuda() data_time.update(time.time() - end) out = model(Variable(X)) ce = nn.CrossEntropyLoss()(out, Variable(y)) err = (out.data.max(1)[1] != y).float().sum() / X.size(0) opt.zero_grad() ce.backward() opt.step() batch_time.update(time.time() - end) end = time.time() losses.update(ce.item(), X.size(0)) errors.update(err.item(), X.size(0)) if verbose and i % verbose == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Error {errors.val:.3f} ({errors.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, errors=errors)) if DEBUG and i == 10: break return losses.avg, errors.avg
def evaluate_trans(loader, dataset, model, epoch, epsilon, ref_model, clip_min=0., clip_max=1., eps_iter=0.005, nb_iter=100, rand_init=False, verbose=20): batch_time = AverageMeter() losses = AverageMeter() errors = AverageMeter() params = { 'eps': epsilon, 'clip_min': clip_min, 'clip_max': clip_max, 'eps_iter': eps_iter, 'nb_iter': nb_iter, 'rand_init': rand_init } sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 1, 28, 28, )) model.eval() ref_model.eval() tf_model = convert_pytorch_model_to_tf(ref_model) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') attk = ProjectedGradientDescent(cleverhans_model, sess=sess) adv_x_op = attk.generate(x_op, **params) end = time.time() for i, (X, y) in enumerate(loader): X_adv = sess.run((adv_x_op), feed_dict={x_op: X}) X, y = Variable(torch.tensor(X_adv)).cuda(), y.cuda() out = model(Variable(X)) ce = nn.CrossEntropyLoss()(out, Variable(y)) err = (out.data.max(1)[1] != y).float().sum() / X.size(0) # measure accuracy and record loss losses.update(ce.item(), X.size(0)) errors.update(err.item(), X.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() endline = '\n' if i % verbose == 0 else '\r' print('Adv test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Error {error.val:.3f} ({error.avg:.3f})'.format( i, len(loader), batch_time=batch_time, loss=losses, error=errors), end=endline) if DEBUG and i == 10: break print('\n * Error {error.avg:.3f}'.format(error=errors)) return losses.avg, errors.avg
def train_child(t, p, m, load_dict=False): # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3]) raw_model = TestCNN().cuda(0) model = TestCNN().cuda(0) tf_model = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') session = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32)) fgsm = FastGradientMethod(cleverhans_model, sess=session) # stm = SpatialTransformationMethod(cleverhans_model, sess=session) # cw2 = CarliniWagnerL2(cleverhans_model, sess=session) pgd = ProjectedGradientDescent(cleverhans_model, sess=session) noise = Noise(cleverhans_model, sess=session) mim = MomentumIterativeMethod(cleverhans_model, sess=session) df = DeepFool(cleverhans_model, sess=session) tf_raw_model = convert_pytorch_model_to_tf(raw_model) cleverhans_raw_model = CallableModelWrapper(tf_raw_model, output_layer='logits') # pgd_raw = ProjectedGradientDescent(cleverhans_raw_model, sess=session) noise_raw = Noise(cleverhans_raw_model, sess=session) def fgsm_op(x, eps): att = fgsm.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) # def stm_op(x, eps): # att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps) # return session.run(att, feed_dict={x_op: x}) # def cw2_op(x, eps): # att = cw2.generate(x_op, max_iterations=3) def pgd_op(x, eps): att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) return session.run(att, feed_dict={x_op: x}) # def pgd_raw_op(x, eps): # att = pgd_raw.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) # return session.run(att, feed_dict={x_op: x}) def noise_op(x, eps): att = noise.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def noise_raw_op(x, eps): att = noise_raw.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def df_op(x): att = df.generate(x_op, nb_candidate=10, max_iter=3) return session.run(att, feed_dict={x_op: x}) def mim_op(x, eps): att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2) return session.run(att, feed_dict={x_op: x}) def attack_train(x): attacks = [fgsm_op, pgd_op, mim_op] attacks_name = ['FGSM', 'PGD', 'MIM'] eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]] train_x_adv = x.copy() adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv)) for i, (ti, pi, mi) in enumerate( tqdm(zip(t, p, m), total=len(t), desc='Subpolicy: ', leave=False)): adv_i = train_x_adv[adv_type == i] for j, (tj, pj, mj) in enumerate( tqdm(zip(ti, pi, mi), total=len(ti), desc='Operation: ', leave=False)): tj, pj, mj = (*tj, *pj, *mj) adv_j = adv_i[np.random.randn(len(adv_i)) < pj] for i in tqdm(range(0, len(adv_j), BATCH_SIZE), desc=attacks_name[tj] + ': ', leave=False): adv_j[i:][:BATCH_SIZE] = attacks[tj]( adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT * (eps[tj][1] - eps[tj][0]) + eps[tj][0]) return train_x_adv optimizer = optim.SGD(model.parameters(), lr=1e-3) raw_optimizer = optim.SGD(raw_model.parameters(), lr=1e-3) train_x_adv = attack_train(train_x) adv_trainset = torch.utils.data.TensorDataset( torch.tensor(train_x_adv, dtype=torch.float), torch.tensor(train_y, dtype=torch.long)) adv_trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) if load_dict: model.load_state_dict(torch.load('black_eval_runs/model.pt')) optimizer.load_state_dict(torch.load('black_eval_runs/optimizer.pt')) raw_model.load_state_dict(torch.load('black_eval_runs/raw_model.pt')) raw_optimizer.load_state_dict( torch.load('black_eval_runs/raw_optimizer.pt')) model.train() batch_tqdm = tqdm(adv_trainloader, leave=False) for x, y in batch_tqdm: optimizer.zero_grad() output = model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'adv {loss:.3f} {acc:.3f}') batch_tqdm = tqdm(trainloader, leave=False) raw_model.train() for x, y in batch_tqdm: raw_optimizer.zero_grad() output = raw_model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() raw_optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'raw {loss:.3f} {acc:.3f}') with torch.no_grad(): model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_adv_acc = tot_acc / len(val_x) raw_model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_raw_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_adv_acc = tot_acc / len(val_x) with open('black_eval_runs/acc.csv', 'a') as f: f.write(f'{adv_raw_acc},{adv_adv_acc},{raw_raw_acc},{raw_adv_acc}\n') print( f'adv {adv_raw_acc:.3f} -> {adv_adv_acc:.3f} | raw {raw_raw_acc:.3f} -> {raw_adv_acc:.3f}' ) torch.save(model.state_dict(), 'black_eval_runs/model.pt') torch.save(optimizer.state_dict(), 'black_eval_runs/optimizer.pt') torch.save(raw_model.state_dict(), 'black_eval_runs/raw_model.pt') torch.save(raw_optimizer.state_dict(), 'black_eval_runs/raw_optimizer.pt')
def evaluate_model(self, model_path, num_batches=-1, model_device=None): model_device = model_device or device start_sec = time() with tf.Session() as sess: torch_model_orig = torch.load( model_path, map_location=torch.device('cpu')).to(model_device) torch_model = lambda x: torch_model_orig(x.to(model_device))[0].to( device) # [0]: convert to standard format tf_model_fn = convert_pytorch_model_to_tf(torch_model, out_dims=10) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # fix error with SPSA: "ValueError: Tried to convert 'depth' to a tensor and # failed. Error: None values not supported." cleverhans_model.nb_classes = 10 # important to shuffle the data since we'll measure standard deviation test_loader = torch.utils.data.DataLoader( self.test_data, batch_size=self.batch_size, shuffle=True) x_test_sample, _ = next( iter(test_loader)) # to get the shape of the input nchannels, img_rows, img_cols = x_test_sample.shape[1:] x = tf.placeholder(tf.float32, shape=(None, nchannels, img_rows, img_cols)) y = tf.placeholder(tf.int32, shape=(None, )) attack_model = self.attack(cleverhans_model, sess=sess) clean_preds_op = tf_model_fn(x) preds_op = tf_model_fn(x) # # to use generate() instead of generate_np() # self.params['y'] = y # advs = attack_model.generate(x, **self.params) # adv_preds_op = tf_model_fn(advs) # Run an evaluation of our model against fgsm self.saved_xs, self.saved_advs, self.saved_ys, \ self.saved_adv_preds, self.saved_clean_preds = [], [], [], [], [] accuracies = [] try: for batch_no, (xs, ys) in enumerate(test_loader): if self.attack == SPSA: self.params['y'] = ys.numpy().astype(np.int32) else: ys_one_hot = torch.nn.functional.one_hot(ys, 10).numpy() if self.attack == MaxConfidence: self.params['y'] = ys_one_hot.astype(np.float32) else: self.params['y'] = ys_one_hot.astype(np.int32) # using generate_np() or generate() leads to similar performance # not sure if the GPU is fully utilized... advs = attack_model.generate_np(xs.numpy(), **self.params) adv_preds = sess.run(preds_op, feed_dict={x: advs}) clean_preds = sess.run(preds_op, feed_dict={x: xs}) # clean_preds, adv_preds = sess.run([clean_preds_op, adv_preds_op], # feed_dict={x: xs.numpy(), y: ys.numpy()}) correct = (np.argmax(adv_preds, axis=1) == ys.numpy()).sum() total = test_loader.batch_size self.saved_xs.append(xs) self.saved_ys.append(ys) self.saved_advs.append(advs) self.saved_adv_preds.append(adv_preds) self.saved_clean_preds.append(clean_preds) accuracies.append(correct / total) if self.report_interval > 0 and batch_no % self.report_interval == 0: elapsed_sec = time() - start_sec print( '%s: Batch: #%d, accuracy: %.2f, std: %.2f, %.1f secs/batch' % (self.attack_name, batch_no, np.mean(accuracies), np.std(accuracies), elapsed_sec / (batch_no + 1)), file=sys.stderr) if num_batches > 0 and batch_no + 1 >= num_batches: break except KeyboardInterrupt: print('Evaluation aborted', file=sys.stderr) self._process_saved_info() print('%s: Accuracy under attack: %.2f (std=%.2f)' % (self.attack_name, np.mean(accuracies), np.std(accuracies)), file=sys.stderr) return accuracies
def mnist_tutorial(nb_epochs=6, batch_size=128, train_end=-1, test_end=-1, learning_rate=0.001): """ MNIST cleverhans tutorial :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Train a pytorch MNIST model torch_model = PytorchMnistModel() if torch.cuda.is_available(): torch_model = torch_model.cuda() report = AccuracyReport() train_loader = torch.utils.data.DataLoader(datasets.MNIST( 'data', train=True, download=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.MNIST( 'data', train=False, transform=transforms.ToTensor()), batch_size=batch_size) # Truncate the datasets so that our test run more quickly train_loader.dataset.train_data = train_loader.dataset.train_data[: train_end] test_loader.dataset.test_data = test_loader.dataset.test_data[:test_end] # Train our model optimizer = optim.Adam(torch_model.parameters(), lr=learning_rate) train_loss = [] total = 0 correct = 0 step = 0 for epoch in range(nb_epochs): for xs, ys in train_loader: xs, ys = Variable(xs), Variable(ys) if torch.cuda.is_available(): xs, ys = xs.cuda(), ys.cuda() optimizer.zero_grad() preds = torch_model(xs) loss = F.nll_loss(preds, ys) loss.backward() # calc gradients train_loss.append(loss.data.item()) optimizer.step() # update gradients preds_np = preds.data.cpu().numpy() correct += (np.argmax(preds_np, axis=1) == ys).sum() total += len(xs) step += 1 if total % 1000 == 0: acc = float(correct) / total print('[%s] Training accuracy: %.2f%%' % (step, acc * 100)) total = 0 correct = 0 # Evaluate on clean data total = 0 correct = 0 for xs, ys in test_loader: xs, ys = Variable(xs), Variable(ys) if torch.cuda.is_available(): xs, ys = xs.cuda(), ys.cuda() preds = torch_model(xs) preds_np = preds.data.cpu().numpy() correct += (np.argmax(preds_np, axis=1) == ys).sum() total += len(xs) acc = float(correct) / total report.clean_train_clean_eval = acc print('[%s] Clean accuracy: %.2f%%' % (step, acc * 100)) # We use tf for evaluation on adversarial data sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=( None, 1, 28, 28, )) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(torch_model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # Create an FGSM attack fgsm_op = FastGradientMethod(cleverhans_model, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x_op = fgsm_op.generate(x_op, **fgsm_params) adv_preds_op = tf_model_fn(adv_x_op) # Run an evaluation of our model against fgsm total = 0 correct = 0 for xs, ys in test_loader: adv_preds = sess.run(adv_preds_op, feed_dict={x_op: xs}) correct += (np.argmax(adv_preds, axis=1) == ys).sum() total += len(xs) acc = float(correct) / total print('Adv accuracy: {:.3f}'.format(acc * 100)) report.clean_train_adv_eval = acc return report
def forward(self, x): y = self.model(x) if y.dim() == 3: return y.squeeze(1) else: return y #model = cuda(DFSGlimpseSingleObjectClassifier()) model = cuda(tvmodels.ResNet(tvmodels.resnet.BasicBlock, [2, 2, 2, 2], 10)) model.load_state_dict(T.load('model.pt')) s = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 3, 200, 200)) tf_model_fn = convert_pytorch_model_to_tf(cuda(TemporaryModule(model))) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') fgsm_op = FastGradientMethod(cleverhans_model, sess=s) fgsm_params = {'eps': 0.01, 'clip_min': 0, 'clip_max': 1} adv_x_op = fgsm_op.generate(x_op, **fgsm_params) adv_preds_op = tf_model_fn(adv_x_op) preds_op = tf_model_fn(x_op) total = 0 correct = 0 adv_correct = 0 nviz = 0 for xs, ys in loader: y = ys[0].item()