weight_decay=param['weight_decay']) for epoch in range(param['num_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): x_var, y_var = to_var(x), to_var(y.long()) loss = criterion(net(x_var), y_var) # adversarial training if epoch + 1 > param['delay']: # use predicted label to prevent label leaking y_pred = pred_batch(x, net) x_adv = adv_train(x, y_pred, net, criterion, adversary) x_adv_var = to_var(x_adv) loss_adv = criterion(net(x_adv_var), y_var) loss = (loss + loss_adv) / 2 if (t + 1) % 100 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() test(net, loader_test) torch.save(net.state_dict(), 'models/adv_trained_lenet5.pkl')
if method == 'BayesWRM' or method=='Bayes': for net in model_list: for p in net.parameters(): p.requires_grad = False net.eval() else: for p in net.parameters(): p.requires_grad = False net.eval() valset = datasets.MNIST('data-mnist', train=False, transform=val_transforms) loader_test = DataLoader(valset, batch_size=param['test_batch_size'], shuffle=False) test(model_list, loader_test) for epsilon in epsilon_set: # Data loaders if method == 'BayesWRM' or method == 'Bayes': adversary = FGSMAttack(model_list, epsilon, is_train=False, advtraining='Bayes') advacc = attack_over_test_data(model_list, adversary, param, loader_test) else: adversary = FGSMAttack(net, epsilon, is_train=False, advtraining=method) advacc = attack_over_test_data(net, adversary, param, loader_test) print('method',method, 'adv accuracy', advacc)
def MNIST_bbox_sub(param, loader_hold_out, loader_test): """ Train a substitute model using Jacobian data augmentation arXiv:1602.02697 """ # Setup the substitute net = SubstituteModel() if torch.cuda.is_available(): print('CUDA ensabled for the substitute.') net.cuda() net.train() # Setup the oracle oracle = LeNet5() if torch.cuda.is_available(): print('CUDA ensabled for the oracle.') oracle.cuda() oracle.load_state_dict(torch.load(param['oracle_name'] + '.pkl')) oracle.eval() # Setup training criterion = nn.CrossEntropyLoss() # Careful optimization is crucial to train a well-representative # substitute. In Tensorflow Adam has some problem: # (https://github.com/tensorflow/cleverhans/issues/183) # But it works fine here in PyTorch (you may try other optimization # methods optimizer = torch.optim.Adam(net.parameters(), lr=param['learning_rate']) # Data held out for initial training data_iter = iter(loader_hold_out) X_sub, y_sub = data_iter.next() X_sub, y_sub = X_sub.numpy(), y_sub.numpy() # Train the substitute and augment dataset alternatively for rho in range(param['data_aug']): print("Substitute training epoch #" + str(rho)) print("Training data: " + str(len(X_sub))) rng = np.random.RandomState() # model training for epoch in range(param['nb_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['nb_epochs'])) # Compute number of batches nb_batches = int( np.ceil(float(len(X_sub)) / param['test_batch_size'])) assert nb_batches * param['test_batch_size'] >= len(X_sub) # Indices to shuffle training set index_shuf = list(range(len(X_sub))) rng.shuffle(index_shuf) for batch in range(nb_batches): # Compute batch start and end indices start, end = batch_indices(batch, len(X_sub), param['test_batch_size']) x = X_sub[index_shuf[start:end]] y = y_sub[index_shuf[start:end]] scores = net(to_var(torch.from_numpy(x))) loss = criterion(scores, to_var(torch.from_numpy(y).long())) optimizer.zero_grad() loss.backward() optimizer.step() print('loss = %.8f' % (loss.data[0])) test(net, loader_test, blackbox=True, hold_out_size=param['hold_out_size']) # If we are not at last substitute training iteration, augment dataset if rho < param['data_aug'] - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(net, X_sub, y_sub) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box scores = oracle(to_var(torch.from_numpy(X_sub))) # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model y_sub = np.argmax(scores.data.cpu().numpy(), axis=1) torch.save(net.state_dict(), param['oracle_name'] + '_sub.pkl')
MNIST_bbox_sub(param, loader_hold_out, loader_test) # Setup models net = SubstituteModel() oracle = LeNet5() net.load_state_dict(torch.load(param['oracle_name'] + '_sub.pkl')) oracle.load_state_dict(torch.load(param['oracle_name'] + '.pkl')) if torch.cuda.is_available(): net.cuda() oracle.cuda() print('CUDA ensabled.') for p in net.parameters(): p.requires_grad = False net.eval() oracle.eval() # Setup adversarial attacks adversary = FGSMAttack(net, param['epsilon']) print('For the substitute model:') test(net, loader_test, blackbox=True, hold_out_size=param['hold_out_size']) # Setup oracle print('For the oracle' + param['oracle_name']) print('agaist blackbox FGSM attacks using gradients from the substitute:') attack_over_test_data(net, adversary, param, loader_test, oracle)
} # Setup model to be attacked net = models.resnet18(pretrained=False) dim_in = net.fc.in_features net.fc = nn.Linear(dim_in, 2) num_epoch = 20 for epoch in range(num_epoch): print('{}/{}'.format(epoch + 1, num_epoch)) print('-' * 10) net.load_state_dict(torch.load('model_save/' + str(epoch) + '.pth')) if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() for p in net.parameters(): p.requires_grad = False net.eval() test(net, dset_loaders['val']) # Adversarial attack adversary = FGSMAttack(net, param['epsilon']) # adversary = LinfPGDAttack(net, random_start=False) t0 = time() attack_over_test_data(net, adversary, param, dset_loaders['val']) print('{}s eclipsed.'.format(time() - t0)) print('Finish attacking!') print()
return dataset,dataloader,model_num_labels def attack(targeted_model, random_start=False,args): if args.attack=='FGSM': from adversarialbox.attacks import FGSMAttack adversary=FGSMAttack(targeted_model,args.epsilon) if args.attack=='BIM': from adversarialbox.attacks import LinfPGDAttack adversary=LinfPGDAttack(targeted_model, random_start) def load_pretrained_model(model): pretrained_model =join('pretrained_model',args.pretrained_model) model.load_state_dict(torch.load(pretrained_model)) return model if __name__ == "__main__": targeted_model=load_targeted_model().to(device) targeted_model=load_pretrained_model(targeted_model) # for p in targeted_model.parameters(): # p.requires_grad = False targeted_model.eval() dataset,dataloader,data_num_labels=make_dataset(args.mode) test(targeted_model, dataloader) # Adversarial attack # adversary = FGSMAttack(targeted_model, args.epsilon) adversary = attack(targeted_model, random_start=False,args) t0 = time() # attack_over_test_data(targeted_model, adversary,train_dataloader ,args) attack_over_test_data_and_save(targeted_model, adversary,dataset ,args) print('{}s eclipsed.'.format(time()-t0))