def main(): # 自适应使用GPU还是CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Net().to(device) optimizer = torch.optim.Adam(model.parameters()) criterion = torch.nn.CrossEntropyLoss() train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) test_loader = Data.DataLoader(dataset=test_data, batch_size=batch_size) adversary = FGSMAttack(epsilon=0.2) for epoch in range(epochs): for t, (x, y) in enumerate(train_loader): x_var, y_var = to_var(x), to_var(y.long()) loss = criterion(model(x_var), y_var) # adversarial training if epoch + 1 > delay: # use predicted label to prevent label leaking y_pred = pred_batch(x, model) x_adv = adv_train(x, y_pred, model, criterion, adversary) x_adv_var = to_var(x_adv) loss_adv = criterion(model(x_adv_var), y_var) loss = (loss + loss_adv) / 2 if (t + 1) % 10 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() # 每跑完一次epoch测试一下准确率 进入测试模式 禁止梯度传递 with torch.no_grad(): correct = 0 total = 0 sum_val_loss = 0 for data in test_loader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = model(images) val_loss = criterion(outputs, labels) sum_val_loss += val_loss.item() # 取得分最高的那个类 _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print('epoch=%d accuracy=%.02f%% val_loss=%.02f%' % (epoch + 1, (100 * correct / total), sum_val_loss)) sum_val_loss = 0.0 torch.save(model.state_dict(), './cifar-adv-pytorch/net.pth')
def attack_over_test(testloader, net, criterion, adversary): net.eval() batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(testloader)) for batch_idx, data_info in enumerate(testloader): inputs = data_info[0] targets = data_info[1].long() # adv_inputs = inputs adv_inputs = adv_train(inputs, targets, net, criterion, adversary) adv_inputs, targets = adv_inputs.to(device), targets.to(device) outputs = net(adv_inputs) loss = criterion(outputs, targets) prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) batch_time.update(time.time() - end) end = time.time() bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s| Total: {total:}| ETA: {eta:}| Loss:{loss:.4f}| top1: {top1:.2f}'.format( batch=batch_idx + 1, size=len(testloader), bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg) bar.next() bar.finish() return losses.avg, top1.avg
weight_decay=param['weight_decay']) for epoch in range(param['num_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): x_var, y_var = to_var(x), to_var(y.long()) loss = criterion(net(x_var), y_var) # adversarial training if epoch + 1 > param['delay']: # use predicted label to prevent label leaking y_pred = pred_batch(x, net) x_adv = adv_train(x, y_pred, net, criterion, adversary) x_adv_var = to_var(x_adv) loss_adv = criterion(net(x_adv_var), y_var) loss = (loss + loss_adv) / 2 if (t + 1) % 100 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() test(net, loader_test) torch.save(net.state_dict(), 'models/adv_trained_lenet5.pkl')
# Change optimizer for finetuning optimizer = optim.Adam(net.parameters()) for e in range(param['nepochs']): print('Starting epoch %d' % (e + 1)) for t, (x_input, y_label) in enumerate(train_loader): #print('t:',t) x_var, y_var = to_var(x_input), to_var(y_label.long()) if args.advtraining == 'BayesWRM' or args.advtraining == 'Bayes': if args.advtraining == 'BayesWRM': x_adv = x_input.cpu() x_adv = adv_train(X=x_adv, y=y_label.cpu().long(), model=model_list, criterion=criterion, adversary=adversary) for i in range(len(model_list)): optimizer = SGAdaHMC(model_list[i].parameters(), config=dict(lr=args.initlr, T=args.T_out)) #optimizer = optimizer_list[i] if advtraining == 'BayesWRM': def helper(): def feval(): loss_adv = 0 for k in range(len(x_adv)): x_adv_var = to_var( torch.from_numpy(x_adv[k].astype( np.float32)))
def train(self): self.epochs = self.params.epochs criterion = nn.CrossEntropyLoss() start_epoch = 0 if advtraining == 'BayesWRM' or advtraining == 'Bayes': for net in self.model_list: net.train() else: self.model.train() print("Starting training") self.print_info() init_lr = 0.5 init_lr = args.initlr for epoch in range(start_epoch, self.params.epochs): print('start epoch', str(epoch)) print('advtraining method', advtraining) #break for i, (images, labels) in enumerate(self.train_loader): X, y = images.cuda(), labels.cuda() x_var, y_var = to_var(X), to_var(y) if adversary is not None: x_adv = X.cpu() if advtraining == 'BayesWRM': x_adv = adv_train(X=x_adv, y=labels.cpu().long(), model=self.model_list, criterion=criterion, adversary=adversary) for i in range(Stheta): x_adv_temp = x_adv if args.multi == True: del x_adv_temp[i] if epoch < 2: lr = init_lr elif epoch < 5: lr = 0.1*init_lr elif epoch < 10: lr = 0.1*init_lr else: lr = 0.05*init_lr #optimizer = SGHMC(self.model_list[i].parameters(), config=dict(lr=lr)) if args.outoptimizer == 'SGHMC': optimizer = SGHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=lr, T=args.T_out)) elif args.outoptimizer == 'SGAdaHMC': optimizer = SGAdaHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=0.01, T=args.T_out)) else: raise NotImplementedError('Inner optimizer not implemented') def helper(): def feval(): loss_adv = 0 for k in range(len(x_adv_temp)): x_adv_var = to_var(torch.from_numpy(x_adv_temp[k].astype(np.float32))) #loss_adv = loss_adv + criterion(net(x_adv_var), y_var) #add adversarial loss loss_adv = loss_adv + criterion(self.model_list[i](x_adv_var), y_var) #add clean loss loss_adv = loss_adv + criterion(self.model_list[i](x_var), y_var) loss_adv = loss_adv/2.0 optimizer.zero_grad() loss_adv.backward() return loss_adv #TODO return loss for extension return feval #Tracer()() loss_adv = optimizer.step(helper()) #print('Epoch:', epoch, 'model:', i, 'loss', loss_adv.data.cpu().numpy()[0]) #print("Current timestamp: %s" % (utils.get_time_hhmmss())) else: x_adv = adv_train(x_adv, y.cpu().long(), self.model, criterion, adversary) x_adv_var = to_var(x_adv) loss_adv = criterion(self.model(x_adv_var), y_var) loss = (loss_adv + criterion(self.model(x_var), y_var))/2.0 self.optimizer.zero_grad() loss.backward() self.optimizer.step() else: if advtraining == 'Bayes': for i in range(Stheta): if epoch < 2: lr = init_lr elif epoch < 5: lr = 0.1*init_lr elif epoch < 10: lr = 0.1*init_lr else: lr = 0.05*init_lr #optimizer = SGHMC(self.model_list[i].parameters(), config=dict(lr=lr)) if args.outoptimizer == 'SGHMC': optimizer = SGHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=lr, T=args.T_out)) elif args.outoptimizer == 'SGAdaHMC': optimizer = SGAdaHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=0.01, T=args.T_out)) else: raise NotImplementedError('Outer optimizer not implemented') def helper(): def feval(): loss_adv = criterion(self.model_list[i](x_var), y_var) optimizer.zero_grad() loss_adv.backward() return loss_adv #TODO return loss for extension return feval #Tracer()() loss = optimizer.step(helper()) else: loss = criterion(self.model(x_var), y_var) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if self.params.extra_debug and (i + 1) % (self.params.batch_size * 4) == 0: print(('Epoch: [{0}/{1}], Step: [{2}/{3}], Loss: {4},') .format(epoch + 1, self.params.epochs, i + 1, len(self.train_loader), loss.data[0])) print('entering validation loss set the advtraining method is ', advtraining) if advtraining == 'BayesWRM' or advtraining == 'Bayes': train_acc, train_loss = self.validate_model(self.train_loader, self.model_list[0]) val_acc, val_loss = self.validate_model(self.val_loader, self.model_list[0]) else: train_acc, train_loss = self.validate_model(self.train_loader, self.model) val_acc, val_loss = self.validate_model(self.val_loader, self.model) self.histories['train_loss'] = np.append(self.histories['train_loss'], [train_loss]) self.histories['val_loss'] = np.append(self.histories['val_loss'], [val_loss]) self.histories['val_acc'] = np.append(self.histories['val_acc'], [val_acc]) self.histories['train_acc'] = np.append(self.histories['train_acc'], [train_acc]) print('trianacc', str(train_acc), 'valacc', str(val_acc)) print('advtraining method', advtraining)
torch.nn.utils.clip_grad_norm(n.parameters(), 10.0) opt.step() epoch_loss += loss.data.item() _, predicted = torch.max(c_pre.data, 1) total += y_train.size(0) correct += predicted.eq(y_train.data).cuda().sum() torch.cuda.empty_cache() else: loss_cl = loss2(c_pre, y_train) loss_sum = torch.mul(loss, 1 / 1) + loss_cl if epoch + 1 > param['delay']: # use predicted label to prevent label leaking y_pred = pred_batch(torch.cat((x_train, x_train, x_train), 1), n) x_adv = adv_train(torch.cat((x_train, x_train, x_train), 1), y_pred, n, loss2, adversary) n.zero_grad() optimizer.zero_grad() x_adv_var = to_var(x_adv) y_pre, c_pre = n(x_adv_var) loss_adv = loss2(c_pre, y_train) + loss1( torch.mul(y_pre, 1.0), torch.mul(torch.cat( (x_train, x_train, x_train), 1), 1.0)) / 1 loss_sum = (loss_sum + loss_adv) / 2 loss_sum.backward(retain_graph=True) torch.nn.utils.clip_grad_norm(n.parameters(), 10.0) optimizer.step() epoch_loss += loss_sum.data.item() _, predicted = torch.max(c_pre.data, 1) total += y_train.size(0)
x_train, y_train = Variable(x_train.cuda()), Variable(y_train.cuda()) y_pre,c_pre = n( x_train) n.zero_grad() optimizer.zero_grad() _, predicted = torch.max(c_pre.data, 1) total += y_train.size(0) correct += predicted.eq(y_train.data).cuda().sum() loss = loss2(c_pre, y_train)+loss1(torch.mul(y_pre, 1.0), torch.mul( x_train, 1.0))/ 1 if epoch + 1 > param['delay']: # use predicted label to prevent label leaking y_pred = pred_batch(x_train, n) x_adv = adv_train(x_train, y_pred, n, loss2, adversary) x_adv_var = to_var(x_adv) y_pre, c_pre = n(x_adv_var) loss_adv = loss2( c_pre , y_train)+loss1(torch.mul(y_pre, 1.0), torch.mul(x_adv_var, 1.0))/ 1 loss = (loss_adv +loss)/2 loss.backward(retain_graph=True) torch.nn.utils.clip_grad_norm(n.parameters(), 5.0) optimizer.step() epoch_loss += loss.data.item() torch.cuda.empty_cache() if epoch + 1 > param['delay']: y_pre2, c_pre2 = n(y_pre) y_pre2 = y_pre2.cuda()