def perturb(self, X_nat, y): """ Given examples (X_nat, y), returns adversarial examples within epsilon of X_nat in l_infinity norm. """ if self.rand: X = X_nat + np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32') else: X = np.copy(X_nat) for i in range(self.k): X_var = base.to_var(torch.from_numpy(X), requires_grad=True) y_var = base.to_var(torch.LongTensor(y)) scores = self.model(X_var) loss = self.loss_fn(scores, y_var) loss.backward() grad = X_var.grad.data.cpu().numpy() X += self.a * np.sign(grad) X = np.clip(X, X_nat - self.epsilon, X_nat + self.epsilon) X = np.clip(X, 0, 1) # ensure valid pixel range X=torch.from_numpy(X) return X
def perturb(self, X_nat, y, epsilons=None): """ Given examples (X_nat, y), returns their adversarial counterparts with an attack length of epsilon. """ # Providing epsilons in batch if epsilons is not None: self.epsilon = epsilons X = np.copy(X_nat) X_var = base.to_var(torch.from_numpy(X), requires_grad=True) y_var = base.to_var(torch.LongTensor(y)) scores = self.model(X_var) loss = self.loss_fn(scores, y_var) loss.backward() grad_sign = X_var.grad.data.cpu().sign().numpy() X += self.epsilon * grad_sign X = np.clip(X, 0, 1) X = torch.from_numpy(X) return X
''' 训练模型 ''' net.train() # 训练模式 criterion = nn.CrossEntropyLoss() # 损失函数 optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) # 优化器,具体怎么优化,学习率、正则化等等 '''对抗样本攻击''' #adversary = Adversary.FGSM.FGSM(net, param['epsilon']) # 攻击方法 adversary = Adversary.LinfPGD.LinfPGDAttack(net, param['epsilon']) for epoch in range(param['num_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): x_var, y_var = base.to_var(x), base.to_var(y.long()) # 转换格式 X_adv = adversary.perturb(x.numpy(), y) # 加入扰动,生成对抗样本 loss = (criterion(net(x_var), y_var) + criterion(net(X_adv.cuda()), y_var))/2 # 计算损失 optimizer.zero_grad() # 把上一轮的梯度清零 loss.backward() # 反向传播求导 optimizer.step() # 优化参数 if (t + 1) % 10 == 0: # 每训练10批数据看一下loss print('t = %d, loss = %.8f' % (t + 1, loss.item())) if (t + 1) % 100 == 0: num_correct, num_samples, acc = Optimizer.test(net, loader_test) print('[train] t = %d, right predict:(%d/%d) ,pre test_acc=%.4f%%' % (t + 1, num_correct, num_samples, acc)) torch.save(net.state_dict(), modelpath) # 保存模型到文件