Exemplo n.º 1
0
 def generate(self):
     attacker = JSMAAttack(model=self.raw_model,
                           theta=self.theta,
                           gamma=self.gamma)
     targets = np.argmax(self.targets_samples, axis=1)
     adv_samples = attacker.perturbation(xs=self.nature_samples,
                                         ys_target=targets,
                                         device=self.device)
     adv_labels = predict(model=self.raw_model,
                          samples=adv_samples,
                          device=self.device)
     adv_labels = torch.max(adv_labels, 1)[1]
     adv_labels = adv_labels.cpu().numpy()
     np.save(
         '{}{}_AdvExamples.npy'.format(self.adv_examples_dir,
                                       self.attack_name), adv_samples)
     np.save(
         '{}{}_AdvLabels.npy'.format(self.adv_examples_dir,
                                     self.attack_name), adv_labels)
     np.save(
         '{}{}_TrueLabels.npy'.format(self.adv_examples_dir,
                                      self.attack_name),
         self.labels_samples)
     mis_target = 0
     for i in range(len(adv_samples)):
         if targets[i] == adv_labels[i]:
             mis_target += 1
     print(
         '\nFor **{}**(targeted attack) on **{}**, {}/{}={:.1f}% samples are misclassified as the specified targeted label\n'
         .format(self.attack_name, self.dataset, mis_target,
                 len(adv_samples), mis_target / len(adv_samples) * 100.0))
Exemplo n.º 2
0
 def defense_predication(self, DefenseModelDirs, defense_name, **kwargs):
     # DefenseModelDirs:防御模型所在位置
     # defense_name:防御名称(大写)
     re_train_defenses = {
         'NAT', 'RLT', 'RLT1', 'RLT2', 'RLT3', 'EAT', 'UAPAT', 'NEAT',
         'NRC', 'RAT', 'RAT1', 'RAT2', 'RAT3', 'RAT4', 'RAT5', 'RAT6',
         'RAT7', 'RAT8', 'RAT9', 'RAT10', 'RAT11', 'MART', 'NEW_MART',
         'NEW_MART1', 'NEW_MMA'
     }
     other_defenses = {'NRC'}
     defense_name = defense_name.upper().strip()
     assert defense_name in re_train_defenses or input_transformation_defenses or other_defenses
     # 如果是重新训练网络防御
     if defense_name in re_train_defenses:
         print(
             '\n##{}## defense is a kind of complete defenses that retrain the model'
             .format(defense_name))
         # 加载防御模型
         defended_model_location = '{}/{}/{}_{}_enhanced.pt'.format(
             DefenseModelDirs, defense_name, self.dataset, defense_name)
         defended_model = MNIST_CNN().to(
             self.device) if self.dataset == 'MNIST' else ResNet18().to(
                 self.device)
         defended_model.load(path=defended_model_location,
                             device=self.device)
         defended_model.eval()
         # 进行标签预测
         predication = predict(model=defended_model,
                               samples=self.adv_samples,
                               device=self.device)
         # 返回标签行向量
         labels = torch.argmax(predication, 1).cpu().numpy()
         return labels
     else:
         if defense_name == 'NRC':
             print(
                 '\n##{}## defense is a kind of region-based classification defenses ... '
                 .format(defense_name))
             from Defenses.DefenseMethods.NRC import NRCDefense
             num_points = 1000
             assert 'nrc_radius' in kwargs
             assert 'nrc_mean' in kwargs
             assert 'nrc_std' in kwargs
             radius = kwargs['nrc_radius']
             mean = kwargs['nrc_mean']
             std = kwargs['nrc_std']
             nrc = NRCDefense(model=self.raw_model,
                              defense_name='NRC',
                              dataset=self.dataset,
                              device=self.device,
                              num_points=num_points)
             labels = nrc.region_based_classification(
                 samples=self.adv_samples,
                 radius=radius,
                 mean=mean,
                 std=std)
             return labels
         else:
             raise ValueError('{} is not supported!!!'.format(defense_name))
Exemplo n.º 3
0
 def generate(self):
     attacker = PGDAttack(model=self.raw_model, epsilon=self.epsilon, eps_iter=self.epsilon_iter, num_steps=self.num_steps)
     adv_samples = attacker.batch_perturbation(xs=self.nature_samples, ys=self.labels_samples,
                                               batch_size=self.attack_batch_size, device=self.device)
     # prediction for the adversarial examples
     adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device)
     adv_labels = torch.max(adv_labels, 1)[1]
     adv_labels = adv_labels.cpu().numpy()
     np.save('{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples)
     np.save('{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels)
     np.save('{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples)
     mis = 0
     for i in range(len(adv_samples)):
         if self.labels_samples[i].argmax(axis=0) != adv_labels[i]:
             mis = mis + 1
     print('\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n'.format(self.attack_name, self.dataset, mis, len(adv_samples),
                                                                                       mis / len(adv_labels) * 100))
Exemplo n.º 4
0
 def generate(self):
     # adv_labels:对抗样本标签
     # adv_samples:对抗样本
     # labels_samples:真实标签(存放方式为0,1)
     # nature_samples:干净样本(用于攻击)
     # 将相关参数传入BIM攻击中
     attacker = BIMAttack(model=self.raw_model,
                          epsilon=self.epsilon,
                          eps_iter=self.epsilon_iter,
                          num_steps=self.num_steps)
     # 分组产生对抗样本
     adv_samples = attacker.batch_perturbation(
         xs=self.nature_samples,
         ys=self.labels_samples,
         batch_size=self.attack_batch_size,
         device=self.device)
     # 对对抗样本进行标签预测
     adv_labels = predict(model=self.raw_model,
                          samples=adv_samples,
                          device=self.device)
     # 返回最大值的索引,并转为numpy数据
     adv_labels = torch.max(adv_labels, 1)[1]
     adv_labels = adv_labels.cpu().numpy()
     # 将对抗性样本存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST
     # 将对抗性样本标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST
     # 将样本真实标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST
     np.save(
         '{}{}_AdvExamples.npy'.format(self.adv_examples_dir,
                                       self.attack_name), adv_samples)
     np.save(
         '{}{}_AdvLabels.npy'.format(self.adv_examples_dir,
                                     self.attack_name), adv_labels)
     np.save(
         '{}{}_TrueLabels.npy'.format(self.adv_examples_dir,
                                      self.attack_name),
         self.labels_samples)
     # 如果对抗样本的预测结果与目标类不同,则mis+1,计算对抗样本的制作成功率
     mis = 0
     for i in range(len(adv_samples)):
         if self.labels_samples[i].argmax(axis=0) != adv_labels[i]:
             mis = mis + 1
     print(
         '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n'
         .format(self.attack_name, self.dataset, mis, len(adv_samples),
                 mis / len(adv_labels) * 100))
Exemplo n.º 5
0
 def generate(self):
     # 将参数传入RLLC攻击中
     attacker = RLLCAttack(model=self.raw_model,
                           epsilon=self.epsilon,
                           alpha_ratio=self.alpha_ratio)
     # nature_samples:干净数据集
     # targets_samples:目标分类的标签,ILLC时为最不可能分类的标签
     # llc_labels:目标分类索引
     llc_labels = np.argmax(self.targets_samples, 1)
     # generating
     # 产生对抗样本
     adv_samples = attacker.batch_perturbation(
         xs=self.nature_samples,
         ys_target=llc_labels,
         batch_size=self.attack_batch_size,
         device=self.device)
     # 预测对抗样本标签,转为numpy格式
     adv_labels = predict(model=self.raw_model,
                          samples=adv_samples,
                          device=self.device)
     adv_labels = torch.max(adv_labels, 1)[1]
     adv_labels = adv_labels.cpu().numpy()
     # 将对抗性样本存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST
     # 将对抗性样本标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST
     # 将样本真实标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST
     np.save(
         '{}{}_AdvExamples.npy'.format(self.adv_examples_dir,
                                       self.attack_name), adv_samples)
     np.save(
         '{}{}_AdvLabels.npy'.format(self.adv_examples_dir,
                                     self.attack_name), adv_labels)
     np.save(
         '{}{}_TrueLabels.npy'.format(self.adv_examples_dir,
                                      self.attack_name),
         self.labels_samples)
     # 如果对抗样本的预测结果与目标类不同,则mis+1,计算对抗样本的制作成功率
     mis_target = 0
     for i in range(len(adv_samples)):
         if llc_labels[i] == adv_labels[i]:
             mis_target += 1
     print(
         '\nFor the **{}** (targeted attack) on **{}**, the misclassified rate = {}/{} = {:.1f}%\n'
         .format(self.attack_name, self.dataset, mis_target,
                 len(adv_samples), mis_target / len(adv_samples) * 100))
Exemplo n.º 6
0
 def generate(self):
     # nature_samples:干净数据集
     # labels_samples:干净标签
     # raw_model:加载的CNN或者ResNet模型
     # 将模型raw_model和扰动大小epsilon参数传入FGSMAttack类中
     attacker = FGSMAttack(model=self.raw_model, epsilon=self.epsilon)
     # 产生对抗性样本
     adv_samples = attacker.batch_perturbation(
         xs=self.nature_samples,
         ys=self.labels_samples,
         batch_size=self.attack_batch_size,
         device=self.device)
     # 对对抗性样本进行封装,返回预测结果标签
     adv_labels = predict(model=self.raw_model,
                          samples=adv_samples,
                          device=self.device)
     # 得到对应的标签值
     adv_labels = torch.max(adv_labels, 1)[1]
     # 将标签值转化为数组形式
     adv_labels = adv_labels.cpu().numpy()
     # 将对抗样本保存到AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST
     np.save(
         '{}{}_AdvExamples.npy'.format(self.adv_examples_dir,
                                       self.attack_name), adv_samples)
     # 将对抗样本标签到AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST
     np.save(
         '{}{}_AdvLabels.npy'.format(self.adv_examples_dir,
                                     self.attack_name), adv_labels)
     # 将样本真实标签保存到AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST
     np.save(
         '{}{}_TrueLabels.npy'.format(self.adv_examples_dir,
                                      self.attack_name),
         self.labels_samples)
     # 如果对抗样本的预测结果与真实标签不同,则mis+1,计算对抗样本的制作成功率
     mis = 0
     for i in range(len(adv_samples)):
         if self.labels_samples[i].argmax(axis=0) != adv_labels[i]:
             mis = mis + 1
     print(
         '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n'
         .format(self.attack_name, self.dataset, mis, len(adv_samples),
                 mis / len(adv_labels) * 100))
Exemplo n.º 7
0
 def generate(self):
     # nature_samples:干净数据集
     # labels_samples:干净数据集的真实标签
     # 将参数传入RFGSM攻击中
     attacker = RFGSMAttack(model=self.raw_model,
                            epsilon=self.epsilon,
                            alpha_ratio=self.alpha_ratio)
     # 产生对抗样本
     adv_samples = attacker.batch_perturbation(
         xs=self.nature_samples,
         ys=self.labels_samples,
         batch_size=self.attack_batch_size,
         device=self.device)
     # 预测对抗样本标签,转为numpy格式
     adv_labels = predict(model=self.raw_model,
                          samples=adv_samples,
                          device=self.device)
     adv_labels = torch.max(adv_labels, 1)[1]
     adv_labels = adv_labels.cpu().numpy()
     # 将对抗性样本存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST
     # 将对抗性样本标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST
     # 将样本真实标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST
     np.save(
         '{}{}_AdvExamples.npy'.format(self.adv_examples_dir,
                                       self.attack_name), adv_samples)
     np.save(
         '{}{}_AdvLabels.npy'.format(self.adv_examples_dir,
                                     self.attack_name), adv_labels)
     np.save(
         '{}{}_TrueLabels.npy'.format(self.adv_examples_dir,
                                      self.attack_name),
         self.labels_samples)
     # 如果对抗样本的预测结果与真实标签不同,则mis+1,计算对抗样本的制作成功率
     mis = 0
     for i in range(len(adv_samples)):
         if self.labels_samples[i].argmax(axis=0) != adv_labels[i]:
             mis = mis + 1
     print(
         '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n'
         .format(self.attack_name, self.dataset, mis, len(adv_samples),
                 mis / len(adv_labels) * 100))
Exemplo n.º 8
0
 def generate(self):
     # 将参数传入到DeepFool攻击中
     attacker = DeepFoolAttack(model=self.raw_model,
                               overshoot=self.overshoot,
                               max_iters=self.max_iters)
     # 产生DeepFool对抗样本
     adv_samples = attacker.perturbation(xs=self.nature_samples,
                                         device=self.device)
     # prediction for the adversarial examples
     # 计算DeepFool对抗样本经过原始模型的softmax预测结果
     adv_labels = predict(model=self.raw_model,
                          samples=adv_samples,
                          device=self.device)
     # 获取DeepFool对抗样本经过原始模型的标签
     adv_labels = torch.max(adv_labels, 1)[1]
     # 将标签转化为numpy形式
     adv_labels = adv_labels.cpu().numpy()
     # 将DeepFool对抗样本保存
     np.save(
         '{}{}_AdvExamples.npy'.format(self.adv_examples_dir,
                                       self.attack_name), adv_samples)
     # 将DeepFool对抗样本对应标签保存
     np.save(
         '{}{}_AdvLabels.npy'.format(self.adv_examples_dir,
                                     self.attack_name), adv_labels)
     # 将DeepFool的真实标签保存
     np.save(
         '{}{}_TrueLabels.npy'.format(self.adv_examples_dir,
                                      self.attack_name),
         self.labels_samples)
     # mis:误分类率,计算1000个DeepFool对抗样本经过原始模型的误分类率
     mis = 0
     for i in range(len(adv_samples)):
         if self.labels_samples[i].argmax(axis=0) != adv_labels[i]:
             mis = mis + 1
     print(
         '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n'
         .format(self.attack_name, self.dataset, mis, len(adv_samples),
                 mis / len(adv_labels) * 100))
Exemplo n.º 9
0
    def __init__(self,
                 DataSet='MNIST',
                 AttackName='FGSM',
                 RawModelLocation='../data/',
                 CleanDataLocation='../clean_datasets/',
                 AdvExamplesDir='../AdversarialExampleDatasets/',
                 device=torch.device('cpu')):
        # DataSet:数据集名称
        # dataset:数据集名称MNIST或CIFAR10
        # AttackName:攻击名称
        # attack_name:攻击名称
        # RawModelLocation:模型所在位置data/
        # CleanDataLocation:干净数据集所在位置clean_datasets/
        # AdvExamplesDir:对抗性样本所在位置AdversarialExampleDatasets/
        # color_mode:CIFAR10为RGB,MNIST为L
        # Targeted:False为非目标攻击,Ture为目标攻击

        self.device = device
        assert DataSet.upper() in ['MNIST', 'CIFAR10'
                                   ], "The data set must be MNIST or CIFAR10"
        self.dataset = DataSet.upper()
        self.color_mode = 'RGB' if self.dataset == 'CIFAR10' else 'L'
        self.attack_name = AttackName.upper()
        # 非目标攻击名称
        supported_un_targeted = [
            'FGSM', 'RFGSM', 'BIM', 'PGD', 'DEEPFOOL', 'UAP'
        ]
        # 目标攻击名称
        supported_targeted = ['LLC', "RLLC", 'ILLC', 'JSMA', 'CW2']
        assert self.attack_name in supported_un_targeted or self.attack_name in supported_targeted, \
            "\nCurrently, our implementation support attacks of FGSM, RFGSM, BIM, UMIFGSM, DeepFool, LLC, RLLC, ILLC, TMIFGSM, JSMA, CW2,....\n"
        if self.attack_name.upper() in supported_un_targeted:
            self.Targeted = False
        else:
            self.Targeted = True

        # 加载模型
        # raw_model_location:模型位置data/CIFAR10/model/CIFAR10_raw.pt或者MNIST
        # raw_model:模型
        # ********若要衡量白盒攻击将路径改为RawModelLocation/防御名称/数据集名称_防御名称_enhanced.pt********
        raw_model_location = '{}{}/model/{}_raw.pt'.format(
            RawModelLocation, self.dataset, self.dataset)
        if self.dataset == 'MNIST':
            self.raw_model = MNIST_CNN().to(device)
            self.raw_model.load(path=raw_model_location, device=device)
        else:
            self.raw_model = ResNet18().to(device)
            self.raw_model.load(path=raw_model_location, device=device)

        # 获取干净数据集及标签
        # nature_samples:干净数据集CleanDatasets/CIFAR10/CIFAR10_inputs.npy或者MNIST
        # labels_samples:干净数据集标签CleanDatasets/CIFAR10/CIFAR10_labels.npy或者MNIST
        self.nature_samples = np.load('{}{}/{}_inputs.npy'.format(
            CleanDataLocation, self.dataset, self.dataset))
        self.labels_samples = np.load('{}{}/{}_labels.npy'.format(
            CleanDataLocation, self.dataset, self.dataset))

        # 获取目标标签
        # 如果是LLC RLLC和ILLC攻击,为LLC RLLC和ILLC准备目标标签CleanDatasets/CIFAR10/CIFAR10_llc.npy
        if self.attack_name.upper() in ['LLC', 'RLLC', 'ILLC']:
            self.targets_samples = np.load('{}{}/{}_llc.npy'.format(
                CleanDataLocation, self.dataset, self.dataset))
        # 否则目标标签为CleanDatasets/CIFAR10/CIFAR10_targets.npy
        else:
            self.targets_samples = np.load('{}{}/{}_targets.npy'.format(
                CleanDataLocation, self.dataset, self.dataset))

        # 获取对抗性样本
        # AdvExamplesDir:AdversarialExampleDatasets/attack_name/CIFAR10/或者MNIST
        # adv_samples:对抗性样本AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST
        self.AdvExamplesDir = AdvExamplesDir + self.attack_name + '/' + self.dataset + '/'
        # 如果没有这个路径则提示
        if os.path.exists(self.AdvExamplesDir) is False:
            print(
                "the directory of {} is not existing, please check carefully".
                format(self.AdvExamplesDir))
        self.adv_samples = np.load('{}{}_AdvExamples.npy'.format(
            self.AdvExamplesDir, self.attack_name))
        # self.adv_labels = np.load('{}{}_AdvLabels.npy'.format(self.AdvExamplesDir, self.AttackName))

        # 对对抗性样本进行标签预测
        # predictions:对抗性样本预测标签
        predictions = predict(model=self.raw_model,
                              samples=self.adv_samples,
                              device=self.device).detach().cpu().numpy()

        # 定义softmax函数
        def soft_max(x):
            return np.exp(x) / np.sum(np.exp(x), axis=0)

        # 对预测标签进行softmax计算
        # softmax_prediction:经softmax的预测标签
        tmp_soft_max = []
        for i in range(len(predictions)):
            tmp_soft_max.append(soft_max(predictions[i]))
        self.softmax_prediction = np.array(tmp_soft_max)
Exemplo n.º 10
0
 def generate(self):
     # 将参数传入到UAP攻击中
     attacker = UniversalAttack(model=self.raw_model,
                                fooling_rate=self.fooling_rate,
                                max_iter_universal=self.max_iter_uni,
                                epsilon=self.epsilon,
                                overshoot=self.overshoot,
                                max_iter_deepfool=self.max_iter_df)
     assert self.dataset.upper() == 'MNIST' or self.dataset.upper(
     ) == 'CIFAR10', "dataset should be MNIST or CIFAR10!"
     # 获取MNIST/CIFAR10的训练集和验证集
     if self.dataset.upper() == 'MNIST':
         samples_loader, valid_loader = get_mnist_train_validate_loader(
             dir_name='../data/MNIST/',
             batch_size=1,
             valid_size=0.9,
             shuffle=True)
     else:  # 'CIFAR10':
         samples_loader, valid_loader = get_cifar10_train_validate_loader(
             dir_name='../data/CIFAR10/',
             batch_size=1,
             valid_size=0.9,
             augment=False,
             shuffle=True)
     # 计算UAP扰动并转化为numpy形式
     universal_perturbation = attacker.universal_perturbation(
         dataset=samples_loader,
         validation=valid_loader,
         device=self.device)
     universal_perturbation = universal_perturbation.cpu().numpy()
     # 将UAP扰动存放在AdversarialExampleDatasets/UAP_MNIST_universal_perturbation中
     np.save(
         '{}{}_{}_universal_perturbation'.format(self.adv_examples_dir,
                                                 self.attack_name,
                                                 self.dataset),
         universal_perturbation)
     # 产生UAP对抗样本
     adv_samples = attacker.perturbation(xs=self.nature_samples,
                                         uni_pert=universal_perturbation,
                                         device=self.device)
     # UAP对抗样本的预测标签softmax
     adv_labels = predict(model=self.raw_model,
                          samples=adv_samples,
                          device=self.device)
     # UAP对抗样本的预测标签转化为numpy
     adv_labels = torch.max(adv_labels, 1)[1]
     adv_labels = adv_labels.cpu().numpy()
     # 将UAP对抗样本、UAP对抗样本标签和真实标签进行保存
     np.save(
         '{}{}_AdvExamples.npy'.format(self.adv_examples_dir,
                                       self.attack_name), adv_samples)
     np.save(
         '{}{}_AdvLabels.npy'.format(self.adv_examples_dir,
                                     self.attack_name), adv_labels)
     np.save(
         '{}{}_TrueLabels.npy'.format(self.adv_examples_dir,
                                      self.attack_name),
         self.labels_samples)
     # 计算添加了UAP扰动对抗样本的误分类率
     mis = 0
     for i in range(len(adv_samples)):
         if self.labels_samples[i].argmax(axis=0) != adv_labels[i]:
             mis = mis + 1
     print(
         '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n'
         .format(self.attack_name, self.dataset, mis, len(adv_samples),
                 mis / len(adv_labels) * 100))