示例#1
0
    def get_metric_eval(self):

        utr_score = []
        tr_score = []
        for i in range(1):

            ##TODO: Customise input parameters to methods like LinfPGDAttack
            adversary = LinfPGDAttack(
                self.phi,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=0.10,
                nb_iter=40,
                eps_iter=0.01,
                rand_init=True,
                clip_min=0.0,
                clip_max=1.0,
                targeted=False)

            adv_untargeted = adversary.perturb(x_e, y_e)

            target = torch.ones_like(y_e) * 3
            adversary.targeted = True
            adv_targeted = adversary.perturb(x_e, target)

            pred_cln = predict_from_logits(self.phi(x_e))
            pred_untargeted_adv = predict_from_logits(self.phi(adv_untargeted))
            pred_targeted_adv = predict_from_logits(self.phi(adv_targeted))
            utr_score.append(torch.sum(pred_cln != pred_untargeted_adv))
            tr_score.append(torch.sum(pred_cln != pred_targeted_adv))

            batch_size = 5
            plt.figure(figsize=(10, 8))
            for ii in range(batch_size):
                plt.subplot(3, batch_size, ii + 1)
                _imshow(x_e[ii])
                plt.title("clean \n pred: {}".format(pred_cln[ii]))
                plt.subplot(3, batch_size, ii + 1 + batch_size)
                _imshow(adv_untargeted[ii])
                plt.title("untargeted \n adv \n pred: {}".format(
                    pred_untargeted_adv[ii]))
                plt.subplot(3, batch_size, ii + 1 + batch_size * 2)
                _imshow(adv_targeted[ii])
                plt.title("targeted to 3 \n adv \n pred: {}".format(
                    pred_targeted_adv[ii]))

            plt.tight_layout()
            plt.savefig(self.save_path + '.png')

        utr_score = np.array(utr_score)
        tr_score = np.array(tr_score)
        print('MisClassifcation on Untargetted Attack ', np.mean(utr_score),
              np.std(utr_score))
        print('MisClassifcation on Targeted Atttack', np.mean(tr_score),
              np.std(tr_score))

        self.metric_score['Untargetted Method'] = np.mean(utr_score)
        self.metric_score['Targetted Method'] = np.mean(tr_score)

        return
示例#2
0
def test_pgd(args, model, device, test_loader, epsilon=0.063):

    model.eval()
    model.reset()
    adversary = LinfPGDAttack(model.forward_adv,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=epsilon,
                              nb_iter=args.nb_iter,
                              eps_iter=args.eps_iter,
                              rand_init=True,
                              clip_min=-1.0,
                              clip_max=1.0,
                              targeted=False)

    correct = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)
        model.reset()
        with ctx_noparamgrad_and_eval(model):
            adv_images = adversary.perturb(data, target)

        output = model.run_cycles(adv_images)

        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

    acc = correct / len(test_loader.dataset)
    print('PGD attack Acc {:.3f}'.format(100. * acc))

    return acc
示例#3
0
def test_attack(threshold, arch, dataset, test_loader):
    target_model = StandardModel(dataset, arch, no_grad=False)
    if torch.cuda.is_available():
        target_model = target_model.cuda()
    target_model.eval()
    attack = LinfPGDAttack(target_model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=threshold, nb_iter=30,
                           eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False)
    all_count = 0
    success_count = 0
    all_adv_images = []
    all_true_labels = []
    for idx, (img, true_label) in enumerate(test_loader):
        img = img.cuda()
        true_label = true_label.cuda().long()

        adv_image = attack.perturb(img, true_label) # (3, 224, 224), float
        if adv_image is None:
            continue
        adv_label = target_model.forward(adv_image).max(1)[1].detach().cpu().numpy().astype(np.int32)
        # adv_image = np.transpose(adv_image, (0, 2, 3, 1)) # N,C,H,W -> (N, H, W, 3), float
        all_count += len(img)
        true_label_np = true_label.detach().cpu().numpy().astype(np.int32)
        success_count+= len(np.where(true_label_np != adv_label)[0])
        all_adv_images.append(adv_image.cpu().detach().numpy())
        all_true_labels.append(true_label_np)
    attack_success_rate = success_count / float(all_count)
    log.info("Before train. Attack success rate is {:.3f}".format(attack_success_rate))
    return target_model, np.concatenate(all_adv_images,0), np.concatenate(all_true_labels, 0)  # N,224,224,3
示例#4
0
    def attack_pgd_transfer(self, model_attacker, clean_loader, epsilon=0.1, eps_iter=0.02, test='average', nb_iter=7):
        """ Use adversarial samples generated against model_attacker to attack the current model. """

        self.model.eval()
        self.model.reset()
        model_attacker.eval()
        model_attacker.reset()
        adversary = LinfPGDAttack(
            model_attacker.forward_adv, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon,
            nb_iter=nb_iter, eps_iter=eps_iter, rand_init=True, clip_min=-1.0, clip_max=1.0, targeted=False)

        correct = 0
        for batch_idx, (data, target) in enumerate(clean_loader):
            data, target = data.to(self.device), target.to(self.device)
            self.model.reset()
            model_attacker.reset()
            with ctx_noparamgrad_and_eval(model_attacker):
                adv_images = adversary.perturb(data, target)

                if(test=='last'):
                    output = self.model.run_cycles(adv_images)
                elif(test=='average'):
                    output = self.model.run_average(adv_images)
                else:
                    self.model.reset()
                    output = self.model(adv_images)

            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

        acc = correct / len(clean_loader.dataset)
        print('PGD attack Acc {:.3f}'.format(100. * acc))

        return acc
示例#5
0
def generate_attack_samples(model, cln_data, true_label, nb_iter, eps_iter):
    adversary = LinfPGDAttack(
        model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.25,
        nb_iter=nb_iter, eps_iter=eps_iter, rand_init=True, clip_min=0.0, clip_max=1.0,
        targeted=False)

    adv_untargeted = adversary.perturb(cln_data, true_label)

    adv_targeted_results = []
    adv_target_labels = []
    for target_label in range(0, 10):
        assert target_label >= 0 and target_label <= 10 and type(
            target_label) == int
        target = torch.ones_like(true_label) * target_label
        adversary.targeted = True
        adv_targeted = adversary.perturb(cln_data, target)
        adv_targeted_results.append(adv_targeted)
        adv_target_labels.append(target)

    return adv_targeted_results, adv_target_labels, adv_untargeted
示例#6
0
def mifgsm_attack(max_count,
                  model,
                  train_loader,
                  max_epsilon,
                  learning_rate,
                  iters=20,
                  isnorm=False,
                  num_classes=1000):
    if isnorm:
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        mean = torch.tensor(mean).float().view(3, 1, 1)
        std = torch.tensor(std).float().view(3, 1, 1)
        mmax = torch.ones(3, 224, 224)
        mmin = torch.zeros(3, 224, 224)
        mmax = ((mmax - mean) / std).cuda()
        mmin = ((mmin - mean) / std).cuda()
        learning_rate = learning_rate / (255 * 0.224)
        max_epsilon = max_epsilon / (255 * 0.224)
    else:
        learning_rate = float(learning_rate)
        max_epsilon = float(max_epsilon)
        mmax = 255
        mmin = 0
    # adversary = mifgsm(model,eps=max_epsilon,nb_iter=iters,eps_iter=learning_rate,clip_min=mmin,clip_max=mmax)
    adversary = LinfPGDAttack(model,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=max_epsilon,
                              nb_iter=iters,
                              eps_iter=learning_rate,
                              clip_min=mmin,
                              clip_max=mmax,
                              targeted=True)
    count = 0
    total_correct = 0
    # device = model.device()
    for x, y in train_loader:
        x = x.cuda()
        y = y.cuda()
        y1 = (y + 3) % num_classes
        count += len(x)
        ad_ex = adversary.perturb(x, y1)
        if not isnorm:
            ad_ex = torch.round(ad_ex)
        z1 = model(ad_ex).argmax(dim=1)
        diff = ad_ex - x
        total_correct += (z1 == y).sum()

        if count >= max_count:
            break
    return total_correct.cpu().numpy() / (count)
示例#7
0
def whitebox_pgd(args, image, target, model, normalize=None):
    adversary = LinfPGDAttack(
	model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3,
	nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,
	targeted=False)
    adv_image = adversary.perturb(image, target)
    print("Target is %d" %(target))
    pred = model(adv_image)
    out = pred.max(1, keepdim=True)[1] # get the index of the max log-probability
    print("Adv Target is %d" %(out))
    clean_image = (image)[0].detach()
    adv_image = adv_image[0].detach()
    if args.comet:
        plot_image_to_comet(args,clean_image,"clean.png")
        plot_image_to_comet(args,adv_image,"Adv.png")
    return pred, clamp(clean_image - adv_image,0.,1.)
示例#8
0
    def create_adv_input(self, x, y, model):
        # Prepare copied model
        model = copy.deepcopy(model)

        # Prepare input and corresponding label
        data = torch.from_numpy(np.expand_dims(x, axis=0).astype(np.float32))
        target = torch.from_numpy(np.array([y]).astype(np.int64))
        data.requires_grad = True

        from advertorch.attacks import LinfPGDAttack
        adversary = LinfPGDAttack(model.forward)
        perturbed_data = adversary.perturb(data, target)

        # Have to be different
        output = model.forward(perturbed_data)
        final_pred = output.max(
            1, keepdim=True)[1]  # get the index of the max log-probability

        if final_pred.item() == target.item():
            return perturbed_data, 0
        else:
            return perturbed_data, 1
示例#9
0
            clncorrect_nodefence += pred.eq(target.view_as(pred)).sum().item()

            # clean data with defence
            clndata_test_one = clndata

            with torch.no_grad():
                output = model(clndata_test_one.float())
            test_clnloss += F.cross_entropy(output, target,
                                            reduction='sum').item()

            pred = output.max(1, keepdim=True)[1]
            clncorrect += pred.eq(target.view_as(pred)).sum().item()

            if flag_advtrain:
                with ctx_noparamgrad_and_eval(model):
                    advdata = adversary.perturb(clndata, target)

                # no defence
                with torch.no_grad():
                    output = model(advdata.float())
                test_advloss_nodefence += F.cross_entropy(
                    output, target, reduction='sum').item()
                pred = output.max(1, keepdim=True)[1]
                advcorrect_nodefence += pred.eq(
                    target.view_as(pred)).sum().item()

                # with defence

                # # gaussian_block
                if args.gaussian_block:
                    noise_data = add_gaussian_nosie(advdata, args.sigma)
示例#10
0
def _get_test_adv(attack_method,epsilon):
    # define parameter
    parser = argparse.ArgumentParser(description='Train MNIST')
    parser.add_argument('--seed', default=0, type=int)
    parser.add_argument('--mode', default="adv", help="cln | adv")
    parser.add_argument('--sigma', default=75, type=int, help='noise level')
    parser.add_argument('--train_batch_size', default=50, type=int)
    parser.add_argument('--test_batch_size', default=1000, type=int)
    parser.add_argument('--log_interval', default=200, type=int)
    parser.add_argument('--result_dir', default='results', type=str, help='directory of test dataset')
    parser.add_argument('--monitor', default=False, type=bool, help='if monitor the training process')
    parser.add_argument('--start_save', default=90, type=int,
                        help='the threshold epoch which will start to save imgs data using in testing')

    # attack
    parser.add_argument("--attack_method", default="PGD", type=str,
                        choices=['FGSM', 'PGD', 'Momentum', 'STA'])

    parser.add_argument('--epsilon', type=float, default=8 / 255, help='if pd_block is used')

    parser.add_argument('--dataset', default='cifar10', type=str, help='dataset = [cifar10/MNIST]')

    # net
    parser.add_argument('--net_type', default='wide-resnet', type=str, help='model')
    parser.add_argument('--depth', default=28, type=int, help='depth of model')
    parser.add_argument('--widen_factor', default=10, type=int, help='width of model')
    parser.add_argument('--dropout', default=0.3, type=float, help='dropout_rate')
    parser.add_argument('--num_classes', default=10, type=int)
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # load basic data
    # 测试包装的loader
    test_loader = get_handled_cifar10_test_loader(num_workers=4, shuffle=False, batch_size=50)

    # 加载网络模型
    # Load checkpoint
    print('| Resuming from checkpoint...')
    assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
    _, file_name = getNetwork(args)
    checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name + '.t7')  # os.sep提供跨平台的分隔符
    model = checkpoint['net']

    #
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    # 定义对抗攻击类型:C&W
    from advertorch.attacks import LinfPGDAttack
    if attack_method == "PGD":
        adversary = LinfPGDAttack(
            model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon,
            nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,
            targeted=False)
    elif attack_method == "FGSM":
        adversary = GradientSignAttack(
            model, loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            clip_min=0.0, clip_max=1.0, eps=0.007, targeted=False)  # 先测试一下不含扰动范围限制的,FGSM的eps代表的是一般的eps_iter
    elif attack_method == "Momentum":
        adversary = MomentumIterativeAttack(
            model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=epsilon,
            nb_iter=40, decay_factor=1.0, eps_iter=1.0, clip_min=0.0, clip_max=1.0,
            targeted=False, ord=np.inf)
    elif attack_method == "STA":
        adversary = SpatialTransformAttack(
            model, num_classes=args.num_classes, loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            initial_const=0.05, max_iterations=1000, search_steps=1, confidence=0, clip_min=0.0, clip_max=1.0,
            targeted=False, abort_early=True)  # 先测试一下不含扰动范围限制的

    # generate for train.h5 | save as train_adv_attackMethod_epsilon
    test_adv = []
    test_true_target = []
    for clndata, target in test_loader:
        print("clndata:{}".format(clndata.size()))
        clndata, target = clndata.to(device), target.to(device)
        with ctx_noparamgrad_and_eval(model):
            advdata = adversary.perturb(clndata, target)
            test_adv.append(advdata.detach().cpu().numpy())
        test_true_target.append(target.cpu().numpy())
    test_adv = np.reshape(np.asarray(test_adv),[-1,3,32,32])
    test_true_target = np.reshape(np.asarray(test_true_target),[-1])
    print("test_adv.shape:{}".format(test_adv.shape))
    print("test_true_target.shape:{}".format(test_true_target.shape))
    del model

    return test_adv, test_true_target
示例#11
0
            acc_count_train = 0
            fp_sum_train = 0
            fp_count_train = 0

            fn_sum_train = 0
            fn_count_train = 0

            if do_train:
                for batch_idx, sample in enumerate(tqdm(train_loader)):

                    img = sample['img'].to(device)
                    orig_img = img.clone()
                    label = sample['label'].to(device)

                    with ctx_noparamgrad_and_eval(att_model):
                        data = adversary.perturb(img, label)

                        if one_way:
                            data[torch.flatten(label) == 1] = orig_img[
                                torch.flatten(label) == 1]

                    outputs = att_model(data)
                    optimizer.zero_grad()

                    loss = criterion(outputs,
                                     label,
                                     gamma=adv_floss_gamma,
                                     alpha=adv_floss_alpha)

                    if isinstance(model, torch.nn.DataParallel):
                        loss = loss.sum()
                              loss_fn=nn.CrossEntropyLoss(),
                              eps=args.eps,
                              nb_iter=40,
                              eps_iter=0.01,
                              rand_init=True,
                              clip_min=0.0,
                              clip_max=1.0,
                              targeted=False)

net.eval()
correct = 0
total = 0
for step, data in enumerate(testloader, 0):

    inputs, labels = data

    inputs = inputs.cuda()
    labels = labels.cuda()

    adv_inputs = adversary.perturb(inputs, labels)
    adv_inputs = Variable(adv_inputs)
    labels = Variable(labels)
    outputs_adv = net(adv_inputs)

    _, predicted = torch.max(outputs_adv.data, 1)
    total += labels.size(0)
    correct += predicted.eq(labels.data).cpu().sum()
    correct = correct.item()

print("Classification accuracy : {}%".format(100. * correct / total))
示例#13
0
def main():
    data = []
    torch.cuda.empty_cache()

    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=60000,
                        metavar='N',
                        help='input batch size for training (default: 60000)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=10000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=1,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1} if use_cuda else {}
    if os.path.exists('/local2'):
        dr_t = 'local2/data'
    else:
        dr_t = '/home/jung/hypothesis/data'

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        dr_t,
        train=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=60000,
                                               shuffle=False,
                                               **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        dr_t,
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=10000,
                                              shuffle=False,
                                              **kwargs)

    test_loader_small = torch.utils.data.DataLoader(datasets.MNIST(
        dr_t,
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                                    batch_size=200,
                                                    shuffle=False,
                                                    **kwargs)

    strage = device
    model = Net()
    model.to(device)
    checkpoint = torch.load('mnist_cnn.pt',
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint)

    adv_ = True
    Associations = []
    for xin in range(4):
        temp = torch.load('map_association_' + str(xin) + '.pt')
        Associations.append(temp)

    hookF = [Hook(layer[1]) for layer in list(model._modules.items())]

    # Let's test how this association is predictive of the test set

    adversary = LinfPGDAttack(model,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=0.7,
                              nb_iter=40,
                              eps_iter=0.01,
                              rand_init=True,
                              clip_min=0.0,
                              clip_max=1.0,
                              targeted=False)

    for data, target in test_loader_small:
        break
    labels_ = target
    data = data.to(device)
    labels_ = labels_.to(device)

    if adv_:
        adv_untargeted = adversary.perturb(data, labels_)
        fn = 'adv'
    else:
        adv_untargeted = data
        fn = 'norm'
    print(adv_untargeted.size())

    pred_n = test_adv(args, model, device, test_loader_small, hookF,
                      adv_untargeted)

    activity_layer = {}
    for ttin in range(4):
        layer_sel_ = ttin
        act_map = Associations[layer_sel_]

        roV = intermediate_output[layer_sel_]

        sel = Associations[layer_sel_]
        sel = sel.numpy()

        wm = torch.load('wm_' + str(layer_sel_) + '.pt',
                        map_location=lambda storage, loc: storage)

        fp = open('labels_' + str(layer_sel_) +
                  '.json')  # labels for wm i.e., the labels of the test set.
        label = json.load(fp)
        fp.close()
        cog = CogMem_load(wm, label)

        for data, target in test_loader_small:
            break
        labels_ = target
        cog.foward(roV)
        pred = cog.pred.long()
        #pred=cog.pred.long().cpu().numpy()

        total_1 = 0
        total_2 = 0
        total_3 = 0
        total_4 = 0
        total_5 = 0
        cons1 = 0
        cons2 = 0
        temp = 0
        corr = np.zeros((10, 10))
        mem = []
        #print ('sel shape',sel.shape)
        #print (cog.image.size())
        #print ('pred',pred.size())
        temp_vec = []
        for xi, xin in enumerate(pred_n):
            cls = xin.item()
            label_t = labels_[xi].long().item()
            v2 = cog.image[:, xi]

            idx = torch.argsort(v2).cpu().numpy()
            mem.append(v2.cpu().numpy())
            idx = np.flip(idx, 0)[:3]
            tar = sel[idx, :]
            temp_v = np.zeros(10)
            for zin in idx:
                temp_v = temp_v + sel[zin, :] * v2[zin].item()
            temp_v = np.exp(temp_v)
            temp_v = temp_v / np.sum(temp_v)
            temp_vec.append(temp_v)

        activity_layer[layer_sel_] = np.array(temp_vec)

    layer_corr = {}
    values = []
    for xin in activity_layer:
        pred = activity_layer[xin]
        for yin in activity_layer:
            post = activity_layer[yin]
            if xin == yin:
                pass
            else:
                temp = []
                for zin in range(len(pred_n)):
                    temp.append(np.dot(pred[zin], post[zin]))
                #temp=np.array(temp)
                layer_corr[str(xin) + '_' + str(yin)] = temp

    fp = open('layer-' + fn + '_' + '.json', 'w')
    json.dump(layer_corr, fp)
示例#14
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='gen adversarial examples via advertorch')

    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')

    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    parser.add_argument('--eps', default=0.3, help='eps for LinfPGDAttack')

    parser.add_argument('--norm',
                        action='store_true',
                        default=False,
                        help='adversarial?')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    normalize = transforms.Normalize((0.1307, ), (0.3081, ))

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    dr_t = './data'
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        dr_t,
        train=False,
        transform=transforms.Compose([transforms.ToTensor(), normalize])),
                                              batch_size=200,
                                              shuffle=False,
                                              **kwargs)

    model = Net()
    checkpoint = torch.load('pretrained_models/mnist_cnn.pt',
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint)

    model.to(device)

    adversary = LinfPGDAttack(model,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=float(args.eps),
                              nb_iter=40,
                              eps_iter=0.01,
                              rand_init=True,
                              clip_min=0.0,
                              clip_max=1.0,
                              targeted=False)

    for data, target in test_loader:
        break
    labels_ = target
    data = data.to(device)
    labels_ = labels_.to(device)

    if os.path.exists("adversarial_examples"):
        pass
    else:
        os.mkdir("adversarial_examples")

    if args.norm:
        torch.save(data, 'adversarial_examples/norm.pt')
        torch.save(labels_, 'adversarial_examples/norm_label_.pt')
    else:
        adv_untargeted = adversary.perturb(data, labels_)
        torch.save(adv_untargeted,
                   'adversarial_examples/adv_' + str(args.eps) + '.pt')
        torch.save(labels_,
                   'adversarial_examples/adv_label_' + str(args.eps) + '.pt')
示例#15
0
def run(args):
    torch.manual_seed(args.seed)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    if args.mode == "cln":
        flag_advtrain = False
        nb_epoch = 10
        model_filename = "mnist_lenet5_clntrained_%i.pt" % args.seed
    elif args.mode == "adv":
        flag_advtrain = True
        nb_epoch = 90
        model_filename = "mnist_lenet5_advtrained_%i.pt" % args.seed
    else:
        raise

    train_loader, test_loader = load_mnist(args,
                                           augment=False,
                                           root='../data/')
    if args.architecture == 'LeNet':
        model = Net(1, 28, 28).to(device)
    elif args.architecture == 'MadryLeNet':
        model = MadryNet(1, 28, 28).to(device)
        print(device)
    else:
        raise (f'Architecture {args.architecture} not implemented')
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    if flag_advtrain:
        from advertorch.attacks import LinfPGDAttack
        adversary = LinfPGDAttack(model,
                                  loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                                  eps=0.3,
                                  nb_iter=40,
                                  eps_iter=0.01,
                                  rand_init=True,
                                  clip_min=0.0,
                                  clip_max=1.0,
                                  targeted=False)
        save_path = os.path.join(args.save_path, "adv_trained")
    else:
        save_path = os.path.join(args.save_path, "natural")

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    save_path = os.path.join(save_path, model_filename)

    for epoch in range(nb_epoch):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            ori = data
            if flag_advtrain:
                # when performing attack, the model needs to be in eval mode
                # also the parameters should NOT be accumulating gradients
                with ctx_noparamgrad_and_eval(model):
                    data = adversary.perturb(data, target)

            optimizer.zero_grad()
            output = model(data)
            loss = F.cross_entropy(output, target, reduction='mean')
            loss.backward()
            optimizer.step()
            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))

        model.eval()
        test_clnloss = 0
        clncorrect = 0

        if flag_advtrain:
            test_advloss = 0
            advcorrect = 0

        for clndata, target in test_loader:
            clndata, target = clndata.to(device), target.to(device)
            with torch.no_grad():
                output = model(clndata)
            test_clnloss += F.cross_entropy(output, target,
                                            reduction='sum').item()
            pred = output.max(1, keepdim=True)[1]
            clncorrect += pred.eq(target.view_as(pred)).sum().item()

            if flag_advtrain:
                advdata = adversary.perturb(clndata, target)
                with torch.no_grad():
                    output = model(advdata)
                test_advloss += F.cross_entropy(output,
                                                target,
                                                reduction='sum').item()
                pred = output.max(1, keepdim=True)[1]
                advcorrect += pred.eq(target.view_as(pred)).sum().item()

        test_clnloss /= len(test_loader.dataset)
        print('\nTest set: avg cln loss: {:.4f},'
              ' cln acc: {}/{} ({:.0f}%)\n'.format(
                  test_clnloss, clncorrect, len(test_loader.dataset),
                  100. * clncorrect / len(test_loader.dataset)))
        if flag_advtrain:
            test_advloss /= len(test_loader.dataset)
            print('Test set: avg adv loss: {:.4f},'
                  ' adv acc: {}/{} ({:.0f}%)\n'.format(
                      test_advloss, advcorrect, len(test_loader.dataset),
                      100. * advcorrect / len(test_loader.dataset)))

    torch.save(model.state_dict(), save_path)
示例#16
0
class AIGAN:
    def __init__(self,
                 device,
                 model,
                 model_num_labels,
                 image_nc,
                 epoch_of_change,
                 box_min,
                 box_max,
                 c_tresh,
                 dataset_name,
                 is_targeted):
        output_nc = image_nc
        self.device = device
        self.model_num_labels = model_num_labels
        self.model = model
        self.input_nc = image_nc
        self.output_nc = output_nc
        self.box_min = box_min
        self.box_max = box_max
        self.c_treshold = c_tresh 
        self.dataset_name = dataset_name
        self.is_targeted = is_targeted
        
        self.models_path = './models/'
        self.writer = SummaryWriter('./checkpoints/logs/', max_queue=100)

        self.gen_input_nc = image_nc

        self.epoch_of_change = epoch_of_change
        self.attacker = LinfPGDAttack(self.model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3,
                nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=box_min, clip_max=box_max,
                targeted=self.is_targeted)

        if dataset_name=="mnist":
            from models import Generator, Discriminator
        elif dataset_name=="imagenet":
            from imagenet_models import PatchDiscriminator as Discriminator
            from imagenet_models import Resnet224Generator as Generator
        else:
            raise NotImplementedError('dataset [%s] is not implemented' % dataset_name)

        self.netG = Generator(self.gen_input_nc, image_nc).to(device)
        self.netDisc = Discriminator(image_nc).to(device)
        self.netG_file_name = self.models_path + 'netG.pth.tar'
        self.netDisc_file_name = self.models_path + 'netD.pth.tar'

        os.makedirs(self.models_path, exist_ok=True)

        # initialize all weights
        last_netG = find_last_checkpoint(self.netG_file_name)
        last_netDisc = find_last_checkpoint(self.netDisc_file_name)
        if last_netG is not None:
            self.netG.load_state_dict(torch.load(last_netG))
            self.netDisc.load_state_dict(torch.load(last_netDisc))
            *_, self.start_epoch = last_netG.split('.')
            self.iteration = None
            self.start_epoch = int(self.start_epoch)+1
        else:
            self.netG.apply(weights_init)
            self.netDisc.apply(weights_init)
            self.start_epoch = 1
            self.iteration = 0

       # initialize optimizers
        if self.dataset_name == "mnist":
            lr = 10**(-3)
        elif self.dataset_name == "imagenet":
            lr = 10**(-5)
        else:
            raise NotImplementedError('dataset [%s] is not implemented' % dataset_name)

        self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
                                            lr=lr)
        self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(),
                                            lr=lr)
        self.optG_file_name = self.models_path + 'optG.pth.tar'
        self.optD_file_name = self.models_path + 'optD.pth.tar'

        last_optG = find_last_checkpoint(self.optG_file_name)
        last_optD = find_last_checkpoint(self.optD_file_name)
        if last_optG is not None:
            self.optimizer_G.load_state_dict(torch.load(last_optG))
            self.optimizer_D.load_state_dict((torch.load(last_optD)))

        self._use_attacker = (self.start_epoch < self.epoch_of_change)



    def train_batch(self, x, labels):
        # if training is targeted, labels = targets
       
        # optimize D
        for _ in range(1):
            # # add a clipping trick
            perturbation = torch.clamp(self.netG(x), -self.c_treshold, self.c_treshold)
            # perturbation = self.netG(x)
            adv_images = perturbation + x
            adv_images = torch.clamp(adv_images, self.box_min, self.box_max)
            
            self.optimizer_D.zero_grad()

            if self._use_attacker:
                pgd_images = self.attacker.perturb(x, labels) 
                d_real_logits, d_real_probs = self.netDisc(pgd_images)
            else:
                d_real_logits, d_real_probs = self.netDisc(x) 
            d_fake_logits, d_fake_probs = self.netDisc(adv_images.detach())

            # generate labels for discriminator (optionally smooth labels for stability)
            smooth = 0.0
            d_labels_real = torch.ones_like(d_real_probs, device=self.device) * (1 - smooth)
            d_labels_fake = torch.zeros_like(d_fake_probs, device=self.device)
            
            # discriminator loss
            loss_D_real = F.mse_loss(d_real_probs, d_labels_real)
            loss_D_real.backward()
            loss_D_fake = F.mse_loss(d_fake_probs, d_labels_fake)
            loss_D_fake.backward()
            loss_D_GAN = (loss_D_fake + loss_D_real) #/2
            # loss_D_GAN.backward()
            self.optimizer_D.step()
        
        gc.collect()

        # optimize G
        for _ in range(1):

            self.optimizer_G.zero_grad()

            # cal G's loss in GAN
            d_fake_logits, d_fake_probs = self.netDisc(adv_images.detach()) 
            loss_G_fake = F.mse_loss(d_fake_probs, torch.ones_like(d_fake_probs, device=self.device))
            loss_G_fake.backward(retain_graph=True)

            # # calculate perturbation norm
            loss_perturb = torch.norm(perturbation.view(perturbation.shape[0], -1), 2, dim=1)
            loss_perturb = torch.max(loss_perturb - self.c_treshold, torch.zeros(1, device=self.device))
            loss_perturb = torch.mean(loss_perturb)

            # cal adv loss
            # f_real_logits = self.model(x)
            # f_real_probs = F.softmax(f_real_logits, dim=1)
            f_fake_logits = self.model(adv_images) 
            f_fake_probs = F.softmax(f_fake_logits, dim=1)
            # if training is targeted, indicate how many examples classified as targets
            # else show accuraccy on adversarial images
            fake_accuracy = torch.mean((torch.argmax(f_fake_probs, 1) == labels).float())
            onehot_labels = torch.eye(self.model_num_labels, device=self.device)[labels.long()]
            loss_adv = adv_loss(f_fake_probs, onehot_labels, self.is_targeted)

            if self.dataset_name == "mnist":
                alambda = 1.
                alpha = 1.
                beta = 1.5
            elif self.dataset_name == "imagenet":
                alambda = 10.0#
                alpha = 1.
                beta = 0.5
            else:
                raise NotImplementedError('dataset [%s] is not implemented' % self.dataset_name)
            loss_G = alambda*loss_adv + alpha*loss_G_fake + beta*loss_perturb
            loss_G.backward()
            self.optimizer_G.step()

        self.writer.add_scalar('iter/train/loss_D_real', loss_D_real.data, global_step=self.iteration)
        self.writer.add_scalar('iter/train/loss_D_fake', loss_D_fake.data, global_step=self.iteration)
        self.writer.add_scalar('iter/train/loss_G_fake', loss_G_fake.data, global_step=self.iteration)
        self.writer.add_scalar('iter/train/loss_perturb', loss_perturb.data, global_step=self.iteration)
        self.writer.add_scalar('iter/train/loss_adv', loss_adv.data, global_step=self.iteration)
        self.writer.add_scalar('iter/train/loss_G', loss_G.data, global_step=self.iteration)
        self.writer.add_scalar('iter/train/fake_acc', fake_accuracy.data, global_step=self.iteration)
        self.iteration += 1

        return loss_D_GAN.item(), loss_G_fake.item(), loss_perturb.item(), loss_adv.item(), loss_G.item(), fake_accuracy

    def train(self, train_dataloader, epochs):
        if self.iteration is None:
            self.iteration = (self.start_epoch-1)*len(train_dataloader)+1
        for epoch in range(self.start_epoch, epochs+1):
            if epoch == self.epoch_of_change:
                self._use_attacker = False
            if epoch == 120 and self.dataset_name == "mnist":
                self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
                                                    lr=0.0001)
                self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(),
                                                    lr=0.0001)
            if epoch == 60 and self.dataset_name == "imagenet":
                self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
                                                    lr=10**(-7))
                self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(),
                                                    lr=10**(-7))
            if epoch == 200 and self.dataset_name == "mnist":
                self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
                                                    lr=0.00001)
                self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(),
                                                    lr=0.00001)
            if epoch == 200  and self.dataset_name == "imagenet":
                self.optimizer_G = torch.optim.Adam(self.netG.parameters(),
                                                    lr=10**(-9))
                self.optimizer_D = torch.optim.Adam(self.netDisc.parameters(),
                                                    lr=10**(-9))
            loss_D_sum = 0
            loss_G_fake_sum = 0
            loss_perturb_sum = 0
            loss_adv_sum = 0
            loss_G_sum = 0
            fake_acc_sum = 0
            for i, data in enumerate(train_dataloader, start=0):
                gc.collect()
                images, labels = data
                images, labels = images.to(self.device), labels.to(self.device)
                
                # # if targeted, create one hot vectors of the target
                # if self.is_targeted:
                #     assert(targets is not None)
                #     # this statement can be used when all targets is equal
                #     # targets = torch.zeros_like(labels) + target 
                #     # commmented because labels will be converted to one hot during training on batch  
                #     # labels = torch.eye(self.model_num_labels, device=self.device)[targets] #onehot targets 
                #     labels = targets

                loss_D_batch, loss_G_fake_batch, loss_perturb_batch, loss_adv_batch, loss_G_batch, fake_acc_batch = \
                    self.train_batch(images, labels)
                loss_D_sum += loss_D_batch
                loss_G_fake_sum += loss_G_fake_batch
                loss_perturb_sum += loss_perturb_batch
                loss_adv_sum += loss_adv_batch
                loss_G_sum += loss_G_batch
                fake_acc_sum += fake_acc_batch
                if i == len(train_dataloader)-2:
                    perturbation = self.netG(images)
                    self.writer.add_images('train/adversarial_perturbation', perturbation, global_step=epoch)
                    self.writer.add_images('train/adversarial_images', images+perturbation, global_step=epoch)
                    self.writer.add_images('train/adversarial_images_cl', torch.clamp(images+perturbation, self.box_min, self.box_max), global_step=epoch)


            # print statistics
            num_batch = len(train_dataloader)
            self.writer.add_scalar('epoch/train/loss_D', loss_D_sum/num_batch, global_step=epoch)
            self.writer.add_scalar('epoch/train/loss_G_fake', loss_G_fake_sum/num_batch, global_step=epoch)
            self.writer.add_scalar('epoch/train/loss_perturb', loss_perturb_sum/num_batch, global_step=epoch)
            self.writer.add_scalar('epoch/train/loss_adv', loss_adv_sum/num_batch, global_step=epoch)
            self.writer.add_scalar('epoch/train/loss_G', loss_G_sum/num_batch, global_step=epoch)
            self.writer.add_scalar('epoch/train/fake_acc', fake_acc_sum/num_batch, global_step=epoch)

            print("epoch %d:\nloss_D: %.3f, loss_G_fake: %.3f,\
             \nloss_perturb: %.3f, loss_adv: %.3f, \n" %
                  (epoch, loss_D_sum/num_batch, loss_G_fake_sum/num_batch,
                   loss_perturb_sum/num_batch, loss_adv_sum/num_batch))
            
             # save generator
            if epoch%1==0:
                netG_file_name = self.netG_file_name + '.' + str(epoch) 
                torch.save(self.netG.state_dict(), netG_file_name)
                netD_file_name = self.netDisc_file_name + '.' + str(epoch) 
                torch.save(self.netDisc.state_dict(), netD_file_name)
                optG_file_name = self.optG_file_name + '.' + str(epoch) 
                torch.save(self.optimizer_G.state_dict(), optG_file_name)
                optD_file_name = self.optD_file_name + '.' + str(epoch) 
                torch.save(self.optimizer_D.state_dict(), optD_file_name)
            
        #save final model
        torch.save(self.netG.state_dict(), self.netG_file_name )
        torch.save(self.netDisc.state_dict(), self.netDisc_file_name)
        torch.save(self.optimizer_G.state_dict(), self.optG_file_name)
        torch.save(self.optimizer_D.state_dict(), self.optD_file_name)
示例#17
0
def main():
    # get args
    args = get_args()

    # set up gpus
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    assert torch.cuda.is_available()

    # load models
    if 'gal' in args.model_file:
        leaky_relu = True
    else:
        leaky_relu = False
    ensemble = utils.get_models(args, train=False, as_ensemble=True, model_file=args.model_file, leaky_relu=leaky_relu)
    models = ensemble.models

    train_seed = args.model_file.split('/')[-3]
    train_alg = args.model_file.split('/')[-4]

    # get data loaders
    testloader = utils.get_testloader(args, batch_size=args.batch_size)
    
    # pick out samples that are correctly classified by all submodels
    correct = []
    for m in models:
        correct_m = []
        for (x, y) in testloader:
            x, y = x.cuda(), y.cuda()

            outputs = m(x)
            _, pred = outputs.max(1)
            correct_m.append(pred.eq(y))
        correct_m = torch.cat(correct_m)
        correct.append(correct_m)
    correct = torch.stack(correct, dim=-1).all(-1)
    correct_idx = correct.nonzero().squeeze(-1)

    random.seed(0)
    subset_idx = correct_idx[random.sample(range(correct_idx.size(0)), args.subset_num)].cpu()
    subset_loader = utils.get_testloader(args, batch_size=args.batch_size, shuffle=False, subset_idx=subset_idx)

    # PGD
    eps_list = [0.03]
    random_start = args.random_start
    steps = args.steps

    rob = {}
    rob['random_start'] = args.random_start
    rob['steps'] = args.steps
    
    for eps in tqdm(eps_list, desc='PGD eps', leave=False, position=0):
        correct_or_not_rs = torch.zeros((len(models), len(models)+1, args.subset_num, random_start), dtype=torch.bool)

        for rs in tqdm(range(random_start), desc='Random Start', leave=False, position=1):
            torch.manual_seed(rs)
            test_iter = tqdm(subset_loader, desc='Batch', leave=False, position=2)

            total = 0
            for (x, y) in test_iter:
                x, y = x.cuda(), y.cuda()

                adv_list = []
                for i, m in enumerate(models):
                    adversary = LinfPGDAttack(
                        m, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps,
                        nb_iter=steps, eps_iter=eps/5, rand_init=True, clip_min=0., clip_max=1.,
                        targeted=False)
            
                    adv = adversary.perturb(x, y)
                    adv_list.append(adv)

                for i, adv in enumerate(adv_list):
                    for j, m in enumerate(models):
                        if j == i:
                            outputs = m(x)
                            _, pred = outputs.max(1)
                            assert pred.eq(y).all()

                        outputs = m(adv)
                        _, pred = outputs.max(1)

                        correct_or_not_rs[i, j, total:total+x.size(0), rs] = pred.eq(y)
                
                    outputs = ensemble(adv)
                    _, pred = outputs.max(1)
                    correct_or_not_rs[i, len(models), total:total+x.size(0), rs] = pred.eq(y)
                
                total += x.size(0)

        correct_or_not_rs = torch.all(correct_or_not_rs, dim=-1)
        asr = np.zeros((len(models), len(models)+1))

        tqdm.write("eps: {:.2f}".format(eps))

        for i in range(len(models)):
            message = ''
            for j in range(len(models)+1):
                message += '\t{}: {:.2%}'.format(j, 1-correct_or_not_rs[i, j, :].sum().item()/args.subset_num)
                asr[i, j] = 1-correct_or_not_rs[i, j, :].sum().item()/args.subset_num
            tqdm.write(message)
        
        rob[str(eps)] = asr
    
    # save to file
    if args.save_to_file:
        output_root = os.path.join('results', 'transferability', train_alg, train_seed)
        if not os.path.exists(output_root):
            os.makedirs(output_root)
        output_filename = args.model_file.split('/')[-2]
        output = os.path.join(output_root, '.'.join((output_filename, 'pkl')))

        with open(output, 'wb') as f:
            pickle.dump(rob, f, pickle.HIGHEST_PROTOCOL)
示例#18
0
    def get_metric_eval(self):        

        utr_score=[]
        tr_score=[]
        for i in range(1):
            
            ##TODO: Customise input parameters to methods like LinfPGDAttack
            adversary = LinfPGDAttack(
                self.phi, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=self.args.adv_eps, nb_iter=70, eps_iter=0.01, rand_init=True, clip_min=(0.0-0.1307)/0.3081, clip_max=(1.0-0.1307)/0.3081,
                targeted=False)    

            pred_cln=[]
            pred_untargeted_adv=[]
            pred_targeted_adv=[]
            temp_counter=0
            for batch_idx, (x_e, y_e ,d_e, idx_e) in enumerate(self.test_dataset):
                x_e= x_e.to(self.cuda)
                print(torch.min(x_e), torch.max(x_e))
                y_e= torch.argmax(y_e, dim=1).to(self.cuda)

                adversary.targeted = False
                adv_untargeted = adversary.perturb(x_e, y_e)
                
                target = torch.ones_like(y_e)*3
                adversary.targeted = True
                adv_targeted = adversary.perturb(x_e, target)
                print(torch.min(adv_untargeted), torch.max(adv_untargeted))
                pred_cln.append( predict_from_logits(self.phi(x_e)) )
                pred_untargeted_adv.append( predict_from_logits(self.phi(adv_untargeted)) )
                pred_targeted_adv.append( predict_from_logits(self.phi(adv_targeted)) )
            
                temp_counter+=1
                if temp_counter ==5:
                    break
                    
            pred_cln= torch.cat(pred_cln)
            pred_untargeted_adv= torch.cat(pred_untargeted_adv)
            pred_targeted_adv= torch.cat(pred_targeted_adv)
            utr_score.append( torch.sum( pred_cln != pred_untargeted_adv).detach().cpu().numpy() / pred_cln.shape[0] )
            tr_score.append( torch.sum(pred_cln!= pred_targeted_adv).detach().cpu().numpy() / pred_cln.shape[0] )

#             batch_size=5
#             plt.figure(figsize=(10, 8))
#             for ii in range(batch_size):
#                 plt.subplot(3, batch_size, ii + 1)
#                 _imshow(x_e[ii])
#                 plt.title("clean \n pred: {}".format(pred_cln[ii]))
#                 plt.subplot(3, batch_size, ii + 1 + batch_size)
#                 _imshow(adv_untargeted[ii])
#                 plt.title("untargeted \n adv \n pred: {}".format(
#                     pred_untargeted_adv[ii]))
#                 plt.subplot(3, batch_size, ii + 1 + batch_size * 2)
#                 _imshow(adv_targeted[ii])
#                 plt.title("targeted to 3 \n adv \n pred: {}".format(
#                     pred_targeted_adv[ii]))

#             plt.tight_layout()
#             plt.savefig( self.save_path + '.png' )


        utr_score= np.array(utr_score)
        tr_score= np.array(tr_score)
        print('MisClassifcation on Untargetted Attack ', np.mean(utr_score), np.std(utr_score), self.args.adv_eps  ) 
        print('MisClassifcation on Targeted Atttack', np.mean(tr_score), np.std(tr_score), self.args.adv_eps )
    
        self.metric_score['Untargetted Method']= np.mean( utr_score ) 
        self.metric_score['Targetted Method']= np.mean( tr_score )
        
        return
示例#19
0
def train_adv(args,
              model,
              device,
              train_loader,
              optimizer,
              scheduler,
              epoch,
              cycles,
              mse_parameter=1.0,
              clean_parameter=1.0,
              clean='supclean'):

    model.train()

    correct = 0
    train_loss = 0.0

    model.reset()

    adversary = LinfPGDAttack(model,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=args.eps,
                              nb_iter=args.nb_iter,
                              eps_iter=args.eps_iter,
                              rand_init=True,
                              clip_min=-1.0,
                              clip_max=1.0,
                              targeted=False)

    print(len(train_loader))
    for batch_idx, (images, targets) in enumerate(train_loader):

        optimizer.zero_grad()
        images = images.cuda()
        targets = targets.cuda()

        model.reset()
        with ctx_noparamgrad_and_eval(model):
            adv_images = adversary.perturb(images, targets)

        images_all = torch.cat((images, adv_images), 0)

        # Reset the model latent variables
        model.reset()
        if (args.dataset == 'cifar10'):
            logits, orig_feature_all, block1_all, block2_all, block3_all = model(
                images_all, first=True, inter=True)
        elif (args.dataset == 'fashion'):
            logits, orig_feature_all, block1_all, block2_all = model(
                images_all, first=True, inter=True)
        ff_prev = orig_feature_all
        # f1 the original feature of clean images
        orig_feature, _ = torch.split(orig_feature_all, images.size(0))
        block1_clean, _ = torch.split(block1_all, images.size(0))
        block2_clean, _ = torch.split(block2_all, images.size(0))
        if (args.dataset == 'cifar10'):
            block3_clean, _ = torch.split(block3_all, images.size(0))
        logits_clean, logits_adv = torch.split(logits, images.size(0))

        if not ('no' in clean):
            loss = (clean_parameter * F.cross_entropy(logits_clean, targets) +
                    F.cross_entropy(logits_adv, targets)) / (2 * (cycles + 1))
        else:
            loss = F.cross_entropy(logits_adv, targets) / (cycles + 1)
        for i_cycle in range(cycles):
            if (args.dataset == 'cifar10'):
                recon, block1_recon, block2_recon, block3_recon = model(
                    logits, step='backward', inter_recon=True)
            elif (args.dataset == 'fashion'):
                recon, block1_recon, block2_recon = model(logits,
                                                          step='backward',
                                                          inter_recon=True)
            recon_clean, recon_adv = torch.split(recon, images.size(0))
            recon_block1_clean, recon_block1_adv = torch.split(
                block1_recon, images.size(0))
            recon_block2_clean, recon_block2_adv = torch.split(
                block2_recon, images.size(0))
            if (args.dataset == 'cifar10'):
                recon_block3_clean, recon_block3_adv = torch.split(
                    block3_recon, images.size(0))
                loss += (F.mse_loss(recon_adv, orig_feature) +
                         F.mse_loss(recon_block1_adv, block1_clean) +
                         F.mse_loss(recon_block2_adv, block2_clean) +
                         F.mse_loss(recon_block3_adv, block3_clean)
                         ) * mse_parameter / (4 * cycles)
            elif (args.dataset == 'fashion'):
                loss += (F.mse_loss(recon_adv, orig_feature) +
                         F.mse_loss(recon_block1_adv, block1_clean) +
                         F.mse_loss(recon_block2_adv, block2_clean)
                         ) * mse_parameter / (3 * cycles)

            # feedforward
            ff_current = ff_prev + args.res_parameter * (recon - ff_prev)
            logits = model(ff_current, first=False)
            ff_prev = ff_current
            logits_clean, logits_adv = torch.split(logits, images.size(0))
            if not ('no' in clean):
                loss += (
                    clean_parameter * F.cross_entropy(logits_clean, targets) +
                    F.cross_entropy(logits_adv, targets)) / (2 * (cycles + 1))
            else:
                loss += F.cross_entropy(logits_adv, targets) / (cycles + 1)

        pred = logits_clean.argmax(
            dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(targets.view_as(pred)).sum().item()

        loss.backward()
        if (args.grad_clip):
            nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        scheduler.step()
        train_loss += loss
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(images[0]), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    train_loss /= len(train_loader)
    acc = correct / len(train_loader.dataset)
    return train_loss, acc
示例#20
0
                          rand_init=True,
                          clip_min=0.0,
                          clip_max=1.0,
                          targeted=False)
correct_1 = 0.0
correct_5 = 0.0
attack_correct_1 = 0.0
attack_correct_5 = 0.0
total = 0
for n_iter, (image, label) in enumerate(cifar100_test_loader):
    print("iteration: {}\ttotal {} iterations".format(
        n_iter + 1, len(cifar100_test_loader)))
    image = Variable(image).cuda()
    label = Variable(label).cuda()
    output = model(image)
    adv_untargeted = adversary.perturb(image, label)
    attack_output = model(adv_untargeted)
    for i in range(16):
        save_image_tensor2pillow(image[i], str(i) + '.jpg')
        save_image_tensor2pillow(adv_untargeted[i],
                                 'attack_' + str(i) + '.jpg')
    _, pred = output.topk(5, 1, largest=True, sorted=True)
    _attack, pred_attack = attack_output.topk(5, 1, largest=True, sorted=True)
    # pred为output的class index
    label = label.view(label.size(0), -1).expand_as(pred)
    correct = pred.eq(label).float()
    attack_correct = pred_attack.eq(label).float()
    #compute top 5
    correct_5 += correct[:, :5].sum()
    #compute top1
    correct_1 += correct[:, :1].sum()
示例#21
0
def validate(val_loader, model, criterion, args):
    print("validating")
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5],
                             prefix='Test: ')

    # switch to evaluate mode
    model.eval()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    with torch.no_grad():
        # adversary = LinfPGDAttack(
        # model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.15,
        # nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,
        # targeted=False)
        if args.attack == 'l2_3':
            adversary = L2PGDAttack(
                model,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=14.2737,
                nb_iter=20,
                eps_iter=1.784,
                rand_init=True,
                clip_min=-2.1179,
                clip_max=2.6400,
                targeted=False)
        if args.attack == 'l2_0.15':
            adversary = L2PGDAttack(
                model,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=0.7137,
                nb_iter=20,
                eps_iter=0.09,
                rand_init=True,
                clip_min=-2.1179,
                clip_max=2.6400,
                targeted=False)
        if args.attack == 'linf1_1020':
            adversary = LinfPGDAttack(
                model,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=4.7579 / 1020,
                nb_iter=20,
                eps_iter=0.000233,
                rand_init=True,
                clip_min=-2.1179,
                clip_max=2.6400,
                targeted=False)
        if args.attack == 'linf4_255':
            adversary = LinfPGDAttack(
                model,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=19.0316 / 255,
                nb_iter=20,
                eps_iter=47.579 / 5100,
                rand_init=True,
                clip_min=-2.1179,
                clip_max=2.6400,
                targeted=False)


#         adversary = L1PGDAttack(
#         model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=190.316,
#         nb_iter=20, eps_iter=23.7895, rand_init=True, clip_min=-2.1179, clip_max=2.6400,
#         targeted=False)
        end = time.time()
        print("enumerate dataloader")
        for i, (images, target) in enumerate(val_loader):
            # print(images)
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
            if torch.cuda.is_available():
                target = target.cuda(args.gpu, non_blocking=True)
            with torch.enable_grad():
                adv_untargeted = adversary.perturb(images, target)
            # compute output
            # if args.arch=='simclr':
            #     output = model(adv_untargeted)
            # elif args.arch=='linf_4' or args.arch=='linf_8' or args.arch=='l2_3':
            #     output= model((adv_untargeted))
            # else:
            output = model((adv_untargeted))
            loss = criterion(output, target)
            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
    print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                top5=top5))
    accuracy_array = []
    accuracy_array.append(top1.avg.to('cpu'))
    accuracy_array.append(top5.avg.to('cpu'))
    np.save(
        f'/content/gdrive/MyDrive/model_adv_loss/{args.attack}/{args.arch}_accuracy.npy',
        accuracy_array)
    return top1.avg, top5.avg
示例#22
0
def _generate_adv_file(attack_method, num_classes, epsilon, set_size):
    # load model
    model = torch.load(os.path.join("checkpoint", "resnet50_epoch_22.pth"))
    model = model.cuda()

    #define attack
    if attack_method == "PGD":
        adversary = LinfPGDAttack(model,
                                  loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                                  eps=epsilon,
                                  nb_iter=20,
                                  eps_iter=0.01,
                                  rand_init=True,
                                  clip_min=0.0,
                                  clip_max=1.0,
                                  targeted=False)
    elif attack_method == "FGSM":
        adversary = GradientSignAttack(
            model,
            loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            clip_min=0.0,
            clip_max=1.0,
            eps=epsilon,
            targeted=False)
    elif attack_method == "Momentum":
        adversary = MomentumIterativeAttack(
            model,
            loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            eps=epsilon,
            nb_iter=20,
            decay_factor=1.0,
            eps_iter=1.0,
            clip_min=0.0,
            clip_max=1.0,
            targeted=False,
            ord=np.inf)
    elif attack_method == "STA":
        adversary = SpatialTransformAttack(
            model,
            num_classes=num_classes,
            loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            initial_const=0.05,
            max_iterations=500,
            search_steps=1,
            confidence=0,
            clip_min=0.0,
            clip_max=1.0,
            targeted=False,
            abort_early=True)
    elif attack_method == "DeepFool":
        adversary = DeepFool(model,
                             max_iter=20,
                             clip_max=1.0,
                             clip_min=0.0,
                             epsilon=epsilon)
    elif attack_method == "CW":
        adversary = CarliniWagnerL2Attack(
            model,
            num_classes=args.num_classes,
            epsilon=epsilon,
            loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            max_iterations=20,
            confidence=0,
            clip_min=0.0,
            clip_max=1.0,
            targeted=False,
            abort_early=True)

    # version two
    h5_store = h5py.File("data/test_tiny_ImageNet_" + str(set_size) + ".h5",
                         "r")
    data = h5_store['data'][:]
    target = h5_store['true_target'][:]
    data = torch.from_numpy(data)
    target = torch.from_numpy(target)
    test_dataset = ImageNetDataset(data, target)
    test_loader = DataLoader(dataset=test_dataset,
                             num_workers=4,
                             drop_last=True,
                             batch_size=50,
                             shuffle=False)

    torch.manual_seed(0)
    test_adv = np.zeros([set_size, 3, 64, 64])
    test_true_target = np.zeros([set_size])

    # perturb
    for batch_idx, (clndata, target) in enumerate(test_loader):
        print("{}/{}".format(batch_idx, set_size // 50))
        clndata, target = clndata.cuda().float(), target.cuda().long()
        with ctx_noparamgrad_and_eval(model):
            # print(target)
            advdata = adversary.perturb(clndata, target)
            test_adv[batch_idx * 50:(batch_idx + 1) *
                     50, :, :, :] = advdata.detach().cpu().numpy()
        test_true_target[batch_idx * 50:(batch_idx + 1) *
                         50] = target.cpu().numpy()

    print("test_adv.shape:{}".format(test_adv.shape))
    print("test_true_target.shape:{}".format(test_true_target.shape))
    del model

    h5_store = h5py.File(
        "data/test_tiny_ImageNet_" + str(set_size) + "_adv_" +
        str(attack_method) + "_" + str(epsilon) + ".h5", 'w')
    h5_store.create_dataset('data', data=test_adv)
    h5_store.create_dataset('true_target', data=test_true_target)
    h5_store.close()
示例#23
0
adversary = LinfPGDAttack(
    net, loss_fn=nn.CrossEntropyLoss().cuda(), eps=16/255,
    nb_iter=7, eps_iter=4/255, rand_init=True, clip_min=-1.0, clip_max=1.0,
    targeted=False)
if args.alp:
    criterion_alp = nn.MSELoss().cuda()

for epoch in range(1, args.nepoch+1):
    net.train()

    for batch_idx, (inputs, labels) in enumerate(trloader):
        inputs_cls, labels_cls = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()

        with ctx_noparamgrad_and_eval(net):
            inputs_adv = adversary.perturb(inputs_cls, labels_cls)

        if args.weight == 0:
            outputs_adv = net(inputs_adv)
            loss = criterion(outputs_adv, labels_cls)
        else:
            inputs_all = torch.cat([inputs_cls, inputs_adv], dim=0)
            labels_all = torch.cat([labels_cls, labels_cls], dim=0)
            outputs_all = net(inputs_all)
            outputs_cls, outputs_adv = torch.split(outputs_all, inputs_cls.size(0), dim=0)
            loss = criterion(outputs_cls, labels_cls)

            if args.alp:
                loss += args.weight * criterion_alp(outputs_cls, outputs_adv)
            else:
                loss += args.weight * criterion(outputs_adv, labels_cls)
示例#24
0
    labelt = labelt.cuda()
    mlabelt = mlabelt.cuda()
    blabelt = blabelt.cuda()
    """
    inputp, labelp, mlabelp, blabelp = get_test2(args.batch_size)
    inputp = torch.FloatTensor(inputp)
    labelp = torch.FloatTensor(labelp)
    mlabelp = torch.FloatTensor(mlabelp)
    blabelp = torch.FloatTensor(blabelp)
    labelp = labelp.cuda()
    mlabelp = mlabelp.cuda()
    blabelp = blabelp.cuda()
    inputp = inputp.cuda()
    """

    adv_inputs = adversary.perturb(inputt, labelt)  # inputs
    #adv_inputs = inputt

    noise = torch.normal(0, 1, inputt.size()).cuda()
    adv_inputs = torch.clamp(adv_inputs + noise, 0, 255)
    # print(torch.mean(torch.abs())
    outt, outt2, outt3 = net(adv_inputs)
    out1, out2, out3 = net(inputt)
    noise_avg += torch.mean(torch.abs(adv_inputs - inputt))

    if iter % args.print_freq == 0:
        top1 = accuracy(outt.data, blabelt, 1)
        top2 = accuracy(outt2.data, mlabelt, 2)
        #top3 = accuracy(outt3.data, labelt, 3)
        top3 = accuracy(outt3.data[:, :19], labelt[:, :19], 2)
        top4 = accuracy(outt3.data[:, :19], outt2[:, :19], 2)
    def train_epoch(self,   model: nn.Module, train_loader: DataLoader,
                    val_clean_loader: DataLoader, val_triggered_loader: DataLoader,
                    epoch_num: int, use_amp: bool = False):
        """
        Runs one epoch of training on the specified model

        :param model: the model to train for one epoch
        :param train_loader: a DataLoader object pointing to the training dataset
        :param val_clean_loader: a DataLoader object pointing to the validation dataset that is clean
        :param val_triggered_loader: a DataLoader object pointing to the validation dataset that is triggered
        :param epoch_num: the epoch number that is being trained
        :param use_amp: if True, uses automated mixed precision for FP16 training.
        :return: a list of statistics for batches where statistics were computed
        """

        # Probability of Adversarial attack to occur in each iteration
        attack_prob = self.optimizer_cfg.training_cfg.adv_training_ratio
        pid = os.getpid()
        train_dataset_len = len(train_loader.dataset)
        loop = tqdm(train_loader, disable=self.optimizer_cfg.reporting_cfg.disable_progress_bar)

        scaler = None
        if use_amp:
            scaler = torch.cuda.amp.GradScaler()

        train_n_correct, train_n_total = None, None

        # Define parameters of the adversarial attack
        attack_eps = float(self.optimizer_cfg.training_cfg.adv_training_eps)
        attack_iterations = int(self.optimizer_cfg.training_cfg.adv_training_iterations)
        eps_iter = (2.0 * attack_eps) / float(attack_iterations)
        attack = LinfPGDAttack(
            predict=model,
            loss_fn=nn.CrossEntropyLoss(reduction="sum"),
            eps=attack_eps,
            nb_iter=attack_iterations,
            eps_iter=eps_iter)

        sum_batchmean_train_loss = 0
        running_train_acc = 0
        num_batches = len(train_loader)
        model.train()
        for batch_idx, (x, y_truth) in enumerate(loop):
            x = x.to(self.device)
            y_truth = y_truth.to(self.device)

            # put network into training mode & zero out previous gradient computations
            self.optimizer.zero_grad()

            # get predictions based on input & weights learned so far
            if use_amp:
                with torch.cuda.amp.autocast():
                    # add adversarial noise via l_inf PGD attack
                    # only apply attack to attack_prob of the batches
                    if attack_prob and np.random.rand() <= attack_prob:
                        with ctx_noparamgrad_and_eval(model):
                            x = attack.perturb(x, y_truth)
                    y_hat = model(x)
                    # compute metrics
                    batch_train_loss = self._eval_loss_function(y_hat, y_truth)

            else:
                # add adversarial noise vis lin PGD attack
                if attack_prob and np.random.rand() <= attack_prob:
                    with ctx_noparamgrad_and_eval(model):
                        x = attack.perturb(x, y_truth)
                y_hat = model(x)
                batch_train_loss = self._eval_loss_function(y_hat, y_truth)

            sum_batchmean_train_loss += batch_train_loss.item()

            running_train_acc, train_n_total, train_n_correct = default_optimizer._running_eval_acc(y_hat, y_truth,
                                                                                  n_total=train_n_total,
                                                                                  n_correct=train_n_correct,
                                                                                  soft_to_hard_fn=self.soft_to_hard_fn,
                                                                                  soft_to_hard_fn_kwargs=self.soft_to_hard_fn_kwargs)

            # compute gradient
            if use_amp:
                # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
                # Backward passes under autocast are not recommended.
                # Backward ops run in the same dtype autocast chose for corresponding forward ops.
                scaler.scale(batch_train_loss).backward()
            else:
                if np.isnan(sum_batchmean_train_loss) or np.isnan(running_train_acc):
                    default_optimizer._save_nandata(x, y_hat, y_truth, batch_train_loss, sum_batchmean_train_loss, running_train_acc,
                                  train_n_total, train_n_correct, model)

                batch_train_loss.backward()

            # perform gradient clipping if configured
            if self.optimizer_cfg.training_cfg.clip_grad:
                if use_amp:
                    # Unscales the gradients of optimizer's assigned params in-place
                    scaler.unscale_(self.optimizer)

                if self.optimizer_cfg.training_cfg.clip_type == 'norm':
                    # clip_grad_norm_ modifies gradients in place
                    #  see: https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html
                    torch_clip_grad.clip_grad_norm_(model.parameters(), self.optimizer_cfg.training_cfg.clip_val,
                                                    **self.optimizer_cfg.training_cfg.clip_kwargs)
                elif self.optimizer_cfg.training_cfg.clip_type == 'val':
                    # clip_grad_val_ modifies gradients in place
                    #  see: https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html
                    torch_clip_grad.clip_grad_value_(
                        model.parameters(), self.optimizer_cfg.training_cfg.clip_val)
                else:
                    msg = "Unknown clipping type for gradient clipping!"
                    logger.error(msg)
                    raise ValueError(msg)

            if use_amp:
                # scaler.step() first unscales the gradients of the optimizer's assigned params.
                # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
                # otherwise, optimizer.step() is skipped.
                scaler.step(self.optimizer)
                # Updates the scale for next iteration.
                scaler.update()
            else:
                self.optimizer.step()

            # report batch statistics to tensorflow
            if self.tb_writer:
                try:
                    batch_num = int(epoch_num * num_batches + batch_idx)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-train_loss',
                                              batch_train_loss.item(), global_step=batch_num)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name + '-running_train_acc',
                                              running_train_acc, global_step=batch_num)
                except:
                    # TODO: catch specific expcetions
                    pass

            loop.set_description('Epoch {}/{}'.format(epoch_num + 1, self.num_epochs))
            loop.set_postfix(avg_train_loss=batch_train_loss.item())

            if batch_idx % self.num_batches_per_logmsg == 0:
                logger.info('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tTrainLoss: {:.6f}\tTrainAcc: {:.6f}'.format(
                    pid, epoch_num, batch_idx * len(x), train_dataset_len,
                    100. * batch_idx / num_batches, batch_train_loss.item(), running_train_acc))

        train_stats = EpochTrainStatistics(running_train_acc, sum_batchmean_train_loss / float(num_batches))

        # if we have validation data, we compute on the validation dataset
        num_val_batches_clean = len(val_clean_loader)
        if num_val_batches_clean > 0:
            logger.info('Running Validation on Clean Data')
            running_val_clean_acc, _, _, val_clean_loss = \
                default_optimizer._eval_acc(val_clean_loader, model, self.device,
                          self.soft_to_hard_fn, self.soft_to_hard_fn_kwargs, self._eval_loss_function)
        else:
            logger.info("No dataset computed for validation on clean dataset!")
            running_val_clean_acc = None
            val_clean_loss = None

        num_val_batches_triggered = len(val_triggered_loader)
        if num_val_batches_triggered > 0:
            logger.info('Running Validation on Triggered Data')
            running_val_triggered_acc, _, _, val_triggered_loss = \
                default_optimizer._eval_acc(val_triggered_loader, model, self.device,
                          self.soft_to_hard_fn, self.soft_to_hard_fn_kwargs, self._eval_loss_function)
        else:
            logger.info(
                "No dataset computed for validation on triggered dataset!")
            running_val_triggered_acc = None
            val_triggered_loss = None

        validation_stats = EpochValidationStatistics(running_val_clean_acc, val_clean_loss,
                                                     running_val_triggered_acc, val_triggered_loss)
        if num_val_batches_clean > 0:
            logger.info('{}\tTrain Epoch: {} \tCleanValLoss: {:.6f}\tCleanValAcc: {:.6f}'.format(
                pid, epoch_num, val_clean_loss, running_val_clean_acc))
        if num_val_batches_triggered > 0:
            logger.info('{}\tTrain Epoch: {} \tTriggeredValLoss: {:.6f}\tTriggeredValAcc: {:.6f}'.format(
                pid, epoch_num, val_triggered_loss, running_val_triggered_acc))

        if self.tb_writer:
            try:
                batch_num = int((epoch_num + 1) * num_batches)
                if num_val_batches_clean > 0:
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-clean-val-loss', val_clean_loss, global_step=batch_num)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-clean-val_acc', running_val_clean_acc, global_step=batch_num)
                if num_val_batches_triggered > 0:
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-triggered-val-loss', val_triggered_loss, global_step=batch_num)
                    self.tb_writer.add_scalar(self.optimizer_cfg.reporting_cfg.experiment_name +
                                              '-triggered-val_acc', running_val_triggered_acc, global_step=batch_num)
            except:
                pass

        # update the lr-scheduler if necessary
        if self.lr_scheduler is not None:
            if self.optimizer_cfg.training_cfg.lr_scheduler_call_arg is None:
                self.lr_scheduler.step()
            elif self.optimizer_cfg.training_cfg.lr_scheduler_call_arg.lower() == 'val_acc':
                val_acc = validation_stats.get_val_acc()
                if val_acc is not None:
                    self.lr_scheduler.step(val_acc)
                else:
                    msg = "val_clean_acc not defined b/c validation dataset is not defined! Ignoring LR step!"
                    logger.warning(msg)
            elif self.optimizer_cfg.training_cfg.lr_scheduler_call_arg.lower() == 'val_loss':
                val_loss = validation_stats.get_val_loss()
                if val_loss is not None:
                    self.lr_scheduler.step(val_loss)
                else:
                    msg = "val_clean_loss not defined b/c validation dataset is not defined! Ignoring LR step!"
                    logger.warning(msg)
            else:
                msg = "Unknown mode for calling lr_scheduler!"
                logger.error(msg)
                raise ValueError(msg)

        return train_stats, validation_stats
示例#26
0
def main():
    data = []
    torch.cuda.empty_cache()

    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=60000,
                        metavar='N',
                        help='input batch size for training (default: 60000)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=10000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=1,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=60000,
                                               shuffle=False,
                                               **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=10000,
                                              shuffle=False,
                                              **kwargs)

    test_loader_small = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                                    batch_size=5,
                                                    shuffle=False,
                                                    **kwargs)

    strage = device
    model = Net()
    model.to(device)
    checkpoint = torch.load('mnist_cnn.pt',
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint)

    layer_sel = 1
    Associations = []
    for xin in range(4):
        temp = torch.load('map_association_' + str(xin) + '.pt')
        Associations.append(temp)

    hookF = [Hook(layer[1]) for layer in list(model._modules.items())]

    # Let's test how this association is predictive of the test set

    adversary = LinfPGDAttack(model,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=0.2,
                              nb_iter=40,
                              eps_iter=0.01,
                              rand_init=True,
                              clip_min=0.0,
                              clip_max=1.0,
                              targeted=False)

    for data, target in test_loader_small:
        break
    labels_ = target
    data = data.to(device)
    labels_ = labels_.to(device)

    adv_untargeted = adversary.perturb(data, labels_)
    #adv_untargeted=data
    print(adv_untargeted.size())

    pred_n = test(args, model, device, test_loader_small, hookF,
                  adv_untargeted)

    for ttin in range(3):
        layer_sel = ttin
        act_map = Associations[layer_sel]

        roV = intermediate_output[layer_sel]

        sel = Associations[layer_sel]
        sel = sel.numpy()

        wm = torch.load('wm_' + str(layer_sel) + '.pt',
                        map_location=lambda storage, loc: storage)

        fp = open('labels_' + str(layer_sel) +
                  '.json')  # labels for wm i.e., the labels of the test set.
        label = json.load(fp)
        fp.close()
        cog = CogMem_load(wm, label)

        for data, target in test_loader_small:
            break
        labels_ = target
        cog.foward(roV)
        pred = cog.pred.long()
        #pred=cog.pred.long().cpu().numpy()

        total_1 = 0
        total_2 = 0
        total_3 = 0
        total_4 = 0
        total_5 = 0
        cons1 = 0
        cons2 = 0
        temp = 0
        corr = np.zeros((10, 10))
        mem = []
        #print ('sel shape',sel.shape)
        #print (cog.image.size())
        #print ('pred',pred.size())
        for xi, xin in enumerate(pred_n):
            cls = xin.item()
            label_t = labels_[xi].long().item()
            v2 = cog.image[:, xi]

            idx = torch.argsort(v2).cpu().numpy()
            mem.append(v2.cpu().numpy())
            idx = np.flip(idx, 0)[:3]
            tar = sel[idx, :]
            temp_v = np.zeros(10)
            for zin in idx:
                temp_v = temp_v + sel[zin, :] * v2[zin].item()

            #print (temp_v)
            #tar=sel[idx,:]
            #idx3=cog.labels[idx].long().item()
            idx2 = np.argmax(temp_v)
            idx3 = np.argsort(temp_v)
            idx3 = np.flip(idx3, 0)[:3]
            sum_v = np.sum(np.exp(temp_v))
            #print (xi, idx, cls, idx3, idx2)
            # cls: prediction, idx2: max from association, idx3, label from truth, idx_truth: ground truth
            if cls == idx2:
                total_1 = total_1 + 1
            if label_t == cls:
                total_2 = total_2 + 1

            if label_t == idx2:
                total_3 = total_3 + 1

            if label_t != cls:
                temp = temp + 1
            if cls == idx2:
                total_4 = total_4 + 1
            else:
                temp = temp + 1
                if cls == idx2:
                    total_5 = total_5 + 1

            if cls in idx3:
                cons1 = cons1 + 1
            if label_t in idx3:
                cons2 = cons2 + 1
            for c1 in idx3:
                if c1 == cls:
                    for c2 in idx3:
                        if c1 != c2:
                            corr[c1,
                                 c2] = corr[c1,
                                            c2] + np.exp(temp_v[c2]) / sum_v

        max_v = np.amax(corr)
        #corr=corr/500.0
        print('pred. of prediction:', total_1, 'global pred. of actual class:',
              total_2, 'local pred. of actual class:', total_3)
        #print ('cons1',cons1,'cons2',cons2)
        #print (idx3)

        mem = np.array(mem)

        data = np.loadtxt('mem_' + str(layer_sel) + '.txt')
        temp = np.argsort(mem[0])
        temp = np.flip(temp, 0)
        print('adv', temp[:3])
        #print (data.shape)
        temp = np.argsort(data[0, :])
        temp = np.flip(temp, 0)
        print('clean', temp[:3])
        diff = data[0, :] - mem[0]
        #print ('diff',diff)
        #print (np.amax(diff),np.mean(diff),np.std(diff),np.amin(diff))
        #pylab.figure(ttin+1)
        #pylab.plot(data[0,:], label='clean')
        #pylab.plot(mem[0], label='adv')
        #pylab.legend()

        torch.cuda.empty_cache()
        del cog, roV

    pylab.show()
def experiment(num_shared_classes, percent_shared_data, n_epochs=200,batch_size=128, eps=.3, adv_steps=100, learning_rate=.0004, gpu_num=1,adv_training=False,task="CIFAR100"):
    print("epochs,batch_size,eps,adv_steps,learning_rate,task")
    print(n_epochs,batch_size,eps,adv_steps,learning_rate,task)

    cuda = torch.cuda.is_available()

    transform_test = transforms.Compose(
            [transforms.ToTensor(),transforms.Normalize((0.5070751592371323, 0.48654887331495095, 0.4409178433670343), (0.2673342858792401, 0.2564384629170883, 0.27615047132568404))])

    transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5070751592371323, 0.48654887331495095, 0.4409178433670343), (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)),
            ])

    if task.upper() == "CIFAR100":
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
            ])

        train_data = CIFAR100("data/",transform=transform_train, download=False)
        test_data = CIFAR100("data/", train=False, transform=transform_test, download=False)
    elif task.upper() == "IMAGENET":
        train_data = ImageNet('data/imagenet', split='train', download=False)
        test_data = ImageNet('data/imagenet', split='val', download=False)
    elif task.upper() == "FASHIONMNIST":
        transform = transforms.Compose([transforms.Lambda(lambda image: image.convert('RGB')),
                                        transforms.ToTensor()
                             ])

        train_data = FashionMNIST('data/fashionmnist',transform=transform, train=True, download=False)
        test_data = FashionMNIST('data/fashionmnist', transform=transform, train=False, download=False)
    else:
        train_data = CIFAR10("data/",transform=transform_train,download=False)
        test_data = CIFAR10("data/", train=False, transform=transform_test,download=False)

        # model1 = ResNet(ResidualBlock, [2, 2, 2],num_classes=10)
        # model2 = ResNet(ResidualBlock, [2, 2, 2],num_classes=10)


    all_classes = set([x[1] for x in train_data])
    shared_classes = random.sample(all_classes, num_shared_classes)
    split_classes = [c for c in all_classes if c not in shared_classes] # get classes not shared


    if len(split_classes) % 2 == 1: # if we have an odd #, randomly remove one so that number of classes will be the same for each model
        split_classes.pop(random.randint(0, len(split_classes) - 1))

    model1_split = random.sample(split_classes, len(split_classes) // 2)
    model2_split = [c for c in split_classes if c not in model1_split]


    model1_classes = model1_split
    model2_classes = model2_split

    model1_classes.sort()
    model2_classes.sort()

    # DEBUG:
    print("shared classes: {}".format(shared_classes))
    print("model1 classes: {}".format(model1_classes))
    print("model2 classes: {}".format(model2_classes))

    model1_x_train = []
    model1_y_train = []

    model2_x_train = []
    model2_y_train = []

    shared_x_train = []
    shared_y_train = []

    # train data splits
    for index in range(len(train_data)):

        current_class = train_data[index][1]

        # model 1
        if current_class in model1_classes:
            model1_x_train.append(train_data[index][0])
            model1_y_train.append(train_data[index][1])

        # model 2
        if current_class in model2_classes:
            model2_x_train.append(train_data[index][0])
            model2_y_train.append(train_data[index][1])


    # split by percentage for classes per model1

    if percent_shared_data < 100:

        new_model1_x_train = []
        new_model1_y_train = []

        for curr_class in model1_classes:
            temp_data_x = []
            temp_data_y = []

            # get all examples of class
            for i in range(len(model1_x_train)):
                if(model1_y_train[i] == curr_class):
                    temp_data_x.append(model1_x_train[i])
                    temp_data_y.append(model1_y_train[i])

            # split data by half the size
            total_size = len(temp_data_x)
            shared_size = int(total_size * .5)

            shared_indices = random.sample(list(range(len(temp_data_x))),shared_size)

            new_model1_x_train += [temp_data_x[i] for i in shared_indices]
            new_model1_y_train += [temp_data_y[i] for i in shared_indices]


        # split for model2

        new_model2_x_train = []
        new_model2_y_train = []

        for curr_class in model2_classes:
            temp_data_x = []
            temp_data_y = []

            # get all examples of class
            for i in range(len(model2_x_train)):
                if(model2_y_train[i] == curr_class):
                    temp_data_x.append(model2_x_train[i])
                    temp_data_y.append(model2_y_train[i])

            # split data by half the size
            total_size = len(temp_data_x)
            shared_size = int(total_size * .5)

            shared_indices = random.sample(list(range(len(temp_data_x))),shared_size)

            new_model2_x_train += [temp_data_x[i] for i in shared_indices]
            new_model2_y_train += [temp_data_y[i] for i in shared_indices]


        # rewrite dataset
        model1_x_train = new_model1_x_train
        model1_y_train = new_model1_y_train

        model2_x_train = new_model2_x_train
        model2_y_train = new_model2_y_train

    # Carry out datasplitting for shared classes and add to datasets

    for shared_class in shared_classes:

        all_examples_x_train = []
        all_examples_y_train = []

        # get all examples of class
        for index in range(len(train_data)):
            current_class = train_data[index][1]

            if current_class == shared_class:
                all_examples_x_train.append(train_data[index][0])
                all_examples_y_train.append(train_data[index][1])


        # find max number of samples per model (set to be amount of examples if data is completely disjoint)
        max_examples = len(all_examples_x_train) // 2

        # get shared examples
        shared_examples_x_train = []
        shared_examples_y_train = []

        num_shared_examples = max_examples * percent_shared_data // 100
        for _ in range(num_shared_examples):
            random_int = random.randint(0, len(all_examples_x_train) - 1)

            shared_examples_x_train.append(all_examples_x_train.pop(random_int))
            shared_examples_y_train.append(all_examples_y_train.pop(random_int))


        # get disjoint examples
        disjoint_examples = max_examples - len(shared_examples_x_train)

        model1_examples_x_train = []
        model1_examples_y_train = []

        model2_examples_x_train = []
        model2_examples_y_train = []

        for _ in range(disjoint_examples):
            model1_rand_int = random.randint(0, len(all_examples_x_train) - 1)

            model1_examples_x_train.append(all_examples_x_train.pop(model1_rand_int))
            model1_examples_y_train.append(all_examples_y_train.pop(model1_rand_int))

            model2_rand_int = random.randint(0, len(all_examples_x_train) - 1)
            model2_examples_x_train.append(all_examples_x_train.pop(model2_rand_int))
            model2_examples_y_train.append(all_examples_y_train.pop(model2_rand_int))


        # add to the datasets for the model
        model1_x_train = shared_examples_x_train + model1_x_train + model1_examples_x_train
        model1_y_train = shared_examples_y_train + model1_y_train + model1_examples_y_train

        model2_x_train = shared_examples_x_train + model2_x_train + model2_examples_x_train
        model2_y_train = shared_examples_y_train + model2_y_train + model2_examples_y_train

    #print(model1_y_train)

    # assign mapping for new classes
    model1_class_mapping = {}
    model2_class_mapping = {}

    model1_classes_inc = 0
    # go through model1 and assign unique classes to incrimental int starting at 0
    for index in range(len(model1_y_train)):
        # if it doesn't exist assign
        if model1_y_train[index] not in model1_class_mapping.keys():
            model1_class_mapping[model1_y_train[index]] = model1_classes_inc
            model1_classes_inc += 1
        # append assigned token
        model1_y_train[index] = model1_class_mapping[model1_y_train[index]]


    model2_classes_inc = 0
    # go through model2 and assign unique classes to incrimental int starting at 0
    for index in range(len(model2_y_train)):
        # if it doesn't exist in model2 OR in model1, assign it
        if model2_y_train[index] not in model2_class_mapping.keys() and model2_y_train[index] not in model1_class_mapping.keys():
            model2_class_mapping[model2_y_train[index]] = model2_classes_inc
            model2_y_train[index] = model2_classes_inc
            model2_classes_inc += 1
        elif model2_y_train[index] in model1_class_mapping.keys():
            model2_y_train[index] = model1_class_mapping[model2_y_train[index]]
        else:
            model2_y_train[index] = model2_class_mapping[model2_y_train[index]]

    model1_x_test = []
    model1_y_test = []

    model2_x_test = []
    model2_y_test = []

    shared_x_test = []
    shared_y_test = []


    # test data splits
    for index in range(len(test_data)):

        current_class = test_data[index][1]

        # model 1
        if current_class in model1_classes:
            model1_x_test.append(test_data[index][0])
            model1_y_test.append(test_data[index][1])

        # model 2
        if current_class in model2_classes:
            model2_x_test.append(test_data[index][0])
            model2_y_test.append(test_data[index][1])

        # shared classes for eval
        if current_class in shared_classes:
            shared_x_test.append(test_data[index][0])
            shared_y_test.append(test_data[index][1])

    model1_x_test += shared_x_test
    model1_y_test += shared_y_test

    model2_x_test += shared_x_test
    model2_y_test += shared_y_test


    for index in range(len(model1_y_test)):
        model1_y_test[index] = model1_class_mapping[model1_y_test[index]]


    for index in range(len(model2_y_test)):
        if model2_y_test[index] in model1_class_mapping.keys():
            model2_y_test[index] = model1_class_mapping[model2_y_test[index]]
        else:
            model2_y_test[index] = model2_class_mapping[model2_y_test[index]]


    model1_classes_len= len(set([item for item in model1_y_train]))
    model2_classes_len = len(set([item for item in model2_y_train]))


    if task.upper() == "CIFAR100":

        model1 = models.wide_resnet50_2()
        model2 = models.wide_resnet50_2()
        #
        model1.fc = nn.Linear(2048, model1_classes_len)
        model2.fc = nn.Linear(2048, model2_classes_len)

    elif task.upper() == "IMAGENET":
        model1 = models.wide_resnet50_2()
        model2 = models.wide_resnet50_2()

        model1.fc = nn.Linear(2048, model1_classes_len)
        model2.fc = nn.Linear(2048, model2_classes_len)
    elif task.upper() == "FASHIONMNIST":
        model1 = models.resnet18()
        model2 = models.resnet18()


        model1.fc = nn.Linear(512, model1_classes_len)
        model2.fc = nn.Linear(512, model2_classes_len)

    else:
        # Get model (using ResNet50 for now)
        model1 = models.resnet50()
        model2 = models.resnet50()

        model1.fc = nn.Linear(2048, model1_classes_len)
        model2.fc = nn.Linear(2048, model2_classes_len)


    cuda = torch.cuda.is_available()
    if gpu_num in range(torch.cuda.device_count()):
        device = torch.device('cuda:'+str(gpu_num) if cuda else 'cpu')
        torch.cuda.set_device(device)
    else:
        device = torch.device('cpu')

    # Model Training

    model1 = model1.to(device)
    model2 = model2.to(device)

    criterion1 = nn.CrossEntropyLoss()
    optimizer1 = optim.AdamW(model1.parameters(), lr=learning_rate)
    scheduler1 = optim.lr_scheduler.MultiStepLR(optimizer1,milestones=[60, 120, 160], gamma=.2) #learning rate decay


    criterion2 = nn.CrossEntropyLoss()
    optimizer2 = optim.AdamW(model2.parameters(), lr=learning_rate)
    scheduler2 = optim.lr_scheduler.MultiStepLR(optimizer2,milestones=[60, 120, 160], gamma=.2) #learning rate decay

    # zip together two lists
    train_set1 = list(zip(model1_x_train, model1_y_train))

    # create trainloader 1
    trainloader_1 = torch.utils.data.DataLoader(train_set1, batch_size=batch_size,
                                              shuffle=True, num_workers=2)
    # create trainloader 2

    # zip together two lists
    train_set2 = list(zip(model2_x_train, model2_y_train))

    # create trainloader 1
    trainloader_2 = torch.utils.data.DataLoader(train_set2, batch_size=batch_size,
                                              shuffle=True, num_workers=2)


    # TODO change this
    num_adv_batchs = 2 if adv_training else 0

    adv_batches = random.sample(range(len(trainloader_1)), num_adv_batchs)

    #print("adv_batches:", adv_batches)

    # train model 1
    for epoch in tqdm(range(n_epochs),desc="Epoch"):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader_1, 0):
            if cuda:
                data = tuple(d.cuda() for d in data)


            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer1.zero_grad()

            # forward + backward + optimize

            # train adversarial
    #         if i in adv_batches:
    #             print("adv training!")
    #             adversary = LinfPGDAttack(
    #                 model1, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps,
    #                 nb_iter=adv_steps, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,
    #                 targeted=False)
    #             inputs = adversary.perturb(inputs, labels)


            outputs = model1(inputs)
            loss = criterion1(outputs, labels)
            loss.backward()
            optimizer1.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i  + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training model1')

    # train model 2
    for epoch in tqdm(range(n_epochs),desc="Epoch"):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader_2, 0):
            if cuda:
                data = tuple(d.cuda() for d in data)

            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer2.zero_grad()

            # forward + backward + optimize
            outputs = model2(inputs)
            loss = criterion2(outputs, labels)
            loss.backward()
            optimizer2.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

    print('Finished Training model2')

    model1 = model1.to("cpu")
    model2 = model2.to("cpu")


    # convert shared classes to new labels
    for index in range(len(shared_y_test)):
        if shared_y_test[index] in model1_class_mapping.keys():
            shared_y_test[index] = model1_class_mapping[shared_y_test[index]]
        else:
            shared_y_test[index] = model2_class_mapping[shared_y_test[index]]


    shared_y_test = torch.Tensor(shared_y_test).long()


    # if cuda:
    #     shared_x_test = tuple(d.cuda() for d in shared_x_test)
    #     shared_y_test = torch.Tensor(shared_y_test).long().cuda()

    model1_x_test = torch.stack(model1_x_test)
    model2_x_test = torch.stack(model2_x_test)

    model1.eval()

    shared_x_test = torch.stack(shared_x_test)

    model1.eval()

    adversary = LinfPGDAttack(
        model1, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=eps,
        nb_iter=adv_steps, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,
        targeted=False)

    adv_untargeted = adversary.perturb(shared_x_test, shared_y_test)

    timestr = time.strftime("%Y%m%d_%H%M%S")

    print("saving models at", timestr)

    model1_name = './models/{}_{}_{}_model1_{}.pickle'.format(task,num_shared_classes, percent_shared_data,timestr)
    model2_name = './models/{}_{}_{}_model2_{}.pickle'.format(task,num_shared_classes, percent_shared_data,timestr)
    adv_name = './models/{}_{}_{}_adv_{}.pickle'.format(task,num_shared_classes, percent_shared_data,timestr)


    torch.save(model1, model1_name)
    torch.save(model2, model2_name)
    torch.save(adversary, adv_name)

    #  Eval

    with torch.no_grad():
        model1.eval()
        model2.eval()

        # model1 outputs

        output1 = model1(model1_x_test)
        shared_output1 = model1(shared_x_test)
        adv_output1 = model1(adv_untargeted)

        # model2 outputs
        output2 = model2(model2_x_test)
        shared_output2 = model2(shared_x_test)
        adv_output2 = model2(adv_untargeted)

        if task.upper() == "CIFAR100":

            # model 1

            print("model1_acc:", accuracy(output1,model1_y_test))

            print("model1_acc_5:", accuracy_n(output1,model1_y_test,5))

            print("model1_acc_shared:", accuracy(shared_output1,shared_y_test))
            print("model1_acc_5_shared:", accuracy_n(shared_output1,shared_y_test,5))

            print("model1_adv_acc_shared:", accuracy(adv_output1,shared_y_test))
            print("model1_adv_acc_5_shared:", accuracy_n(adv_output1,shared_y_test,5))

            print()

            # model 2

            print("model2_acc:", accuracy(output2,model2_y_test))
            print("model2_acc_5:", accuracy_n(output2,model2_y_test,5))

            print("model2_acc_shared:", accuracy(shared_output2,shared_y_test))
            print("model2_acc_5_shared:", accuracy_n(shared_output2,shared_y_test,5))

            print("model2_adv_acc_shared:", accuracy(adv_output2,shared_y_test))
            print("model2_adv_acc_5_shared:", accuracy_n(adv_output2,shared_y_test,5))

        else:
             # model 1

            print("model1_acc:", accuracy(output1,model1_y_test))

            print("model1_acc_shared:", accuracy(shared_output1,shared_y_test))

            print("model1_adv_acc_shared:", accuracy(adv_output1,shared_y_test))
            print()

            # model 2

            print("model2_acc:", accuracy(output2,model2_y_test))

            print("model2_acc_shared:", accuracy(shared_output2,shared_y_test))

            print("model2_adv_acc_shared:", accuracy(adv_output2,shared_y_test))
def main():
    args = parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    gpus = [idx for idx, gpu in enumerate(args.gpu.split(","))]
    work_dir = '{}/train_pytorch_model/adversarial_train/feature_denoise/'.format(
        PY_ROOT)
    # pretrained_model_path = '{}/train_pytorch_model/adversarial_train/feature_denoise/{}@{}_{}_{}_{}.pth.tar'.format(
    #     PY_ROOT, args.dataset, args.arch, denoise_str, args.filter_type, args.ksize)
    assert os.path.exists(work_dir), "{} does not exist!".format(work_dir)
    os.makedirs(work_dir, exist_ok=True)
    set_log_file(work_dir + "/adv_train_{}.log".format(args.dataset))
    log.info('Command line is: {}'.format(' '.join(sys.argv)))
    log.info('Called with args:')
    print_args(args)
    model_path = '{}/train_pytorch_model/adversarial_train/feature_denoise/pgd_adv_train_{}@{}_{}_{}.pth.tar'.format(
        PY_ROOT, args.dataset, args.arch, args.filter_type, args.ksize)
    best_model_path = '{}/train_pytorch_model/adversarial_train/feature_denoise/best_pgd_adv_train_{}@{}_{}_{}.pth.tar'.format(
        PY_ROOT, args.dataset, args.arch, args.filter_type, args.ksize)

    model = FeatureDefenseModel(args.dataset, args.arch, no_grad=False)
    model = model.cuda()
    resume_epoch = 0
    if os.path.exists(model_path):
        state_dict = torch.load(model_path,
                                map_location=lambda storage, location: storage)
        model.load_state_dict(state_dict["state_dict"])
        resume_epoch = state_dict["epoch"]
        log.info("Load model from {} at epoch {}".format(
            model_path, resume_epoch))
    # model = model.to(args.gpu)
    if torch.cuda.is_available():
        model.cuda(gpus[0])

    log.info(
        "After trained over, model will be saved to {}".format(model_path))
    train_loader = get_img_label_data_loader(args.dataset, args.batch_size,
                                             True)
    test_loader = get_img_label_data_loader(args.dataset, args.batch_size,
                                            False)
    if torch.cuda.device_count() > 1:
        criterion = torch.nn.DataParallel(nn.CrossEntropyLoss(), gpus).cuda()
    else:
        criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.learning_rate,
                          weight_decay=args.weight_decay,
                          momentum=args.momentum,
                          nesterov=True)
    scheduler = MultiStepLR(optimizer,
                            milestones=[
                                int(args.epochs / 2),
                                int(args.epochs * 3 / 4),
                                int(args.epochs * 7 / 8)
                            ],
                            gamma=0.1)

    total, correct, train_loss = 0, 0, 0
    # Record the best accuracy
    best_test_clean_acc, best_test_adv_acc, best_epoch = 0, 0, 0
    log.info(
        "basic model: {}, whether denoising: {}, filter type: {}, kernel size: {}"
        .format(args.arch, args.whether_denoising, args.filter_type,
                args.ksize))
    for epoch in range(resume_epoch, args.epochs):
        if epoch % args.test_interval == 0:
            model.eval()
            test_total, test_correct, test_robustness = 0, 0, 0
            attack = LinfPGDAttack(
                model,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=0.031372,
                nb_iter=30,
                eps_iter=0.01,
                rand_init=True,
                clip_min=0.0,
                clip_max=1.0,
                targeted=False)
            start_time = time.time()

            for (images, labels) in test_loader:
                images = images.cuda()
                labels = labels.cuda().long()
                test_total += images.shape[0]
                with torch.no_grad():
                    test_correct += model(images).max(1)[1].eq(
                        labels).float().sum().item()
                adv_images = attack.perturb(images, labels)
                with torch.no_grad():
                    test_robustness += model(adv_images).max(1)[1].eq(
                        labels).float().sum().item()
            test_acc, test_adv_acc = test_correct / test_total, test_robustness / test_total
            # Record the time on the testset
            end_time = time.time()
            testset_total_time = end_time - start_time
            if test_adv_acc > best_test_adv_acc:
                best_epoch = epoch
                best_test_adv_acc = test_adv_acc
                best_test_clean_acc = test_acc
                torch.save(
                    {
                        "state_dict": model.state_dict(),
                        "epoch": epoch + 1
                    }, best_model_path)
            log.info(
                "Present best adversarial model ----- best epoch: {} clean_test_acc: {:.3f} adv_test_acc: {:.3f}"
                .format(best_epoch, best_test_clean_acc, best_test_adv_acc))
            log.info(
                "Epoch:{} clean_test_acc: {:.3f}  adv_test_acc: {:.3f} during {} seconds"
                .format(epoch, test_acc, test_adv_acc, testset_total_time))

        # Test and Train on the trainset
        train_total, train_correct, train_robustness = 0, 0, 0
        train_clean_loss, train_adv_loss, train_loss = 0, 0, 0
        start_time = time.time()
        attack = LinfPGDAttack(model,
                               loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                               eps=0.031372,
                               nb_iter=30,
                               eps_iter=0.01,
                               rand_init=True,
                               clip_min=0.0,
                               clip_max=1.0,
                               targeted=False)

        for images, labels in train_loader:
            images = images.cuda()
            labels = labels.cuda().long()
            train_total += images.shape[0]
            with torch.no_grad():
                train_correct += model(images).max(1)[1].eq(
                    labels).float().sum().item()
            model.eval()
            adv_images = attack.perturb(images, labels)
            model.train()
            adv_outputs = model(adv_images)
            train_robustness += adv_outputs.max(1)[1].eq(
                labels).float().sum().item()
            adv_loss = criterion(adv_outputs, labels)
            # clean_outputs = model(torch.from_numpy(images).cuda())  # 我认为对抗训练不需要真实图片的loss
            # clean_loss = criterion(clean_outputs, torch.from_numpy(labels).cuda())
            optimizer.zero_grad()
            adv_loss.backward()
            optimizer.step()
            train_adv_loss += adv_loss.item()
            train_loss = train_adv_loss
            model.eval()

        scheduler.step(epoch)
        # Record the time on the trainset
        end_time = time.time()
        trainset_total_time = end_time - start_time
        train_acc, train_adv_acc = train_correct / train_total, train_robustness / train_total
        log.info(
            "Epoch:{} train_clean_loss: {:.3f} train_adv_loss: {:.3f} train_total_loss: {:.3f}"
            .format(epoch, train_clean_loss, train_adv_loss, train_loss))
        log.info(
            "Epoch:{} clean_train_acc: {:.3f}  adv_train_acc: {:.3f}  Consumed time:{}"
            .format(epoch, train_acc, train_adv_acc, trainset_total_time))
        torch.save({
            "state_dict": model.state_dict(),
            "epoch": epoch + 1
        }, model_path)
def sample_cases(sdim, args):
    sdim.eval()
    n_classes = args.get(args.dataset).n_classes

    sample_likelihood_dict = {}
    # logger.info('==> Corruption type: {}, severity level {}'.format(corruption_type, level))
    data_dir = hydra.utils.to_absolute_path(args.data_dir)
    dataset = get_dataset(data_name=args.dataset,
                          data_dir=data_dir,
                          train=False,
                          crop_flip=False)

    test_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False)
    x, y = next(iter(test_loader))
    x, y = x.to(args.device), y.long().to(args.device)

    def f_forward(x_, y_, image_name):
        with torch.no_grad():
            log_lik = sdim(x_)
        save_name = '{}.png'.format(image_name)
        save_image(x_, save_name, normalize=True)
        return log_lik[:, y_].item()

    sample_likelihood_dict['original'] = f_forward(x, y, 'original')

    eps_2 = 2 / 255
    eps_4 = 4 / 255
    eps_8 = 8 / 255

    x_u_4 = (x + torch.FloatTensor(x.size()).uniform_(-eps_4, eps_4).to(
        args.device)).clamp_(0., 1.)
    x_g_4 = (x + torch.randn(x.size()).clamp_(-eps_4, eps_4).to(
        args.device)).clamp_(0., 1.)
    x_u_8 = (x + torch.FloatTensor(x.size()).uniform_(-eps_8, eps_8).to(
        args.device)).clamp_(0., 1.)
    x_g_8 = (x + torch.randn(x.size()).clamp_(-eps_8, eps_8).to(
        args.device)).clamp_(0., 1.)

    sample_likelihood_dict['uniform_4'] = f_forward(x_u_4, y, 'uniform_4')
    sample_likelihood_dict['uniform_8'] = f_forward(x_u_8, y, 'uniform_8')
    sample_likelihood_dict['gaussian_4'] = f_forward(x_g_4, y, 'gaussian_4')
    sample_likelihood_dict['gaussian_8'] = f_forward(x_g_8, y, 'gaussian_8')

    adversary = LinfPGDAttack(sdim,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=eps_2,
                              nb_iter=40,
                              eps_iter=0.01,
                              rand_init=True,
                              clip_min=-1.0,
                              clip_max=1.0,
                              targeted=False)

    adv_pgd_2 = adversary.perturb(x, y)

    adversary = LinfPGDAttack(sdim,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=eps_4,
                              nb_iter=40,
                              eps_iter=0.01,
                              rand_init=True,
                              clip_min=-1.0,
                              clip_max=1.0,
                              targeted=False)

    adv_pgd_4 = adversary.perturb(x, y)

    adversary = LinfPGDAttack(sdim,
                              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                              eps=eps_8,
                              nb_iter=40,
                              eps_iter=0.01,
                              rand_init=True,
                              clip_min=-1.0,
                              clip_max=1.0,
                              targeted=False)

    adv_pgd_8 = adversary.perturb(x, y)

    # adversary = CW(sdim, n_classes, max_iterations=1000, c=1, clip_min=0., clip_max=1., learning_rate=0.01,
    #                targeted=False)
    #
    # adv_cw_1, _, _, _ = adversary.perturb(x, y)
    #
    # adversary = CW(sdim, n_classes, max_iterations=1000, c=10, clip_min=0., clip_max=1., learning_rate=0.01,
    #                targeted=False)
    #
    # adv_cw_10, _, _, _ = adversary.perturb(x, y)

    sample_likelihood_dict['pgd_2'] = f_forward(adv_pgd_2, y, 'pgd_2')
    sample_likelihood_dict['pgd_4'] = f_forward(adv_pgd_4, y, 'pgd_4')
    sample_likelihood_dict['pgd_8'] = f_forward(adv_pgd_8, y, 'pgd_8')
    # sample_likelihood_dict['cw_1'] = f_forward(adv_cw_1, y, 'cw_1')
    # sample_likelihood_dict['cw_10'] = f_forward(adv_cw_10, y, 'cw_10')

    print(sample_likelihood_dict)
    save_dir = hydra.utils.to_absolute_path('attack_logs/case_study')
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    torch.save(sample_likelihood_dict,
               os.path.join(save_dir, 'sample_likelihood_dict.pt'))
示例#30
0
print('==> This is the PGD')


from advertorch.attacks import LinfPGDAttack
adversary = LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.3,
                              nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False)

correct_clean = 0
correct_adv = 0

for idx, (cln_data, true_label) in enumerate(loader):

    cln_data, true_label = cln_data.to(device), true_label.to(device)

    adv_untargeted = adversary.perturb(cln_data, true_label)

    pred_cln = predict_from_logits(model(cln_data))
    pred_adv = predict_from_logits(model(adv_untargeted))

    correct_clean = correct_clean + (pred_cln.data == true_label.data).float().sum()
    correct_adv = correct_adv + (pred_adv.data == true_label.data).float().sum()

    print("current correct clean samples: %s; current correct adv samples: %s" %(correct_clean.data.item(), correct_adv.data.item()))

print("correct clean samples: ", correct_clean)
print("correct adversarial samples: ", correct_adv)