Exemplo n.º 1
0
def get_theoretic_lowerbound(model, eps_range, config, pretrained_config,
                             output_root):
    n_examples = config['num_examples']
    n_batches = int(
        math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size))

    pretrained_config.cuda = pretrained_config.cuda
    data = load_data(pretrained_config)

    # Perform the attack.
    eps_acc = []
    for sample in tqdm(islice(data['validation'], n_batches), total=n_batches):
        x = to_cuda(sample[0], cuda=pretrained_config.cuda)
        y = to_cuda(sample[1].type(torch.LongTensor),
                    cuda=pretrained_config.cuda)
        eps_acc.append(
            theoretical_adversary(
                model, x, y, pretrained_config.model.l_constant * eps_range))
    avg_eps_acc = np.concatenate(eps_acc, 0).mean(0)

    results = {'eps': eps_range.tolist(), 'acc': avg_eps_acc.tolist()}

    print(results)
    with open(os.path.join(output_root, 'results.json'), 'w') as f:
        json.dump(results, f, sort_keys=True, indent=4)
Exemplo n.º 2
0
def check_grad_norm(model, data, cuda, epochs=3):
    u = to_cuda(torch.randn(10), cuda)
    for _ in range(epochs):
        for x, _ in data:
            model.zero_grad()
            x = to_cuda(x, cuda)
            x.requires_grad = True
            logits = model(x)
            s, u = jac_spectral_norm(logits, x, u)
    return s
Exemplo n.º 3
0
def eval_on_examples(model, output_root, cuda=True):
    adv_examples = np.load(os.path.join(output_root, 'examples.npy'))
    adv_targets = np.load(os.path.join(output_root, 'targets.npy'))

    print(adv_examples.shape)
    adv_ex_t = torch.Tensor(adv_examples)
    save_image(adv_ex_t, 'test.png')

    adv_examples = to_cuda(torch.Tensor(adv_examples), cuda)
    adv_targets = to_cuda(torch.LongTensor(adv_targets), cuda)
    print("Adv Accuracy: {}".format(
        accuracy(model, adv_examples, adv_targets).item()))
Exemplo n.º 4
0
def main(config):
    # Create the output directory.
    output_root = config.output_root
    if not os.path.isdir(output_root):
        os.makedirs(output_root)

    # Load pretrained model
    pretrained_path = config.pretrained_path
    model, pretrained_config = load_model_from_config(pretrained_path)

    # Push model to GPU if available.
    if config.cuda:
        print('Using cuda: {}'.format("Yes"))
        to_cuda(model, cuda=config.cuda)

    model.eval()

    # model.model.project_network_weights(Munch.fromDict({'type': 'l_inf_projected'}))
    generate_examples(model, config, pretrained_config, output_root)
Exemplo n.º 5
0
def main(config):
    print(config)
    # Create the output directory.
    output_root = config.output_root
    if not os.path.isdir(output_root):
        os.makedirs(output_root)

    # Load pretrained model
    pretrained_path = config.model.exp_path
    model, pretrained_config = load_model_from_config(pretrained_path)
    # model, pretrained_config = load_model_hack(pretrained_path)

    # Push model to GPU if available.
    if config.cuda:
        print('Using cuda: {}'.format("Yes"))
        to_cuda(model, cuda=config.cuda)

    model.eval()
    eps_range = np.linspace(0.01, 0.5, 20)

    # exp_root = os.path.join(output_root, 'theory')
    # if not os.path.isdir(exp_root):
    #     os.makedirs(exp_root)
    # get_theoretic_lowerbound(model, eps_range, config, pretrained_config, exp_root)

    config.fgs = True
    for e in eps_range:
        config.eps = e
        exp_root = os.path.join(output_root, 'fgs', str(e))
        if not os.path.isdir(exp_root):
            os.makedirs(exp_root)
        generate_examples(model, config, pretrained_config, exp_root)

    config.fgs = False
    config.pgd = True
    for e in eps_range:
        config.eps = e
        exp_root = os.path.join(output_root, 'pgd', str(e))
        if not os.path.isdir(exp_root):
            os.makedirs(exp_root)
        generate_examples(model, config, pretrained_config, exp_root)
Exemplo n.º 6
0
def slow_check_grad_norm(model, data, cuda):
    spectral_rads = []
    for x, _ in data:
        model.zero_grad()
        x = to_cuda(x, cuda).view(-1, 784)
        x.requires_grad = True
        logits = model(x)
        jac = compute_jacobian(logits, x)
        for j in jac:
            _, S, _ = torch.svd(j)
            spectral_rads.append(torch.max(S).cpu().detach().item())
    return np.mean(spectral_rads), np.max(spectral_rads)
Exemplo n.º 7
0
def get_safe_bjorck_scaling(weight, cuda=True):
    bjorck_scaling = torch.tensor([np.sqrt(weight.shape[0] * weight.shape[1])]).float()
    bjorck_scaling = to_cuda(bjorck_scaling, cuda=cuda)

    return bjorck_scaling
Exemplo n.º 8
0
def generate_examples(model, config, pretrained_config, output_root):
    adv_example_filepath = os.path.join(output_root, 'examples')
    adv_targets_filepath = os.path.join(output_root, 'targets')

    # Set up adversarial attack.
    adv_model = PyTorchModel(model, (0, 1),
                             pretrained_config.data.class_count,
                             cuda=config.cuda)
    criterion = Misclassification()
    attack = getattr(foolbox.attacks, config.name)(adv_model, criterion)

    # Get data.
    pretrained_config.cuda = config.cuda
    pretrained_config.optim.batch_size = config.data.batch_size
    data = load_data(pretrained_config)
    # print('Test Accuracy:{}'.format(loader_accuracy(model, data['test'])))

    n_examples = config['num_examples']
    n_batches = int(
        math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size))

    # Save the results of the computations in the following variable.
    adv_ex = torch.Tensor()
    adv_targets = torch.LongTensor()
    adv_mse = torch.Tensor()
    adv_inf = torch.Tensor()
    success = torch.Tensor()

    # Set up distance for the adversarial attack.
    distance_name = config.get('distance')
    distance = getattr(foolbox.distances, distance_name) if distance_name is not None \
        else foolbox.distances.MeanSquaredDistance

    # Perform the attack.
    for sample in tqdm(islice(data['validation'], n_batches), total=n_batches):
        x = sample[0]
        y = sample[1].type(torch.LongTensor)
        x = to_cuda(x, cuda=config.cuda)

        adv, adv_t, batch_success, batch_adv_mse, batch_adv_inf = batch_attack(
            attack, adv_model, criterion, x,
            y.cpu().numpy(), config['attack_kwargs'], distance)
        adv_ex = torch.cat([adv_ex, adv], 0)
        adv_targets = torch.cat([adv_targets, adv_t], 0)
        success = torch.cat([success, batch_success], 0)
        adv_mse = torch.cat([adv_mse, batch_adv_mse], 0)
        adv_inf = torch.cat([adv_inf, batch_adv_inf], 0)

    # evaluate_adv_grad_norms(model, adv_ex, adv_targets, config.cuda)
    # Summarize the results.
    results = {
        "success_rate":
        success.mean().item(),
        "defense_rate":
        1 - success.mean().item(),
        "mean_mse": ((adv_mse * success).sum() / success.sum()).item(),
        "mean_inf": ((adv_inf * success).sum() / success.sum()).item(),
        "mse_quartiles":
        list(
            np.percentile(adv_mse[success == 1.0].numpy(),
                          [0, 25, 50, 75, 100]))
    }

    results["median_mse"] = results["mse_quartiles"][2]

    print("success rate: {}".format(results["success_rate"]))
    print("defense rate: {}".format(results["defense_rate"]))
    print("mean MSE for successful attacks: {}".format(results["mean_mse"]))
    print("mean L_inf for successful attacks: {}".format(results["mean_inf"]))
    print("MSE quartiles for successful attacks: {}".format(
        results["mse_quartiles"]))

    with open(os.path.join(config['output_root'], 'results.json'), 'w') as f:
        json.dump(results, f, sort_keys=True, indent=4)

    np.save(adv_example_filepath, adv_ex)
    np.save(adv_targets_filepath, adv_targets)

    print(
        accuracy(model, to_cuda(adv_ex, cuda=config.cuda),
                 to_cuda(adv_targets, cuda=config.cuda)))
Exemplo n.º 9
0
def visualize_saliency(config):
    # Create the output directory.
    output_root = config.output_root
    if not os.path.isdir(output_root):
        os.makedirs(output_root)

    # Load a pretrained model.
    pretrained_path = config.pretrained_path
    model, pretrained_config = load_model_from_config(pretrained_path)

    # Push model to GPU if available.
    if config.cuda:
        print('Using cuda: Yes')
        model.cuda()

    model.eval()

    # Get data.
    pretrained_config.data.cuda = config.cuda
    pretrained_config.data.batch_size = config.data.batch_size
    data = load_data(pretrained_config)

    # Compute adversarial gradients and save their visualizations.
    for i, (x, y) in enumerate(data['test']):
        x = to_cuda(x, cuda=config.cuda)

        # Save the input image.
        save_path = os.path.join(output_root, 'x{}.png'.format(i))
        save_image(x, save_path)

        # Save the adversarial gradients.
        for j in range(pretrained_config.data.class_count):
            # Compute and save the adversarial gradients.
            x_grad = get_saliency_map(model, x, j)
            save_image(x_grad,
                       os.path.join(output_root,
                                    'x_{}_grad_{}.png'.format(i, j)),
                       normalize=True,
                       scale_each=True)
        break

    # Produce joint image.
    nrow = config.visualization.num_rows
    x_sub = to_cuda(
        torch.zeros(nrow,
                    *x.size()[1:]).copy_(x[:nrow]).detach(), config.cuda)
    print("Size of visualization: ", x_sub.size(), "Maximum pixel value: ",
          x_sub.max())
    tensors = []
    c = 0
    for i, (x, y) in enumerate(data['test']):
        for (k, t) in enumerate(y):
            if t == c:
                c += 1
                tensors.append(x[k])
                if len(tensors) == pretrained_config.data.class_count:
                    break
        if len(tensors) == pretrained_config.data.class_count:
            break

    # Collect tensors from each class
    x_sub = to_cuda(torch.stack(tensors, 0), cuda=config.cuda)

    tensors = [x_sub]
    for j in range(pretrained_config.data.class_count):

        # Compute and visualize the adversarial gradients.
        model.zero_grad()
        x_grad = get_saliency_map(model, x_sub, j).clone().detach()
        tensors.append(x_grad)

    # Concatenate and visualize.
    joint_tensor = torch.cat(tensors, dim=0)
    save_image(joint_tensor,
               os.path.join(output_root, 'x_joint.png'),
               nrow=pretrained_config.data.class_count,
               normalize=True,
               colormap='seismic')
Exemplo n.º 10
0
def generate_examples(model, config, pretrained_config, output_root):
    adv_example_filepath = os.path.join(output_root, 'examples')
    adv_targets_filepath = os.path.join(output_root, 'adv_targets')
    targets_filepath = os.path.join(output_root, 'targets')

    n_examples = config['num_examples']
    n_batches = int(
        math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size))

    pretrained_config.cuda = pretrained_config.cuda
    data = load_data(pretrained_config)

    # Save the results of the computations in the following variable.
    adv_ex = torch.Tensor()
    adv_targets = torch.LongTensor()
    true_targets = torch.LongTensor()
    adv_mse = torch.Tensor()
    adv_inf = torch.Tensor()
    success = torch.Tensor()
    margins = torch.Tensor()

    # Perform the attack.
    i = 0
    for sample in tqdm(islice(data['validation'], n_batches), total=n_batches):
        i += 1
        model.zero_grad()
        x = to_cuda(sample[0], cuda=pretrained_config.cuda)
        y = to_cuda(sample[1].type(torch.LongTensor),
                    cuda=pretrained_config.cuda)
        true_targets = torch.cat([true_targets, y.detach().cpu()], 0)

        if config.fgs:
            adv, adv_t, original_pred = manual_fgs(model,
                                                   x,
                                                   y,
                                                   config.eps,
                                                   clamp=False)
        elif config.pgd:
            adv, adv_t, original_pred = manual_pgd(model,
                                                   x,
                                                   y,
                                                   config.eps,
                                                   config.eps,
                                                   rand_start=False,
                                                   clamp=False)
        adv_ex = torch.cat([adv_ex, adv.cpu().detach()], 0)
        adv_targets = torch.cat([adv_targets, adv_t.cpu().detach()], 0)

        # import pdb; pdb.set_trace()
        original_top_2 = model(x).topk(2, 1)[0]
        original_margin = original_top_2[:, 0] - original_top_2[:, 1]
        margins = torch.cat([margins, original_margin.cpu().detach()], 0)

        batch_success = ((original_pred == y) & (adv_t != y)).float()
        success = torch.cat([success, batch_success.cpu().detach()], 0)
        adv_mse = torch.cat([
            adv_mse, ((adv.view(adv.size(0), -1) - x.view(adv.size(0), -1))**
                      2).mean(-1).cpu().detach()
        ], 0)
        adv_inf = torch.cat([
            adv_inf, (adv.view(adv.size(0), -1) -
                      x.view(adv.size(0), -1)).abs().max(-1)[0].cpu().detach()
        ], 0)

    total_accuracy = accuracy(model, to_cuda(adv_ex, cuda=config.cuda),
                              to_cuda(true_targets, cuda=config.cuda)).item()

    # Summarize the results.
    results = {
        "eps": config.eps,
        "success_rate": success.mean().item(),
        "defense_rate": 1 - success.mean().item(),
        "total_acc": total_accuracy,
        "all_margins_mean": margins.mean().item(),
        "successful_margins": ((margins * success).sum() / success.sum()).item(),
        "mean_mse": ((adv_mse * success).sum() / success.sum()).item(),
        "mean_inf": ((adv_inf * success).sum() / success.sum()).item(),
        "mse_quartiles": list(np.percentile(adv_mse[success == 1.0].numpy(), [0, 25, 50, 75, 100])) \
            if len(adv_mse[success == 1.0]) > 0 else [0, 0, 0, 0, 0]
    }

    results["median_mse"] = results["mse_quartiles"][2]

    print("success rate: {}".format(results["success_rate"]))
    print("defense rate: {}".format(results["defense_rate"]))
    print("total accuracy: {}".format(results["total_acc"]))
    print("Avg Margin: {}".format(results['all_margins_mean']))
    print("Avg Success Margin: {}".format(results['successful_margins']))
    print("mean MSE for successful attacks: {}".format(results["mean_mse"]))
    print("mean L_inf for successful attacks: {}".format(results["mean_inf"]))
    print("MSE quartiles for successful attacks: {}".format(
        results["mse_quartiles"]))

    with open(os.path.join(output_root, 'results.json'), 'w') as f:
        json.dump(results, f, sort_keys=True, indent=4)

    np.save(adv_example_filepath, adv_ex)
    np.save(adv_targets_filepath, adv_targets)
    np.save(targets_filepath, true_targets)

    return adv_ex