示例#1
0
def get_dataframe(df, model, data, nmodel, nattack, ndefence, device,
                  result_path):
    path_result = os.path.join(
        result_path, '{}_{}_{}_{}.pt'.format(data, nmodel, nattack, ndefence))
    result = torch.load(path_result)
    X_test = result['X_test']
    y_test = result['y_test']
    labels_test = result['labels_test']
    res_test = result['res_test']

    idx_tp = np.where(labels_test == 1)[0]

    detected_as_adv = res_test[idx_tp]
    y_true = y_test[idx_tp]
    if ndefence == 'magnet':
        X_reformed = result['X_reformed']
        y_pred = predict_numpy(model, X_reformed[idx_tp], device=device)
    elif ndefence == 'rc':
        y_pred = result['res_test'][idx_tp]
        detected_as_adv = np.zeros_like(y_pred)
    else:
        y_pred = predict_numpy(model, X_test[idx_tp], device=device)

    score = acc_on_adv(y_pred, y_true, detected_as_adv)

    # Compute FPR
    idx_fp = np.where(labels_test == 0)[0]
    if ndefence == 'rc':
        fpr = np.mean(res_test[idx_fp] != y_test[idx_fp])
    else:
        fpr = np.mean(res_test[idx_fp])

    att_str = nattack.split('_')
    df = df.append(
        {
            'Attack': att_str[0],
            'Adv_param': att_str[1],
            'Defence': ndefence,
            'FPR': fpr * 100,
            'Acc_on_adv': score * 100,
        },
        ignore_index=True)
    return df
示例#2
0
def cmpt_and_save_predictions(model, art_detector, detector, device, x, y,
                              pred_folder, eps):

    pred_folder = pred_folder + "_{:}".format(eps)
    if not os.path.exists(pred_folder):
        path = Path(pred_folder)
        path.mkdir(parents=True, exist_ok=True)
        print('Cannot find folder. Created:', pred_folder)

    y_pred = predict_numpy(model, x, device)
    pred_sur_det = art_detector.predict(x)
    pred_baard = detector.detect(x, y_pred)

    # Test stage by stage
    reject_s1 = detector.stages[0].predict(x, y_pred)
    reject_s2 = detector.stages[1].predict(x, y_pred)
    reject_s3 = detector.stages[2].predict(x, y_pred)

    print("Show results:")
    print('Acc classifier:', np.mean(y_pred == y))
    print("acc surrogate detector", np.mean(pred_sur_det == y))
    print("acc baard ",np.mean(pred_sur_det == 1))
    print("acc on advx sistema completo ", acc_on_adv(y_pred, y,
                                                      pred_baard))
    print('reject_s1', np.mean(reject_s1))
    print('reject_s2', np.mean(reject_s2))
    print('reject_s3', np.mean(reject_s3))

    print("Save predictions")
    np.save(pred_folder + "_{:}".format("y"), y)
    np.save(pred_folder + "_{:}".format("y-pred"), y_pred)
    np.save(pred_folder + "_{:}".format("pred-sur-det"), pred_sur_det)
    np.save(pred_folder + "_{:}".format("pred-baard"), pred_baard)
    np.save(pred_folder + "_{:}".format("reject-s1"), reject_s1)
    np.save(pred_folder + "_{:}".format("reject-s2"), reject_s2)
    np.save(pred_folder + "_{:}".format("reject-s3"), reject_s3)

    print("Predictions saved")
示例#3
0
def get_baard_output(data, model_name, data_path, output_path, file_name,
                     param, batch_size, device):
    """This function reads a dataset object. It runs BAARD, applies clipping and 
    adds label_as_adv to the object.
    """
    file_path = os.path.join(output_path, file_name)
    print('file_path:', file_path)

    obj = torch.load(file_path)
    X = obj['X']
    adv = obj['adv']
    y = obj['y']

    # Load model
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_train = datasets.MNIST(data_path,
                                       train=True,
                                       download=True,
                                       transform=transforms)
        model = BaseModel(use_prob=False).to(device)
        pretrained = 'mnist_200.pt'
    elif data == 'cifar10':
        dataset_train = datasets.CIFAR10(data_path,
                                         train=True,
                                         download=True,
                                         transform=transforms)
        if model_name == 'resnet':
            model = Resnet(use_prob=False).to(device)
            pretrained = 'cifar10_resnet_200.pt'
        elif model_name == 'vgg':
            model = Vgg(use_prob=False).to(device)
            pretrained = 'cifar10_vgg_200.pt'
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    pretrained_path = os.path.join(output_path, pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))
    pred = predict_numpy(model, X, device)
    acc = np.mean(pred == y)
    print('Accuracy on {} clean samples: {}'.format(X.shape[0], acc))

    tensor_train_X, tensor_train_y = get_correct_examples(model,
                                                          dataset_train,
                                                          device=device,
                                                          return_tensor=True)
    X_train = tensor_train_X.cpu().detach().numpy()
    y_train = tensor_train_y.cpu().detach().numpy()

    # Load the preprocessed training set
    baard_train_path = os.path.join(
        output_path, '{}_{}_baard_train.pt'.format(data, model_name))
    obj = torch.load(baard_train_path)
    X_baard = obj['X_train']

    # Load the original validation set for BAARD
    # eg: ./results/mnist_basic_apgd2_2.0_adv.npy
    file_root = '{}_{}_apgd2_2.0'.format(data, model_name)
    path_benign = os.path.join(output_path, file_root + '_x.npy')
    path_y = os.path.join(output_path, file_root + '_y.npy')
    X_val = np.load(path_benign)
    y_val = np.load(path_y)
    n = X_val.shape[0] // 2
    X_val = X_val[n:]
    y_val = y_val[n:]

    stages = []
    stages.append(ApplicabilityStage(n_classes=N_CLASSES,
                                     quantile=param['q1']))
    stages.append(
        ReliabilityStage(n_classes=N_CLASSES,
                         k=param['k_re'],
                         quantile=param['q2']))
    stages.append(
        DecidabilityStage(n_classes=N_CLASSES,
                          k=param['k_de'],
                          quantile=param['q3']))
    print('BAARD: # of stages:', len(stages))

    detector = BAARDOperator(stages=stages)
    detector.stages[0].fit(X_baard, y_train)
    detector.stages[1].fit(X_train, y_train)
    detector.stages[2].fit(X_train, y_train)
    detector.search_thresholds(X_val, y_val, np.zeros_like(y_val))

    pred_adv = predict_numpy(model, adv, device)
    print('Acc on adv without clip:', np.mean(pred_adv == y))

    # count_class(pred_adv)

    # TODO: After clipping, the 1st stage still blocks samples. I don't know why?!
    # To bypass the 1st stage, we want to clip all adversarial examples with the bounding boxes
    applicability = detector.stages[0]
    thresholds = applicability.thresholds_
    adv_clipped = adv.copy()
    for c in range(N_CLASSES):
        idx = np.where(pred_adv == c)[0]
        # Adversarial examples do NOT have the same distribution as the true classes
        if len(idx) == 0:
            continue
        bounding_boxes = thresholds[c]
        low = bounding_boxes[0]
        high = bounding_boxes[1]
        shape = adv_clipped[idx].shape
        subset = flatten(adv[idx])
        # clipped_subset = np.clip(subset, low, high)
        subset = np.minimum(subset, high)
        subset = np.maximum(subset, low)
        adv_clipped[idx] = subset.reshape(shape)

    pred_adv_clip = predict_numpy(model, adv_clipped, device)
    print('Acc on adv with clip:', np.mean(pred_adv_clip == y))
    print('Class changed after clipping:', np.sum(pred_adv != pred_adv_clip))

    pred_X = predict_numpy(model, X, device)
    assert not np.all([pred_X, y])
    baard_label_adv = detector.detect(adv_clipped, pred_adv_clip)

    s1_blocked = detector.stages[0].predict(adv_clipped, pred_adv_clip)
    print('Blocked by Stage1:', np.sum(s1_blocked))

    acc = acc_on_adv(pred_adv_clip, y, baard_label_adv)
    print('Acc_on_adv:', acc)

    baard_label_x = detector.detect(X, y)
    print('FPR:', np.mean(baard_label_x))

    output = {
        'X': X,
        'adv': adv_clipped,
        'y': y,
        'baard_label_x': baard_label_x,
        'baard_label_adv': baard_label_adv
    }
    torch.save(output, file_path)
    print('Save to:', file_path)
    print()
示例#4
0
def run_full_pipeline_baard(data,
                            model_name,
                            path,
                            seed,
                            json_param,
                            att_name,
                            eps):
    set_seeds(seed)

    # Line attack takes no hyperparameter
    if att_name == 'line':
        eps = 1
    print('args:', data, model_name, path, seed, json_param, att_name, eps)

    if not os.path.exists(path):
        print('Output folder does not exist. Create:', path)
        os.mkdir(path)

    # Get data
    n_classes = 10
    transform = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform)
        dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform)
    elif data == 'cifar10':
        transform_train = tv.transforms.Compose([
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.RandomCrop(32, padding=4),
            tv.transforms.ToTensor()])
        dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train)
        dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform)
    else:
        raise ValueError('Unknown dataset: {}'.format(data))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name))
    print('Start training {} model on {}...'.format(model_name, data))
    model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model)

    # Split data
    tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True)
    dataset = TensorDataset(tensor_X, tensor_y)
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device)
    print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100))
    # Split rules:
    # 1. Benchmark_defence_test: 1000 (def_test)
    # 2. Benchmark_defence_val:  1000 (def_val)
    # 3. Test white-box attack:  2000 (att_test)
    # 5. Train surrogate model:  2000 (sur_train)
    #    -----------------Total: 6000
    idx_shuffle = np.random.permutation(tensor_X.size(0))[:6000]
    X = tensor_X[idx_shuffle].cpu().detach().numpy()
    y = tensor_y[idx_shuffle].cpu().detach().numpy()

    print('-------------------------------------------------------------------')
    print('Start generating {} adversarial examples...'.format(len(idx_shuffle)))

    adv, X, y = run_attack_untargeted(file_model, X, y, att_name=att_name, eps=eps, device=device)

    print('-------------------------------------------------------------------')
    print('Start testing adversarial examples...')
    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    y_def_val = y[1000:2000]
    adv_def_val = adv[1000:2000]  # Unused by BAARD
    pred_adv_def_val = pred[1000:2000]  # Unused by BAARD

    X_att_test = X[2000:4000]
    y_att_test = y[2000:4000]
    adv_att_test = adv[2000:4000]
    pred_adv_att_test = pred[2000:4000]

    X_surro_train = X[4000:]
    y_surro_train = y[4000:]
    adv_surro_train = adv[4000:]

    # concatenate the adversarial examples computed for different epsilon
    if data == 'mnist':
        eps_1 = 1
        eps_2 = 5
        eps_3 = 8
        eps_4 = 3
    elif data == "cifar10":
        eps_1 = 0.05
        eps_2 = 0.1
        eps_3 = 0.5
        eps_4 = 1
    else:
        raise ValueError("dataset idx unknown")

    print('-------------------------------------------------------------------')
    print('Start training BAARD...')
    # Run preprocessing
    file_baard_train = os.path.join(path, '{}_{}_baard_s1_train_data.pt'.format(data, model_name))
    if os.path.exists(file_baard_train):
        print('Found existing BAARD preprocess data:', file_baard_train)
        obj = torch.load(file_baard_train)
        X_baard_train_s1 = obj['X_s1']
        X_baard_train= obj['X']
        y_baard_train = obj['y']
    else:
        tensor_X, tensor_y = get_correct_examples(model, dataset_train,
                                                  device=device,
                                                  return_tensor=True)
        X_baard_train = tensor_X.cpu().detach().numpy()
        y_baard_train = tensor_y.cpu().detach().numpy()

        # fixme: this gives an error as it expect a PIL image
        X_baard_train_s1 = preprocess_baard(data, X_baard_train
                                            ).cpu().detach().numpy()
        obj = {
            'X_s1': X_baard_train_s1,
            'X': X_baard_train,
            'y': y_baard_train
        }
        torch.save(obj, file_baard_train)
        print('Save BAARD training data to:', file_baard_train)
    
    print('X_baard_train_s1', X_baard_train_s1.shape)

    with open(json_param) as j:
        baard_param = json.load(j)
    print('Param:', baard_param)
    sequence = baard_param['sequence']
    stages = []
    if sequence[0]:
        stages.append(ApplicabilityStage(n_classes=n_classes, quantile=baard_param['q1']))
    if sequence[1]:
        stages.append(ReliabilityStage(n_classes=n_classes, k=baard_param['k_re'], quantile=baard_param['q2']))
    if sequence[2]:
        stages.append(DecidabilityStage(n_classes=n_classes, k=baard_param['k_de'], quantile=baard_param['q3']))
    print('BAARD stages:', len(stages))
    detector = BAARDOperator(stages=stages)
    assert X_baard_train.shape == X_baard_train_s1.shape, 'Unmatched size: {}, {}'.format(X_baard_train.shape, X_baard_train_s1.shape)
    assert X_baard_train_s1.shape[0] == y_baard_train.shape[0]
    detector.stages[0].fit(X_baard_train_s1, y_baard_train)
    for stage in detector.stages[1:]:
        stage.fit(X_baard_train, y_baard_train)

    file_baard_threshold = os.path.join(path, '{}_{}_baard_threshold.pt'.format(data, model_name))
    if os.path.exists(file_baard_threshold):
        print('Found existing BAARD thresholds:', file_baard_threshold)
        detector.load(file_baard_threshold)
    else:
        # Search thresholds
        detector.search_thresholds(X_def_val, y_def_val, np.zeros_like(y_def_val))
        detector.save(file_baard_threshold)

    print('-------------------------------------------------------------------')
    print('Start testing BAARD...')

    time_start = time.time()
    label_adv = detector.detect(adv_def_test, pred_adv_def_test)
    label_clean = detector.detect(X_def_test, y_def_test)
    time_elapsed = time.time() - time_start
    print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

    acc = acc_on_adv(pred_adv_def_test, y_def_test, label_adv)
    fpr = np.mean(label_clean)
    print('Acc_on_adv:', acc)
    print('FPR:', fpr)

    obj = {
        'X': X_def_test,
        'y': y_def_test,
        'adv': adv_def_test,
        'label_adv': label_adv,
        'label_clean': label_clean,
        'pred_adv': pred_adv_def_test
    }
    file_baard_output = os.path.join(path, '{}_{}_{}_{}_baard_output.pt'.format(data, model_name, att_name, round(eps * 1000)))
    torch.save(obj, file_baard_output)
    print('Save to:', file_baard_output)

    print('-------------------------------------------------------------------')
    print('Start training surrogate model...')
    file_surro = os.path.join(path, '{}_{}_baard_surrogate.pt'.format(data, model_name))
#    if os.path.exists(file_surro):
#        print('Found existing surrogate model:', file_surro)
#        surrogate = get_pretrained_surrogate(file_surro, device)
#    else:
        # Prepare data for surrogate model
        # file_surro_data = os.path.join(path, '{}_{}_surrogate_data.pt'.format(data, model_name))
        # if os.path.exists(file_surro_data):
        #     print('Found existing surrogate dataset:', file_surro_data)
        #     obj = torch.load(file_surro_data)
        #     X_train = obj['X_train']
        #     label_train = obj['label_train']
        #     X_test = obj['X_test']
        #     label_test = obj['label_test']
        #     print(X_train.shape, label_train.shape, X_test.shape, label_test.shape)
        #     print('Labelled as adv:', np.mean(label_train == 1), np.mean(label_test == 1))
        # else:

    file_surro_data = os.path.join(path, '{}_{}_surrogate_data.pt'.format(data,
                                                                          model_name))

    adv_surro_train_2 = \
    run_attack_untargeted(file_model, X_surro_train,
                          y_surro_train,
                          att_name=att_name,
                          eps=eps_1, device=device)[0]
    adv_surro_train_3 = \
    run_attack_untargeted(file_model, X_surro_train,
                          y_surro_train,
                          att_name=att_name,
                          eps=eps_2, device=device)[0]
    adv_surro_train_4 = \
    run_attack_untargeted(file_model, X_surro_train,
                          y_surro_train,
                          att_name=att_name,
                          eps=eps_3, device=device)[0]
    adv_surro_train_5 = \
    run_attack_untargeted(file_model, X_surro_train,
                          y_surro_train,
                          att_name=att_name,
                          eps=eps_4, device=device)[0]
    adv_surro_train = np.append(adv_surro_train,adv_surro_train_2,axis = 0)
    adv_surro_train = np.append(adv_surro_train,adv_surro_train_3,axis=0)
    adv_surro_train = np.append(adv_surro_train,adv_surro_train_4, axis=0)
    adv_surro_train = np.append(adv_surro_train,adv_surro_train_5, axis=0)

    # augment also the number of benign dataset to avoid having an
    # unbalanced data
    X_surro_train_replicated = np.append(X_surro_train,X_surro_train,axis = 0)
    X_surro_train_replicated = np.append(X_surro_train_replicated, X_surro_train, axis=0)
    X_surro_train_replicated = np.append(X_surro_train_replicated, X_surro_train, axis=0)
    X_surro_train_replicated = np.append(X_surro_train_replicated, X_surro_train, axis=0)

    y_surro_train_replicated = np.append(y_surro_train,y_surro_train)
    y_surro_train_replicated = np.append(y_surro_train_replicated, y_surro_train)
    y_surro_train_replicated = np.append(y_surro_train_replicated, y_surro_train)
    y_surro_train_replicated = np.append(y_surro_train_replicated, y_surro_train)

    # classify the surrogate set
    pred_adv_surro_train = predict_numpy(model, adv_surro_train, device)
    label_adv_train = detector.detect(adv_surro_train, pred_adv_surro_train)
    label_X_train = detector.detect(X_surro_train_replicated, y_surro_train_replicated)
    # concatenate the clean and the adversarial samples
    X_train = np.concatenate((X_surro_train_replicated, adv_surro_train))
    label_train = np.concatenate((label_X_train, label_adv_train))

    label_adv_test = detector.detect(adv_att_test[:1000], pred_adv_att_test[:1000])
    label_X_test = detector.detect(X_att_test[:1000], y_att_test[:1000])
    X_test = np.concatenate((X_att_test[:1000], adv_att_test[:1000]))
    label_test = np.concatenate((label_X_test, label_adv_test))
    print(X_train.shape, label_train.shape, X_test.shape, label_test.shape)
    print('Labelled as adv:', np.mean(label_train == 1), np.mean(label_test == 1))

    obj = {
        'X_train': X_train,
        'y_train': np.concatenate((y_surro_train, y_surro_train)),
        'pred_train': np.concatenate((y_surro_train, pred_adv_surro_train)),
        'label_train': label_train,
        'X_test': X_test,
        'y_test': np.concatenate((y_att_test[:1000], y_att_test[:1000])),
        'pred_test': np.concatenate((y_att_test[:1000], pred_adv_att_test[:1000])),
        'label_test': label_test
    }

    torch.save(obj, file_surro_data)
    print('Save surrogate training data to:', file_surro_data)

    surrogate = train_surrogate(X_train, X_test, label_train, label_test, epochs=EPOCHS, device=device)
    torch.save(surrogate.state_dict(), file_surro)
    print('Save surrogate model to:', file_surro)

    print('-------------------------------------------------------------------')
    print('Start testing surrogate model...')
    X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:]))
    pred_test = predict_numpy(model, X_test, device)
    label_test = detector.detect(X_test, pred_test)
    acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:])
    fpr = np.mean(label_test[:1000])
    print('BAARD Acc_on_adv:', acc)
    print('BAARD FPR:', fpr)

    label_surro = predict_numpy(surrogate, X_test, device)
    acc = np.mean(label_surro == label_test)
    print('Acc on surrogate:', acc)

    print('DONE!')
    print('-------------------------------------------------------------------\n')
示例#5
0
def main():
    with open('data.json') as data_json:
        data_params = json.load(data_json)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str, required=True)
    parser.add_argument('--data_path', type=str, default='data')
    parser.add_argument('--output_path', type=str, default='results')
    parser.add_argument('--pretrained', type=str, required=True)
    parser.add_argument('--adv', type=str, required=True, help="Example: 'mnist_basic_apgd_0.3'")
    parser.add_argument('--defence', type=str, required=True, choices=data_params['defences'])
    parser.add_argument('--param', type=str, required=True)
    parser.add_argument('--suffix', type=str)
    parser.add_argument('--random_state', type=int, default=1234)
    parser.add_argument('--save', type=int, default=1, choices=[0, 1])
    args = parser.parse_args()
    print(args)

    set_seeds(args.random_state)
    
    if not os.path.exists(args.output_path):
        print('Output folder does not exist. Create:', args.output_path)
        os.mkdir(args.output_path)
        
    print('Dataset:', args.data)
    print('Pretrained model:', args.pretrained)
    print('Pretrained samples:', args.adv + '_adv.npy')
    print('Defence:', args.defence)

    with open(args.param) as param_json:
        param = json.load(param_json)
    param['n_classes'] = data_params['data'][args.data]['n_classes']
    print('Param:', param)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    # Prepare data
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])

    if args.data == 'mnist':
        dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms)
    elif args.data == 'cifar10':
        dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms)
    else:
        data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name'])
        print('Read file:', data_path)
        X, y = load_csv(data_path)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y,
            test_size=data_params['data'][args.data]['n_test'],
            random_state=args.random_state)
        scaler = MinMaxScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
        dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long))
        dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long))

    loader_train = DataLoader(dataset_train, batch_size=512, shuffle=False)
    loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False)

    shape_train = get_shape(loader_train.dataset)
    shape_test = get_shape(loader_test.dataset)
    print('Train set:', shape_train)
    print('Test set:', shape_test)
    use_prob = True
    print('Using softmax layer:', use_prob)

    # Load model
    if args.data == 'mnist':
        model = BaseModel(use_prob=use_prob).to(device)
        model_name = 'basic'
    elif args.data == 'cifar10':
        model_name = args.pretrained.split('_')[1]
        if model_name == 'resnet':
            model = Resnet(use_prob=use_prob).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=use_prob).to(device)
        else:
            raise ValueError('Unknown model: {}'.format(model_name))
    else:
        n_features = data_params['data'][args.data]['n_features']
        n_classes = data_params['data'][args.data]['n_classes']
        model = NumericModel(n_features, n_hidden=n_features * 4, n_classes=n_classes, use_prob=use_prob).to(device)
        model_name = 'basic' + str(n_features * 4)

    loss = nn.CrossEntropyLoss()
    pretrained_path = os.path.join(args.output_path, args.pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))

    _, acc_train = validate(model, loader_train, loss, device)
    _, acc_test = validate(model, loader_test, loss, device)
    print('Accuracy on train set: {:.4f}%'.format(acc_train * 100))
    print('Accuracy on test set: {:.4f}%'.format(acc_test * 100))

    # Create a subset which only contains recognisable samples.
    # The original train and test sets are no longer needed.
    tensor_train_X, tensor_train_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True)
    dataset_train = TensorDataset(tensor_train_X, tensor_train_y)
    loader_train = DataLoader(dataset_train, batch_size=512, shuffle=True)
    _, acc_perfect = validate(model, loader_train, loss, device)
    print('Accuracy on {} filtered train set: {:.4f}%'.format(len(dataset_train), acc_perfect * 100))

    tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True)
    dataset_test = TensorDataset(tensor_test_X, tensor_test_y)
    loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader_test, loss, device)
    print('Accuracy on {} filtered test set: {:.4f}%'.format(len(dataset_test), acc_perfect * 100))

    # Load pre-trained adversarial examples
    path_benign = os.path.join(args.output_path, args.adv + '_x.npy')
    path_adv = os.path.join(args.output_path, args.adv + '_adv.npy')
    path_y = os.path.join(args.output_path, args.adv + '_y.npy')
    X_benign = np.load(path_benign)
    adv = np.load(path_adv)
    y_true = np.load(path_y)

    dataset = TensorDataset(torch.from_numpy(X_benign), torch.from_numpy(y_true))
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    _, acc = validate(model, loader, loss, device)
    print('Accuracy on {} benign samples: {:.4f}%'.format(len(dataset), acc * 100))

    dataset = TensorDataset(torch.from_numpy(adv), torch.from_numpy(y_true))
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    _, acc = validate(model, loader, loss, device)
    print('Accuracy on {} adversarial examples: {:.4f}%'.format(len(dataset), acc * 100))

    # Do NOT shuffle the indices, so different defences can use the same test set.
    dataset = TensorDataset(torch.from_numpy(adv))
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    pred_adv = predict(model, loader, device).cpu().detach().numpy()

    # Find the thresholds using the 2nd half
    n = len(X_benign) // 2
    # Merge benign samples and adversarial examples into one set.
    # This labels indicate a sample is an adversarial example or not.
    X_val, labels_val = merge_and_generate_labels(adv[n:], X_benign[n:], flatten=False)
    # The predictions for benign samples are exactly same as the true labels.
    pred_val = np.concatenate((pred_adv[n:], y_true[n:]))

    X_train = tensor_train_X.cpu().detach().numpy()
    y_train = tensor_train_y.cpu().detach().numpy()

    # Train defence
    time_start = time.time()
    if args.defence == 'baard':
        sequence = param['sequence']
        stages = []
        if sequence[0]:
            stages.append(ApplicabilityStage(n_classes=param['n_classes'], quantile=param['q1']))
        if sequence[1]:
            stages.append(ReliabilityStage(n_classes=param['n_classes'], k=param['k_re'], quantile=param['q2']))
        if sequence[2]:
            stages.append(DecidabilityStage(n_classes=param['n_classes'], k=param['k_de'], quantile=param['q3']))
        print('BAARD: # of stages:', len(stages))
        detector = BAARDOperator(stages=stages)

        # Run preprocessing
        baard_train_path = os.path.join(args.output_path, '{}_{}_baard_train.pt'.format(args.data, model_name))
        obj = torch.load(baard_train_path)
        X_baard = obj['X_train']
        y_train = obj['y_train']
        # Fit the model with the filtered the train set.
        detector.stages[0].fit(X_baard, y_train)
        detector.stages[1].fit(X_train, y_train)
        if len(detector.stages) == 3:
            detector.stages[2].fit(X_train, y_train)
        detector.search_thresholds(X_val, pred_val, labels_val)
        path_baard = os.path.join(args.output_path, 'baard_{}_{}_param.pt'.format(args.data, model_name))
        detector.save(path_baard)
    elif args.defence == 'fs':
        squeezers = []
        if args.data == 'mnist':
            squeezers.append(DepthSqueezer(x_min=0.0, x_max=1.0, bit_depth=1))
            squeezers.append(MedianSqueezer(x_min=0.0, x_max=1.0, kernel_size=2))
        elif args.data == 'cifar10':
            squeezers.append(DepthSqueezer(x_min=0.0, x_max=1.0, bit_depth=4))
            squeezers.append(MedianSqueezer(x_min=0.0, x_max=1.0, kernel_size=2))
            squeezers.append(NLMeansColourSqueezer(x_min=0.0, x_max=1.0, h=2, templateWindowsSize=3, searchWindowSize=13))
        else:
            raise NotImplementedError
        print('FS: # of squeezers:', len(squeezers))
        detector = FeatureSqueezingTorch(
            classifier=model,
            lr=0.001,
            momentum=0.9,
            weight_decay=5e-4,
            loss=loss,
            batch_size=128,
            x_min=0.0,
            x_max=1.0,
            squeezers=squeezers,
            n_classes=param['n_classes'],
            device=device)
        path_fs = os.path.join(args.output_path, '{}_fs.pt'.format(args.pretrained.split('.')[0]))
        detector.load(path_fs)
        detector.search_thresholds(X_val, pred_val, labels_val)
    elif args.defence == 'lid':
        # This batch_size is not same as the mini batch size for the neural network.
        before_softmax = args.data == 'cifar10'
        detector = LidDetector(
            model,
            k=param['k'],
            batch_size=param['batch_size'],
            x_min=0.0,
            x_max=1.0,
            device=device,
            before_softmax=before_softmax)
        # LID uses different training set
        X_train, y_train = detector.get_train_set(X_benign[n:], adv[n:], std_dominator=param['std_dominator'])
        detector.fit(X_train, y_train, verbose=1)
    elif args.defence == 'magnet':
        magnet_detectors = []
        # Different datasets require different autoencoders.
        if args.data == 'mnist':
            # autoencoder1 and autoencoder2
            magnet_detectors.append(MagNetDetector(
                encoder=Autoencoder1(n_channel=1),
                classifier=model,
                lr=param['lr'],
                batch_size=param['batch_size'],
                weight_decay=param['weight_decay'],
                x_min=0.0,
                x_max=1.0,
                noise_strength=param['noise_strength'],
                algorithm='error',
                p=1,
                device=device))
            magnet_detectors.append(MagNetDetector(
                encoder=Autoencoder2(n_channel=1),
                classifier=model,
                lr=param['lr'],
                batch_size=param['batch_size'],
                weight_decay=param['weight_decay'],
                x_min=0.0,
                x_max=1.0,
                noise_strength=param['noise_strength'],
                algorithm='error',
                p=2,
                device=device))
        elif args.data == 'cifar10':
            autoencoder = Autoencoder2(
                n_channel=data_params['data'][args.data]['n_features'][0])
            # There are 3 autoencoder based detectors, but they use the same architecture.
            magnet_detectors.append(MagNetDetector(
                encoder=autoencoder,
                classifier=model,
                lr=param['lr'],
                batch_size=param['batch_size'],
                weight_decay=param['weight_decay'],
                x_min=0.0,
                x_max=1.0,
                noise_strength=param['noise_strength'],
                algorithm='error',
                p=2,
                device=device))
            magnet_detectors.append(MagNetDetector(
                encoder=autoencoder,
                classifier=model,
                lr=param['lr'],
                batch_size=param['batch_size'],
                weight_decay=param['weight_decay'],
                x_min=0.0,
                x_max=1.0,
                noise_strength=param['noise_strength'],
                algorithm='prob',
                temperature=10,
                device=device))
            magnet_detectors.append(MagNetDetector(
                encoder=autoencoder,
                classifier=model,
                lr=param['lr'],
                batch_size=param['batch_size'],
                weight_decay=param['weight_decay'],
                x_min=0.0,
                x_max=1.0,
                noise_strength=param['noise_strength'],
                algorithm='prob',
                temperature=40,
                device=device))
        else:
            raise ValueError('Magnet requires autoencoder.')

        for i, ae in enumerate(magnet_detectors, start=1):
            ae_path = os.path.join(args.output_path, 'autoencoder_{}_{}_{}.pt'.format(args.data, model_name, i))
            ae.load(ae_path)
            tensor_X_test, _ = dataset2tensor(dataset_test)
            X_test = tensor_X_test.cpu().detach().numpy()
            print('Autoencoder {} MSE training set: {:.6f}, test set: {:.6f}'.format(i, ae.score(X_train), ae.score(X_test)))
            print('Autoencoder {} threshold: {}'.format(i, ae.threshold))

        reformer = MagNetAutoencoderReformer(
            encoder=magnet_detectors[0].encoder,
            batch_size=param['batch_size'],
            device=device)

        detector = MagNetOperator(
            classifier=model,
            detectors=magnet_detectors,
            reformer=reformer,
            batch_size=param['batch_size'],
            device=device)
    elif args.defence == 'rc':
        detector = RegionBasedClassifier(
            model=model,
            r=param['r'],
            sample_size=param['sample_size'],
            n_classes=param['n_classes'],
            x_min=0.0,
            x_max=1.0,
            batch_size=param['batch_size'],
            r0=param['r0'],
            step_size=param['step_size'],
            stop_value=param['stop_value'],
            device=device)
        # Region-based classifier only uses benign samples to search threshold.
        # The r value is already set to the optimal. We don't need to search it.
        # detector.search_thresholds(X_val, pred_val, labels_val, verbose=0)
    else:
        raise ValueError('{} is not supported!'.format(args.defence))
    time_elapsed = time.time() - time_start
    print('Total training time:', str(datetime.timedelta(seconds=time_elapsed)))

    # Test defence
    time_start = time.time()
    X_test, labels_test = merge_and_generate_labels(adv[:n], X_benign[:n], flatten=False)
    pred_test = np.concatenate((pred_adv[:n], y_true[:n]))
    y_test = np.concatenate((y_true[:n], y_true[:n]))

    # Only MegNet uses reformer.
    X_reformed = None
    if args.defence == 'magnet':
        X_reformed, res_test = detector.detect(X_test, pred_test)
        y_pred = predict_numpy(model, X_reformed, device)
    elif args.defence == 'rc':
        y_pred = detector.detect(X_test, pred_test)
        res_test = np.zeros_like(y_pred)
    else:
        res_test = detector.detect(X_test, pred_test)
        y_pred = pred_test

    acc = acc_on_adv(y_pred[:n], y_test[:n], res_test[:n])
    if args.defence == 'rc':
        fpr = np.mean(y_pred[n:] != y_test[n:])
    else:
        fpr = np.mean(res_test[n:])
    print('Acc_on_adv:', acc)
    print('FPR:', fpr)
    time_elapsed = time.time() - time_start
    print('Total test time:', str(datetime.timedelta(seconds=time_elapsed)))

    # Save results
    suffix = '_' + args.suffix if args.suffix is not None else ''

    if args.save:
        path_result = os.path.join(args.output_path, '{}_{}{}.pt'.format(args.adv, args.defence, suffix))
        torch.save({
            'X_val': X_val,
            'y_val': np.concatenate((y_true[n:], y_true[n:])),
            'labels_val': labels_val,
            'X_test': X_test,
            'y_test': y_test,
            'labels_test': labels_test,
            'res_test': y_pred if args.defence == 'rc' else res_test,
            'X_reformed': X_reformed,
            'param': param}, path_result)
        print('Saved to:', path_result)
    else:
        print('No file is save!')
    print()
示例#6
0
def run_full_pipeline_magnet(data, model_name, path, seed, json_param,
                             att_name, eps):
    set_seeds(seed)

    print('args:', data, model_name, path, seed, json_param, att_name, eps)

    if not os.path.exists(path):
        print('Output folder does not exist. Create:', path)
        os.mkdir(path)

    # Get data
    transform = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_train = datasets.MNIST(PATH_DATA,
                                       train=True,
                                       download=True,
                                       transform=transform)
        dataset_test = datasets.MNIST(PATH_DATA,
                                      train=False,
                                      download=True,
                                      transform=transform)
    elif data == 'cifar10':
        transform_train = tv.transforms.Compose([
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.RandomCrop(32, padding=4),
            tv.transforms.ToTensor()
        ])
        dataset_train = datasets.CIFAR10(PATH_DATA,
                                         train=True,
                                         download=True,
                                         transform=transform_train)
        dataset_test = datasets.CIFAR10(PATH_DATA,
                                        train=False,
                                        download=True,
                                        transform=transform)
    else:
        raise ValueError('Unknown dataset: {}'.format(data))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name))
    print('Start training {} model on {}...'.format(model_name, data))
    model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS,
                        device, file_model)

    # Split data
    tensor_X, tensor_y = get_correct_examples(model,
                                              dataset_test,
                                              device=device,
                                              return_tensor=True)
    dataset = TensorDataset(tensor_X, tensor_y)
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device)
    print('Accuracy on {} filtered test set: {:.2f}%'.format(
        tensor_y.size(0), acc_perfect * 100))
    # Split rules:
    # 1. Benchmark_defence_test: 1000 (def_test)
    # 2. Benchmark_defence_val:  1000 (def_val)
    # 3. Test white-box attack:  2000 (att_test)
    # 5. Train surrogate model:  2000 (sur_train)
    #    -----------------Total: 6000
    idx_shuffle = np.random.permutation(tensor_X.size(0))[:6000]
    X = tensor_X[idx_shuffle].cpu().detach().numpy()
    y = tensor_y[idx_shuffle].cpu().detach().numpy()

    print(
        '-------------------------------------------------------------------')
    print('Start generating {} adversarial examples...'.format(
        len(idx_shuffle)))

    adv, X, y = run_attack_untargeted(file_model,
                                      X,
                                      y,
                                      att_name=att_name,
                                      eps=eps,
                                      device=device)

    print(
        '-------------------------------------------------------------------')
    print('Start testing adversarial examples...')
    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    # y_def_val = y[1000:2000]
    # adv_def_val = adv[1000:2000]
    # pred_adv_def_val = pred[1000:2000]

    # X_att_test = X[2000:4000]
    # y_att_test = y[2000:4000]
    # adv_att_test = adv[2000:4000]
    # pred_adv_att_test = pred[2000:4000]

    # X_surro_train = X[4000:]
    # y_surro_train = y[4000:]
    # adv_surro_train = adv[4000:]
    # pred_adv_surro_train = pred[4000:]

    print(
        '-------------------------------------------------------------------')
    print('Start training MagNet...')
    # Run preprocessing
    tensor_X, tensor_y = get_correct_examples(model,
                                              dataset_train,
                                              device=device,
                                              return_tensor=True)
    X_train = tensor_X.cpu().detach().numpy()
    y_train = tensor_y.cpu().detach().numpy()

    with open(json_param) as j:
        param = json.load(j)

    time_start = time.time()
    detector = train_magnet(data,
                            model_name,
                            X_train,
                            y_train,
                            X_def_val,
                            param,
                            device,
                            path,
                            EPOCHS,
                            model=model)
    time_elapsed = time.time() - time_start
    print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

    print(
        '-------------------------------------------------------------------')
    print('Start testing MagNet...')

    time_start = time.time()
    adv_reformed_test, label_adv = detector.detect(adv_def_test,
                                                   pred_adv_def_test)
    X_reformed_test, label_clean = detector.detect(X_def_test, y_def_test)
    time_elapsed = time.time() - time_start
    print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

    pred_adv_reformed = predict_numpy(model, adv_reformed_test, device)
    acc = acc_on_adv(pred_adv_reformed, y_def_test, label_adv)
    fpr = np.mean(label_clean)
    print('Acc_on_adv:', acc)
    print('FPR:', fpr)

    obj = {
        'X': X_def_test,
        'y': y_def_test,
        'adv': adv_def_test,
        'label_adv': label_adv,
        'label_clean': label_clean,
        'pred_adv': pred_adv_def_test,
        'X_reformed': X_reformed_test,
        'adv_reformed': adv_reformed_test,
        'pred_adv_reformed': pred_adv_reformed
    }
    file_detector_output = os.path.join(
        path, '{}_{}_{}_{}_magnet_output.pt'.format(data, model_name, att_name,
                                                    round(eps * 1000)))
    torch.save(obj, file_detector_output)
    print('Save to:', file_detector_output)

    print('DONE!')
    print(
        '-------------------------------------------------------------------\n'
    )
示例#7
0
def run_evaluate_baard(data,
                       model_name,
                       path,
                       seed,
                       json_param,
                       att_name,
                       eps):
    set_seeds(seed)

    # Line attack takes no hyperparameter
    if att_name == 'line':
        eps = [1]
    print('args:', data, model_name, path, seed, json_param, att_name, eps)

    if not os.path.exists(path):
        print('Output folder does not exist. Create:', path)
        os.mkdir(path)

    # Get data
    n_classes = 10
    transform = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform)
        dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform)
    elif data == 'cifar10':
        transform_train = tv.transforms.Compose([
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.RandomCrop(32, padding=4),
            tv.transforms.ToTensor()])
        dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train)
        dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform)
    else:
        raise ValueError('Unknown dataset: {}'.format(data))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name))
    print('Start training {} model on {}...'.format(model_name, data))
    model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model)

    # Split data
    tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True)
    dataset = TensorDataset(tensor_X, tensor_y)
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device)
    print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100))

    print('-------------------------------------------------------------------')
    print('Start training BAARD...')

    file_baard_train = os.path.join(path, '{}_{}_baard_s1_train_data.pt'.format(data, model_name))
    if os.path.exists(file_baard_train):
        print('Found existing BAARD preprocess data:', file_baard_train)
        obj = torch.load(file_baard_train)
        X_baard_train_s1 = obj['X_s1']
        X_baard_train= obj['X']
        y_baard_train = obj['y']
    else:
        raise FileNotFoundError('Cannot find BAARD preprocess data:', file_baard_train)
    print('BAARD train set:', X_baard_train_s1.shape)

    with open(json_param) as j:
        baard_param = json.load(j)
    print('Param:', baard_param)
    sequence = baard_param['sequence']
    stages = []
    if sequence[0]:
        stages.append(ApplicabilityStage(n_classes=n_classes, quantile=baard_param['q1']))
    if sequence[1]:
        stages.append(ReliabilityStage(n_classes=n_classes, k=baard_param['k_re'], quantile=baard_param['q2']))
    if sequence[2]:
        stages.append(DecidabilityStage(n_classes=n_classes, k=baard_param['k_de'], quantile=baard_param['q3']))
    print('BAARD stages:', len(stages))
    detector = BAARDOperator(stages=stages)
    assert X_baard_train.shape == X_baard_train_s1.shape, 'Unmatched size: {}, {}'.format(X_baard_train.shape, X_baard_train_s1.shape)
    assert X_baard_train_s1.shape[0] == y_baard_train.shape[0]
    detector.stages[0].fit(X_baard_train_s1, y_baard_train)
    for stage in detector.stages[1:]:
        stage.fit(X_baard_train, y_baard_train)

    file_baard_threshold = os.path.join(path, '{}_{}_baard_threshold.pt'.format(data, model_name))
    if os.path.exists(file_baard_threshold):
        print('Found existing BAARD thresholds:', file_baard_threshold)
        detector.load(file_baard_threshold)
    else:
        raise FileNotFoundError('Cannot find pre-trained BAARD:', file_baard_threshold)

    # print('-------------------------------------------------------------------')
    # print('Load surrogate model...')
    # file_surro = os.path.join(path, '{}_{}_baard_surrogate.pt'.format(data, model_name))
    # if os.path.exists(file_surro):
    #     print('Found existing surrogate model:', file_surro)
    #     surrogate = get_pretrained_surrogate(file_surro, device)
    # else:
    #     raise FileNotFoundError('Cannot find pre-trained surrogate model:', file_surro)

    print('-------------------------------------------------------------------')
    print('Start evaluating the robustness of the classifier...')

    eps = np.array(eps, dtype=np.float)
    n_att = eps.shape[0]
    accs_classifier = np.zeros(n_att, dtype=np.float)
    accs_on_adv = np.zeros_like(accs_classifier)
    fprs = np.zeros_like(accs_on_adv)

    file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, int(eps[0] * 1000)))
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']

    pred = predict_numpy(model, X, device)
    print('Acc on clean samples:', np.mean(pred == y))

    for i in range(n_att):
        print('Evaluating {} eps={}'.format(att_name, eps[i]))
        file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, round(eps[i] * 1000)))
        obj = torch.load(file_data)
        adv = obj['adv']

        X_def_test = X[:1000]
        y_def_test = y[:1000]
        adv_def_test = adv[:1000]
        pred_adv_def_test = pred[:1000]

        pred = predict_numpy(model, adv_def_test, device)
        acc_base = np.mean(pred == y_def_test)

        labelled_as_adv = detector.detect(adv_def_test, pred_adv_def_test)
        acc_def = acc_on_adv(pred_adv_def_test, y_def_test, labelled_as_adv)

        labelled_false = detector.detect(X_def_test, y_def_test)
        fpr = np.mean(labelled_false)

        print('acc_model: {:.4f}, acc_on_adv: {:.4f}, fpr: {:.4f}'.format(acc_base, acc_def, fpr))
        accs_classifier[i] = acc_base
        accs_on_adv[i] = acc_def
        fprs[i] = fpr

    results = np.array([eps, accs_classifier, accs_on_adv, fprs]).transpose()
    df = pd.DataFrame(data=results, columns=['eps', 'acc_base', 'acc_on_adv', 'fpr'])
    file_output = os.path.join(path, '{}_{}_{}_{}.csv'.format(data, model_name, DEFENCE, att_name))
    df.to_csv(file_output, index=False)
    print('Saved results to:', file_output)

    print('DONE!')
    print('-------------------------------------------------------------------\n')
示例#8
0
def run_evaluate_magnet(data,
                        model_name,
                        path,
                        seed,
                        json_param,
                        att_name,
                        eps):
    set_seeds(seed)

    # Line attack takes no hyperparameter
    if att_name == 'line':
        eps = [1]
    print('args:', data, model_name, path, seed, json_param, att_name, eps)

    if not os.path.exists(path):
        print('Output folder does not exist. Create:', path)
        os.mkdir(path)

    # Get data
    n_classes = 10
    transform = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform)
        dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform)
    elif data == 'cifar10':
        transform_train = tv.transforms.Compose([
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.RandomCrop(32, padding=4),
            tv.transforms.ToTensor()])
        dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train)
        dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform)
    else:
        raise ValueError('Unknown dataset: {}'.format(data))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name))
    print('Start training {} model on {}...'.format(model_name, data))
    model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model)

    # Split data
    tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True)
    dataset = TensorDataset(tensor_X, tensor_y)
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device)
    print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100))

    print('-------------------------------------------------------------------')
    print('Start training MagNet...')
    # Run preprocessing
    tensor_X, tensor_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True)
    X_train = tensor_X.cpu().detach().numpy()
    y_train = tensor_y.cpu().detach().numpy()

    # We need load the evaluation set first. The clean samples are all the same,
    # which attack is selected does not matter.
    file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, int(eps[0] * 1000)))
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    X_def_val = X[1000:2000]

    with open(json_param) as j:
        param = json.load(j)

    time_start = time.time()
    detector = train_magnet(data, model_name, X_train, y_train, X_def_val, param, device, path, EPOCHS, model=model)
    time_elapsed = time.time() - time_start
    print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

    print('-------------------------------------------------------------------')
    print('Start evaluating the robustness of the classifier...')

    eps = np.array(eps, dtype=np.float)
    n_att = eps.shape[0]
    accs_classifier = np.zeros(n_att, dtype=np.float)
    accs_on_adv = np.zeros_like(accs_classifier)
    fprs = np.zeros_like(accs_on_adv)

    pred = predict_numpy(model, X, device)
    print('Acc on clean samples:', np.mean(pred == y))

    for i in range(n_att):
        print('Evaluating {} eps={}'.format(att_name, eps[i]))
        file_data = os.path.join(path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name, round(eps[i] * 1000)))
        obj = torch.load(file_data)
        adv = obj['adv']

        X_def_test = X[:1000]
        y_def_test = y[:1000]
        adv_def_test = adv[:1000]
        pred_adv_def_test = pred[:1000]

        pred = predict_numpy(model, adv_def_test, device)
        acc_base = np.mean(pred == y_def_test)

        X_reformed, labelled_as_adv = detector.detect(adv_def_test, pred_adv_def_test)
        pred_reformed = predict_numpy(model, X_reformed, device)
        acc_def = acc_on_adv(pred_reformed, y_def_test, labelled_as_adv)

        _, labelled_false = detector.detect(X_def_test, y_def_test)
        fpr = np.mean(labelled_false)

        print('acc_model: {:.4f}, acc_on_adv: {:.4f}, fpr: {:.4f}'.format(acc_base, acc_def, fpr))
        accs_classifier[i] = acc_base
        accs_on_adv[i] = acc_def
        fprs[i] = fpr

    results = np.array([eps, accs_classifier, accs_on_adv, fprs]).transpose()
    df = pd.DataFrame(data=results, columns=['eps', 'acc_base', 'acc_on_adv', 'fpr'])
    file_output = os.path.join(path, '{}_{}_{}_{}.csv'.format(data, model_name, DEFENCE, att_name))
    df.to_csv(file_output, index=False)
    print('Saved results to:', file_output)

    print('DONE!')
    print('-------------------------------------------------------------------\n')
示例#9
0
def run_generate_adv(data,
                     model_name,
                     path,
                     seed,
                     att_name,
                     eps):
    set_seeds(seed)

    # Line attack takes no hyperparameter
    if att_name == 'line':
        eps = [1]
    print('args:', data, model_name, path, seed, att_name, eps)

    if not os.path.exists(path):
        print('Output folder does not exist. Create:', path)
        os.mkdir(path)

    # Get data
    n_classes = 10
    transform = tv.transforms.Compose([tv.transforms.ToTensor()])
    if data == 'mnist':
        dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transform)
        dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transform)
    elif data == 'cifar10':
        transform_train = tv.transforms.Compose([
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.RandomCrop(32, padding=4),
            tv.transforms.ToTensor()])
        dataset_train = datasets.CIFAR10(PATH_DATA, train=True, download=True, transform=transform_train)
        dataset_test = datasets.CIFAR10(PATH_DATA, train=False, download=True, transform=transform)
    else:
        raise ValueError('Unknown dataset: {}'.format(data))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    file_model = os.path.join(path, '{}_{}_model.pt'.format(data, model_name))
    print('Start training {} model on {}...'.format(model_name, data))
    model = train_model(data, model_name, dataset_train, dataset_test, EPOCHS, device, file_model)

    # Split data
    tensor_X, tensor_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True)
    dataset = TensorDataset(tensor_X, tensor_y)
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader, nn.CrossEntropyLoss(), device)
    print('Accuracy on {} filtered test set: {:.2f}%'.format(tensor_y.size(0), acc_perfect * 100))
    # Split rules:
    # 1. Benchmark_defence_test: 1000 (def_test)
    # 2. Benchmark_defence_val:  1000 (def_val)
    # 3. Test white-box attack:  2000 (att_test)
    # 5. Train surrogate model:  2000 (sur_train)
    #    -----------------Total: 6000
    idx_shuffle = np.random.permutation(tensor_X.size(0))[:6000]
    X = tensor_X[idx_shuffle].cpu().detach().numpy()
    y = tensor_y[idx_shuffle].cpu().detach().numpy()

    print('-------------------------------------------------------------------')
    print('Start generating {} adversarial examples...'.format(len(idx_shuffle)))

    advs = []
    for e in eps:
        adv, X, y = run_attack_untargeted(file_model, X, y, att_name=att_name, eps=e, device=device)
        advs.append(adv)
    advs = np.array(advs, dtype=np.float)

    print('-------------------------------------------------------------------')
    print('Start testing adversarial examples...')
    for i, e in enumerate(eps):
        adv = advs[i]
        pred = predict_numpy(model, adv, device)
        print('Attack: {} Eps={} Acc on adv: {:.4f}'.format(att_name, e, np.mean(pred == y)))
def main(seed, dataset_name, clf_name, detector_name, epsilon_lst, input_shape,
         json_param, path):
    set_seeds(SEEDS[seed])

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    print("load the classifier")
    file_model = os.path.join(
        'result_{:}'.format(seed),
        '{:}_{:}_model.pt'.format(dataset_name, clf_name))
    if clf_name == 'dnn':
        model = BaseModel(use_prob=False).to(device)
    elif clf_name == 'resnet':
        model = Resnet(use_prob=False).to(device)
    else:
        raise ValueError("model idx unknown")
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join(
        'result_{:}'.format(seed),
        '{:}_{:}_apgd2_2000.pt'.format(dataset_name, clf_name))
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    print("undefended model acc")
    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    # Split data
    X_att_test = X[2000:3000].astype(np.float32)
    y_att_test = y[2000:3000].astype(np.float32)

    print("x attr shape ", X_att_test.shape)

    #################################################################
    print("Load Magnet")
    with open(json_param) as j:
        param = json.load(j)

    print("before load magnet")
    model_with_reformer_nn_module, detector_nn_module, full_magnet_orig  = \
        loadmagnet(dataset_name, clf_name,param, device,path, model)

    print("Magnet loaded")

    loss = torch.nn.CrossEntropyLoss()
    # this one return the logits
    art_classifier = PyTorchClassifier(model=model_with_reformer_nn_module,
                                       loss=loss,
                                       input_shape=input_shape,
                                       nb_classes=10,
                                       optimizer=None)

    # y_pred = model_with_reformer_nn_module(X)
    # print("model_with_reformer_nn_module", y_pred.shape)

    y_pred = art_classifier.predict(X)
    print("art_classifier", y_pred.shape)

    print("check full magnet ")
    _, y_pred = full_magnet_orig.detect(X)
    print("full magnet", y_pred.shape)

    print("check detector nn module")
    # correcly return an array with the logits
    y_pred = detector_nn_module(X)
    print("y pred ", y_pred)
    print("detector_nn_module", y_pred.shape)

    print("create pytorch detector")
    # must be only the detector
    art_detector = PyTorchClassifier(model=detector_nn_module,
                                     loss=loss,
                                     input_shape=input_shape,
                                     nb_classes=2,
                                     optimizer=None)

    print("check art detector")
    y_pred = art_detector.predict(X + 1000)
    print("detector_nn_module", y_pred.shape)
    print("art detector ok")

    print("y pred ", y_pred)
    print("detected by detector used by attack ",
          np.mean(y_pred.argmax(axis=1) == 1))

    clip_fun = None
    #################################################################

    pred_folder = 'result_{:}/predictions_wb_eval/{:}_{:}_{:}'.format(
        seed, dataset_name, clf_name, detector_name)

    print("compute prediction for samples at epsilon 0")
    x = X_att_test[:10]
    y = y_att_test[:10]

    # compute and save predictions
    cmpt_and_save_predictions(art_classifier, full_magnet_orig, art_detector,
                              device, x, y, pred_folder, 0)

    for eps in epsilon_lst:

        print("epsilon ", eps)

        print("detector threshold ", detector_nn_module.detector.threshold)

        attack = AutoProjectedGradientDescentDetectorsMagnet(
            estimator=art_classifier,
            detector=art_detector,
            detector_th=0,
            detector_clip_fun=clip_fun,
            loss_type='logits_difference',
            batch_size=128,
            norm=2,
            eps=eps,
            eps_step=0.9,
            beta=1.0,
            max_iter=100)

        adv_x = attack.generate(x=x, y=None)

        # compute and save predictions
        cmpt_and_save_predictions(art_classifier, full_magnet_orig,
                                  art_detector, device, adv_x, y, pred_folder,
                                  eps)
示例#11
0
def main():
    set_seeds(SEED)

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    file_model = os.path.join('result_0', 'mnist_dnn_model.pt')
    model = BaseModel(use_prob=False).to(device)
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt')
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    # Split data
    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    y_def_val = y[1000:2000]
    adv_def_val = adv[1000:2000]
    pred_adv_def_val = pred[1000:2000]

    X_att_test = X[2000:4000]
    y_att_test = y[2000:4000]
    adv_att_test = adv[2000:4000]
    pred_adv_att_test = pred[2000:4000]

    X_surro_train = X[4000:]
    y_surro_train = y[4000:]
    adv_surro_train = adv[4000:]
    pred_adv_surro_train = pred[4000:]

    # Load baard
    file_baard_train = os.path.join('result_0',
                                    'mnist_dnn_baard_s1_train_data.pt')
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    stages = []
    stages.append(ApplicabilityStage(n_classes=10, quantile=1., verbose=False))
    stages.append(
        ReliabilityStage(n_classes=10, k=10, quantile=1., verbose=False))
    stages.append(
        DecidabilityStage(n_classes=10, k=100, quantile=1., verbose=False))
    detector = BAARDOperator(stages=stages)

    detector.stages[0].fit(X_baard_train_s1, y_baard_train)
    for stage in detector.stages[1:]:
        stage.fit(X_baard_train, y_baard_train)

    file_baard_threshold = os.path.join('result_0',
                                        'mnist_dnn_baard_threshold.pt')
    thresholds = torch.load(file_baard_threshold)['thresholds']
    detector.load(file_baard_threshold)

    file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt')
    surrogate = get_pretrained_surrogate(file_surro, device)

    # Test surrogate model
    X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:]))
    pred_test = predict_numpy(model, X_test, device)
    label_test = detector.detect(X_test, pred_test)
    acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:])
    fpr = np.mean(label_test[:1000])
    print('BAARD Acc_on_adv:', acc)
    print('BAARD FPR:', fpr)

    label_surro = predict_numpy(surrogate, X_test, device)
    acc = np.mean(label_surro == label_test)
    print('Acc on surrogate:', acc)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(model.parameters(),
                                    lr=0.01,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    art_classifier = PyTorchClassifier(model=model,
                                       loss=loss,
                                       input_shape=(1, 28, 28),
                                       nb_classes=10,
                                       optimizer=optimizer_clf)

    optimizer_sur = torch.optim.SGD(surrogate.parameters(),
                                    lr=0.01,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    art_detector = PyTorchClassifier(model=surrogate,
                                     loss=loss,
                                     input_shape=(1, 28, 28),
                                     nb_classes=2,
                                     optimizer=optimizer_sur)

    loss_multiplier = 1. / 36.
    clip_fun = BAARD_Clipper(detector)

    attack = AutoProjectedGradientDescentDetectors(
        estimator=art_classifier,
        detector=art_detector,
        detector_th=0,  #fpr,
        clf_loss_multiplier=loss_multiplier,
        detector_clip_fun=clip_fun,
        loss_type='logits_difference',
        batch_size=128,
        norm=2,
        eps=8.0,
        eps_step=0.9,
        beta=0.5,
        max_iter=100)

    # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32)
    # pred_toy = art_classifier.predict(X_toy)
    # rejected_s1 = detector.stages[0].predict(X_toy, pred_toy)
    # print('Without:', np.mean(rejected_s1))

    # X_clipped = clip_fun(X_toy, art_classifier)
    # rejected_s1 = detector.stages[0].predict(X_clipped, pred_toy)
    # print('With:', np.mean(rejected_s1))
    # adv_x = attack.generate(x=X_toy)
    # pred_adv = predict_numpy(model, adv_x, device)
    # pred_sur = art_detector.predict(adv_x)
    # print('From surrogate model:', np.mean(pred_sur == 1))
    # labelled_as_adv = detector.detect(adv_x, pred_adv)
    # print('From BAARD', np.mean(labelled_as_adv == 1))

    # # Test it stage by stage
    # reject_s1 = detector.stages[0].predict(adv_x, pred_adv)
    # print('reject_s1', np.mean(reject_s1))
    # reject_s2 = detector.stages[1].predict(adv_x, pred_adv)
    # print('reject_s2', np.mean(reject_s2))
    # reject_s3 = detector.stages[2].predict(adv_x, pred_adv)
    # print('reject_s3', np.mean(reject_s3))

    x = X_att_test[:10]
    y = y_att_test[:10]
    adv_x = attack.generate(x=x, y=None)
    pred_adv = predict_numpy(model, adv_x, device)
    pred_sur = art_detector.predict(adv_x)

    pred = predict_numpy(model, adv_x, device)
    print('Acc classifier:', np.mean(pred == y))

    print('From surrogate model:', np.mean(pred_sur == 1))
    labelled_as_adv = detector.detect(adv_x, pred_adv)
    print('From BAARD', np.mean(labelled_as_adv == 1))

    # Test it stage by stage
    reject_s1 = detector.stages[0].predict(adv_x, pred_adv)
    print('reject_s1', np.mean(reject_s1))
    reject_s2 = detector.stages[1].predict(adv_x, pred_adv)
    print('reject_s2', np.mean(reject_s2))
    reject_s3 = detector.stages[2].predict(adv_x, pred_adv)
    print('reject_s3', np.mean(reject_s3))
    print()
示例#12
0
def main(seed, dataset_name, clf_name, detector_name, epsilon_lst,input_shape):
    set_seeds(SEEDS[seed])

    device = device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    print("load the classifier")
    file_model = os.path.join('result_{:}'.format(seed),
                              '{:}_{:}_model.pt'.format(dataset_name,
                                                        clf_name))
    if clf_name == 'dnn':
        model = BaseModel(use_prob=False).to(device)
    elif clf_name == 'resnet':
        model = Resnet(use_prob=False).to(device)
    else:
        raise ValueError("model idx unknown")
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_{:}'.format(seed),
                             '{:}_{:}_apgd2_2000.pt'.format(dataset_name,
                                                        clf_name))
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv (epsilon 2):', np.mean(pred == y))

    # Split data
    X_att_test = X[2000:3000]
    y_att_test = y[2000:3000]

    print("x attr shape ", X_att_test.shape)

    #########################################################################
    # Load baard
    print("Load baard")
    file_baard_train = os.path.join(
        'result_{:}'.format(seed), '{:}_{:}_baard_s1_train_data.pt'.format(
                                                        dataset_name,
                                                        clf_name))
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    stages = []
    stages.append(ApplicabilityStage(n_classes=10, quantile=1., verbose=False))
    stages.append(ReliabilityStage(n_classes=10, k=10, quantile=1., verbose=False))
    stages.append(DecidabilityStage(n_classes=10, k=100, quantile=1., verbose=False))
    detector = BAARDOperator(stages=stages)

    detector.stages[0].fit(X_baard_train_s1, y_baard_train)
    for stage in detector.stages[1:]:
        stage.fit(X_baard_train, y_baard_train)

    print("load baard's thresholds")
    file_baard_threshold = os.path.join(
        'result_{:}'.format(seed), '{:}_{:}_baard_threshold.pt'.format(
            dataset_name,
                                                        clf_name))

    thresholds = torch.load(file_baard_threshold)['thresholds']
    detector.load(file_baard_threshold)

    print("load the surrogate")
    file_surro = os.path.join('result_{:}'.format(seed),
                              '{:}_{:}_baard_surrogate.pt'.format(
                                  dataset_name,
                                                        clf_name))
    surrogate = get_pretrained_surrogate(file_surro, device)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(
        model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=input_shape,
        nb_classes=10,
        optimizer=optimizer_clf
    )

    optimizer_sur = torch.optim.SGD(
        surrogate.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_detector = PyTorchClassifier(
        model=surrogate,
        loss=loss,
        input_shape=input_shape,
        nb_classes=2,
        optimizer=optimizer_sur
    )

    clip_fun = BAARD_Clipper(detector)

    #########################################################################

    pred_folder = 'result_{:}/predictions_wb_eval/{:}_{:}_{:}'.format(seed,
                                                              dataset_name,
                                                       clf_name, detector_name)

    print("compute prediction for samples at epsilon 0")
    x = X_att_test[:10]
    y = y_att_test[:10]

    # compute and save predictions
    cmpt_and_save_predictions(model, art_detector, detector, device, x, y,
                              pred_folder, 0)

    for eps in epsilon_lst:

        print("epsilon ", eps)

        if dataset_name == 'mnist':
            loss_multiplier = 1. / 36.
        else:
            loss_multiplier = 0.1

        attack = AutoProjectedGradientDescentDetectors(
            estimator=art_classifier,
            detector=art_detector,
            detector_th=0,
            detector_clip_fun=clip_fun,
            loss_type='logits_difference',
            batch_size=128,
            norm=2,
            eps=eps,
            eps_step=0.9,
            beta=0.5,
            max_iter=100)

        adv_x = attack.generate(x=x, y=None)

        # compute and save predictions
        cmpt_and_save_predictions(model, art_detector, detector, device, adv_x,
                                  y, pred_folder, eps)
示例#13
0
def main():
    set_seeds(SEED)

    device = device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    print('device:', device)

    # Load classifier
    file_model = os.path.join('result_0', 'mnist_dnn_model.pt')
    model = BaseModel(use_prob=False).to(device)
    model.load_state_dict(torch.load(file_model, map_location=device))

    file_data = os.path.join('result_0', 'mnist_dnn_apgd2_3000.pt')
    obj = torch.load(file_data)
    X = obj['X']
    y = obj['y']
    adv = obj['adv']

    pred = predict_numpy(model, X, device)
    print('Acc on clean:', np.mean(pred == y))

    pred = predict_numpy(model, adv, device)
    print('Acc on adv:', np.mean(pred == y))

    # Split data
    X_def_test = X[:1000]
    y_def_test = y[:1000]
    adv_def_test = adv[:1000]
    pred_adv_def_test = pred[:1000]

    X_def_val = X[1000:2000]
    y_def_val = y[1000:2000]
    adv_def_val = adv[1000:2000]
    pred_adv_def_val = pred[1000:2000]

    X_att_test = X[2000:4000]
    y_att_test = y[2000:4000]
    adv_att_test = adv[2000:4000]
    pred_adv_att_test = pred[2000:4000]

    X_surro_train = X[4000:]
    y_surro_train = y[4000:]
    adv_surro_train = adv[4000:]
    pred_adv_surro_train = pred[4000:]

    # Load baard
    file_baard_train = os.path.join(
        'result_0', 'mnist_dnn_baard_s1_train_data.pt')
    obj = torch.load(file_baard_train)
    X_baard_train_s1 = obj['X_s1']
    X_baard_train = obj['X']
    y_baard_train = obj['y']

    file_baard_threshold = os.path.join(
        'result_0', 'mnist_dnn_baard_threshold.pt')
    thresholds = torch.load(file_baard_threshold)['thresholds']

    stage1 = ApplicabilityStage(n_classes=10, quantile=1.)
    stage1.thresholds_ = thresholds[0]

    file_surro = os.path.join('result_0', 'mnist_dnn_baard_surrogate.pt')
    surrogate = get_pretrained_surrogate(file_surro, device)

    # Test surrogate model
    X_test = np.concatenate((X_att_test[1000:], adv_att_test[1000:]))
    pred_test = predict_numpy(model, X_test, device)
    # label_test = detector.detect(X_test, pred_test)
    # acc = acc_on_adv(pred_test[1000:], y_att_test[1000:], label_test[1000:])
    # fpr = np.mean(label_test[:1000])
    # print('BAARD Acc_on_adv:', acc)
    # print('BAARD FPR:', fpr)

    label_surro = predict_numpy(surrogate, X_test, device)
    # acc = np.mean(label_surro == label_test)
    # print('Acc on surrogate:', acc)

    loss = torch.nn.CrossEntropyLoss()
    optimizer_clf = torch.optim.SGD(
        model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=(1, 28, 28),
        nb_classes=10,
        optimizer=optimizer_clf
    )

    optimizer_sur = torch.optim.SGD(
        surrogate.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    art_detector = PyTorchClassifier(
        model=surrogate,
        loss=loss,
        input_shape=(1, 28, 28),
        nb_classes=2,
        optimizer=optimizer_sur
    )

    fpr = 0.05
    attack = AutoProjectedGradientDescentDetectors(
        estimator=art_classifier,
        detector=art_detector,
        detector_th=fpr,
        clf_loss_multiplier=1. / 36.,
        loss_type='logits_difference',
        batch_size=128,
        norm=2,
        eps=5.0,
        eps_step=0.9,
        beta=0.5,
        max_iter=100)

    # adv_x = attack.generate(x=X_att_test[:100], y=y_att_test[:100])
    file_whitebox_adv = 'mnist_apgd2_3000_whitebox_size100.npy'
    # np.save(file_whitebox_adv, adv_x)
    adv_x = np.load(file_whitebox_adv)
    print('adv_x', adv_x.shape)

    pred_adv = predict_numpy(model, adv_x, device)
    adv_x = clip_by_threshold(adv_x, pred_adv, thresholds[0])
    pred_sur = art_detector.predict(adv_x)
    print('From surrogate model:', np.mean(pred_sur == 1))
    labelled_as_adv = stage1.predict(adv_x, pred_adv)
    print('From BAARD', np.mean(labelled_as_adv == 1))
    
    # Testing 
    # X_toy = np.random.rand(128, 1, 28, 28).astype(np.float32)  # Same size as MNIST in a single batch
    # y_toy = np.concatenate((np.zeros(50), np.ones(50)))
    # rejected = stage1.predict(X_toy, y_toy)
    # print('rejected', np.mean(rejected))
    # X_bypass = clip_by_threshold(X_toy, y_toy, thresholds[0])
    # rejected_after = stage1.predict(X_bypass, y_toy)
    # print('rejected_after', np.mean(rejected_after))

    print('Pause')