Пример #1
0
def main():
    with open('data.json') as data_json:
        data_params = json.load(data_json)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data', type=str)
    parser.add_argument('--data_path', type=str, default='data')
    parser.add_argument('--output_path', type=str, default='results')
    parser.add_argument('--pretrained', type=str, required=True)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--attack', type=str, required=True, choices=data_params['attacks'])
    parser.add_argument('--eps', type=float, default=0.3)
    # NOTE: In CW_L2 attack, eps is the upper bound of c.
    parser.add_argument('--n_samples', type=int, default=2000)
    parser.add_argument('--random_state', type=int, default=1234)
    args = parser.parse_args()
    print(args)

    set_seeds(args.random_state)
    
    if not os.path.exists(args.output_path):
        print('Output folder does not exist. Create:', args.output_path)
        os.mkdir(args.output_path)
        
    print('Dataset:', args.data)
    print('Pretrained model:', args.pretrained)
    print('Running attack: {}'.format(args.attack))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device: {}'.format(device))

    # Prepare data
    transforms = tv.transforms.Compose([tv.transforms.ToTensor()])

    if args.data == 'mnist':
        dataset_train = datasets.MNIST(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.MNIST(args.data_path, train=False, download=True, transform=transforms)
    elif args.data == 'cifar10':
        dataset_train = datasets.CIFAR10(args.data_path, train=True, download=True, transform=transforms)
        dataset_test = datasets.CIFAR10(args.data_path, train=False, download=True, transform=transforms)
    else:
        data_path = os.path.join(args.data_path, data_params['data'][args.data]['file_name'])
        print('Read file:', data_path)
        X, y = load_csv(data_path)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y,
            test_size=data_params['data'][args.data]['n_test'],
            random_state=args.random_state)
        scaler = MinMaxScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
        dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long))
        dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.long))

    dataloader_train = DataLoader(dataset_train, 256, shuffle=False)
    dataloader_test = DataLoader(dataset_test, 256, shuffle=False)

    shape_train = get_shape(dataloader_train.dataset)
    shape_test = get_shape(dataloader_test.dataset)
    print('Train set:', shape_train)
    print('Test set:', shape_test)

    # Load model
    use_prob = args.attack not in ['apgd', 'apgd1', 'apgd2', 'cw2', 'cwinf']
    print('Attack:', args.attack)
    print('Using softmax layer:', use_prob)
    if args.data == 'mnist':
        model = BaseModel(use_prob=use_prob).to(device)
        model_name = 'basic'
    elif args.data == 'cifar10':
        model_name = args.pretrained.split('_')[1]
        if model_name == 'resnet':
            model = Resnet(use_prob=use_prob).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=use_prob).to(device)
        else:
            raise ValueError('Unknown model: {}'.format(model_name))
    else:
        n_features = data_params['data'][args.data]['n_features']
        n_classes = data_params['data'][args.data]['n_classes']
        model = NumericModel(
            n_features,
            n_hidden=n_features * 4,
            n_classes=n_classes,
            use_prob=use_prob).to(device)
        model_name = 'basic' + str(n_features * 4)

    optimizer = optim.SGD(model.parameters(), lr=0.01,
                          momentum=0.9, weight_decay=5e-4)
    loss = nn.CrossEntropyLoss()
    pretrained_path = os.path.join(args.output_path, args.pretrained)
    model.load_state_dict(torch.load(pretrained_path, map_location=device))

    _, acc_train = validate(model, dataloader_train, loss, device)
    _, acc_test = validate(model, dataloader_test, loss, device)
    print('Accuracy on train set: {:.4f}%'.format(acc_train * 100))
    print('Accuracy on test set: {:.4f}%'.format(acc_test * 100))

    # Create a subset which only contains recognisable samples.
    tensor_test_X, tensor_test_y = get_correct_examples(
        model, dataset_test, device=device, return_tensor=True)
    dataset_perfect = TensorDataset(tensor_test_X, tensor_test_y)
    loader_perfect = DataLoader(dataset_perfect, batch_size=512, shuffle=False)
    _, acc_perfect = validate(model, loader_perfect, loss, device)
    print('Accuracy on {} filtered test examples: {:.4f}%'.format(
        len(dataset_perfect), acc_perfect * 100))

    # Generate adversarial examples
    n_features = data_params['data'][args.data]['n_features']
    n_classes = data_params['data'][args.data]['n_classes']
    if isinstance(n_features, int):
        n_features = (n_features,)

    classifier = PyTorchClassifier(
        model=model,
        loss=loss,
        input_shape=n_features,
        optimizer=optimizer,
        nb_classes=n_classes,
        clip_values=(0.0, 1.0),
        device_type='gpu')

    if args.attack == 'apgd':
        eps_step = args.eps / 10.0 if args.eps <= 0.1 else 0.1
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd1':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=1,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'apgd2':
        attack = AutoProjectedGradientDescent(
            estimator=classifier,
            norm=2,
            eps=args.eps,
            eps_step=0.1,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'bim':
        eps_step = args.eps / 10.0
        attack = BasicIterativeMethod(
            estimator=classifier,
            eps=args.eps,
            eps_step=eps_step,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'boundary':
        attack = BoundaryAttack(
            estimator=classifier,
            max_iter=1000,
            sample_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cw2':
        # NOTE: Do NOT increase the batch size!
        attack = CarliniWagnerAttackL2(
            model=model,
            n_classes=n_classes,
            confidence=args.eps,
            verbose=True,
            check_prob=False,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'cwinf':
        attack = CarliniLInfMethod(
            classifier=classifier,
            confidence=args.eps,
            max_iter=1000,
            batch_size=args.batch_size,
            targeted=False)
    elif args.attack == 'deepfool':
        attack = DeepFool(
            classifier=classifier,
            epsilon=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'fgsm':
        attack = FastGradientMethod(
            estimator=classifier,
            eps=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'jsma':
        attack = SaliencyMapMethod(
            classifier=classifier,
            gamma=args.eps,
            batch_size=args.batch_size)
    elif args.attack == 'line':
        if args.data == 'mnist':
            color = args.eps
        elif args.data == 'cifar10':
            color = (args.eps, args.eps, args.eps)
        else:
            raise NotImplementedError
        attack = LineAttack(color=color, thickness=1)
    elif args.attack == 'shadow':
        attack = ShadowAttack(
            estimator=classifier,
            batch_size=args.batch_size,
            targeted=False,
            verbose=False)
    elif args.attack == 'watermark':
        attack = WaterMarkAttack(
            eps=args.eps,
            n_classes=data_params['data'][args.data]['n_classes'],
            x_min=0.0,
            x_max=1.0,
            targeted=False)

        X_train, y_train = get_correct_examples(model, dataset_train, device=device, return_tensor=True)
        X_train = X_train.cpu().detach().numpy()
        y_train = y_train.cpu().detach().numpy()
        attack.fit(X_train, y_train)
    else:
        raise NotImplementedError

    if len(dataset_perfect) > args.n_samples:
        n = args.n_samples
    else:
        n = len(dataset_perfect)

    X_benign = tensor_test_X[:n].cpu().detach().numpy()
    y = tensor_test_y[:n].cpu().detach().numpy()

    print('Creating {} adversarial examples with eps={} (Not all attacks use eps)'.format(n, args.eps))
    time_start = time.time()
    # Shadow attack only takes single sample!
    if args.attack == 'shadow':
        adv = np.zeros_like(X_benign)
        for i in trange(len(X_benign)):
            adv[i] = attack.generate(x=np.expand_dims(X_benign[i], axis=0))
    elif args.attack == 'watermark':
        # This is untargeted.
        adv = attack.generate(X_benign, y)
    else:
        adv = attack.generate(x=X_benign)
    time_elapsed = time.time() - time_start
    print('Total time spend: {}'.format(str(datetime.timedelta(seconds=time_elapsed))))

    pred_benign = np.argmax(classifier.predict(X_benign), axis=1)
    acc_benign = np.sum(pred_benign == y) / n
    pred_adv = np.argmax(classifier.predict(adv), axis=1)
    acc_adv = np.sum(pred_adv == y) / n
    print("Accuracy on benign samples: {:.4f}%".format(acc_benign * 100))
    print("Accuracy on adversarial examples: {:.4f}%".format(acc_adv * 100))

    # Save results
    if args.n_samples < 2000:
        output_file = '{}_{}_{}_{}_size{}'.format(args.data, model_name, args.attack, str(args.eps), args.n_samples)
    else:
        output_file = '{}_{}_{}_{}'.format(args.data, model_name, args.attack, str(args.eps))

    path_x = os.path.join(args.output_path, '{}_x.npy'.format(output_file))
    path_y = os.path.join(args.output_path, '{}_y.npy'.format(output_file))
    path_adv = os.path.join(args.output_path, '{}_adv.npy'.format(output_file))
    np.save(path_x, X_benign)
    np.save(path_y, y)
    np.save(path_adv, adv)

    print('Saved to:', '{}_adv.npy'.format(output_file))
    print()
Пример #2
0
def main(args):
    assert args.dataset in ['mnist', 'cifar', 'svhn', 'tiny', 'tiny_gray'], \
        "dataset parameter must be either 'mnist', 'cifar', 'svhn', or 'tiny'"
    print('Dataset: %s' % args.dataset)
    adv_path = '/home/aaldahdo/detectors/adv_data/'

    if args.dataset == 'mnist':
        from baselineCNN.cnn.cnn_mnist import MNISTCNN as model
        model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_mnist.model
        sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.3
        pa_th=78
        # random_restart = 20
        # x_train = model_mnist.x_train
        x_test = model_mnist.x_test
        # y_train = model_mnist.y_train
        y_test = model_mnist.y_test
        y_test_labels = model_mnist.y_test_labels
        translation = 10
        rotation = 60
    
    elif args.dataset == 'mnist_gray':
        from baselineCNN.cnn.cnn_mnist_gray import MNISTCNN as model
        model_mnist = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_mnist.model
        sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.3
        pa_th=78
        # random_restart = 20
        # x_train = model_mnist.x_train
        x_test = model_mnist.x_test
        # y_train = model_mnist.y_train
        y_test = model_mnist.y_test
        y_test_labels = model_mnist.y_test_labels
        translation = 10
        rotation = 60

    elif args.dataset == 'cifar':
        from baselineCNN.cnn.cnn_cifar10 import CIFAR10CNN as model
        model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_cifar.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_cifar.x_train
        x_test = model_cifar.x_test
        # y_train = model_cifar.y_train
        y_test = model_cifar.y_test
        y_test_labels = model_cifar.y_test_labels
        translation = 8
        rotation = 30
    
    elif args.dataset == 'cifar_gray':
        from baselineCNN.cnn.cnn_cifar10_gray import CIFAR10CNN as model
        model_cifar = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_cifar.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_cifar.x_train
        x_test = model_cifar.x_test
        # y_train = model_cifar.y_train
        y_test = model_cifar.y_test
        y_test_labels = model_cifar.y_test_labels
        translation = 8
        rotation = 30

    elif args.dataset == 'svhn':
        from baselineCNN.cnn.cnn_svhn import SVHNCNN as model
        model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_svhn.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_svhn.x_train
        x_test = model_svhn.x_test
        # y_train = model_svhn.y_train
        y_test = model_svhn.y_test
        y_test_labels = model_svhn.y_test_labels
        translation = 10
        rotation = 60

    elif args.dataset == 'svhn_gray':
        from baselineCNN.cnn.cnn_svhn_gray import SVHNCNN as model
        model_svhn = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_svhn.model
        sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_svhn.x_train
        x_test = model_svhn.x_test
        # y_train = model_svhn.y_train
        y_test = model_svhn.y_test
        y_test_labels = model_svhn.y_test_labels
        translation = 10
        rotation = 60

    elif args.dataset == 'tiny':
        from baselineCNN.cnn.cnn_tiny import TINYCNN as model
        model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_tiny.model
        sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        pa_th=100
        # x_train = model_tiny.x_train
        x_test = model_tiny.x_test
        # y_train = model_tiny.y_train
        y_test = model_tiny.y_test
        y_test_labels = model_tiny.y_test_labels
        translation = 8
        rotation = 30
        del model_tiny

    elif args.dataset == 'tiny_gray':
        from baselineCNN.cnn.cnn_tiny_gray import TINYCNN as model
        model_tiny = model(mode='load', filename='cnn_{}.h5'.format(args.dataset))
        classifier=model_tiny.model
        sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
        classifier.compile(loss=categorical_crossentropy, optimizer=sgd, metrics=['accuracy'])
        kclassifier = KerasClassifier(model=classifier, clip_values=(0, 1))
        epsilons=[8/256, 16/256, 32/256, 64/256, 80/256, 128/256]
        epsilons1=[5, 10, 15, 20, 25, 30, 40]
        epsilons2=[0.125, 0.25, 0.3125, 0.5, 1, 1.5, 2]
        eps_sa=0.125
        # x_train = model_tiny.x_train
        x_test = model_tiny.x_test
        # y_train = model_tiny.y_train
        y_test = model_tiny.y_test
        y_test_labels = model_tiny.y_test_labels
        translation = 8
        rotation = 30
        del model_tiny

    
    # batch_count_start = args.batch_indx
    # bsize = args.batch_size
    # batch_count_end = batch_count_start + 1

    #FGSM
    for e in epsilons:
        attack = FastGradientMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=256)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_fgsm_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #BIM
    for e in epsilons:
        attack = BasicIterativeMethod(estimator=kclassifier, eps=e, eps_step=0.01, batch_size=32, max_iter=int(e*256*1.25))
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_bim_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #PGD1
    for e in epsilons1:
        attack = ProjectedGradientDescent(estimator=kclassifier, norm=1, eps=e, eps_step=4, batch_size=32)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_pgd1_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #PGD2
    for e in epsilons2:
        attack = ProjectedGradientDescent(estimator=kclassifier, norm=2, eps=e, eps_step=0.1, batch_size=32)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_pgd2_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))
    
    #PGDInf
    for e in epsilons:
        attack = ProjectedGradientDescent(estimator=kclassifier, norm=np.inf, eps=e, eps_step=0.01, batch_size=32)
        adv_data = attack.generate(x=x_test)
        adv_file_path = adv_path + args.dataset + '_pgdi_' + str(e) + '.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))

    #CWi
    attack = CarliniLInfMethod(classifier=kclassifier, max_iter=200)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_cwi.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    # #CWi
    # if args.dataset=='tiny':
    #     for n, x, y in batch(x_test, y_test, batch_size=bsize):
    #         if n>=batch_count_start*bsize and n<batch_count_end*bsize:
    #             adv_file_path = adv_path + args.dataset + '_cwi_' + str(batch_count_start) + '.npy'
    #             if not os.path.isfile(adv_file_path):
    #                 attack = CarliniLInfMethod(classifier=kclassifier, max_iter=100, batch_size=bsize)
    #                 adv_data = attack.generate(x=x)
    #                 np.save(adv_file_path, adv_data)
    #                 print('Done - {}'.format(adv_file_path))

    #CW2 - SLOW
    attack = CarliniL2Method(classifier=kclassifier, max_iter=100, batch_size=1, confidence=10)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_cw2.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    #DF
    attack = DeepFool(classifier=kclassifier)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_df.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    # #DF
    # if args.dataset=='tiny':
    #     for n, x, y in batch(x_test, y_test, batch_size=bsize):
    #         if n>=batch_count_start*bsize and n<batch_count_end*bsize:
    #             attack = DeepFool(classifier=kclassifier, epsilon=9, max_iter=100)
    #             adv_data = attack.generate(x=x)
    #             adv_file_path = adv_path + args.dataset + '_df_'+ str(batch_count_start) + '.npy'
    #             np.save(adv_file_path, adv_data)
    #             print('Done - {}'.format(adv_file_path))

    #Spatial transofrmation attack
    attack = SpatialTransformation(classifier=kclassifier, max_translation=translation, max_rotation=rotation)
    adv_data = attack.generate(x=x_test)
    adv_file_path = adv_path + args.dataset + '_sta.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    #Square Attack
    attack = SquareAttack(estimator=kclassifier, max_iter=200, eps=eps_sa)
    adv_data = attack.generate(x=x_test, y=y_test)
    adv_file_path = adv_path + args.dataset + '_sa.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))

    #HopSkipJump Attack
    y_test_next= get_next_class(y_test)
    attack = HopSkipJump(classifier=kclassifier, targeted=False, max_iter=0, max_eval=100, init_eval=10)
    
    iter_step = 10
    adv_data = np.zeros(x_test.shape)
    # adv_data = adv_data[0:25]
    for i in range(4):
        adv_data = attack.generate(x=x_test, x_adv_init=adv_data, resume=True)
        attack.max_iter = iter_step

    # _, acc_normal = classifier.evaluate(x_test[0:25], y_test[0:25])
    # _, acc_adv = classifier.evaluate(adv_data, y_test[0:25])
    # print('Normal accuracy - {}\nAttack accuracy - {}'.format(acc_normal, acc_adv))

    # subcount=1
    # for i in range(0, 25):
    #     plt.subplot(5,5,subcount)
    #     if args.dataset=='mnist':
    #         plt.imshow(adv_data[i][:,:,0])
    #     else:
    #         plt.imshow(adv_data[i][:,:,:])
    #     plt.suptitle(args.dataset+ " sb")
    #     subcount = subcount + 1
    # plt.show()

        adv_file_path = adv_path + args.dataset + '_hop.npy'
        np.save(adv_file_path, adv_data)
        print('Done - {}'.format(adv_file_path))

    #ZOO attack
    attack = ZooAttack(classifier=kclassifier, batch_size=32)
    adv_data = attack.generate(x=x_test, y=y_test)
    adv_file_path = adv_path + args.dataset + '_zoo.npy'
    np.save(adv_file_path, adv_data)
    print('Done - {}'.format(adv_file_path))
Пример #3
0
    classifier = KerasClassifier(model=model, clip_values=(0, 1))
    return classifier


# Get session
session = tf.Session()
k.set_session(session)

# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

# Construct and train a convolutional neural network on MNIST using Keras
source = cnn_mnist_k(x_train.shape[1:])
source.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Craft adversarial samples with DeepFool
adv_crafter = DeepFool(source)
x_train_adv = adv_crafter.generate(x_train)
x_test_adv = adv_crafter.generate(x_test)

# Construct and train a convolutional neural network
target = cnn_mnist_tf(x_train.shape[1:])
target.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the CNN on the adversarial samples
preds = target.predict(x_test_adv)
acc = np.sum(np.equal(np.argmax(preds, axis=1), np.argmax(
    y_test, axis=1))) / y_test.shape[0]
print("\nAccuracy on adversarial samples: %.2f%%" % (acc * 100))
Пример #4
0
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])

# Create classifier wrapper
classifier = KerasClassifier(model=model, clip_values=(min_, max_))
classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

# Craft adversarial samples with DeepFool
logger.info("Create DeepFool attack")
adv_crafter = DeepFool(classifier)
logger.info("Craft attack on training examples")
x_train_adv = adv_crafter.generate(x_train)
logger.info("Craft attack test examples")
x_test_adv = adv_crafter.generate(x_test)

# Evaluate the classifier on the adversarial samples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info("Classifier before adversarial training")
logger.info("Accuracy on adversarial samples: %.2f%%", (acc * 100))

# Data augmentation: expand the training set with the adversarial samples
x_train = np.append(x_train, x_train_adv, axis=0)
y_train = np.append(y_train, y_train, axis=0)
Пример #5
0
def run_attack_untargeted(file_model, X, y, att_name, eps, device):
    path = file_model.split('/')[0]
    file_str = file_model.split('/')[-1]
    name_arr = file_str.split('_')
    data = name_arr[0]
    model_name = name_arr[1]
    file_data = os.path.join(
        path, '{}_{}_{}_{}.pt'.format(data, model_name, att_name,
                                      round(eps * 1000)))

    if os.path.exists(file_data):
        print('Found existing file:', file_data)
        obj = torch.load(file_data)
        return obj['adv'], obj['X'], obj['y']

    if data == 'mnist':
        n_features = (1, 28, 28)
        n_classes = 10
        model = BaseModel(use_prob=False).to(device)
    elif data == 'cifar10':
        n_features = (3, 32, 32)
        n_classes = 10
        if model_name == 'resnet':
            model = Resnet(use_prob=False).to(device)
        elif model_name == 'vgg':
            model = Vgg(use_prob=False).to(device)
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    model.load_state_dict(torch.load(file_model, map_location=device))
    loss = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=5e-4)
    classifier = PyTorchClassifier(model=model,
                                   loss=loss,
                                   input_shape=n_features,
                                   optimizer=optimizer,
                                   nb_classes=n_classes,
                                   clip_values=(0.0, 1.0),
                                   device_type='gpu')

    if att_name == 'apgd':
        eps_step = eps / 4. if eps <= 0.2 else 0.1
        attack = AutoProjectedGradientDescent(estimator=classifier,
                                              eps=eps,
                                              eps_step=eps_step,
                                              max_iter=1000,
                                              batch_size=BATCH_SIZE,
                                              targeted=False)
    elif att_name == 'apgd2':
        attack = AutoProjectedGradientDescent(estimator=classifier,
                                              norm=2,
                                              eps=eps,
                                              eps_step=0.1,
                                              max_iter=1000,
                                              batch_size=BATCH_SIZE,
                                              targeted=False)
    elif att_name == 'cw2':
        # Do not increase the batch_size
        attack = CarliniWagnerAttackL2(model=model,
                                       n_classes=n_classes,
                                       confidence=eps,
                                       verbose=True,
                                       check_prob=False,
                                       batch_size=32,
                                       targeted=False)
    elif att_name == 'deepfool':
        # Do not adjust Epsilon
        attack = DeepFool(classifier=classifier, batch_size=BATCH_SIZE)
    elif att_name == 'fgsm':
        attack = FastGradientMethod(estimator=classifier,
                                    eps=eps,
                                    batch_size=BATCH_SIZE)
    elif att_name == 'line':
        attack = LineAttack(color=1, thickness=2)
    else:
        raise NotImplementedError

    time_start = time.time()
    adv = attack.generate(x=X)
    time_elapsed = time.time() - time_start
    print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

    obj = {'X': X, 'y': y, 'adv': adv}
    torch.save(obj, file_data)
    print('Save data to:', file_data)

    return adv, X, y
Пример #6
0
def adv_train_loop(model,
                   params,
                   ds,
                   min_y,
                   base_data,
                   model_id,
                   attack_type,
                   device,
                   batch_size,
                   max_epochs=5):
    print('training adversarial:', attack_type)
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    original_model = copy.deepcopy(
        model)  # used to generate adv images for the trained model
    original_model.eval()
    model = copy.deepcopy(
        model)  # making a copy so that original model is not changed
    model = model.to(device)
    model_id = f'{model_id}_{attack_type}'

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        classifier = PyTorchClassifier(
            model=original_model,
            clip_values=(0, 1),
            loss=nn.CrossEntropyLoss(),
            optimizer=optimizer,
            input_shape=(3, 64, 64),
            nb_classes=200,
        )

        attack = None

        #         if attack_type == "fgsm":
        #             attack = FastGradientMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "bim":
        #             attack = BasicIterativeMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "carlini":
        #             attack = CarliniLInfMethod(classifier=classifier)
        #         elif attack_type == "deepfool":
        #             attack = DeepFool(classifier=classifier)
        if attack_type == "fgsm":
            attack = GradientSignAttack(model, loss_fn=loss, eps=0.2)
        elif attack_type == "ffa":
            attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3)
        elif attack_type == "carlini":
            attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000)
        elif attack_type == "lbfgs":
            attack = DeepFool(classifier=classifier)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with ctx_noparamgrad_and_eval(model):
                x_adv = attack.perturb(x, y)
            optimizer.zero_grad()
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Пример #7
0
    loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=["accuracy"]
)

# Step 3: Create the ART classifier

classifier = KerasClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value), use_logits=False)

# Step 4: Train the ART classifier
# %%
classifier.fit(x_train, y_train, batch_size=64, nb_epochs=3)
# %%
# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))
# %%
# Step 6: Generate adversarial test examples
attack = DeepFool(classifier)
x_test_adv = attack.generate(x=x_test)
# %%
# Step 7: Evaluate the ART classifier on adversarial test examples

predictions = classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
#%%
plot_examples([x_test_adv[3], x_test_adv[8], x_test_adv[12], x_test_adv[18]])
# %%
K.clear_session()
 def _generate(self, x):
     attack = DeepFool(self.classifier, **self._params)
     return attack.generate(x)
def main():
    Ad = np.load(AD_MAT_FILE)  # Load adjacency matrix
    NUM_TEST = 50  # The number of experiments recorded in raw result file
    NUM_GRAPH = 200  # The number of graph in a test dataset
    array_std, array_mean_values, array_overlap_ratio = load_raw_result_csv(
        RAW_RESULT_FILE)
    NUM_CLASS = array_mean_values.shape[1]
    print(array_mean_values.shape)
    with open("result_DeepFool.csv", "w", newline='') as csvfile:
        # Header for the csv file:
        # 1)overlap measurement
        # 2)accuracy of model with Lipschitz constant constraint on original test dataset
        # 3)accuracy of model without Lipschitz constant constraint on original test dataset
        # 4)accuracy of model with Lipschitz constant constraint on adversarial test dataset
        # 5)accuracy of model without Lipschitz constant constraint on adversarial test dataset
        writer = csv.writer(csvfile)
        writer.writerow([
            "overlap ratio", "acc_test_L", "acc_test_WL", "acc_adv_with_Lip",
            "acc_adv_without_Lip"
        ])

    # Begin adversarial test for each previous model
    for i in range(0, NUM_TEST):
        tf.keras.backend.clear_session()
        # Reconstruct test dataset for each model
        x_test, y_test = reconstruct_test_data(array_std[i],
                                               array_mean_values[i], Ad,
                                               NUM_GRAPH)
        # Load models with/without Lipschitz constant constraint
        model_with_Lip_constr = tf.keras.models.load_model(
            "saved_model_adver_attack/fit{}_model_with_Lip_constr.h5".format(
                i))
        print(model_with_Lip_constr.summary())
        model_without_Lip_constr = tf.keras.models.load_model(
            "saved_model_adver_attack/fit{}_model_without_Lip_constr.h5".
            format(i))

        # Evaluation of models on original test dataset
        print(
            "Evaluation of model WITH Lipschitz constant constraint on TEST data"
        )
        loss_test_L, acc_test_L = model_with_Lip_constr.evaluate(
            x_test, y_test, batch_size=x_test.shape[0], verbose=0)
        print("Loss: {:.4f}, accuracy: {:.4f}".format(loss_test_L, acc_test_L))

        print(
            "Evaluation of model WITHOUT Lipschitz constant constraint on TEST data"
        )
        loss_test_WL, acc_test_WL = model_without_Lip_constr.evaluate(
            x_test, y_test, batch_size=x_test.shape[0], verbose=0)
        print("Loss: {:.4f}, accuracy: {:.4f}".format(loss_test_WL,
                                                      acc_test_WL))

        # Reshape model output to fit the adversarial attack classifier
        reshape_with_Lip = Reshape(
            (x_test.shape[1] * NUM_CLASS, ),
            name="added_reshape_layer_L")(model_with_Lip_constr.output)
        new_model_with_Lip = Model(inputs=model_with_Lip_constr.input,
                                   outputs=reshape_with_Lip)
        reshape_without_Lip = Reshape(
            (x_test.shape[1] * NUM_CLASS, ),
            name="added_reshape_layer_WL")(model_without_Lip_constr.output)
        new_model_without_Lip = Model(inputs=model_without_Lip_constr.input,
                                      outputs=reshape_without_Lip)
        new_model_with_Lip.compile(loss='categorical_crossentropy',
                                   optimizer='adam',
                                   metrics=['accuracy'])
        new_model_without_Lip.compile(loss='categorical_crossentropy',
                                      optimizer='adam',
                                      metrics=['accuracy'])
        min_value = np.min(array_mean_values[i]) - 100 * array_std[i]
        max_value = np.max(array_mean_values[i]) + 100 * array_std[i]

        # construct classifiers to wrap the existing model
        classifier_with_Lip = KerasClassifier(model=new_model_with_Lip,
                                              clip_values=(min_value,
                                                           max_value),
                                              use_logits=False)
        classifier_without_Lip = KerasClassifier(model=new_model_without_Lip,
                                                 clip_values=(min_value,
                                                              max_value),
                                                 use_logits=False)

        # construct DeepFool attack
        attack1 = DeepFool(classifier=classifier_with_Lip,
                           epsilon=0.2,
                           batch_size=10)
        attack2 = DeepFool(classifier=classifier_without_Lip,
                           epsilon=0.2,
                           batch_size=10)

        # Generate advasarial samples
        x_test_adv1 = attack1.generate(x=x_test)
        x_test_adv2 = attack2.generate(x=x_test)

        # Evaluation of models on adversarial test dataset
        y_predict_adv_with_Lip = classifier_with_Lip.predict(x_test_adv1)
        y_predict_adv_without_Lip = classifier_without_Lip.predict(x_test_adv2)
        y_predict_adv_with_Lip = y_predict_adv_with_Lip.reshape((y_test.shape))
        y_predict_adv_without_Lip = y_predict_adv_without_Lip.reshape(
            (y_test.shape))
        acc_adv_with_Lip = np.sum(
            np.argmax(y_predict_adv_with_Lip, axis=2) == np.argmax(
                y_test, axis=2)) / (y_test.shape[0] * y_test.shape[1])
        print(
            "Accuracy on adversarial test examples with Lipschitz constraint: {:.2f}%"
            .format(acc_adv_with_Lip * 100))
        acc_adv_without_Lip = np.sum(
            np.argmax(y_predict_adv_without_Lip, axis=2) == np.argmax(
                y_test, axis=2)) / (y_test.shape[0] * y_test.shape[1])
        print(
            "Accuracy on adversarial test examples without Lipschitz constraint: {:.2f}%"
            .format(acc_adv_without_Lip * 100))

        # Save comparison result
        with open("result_DeepFool.csv", "a", newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow([
                array_overlap_ratio[i], acc_test_L, acc_test_WL,
                acc_adv_with_Lip, acc_adv_without_Lip
            ])
Пример #10
0
# transform data into right format
predictions = np.asarray(predictions)
x_test = torch.cat(x_data).numpy()
# pp(predictions.shape)

# test accuracy on benign examples
accuracy = np.sum(predictions == y_test) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

# Step 5: Generate adversarial test examples

# attack = FastGradientMethod(estimator=classifier, eps=0.1)
# x_test_adv = attack.generate(x=x_test)

adv_crafter = DeepFool(classifier, nb_grads=args.nb_grads)
print("Craft attack on training examples")
x_test_adv = adv_crafter.generate(x_test)

# deepfool takes ~8 second for each adversarial image

# Step 6: Evaluate the ART classifier on adversarial test examples

# pp(x_test_adv.shape)
predictions = []

for i in range(x_test_adv.shape[0]):
    x = torch.from_numpy(x_test_adv[i]).cuda()
    # predict
    pred = smoothed_classifier.predict(x, args.N, args.alpha, args.batch)
    predictions.append(pred)