Пример #1
0
def deep_fool_attack():
    counter = 0
    image_iterator = importer.load_images_generator(importer.batch_shape)
    tf.reset_default_graph()
    sess = tf.Session()
    x_input = tf.placeholder(tf.float32, shape=importer.batch_shape)
    folder_path = os.path.join(config.ADVERSARIAL_FOLDER, "deep_full_base")
    os.makedirs(folder_path, exist_ok=True)
    while True:
        with tf.Session() as sess:
            filenames, images = next(image_iterator, (None, None))
            model = Inception_V3_Model(np.float32(images))
            params = {}
            attack = DeepFool(model=model, sess=sess)
            params['max_iter'] = 5
            variables = tf.get_collection(tf.GraphKeys.VARIABLES)
            saver = tf.train.Saver(variables)
            saver.restore(sess, importer.checkpoint_path)
            x_adv = attack.generate(x_input, **params)
            #writer = tf.summary.FileWriter("/tmp/log/", sess.graph)
            adversarial_images = sess.run(x_adv, feed_dict={x_input: images})
            utils.image_saver(advesrsarial_images, filenames, folder_path)
            print("adversarial_images counter:{}".format(counter))
            #writer.close()
            counter += 1
            if counter == 999:
                print("Attack is finished")
                break
Пример #2
0
def get_DeepFool_adversarial(targeted, xs, classifier, batch_size):

    # Targeted DeepFool attack not possible
    if targeted:
        print('DeepFool attack cannot be targeted.')
        exit()

    ATTACK_BATCH = batch_size
    samples_range = int(xs.shape[0] / ATTACK_BATCH)

    wrap = KerasModelWrapper(classifier)
    attack = DeepFool(wrap, sess=K.get_session())
    fgsm_params = {
        'overshoot': 0.02,
        'max_iter': 50,
        'nb_candidate': 2,
        'clip_min': -5,
        'clip_max': 5
    }

    attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :], **fgsm_params)
    for ii in range(1, samples_range):
        print('ITER', ii)
        new_attack_batch = attack.generate_np(
            xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :],
            **fgsm_params)
        attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0)
    return attack_xs
Пример #3
0
def untargeted_attack(model, images):

    sess = backend.get_session()
    wrap = KerasModelWrapper(model)
    df = DeepFool(wrap, back='tf', sess=sess)
    adv_x = df.generate_np(images)
    return adv_x
Пример #4
0
    def __init__(self, model, n_candidates=10, overshoot=0.02, max_iterations=50, clip_min=-1., clip_max=1.):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._n_candidate = n_candidates
        self._overshoot = overshoot
        self._max_iterations = max_iterations

        with self.graph.as_default():
            self._method = DeepFool(self._model, sess=self.session, nb_candidate=self._n_candidate,
                                    overshoot=self._overshoot, max_iter=self._max_iterations,
                                    nb_classes=self.n_classes, clip_min=self._clip_min, clip_max=self._clip_max)
Пример #5
0
def init_attack(model, attack_params_dict, sess):
    """
    Initialize the adversarial attack using the cleverhans toolbox

    Parameters
    ----------
    model : Keras Model
        The model to attack

    attack_params_dict : dict
        Self-defined dictionary specifying the attack and its parameters

    sess : Session
        The current tf session

    Returns
    -------
    attack : cleverhans Attack
        The Attack object

    attack_params
        Dictionary with the value of the attack parameters, valid to generate
        adversarial examples with cleverhans.
    """

    # Wrapper for the Keras model
    model_wrap = KerasModelWrapper(model)

    # Initialize attack
    batch_size = None
    if attack_params_dict['attack'] == 'fgsm':
        attack = FastGradientMethod(model_wrap, sess=sess)
        attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 
                         'clip_max': 1.}
    elif attack_params_dict['attack'] == 'spsa':
        attack = SPSA(model_wrap, sess=sess)
        attack_params = {'epsilon': attack_params_dict['eps'], 
                         'num_steps': attack_params_dict['n_steps']}
        batch_size = 1
    elif attack_params_dict['attack'] == 'deepfool':
        attack = DeepFool(model_wrap, sess=sess)
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'pgd':
        attack = ProjectedGradientDescent(model_wrap, sess=sess)
        attack_params = {'eps': attack_params_dict['eps'], 
                         'eps_iter': attack_params_dict['eps_iter'],
                         'nb_iter': attack_params_dict['n_steps'],
                         'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'carlini':
        attack = CarliniWagnerL2(model_wrap, sess=sess)
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    else:
        raise NotImplementedError()

    return attack, attack_params, batch_size
Пример #6
0
 def _DeepFool(self):
     deepfool_attack = DeepFool(self.wrapped_model, sess=self.sess)
     params = {
         'nb_candidate': 10,
         'max_iter': 100,
         'clip_min': 0.,
         'clip_max': 1.,
         'verbose': False
     }
     adv_x = deepfool_attack.generate(self.x, **params)
     self.save_images(adv_x, self.save_loc)
Пример #7
0
def eval_cleverhans():

    # Set test phase
    learning_phase = K.learning_phase()
    K.set_learning_phase(0)

    # Pre-process images
    images_tf = images.astype(K.floatx())
    images_tf /= 255.

    # Wrapper for the Keras model
    model_wrap = KerasModelWrapper(model)

    # Initialize attack
    if attack_params_dict['attack'] == 'fgsm':
        attack = FastGradientMethod(model_wrap, sess=K.get_session())
        attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 
                         'clip_max': 1.}
    elif attack_params_dict['attack'] == 'deepfool':
        attack = DeepFool(model_wrap, sess=K.get_session())
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'madry':
        attack = ProjectedGradientDescent(model_wrap, sess=K.get_session())
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'carlini':
        attack = CarliniWagnerL2(model_wrap, sess=K.get_session())
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    else:
        raise NotImplementedError()

    # Define input TF placeholder
    x = tf.placeholder(K.floatx(), shape=(None,) + images.shape[1:])
    y = tf.placeholder(K.floatx(), shape=(None,) + (labels.shape[-1],))

    # Define adversarial predictions symbolically
    x_adv = attack.generate(x, **attack_params)
    x_adv = tf.stop_gradient(x_adv)
    predictions_adv = model(x_adv)

    # Evaluate the accuracy of the model on adversarial examples
    eval_par = {'batch_size': batch_size}
    # feed_dict = {K.learning_phase(): attack_params_dict['learning_phase']}
    # acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images, 
    #                      labels, feed=feed_dict, args=eval_par)
    acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images_tf, 
                         labels, args=eval_par)

    print('Aversarial accuracy against %s: %.4f\n' %
          (attack_params_dict['attack'], acc_adv))

    # Set original phase
    K.set_learning_phase(learning_phase)

    return acc_adv
Пример #8
0
def df(model):
    wrap = KerasModelWrapper(model)
    att = DeepFool(wrap, sess=session)
    def attack(X, eps):
        for i in tqdm(range(0, len(X), CHILD_BATCH_SIZE), desc=f'DF: ', file=sys.stdout, leave=False):
            # print(X[i:i+CHILD_BATCH_SIZE].shape)
            tensor = tf.convert_to_tensor(X[i:i + CHILD_BATCH_SIZE])
            tensor = att.generate(tensor, nb_candidate=int(eps + 0.5))
            X[i:i + CHILD_BATCH_SIZE] = session.run(tensor)
            # import matplotlib.pyplot as plt
            # plt.imshow(X[i])
            # plt.show()
    return attack
Пример #9
0
    def setUp(self):
        super(TestDeepFool, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(h1, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = DeepFool(self.model, sess=self.sess)
Пример #10
0
 def df(X, which, prob, magn):
     wrapped = DeepFool(KerasModelWrapper(which.model), sess=session)
     X = X.copy()
     idx = np.random.uniform(size=len(X))
     idx = np.where(idx < prob)[0]
     for i in tqdm(range(0, len(idx), CHILD_BATCH_SIZE),
                   desc=f'batch: ',
                   leave=False):
         tensor = tf.convert_to_tensor(X[idx[i:i + CHILD_BATCH_SIZE]])
         init = tf.global_variables_initializer()
         session.run(init)
         tensor = wrapped.generate(tensor,
                                   clip_min=0.,
                                   clip_max=magn * 0.3 + 0.3)
         X[idx[i:i + CHILD_BATCH_SIZE]] = session.run(tensor)
     return X
def get_DeepFool_samples(loaded_model, samples, max_iter):

    sess = backend.get_session()
    wrap = KerasModelWrapper(loaded_model)

    deepfool = DeepFool(wrap, sess=sess)
    deepfool_params = {
        'max_iter': max_iter,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_candidate': 10
    }

    adv_x = deepfool.generate_np(samples, **deepfool_params)

    return adv_x
    def query(self, X_train, Y_train, labeled_idx, amount):

        unlabeled_idx = get_unlabeled_idx(X_train, labeled_idx)
        unlabeled = X_train[unlabeled_idx]

        keras_wrapper = KerasModelWrapper(self.model)
        sess = K.get_session()
        deep_fool = DeepFool(keras_wrapper, sess=sess)
        deep_fool_params = {
            'over_shoot': 0.02,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_candidate': Y_train.shape[1],
            'max_iter': 10
        }
        true_predictions = np.argmax(self.model.predict(unlabeled,
                                                        batch_size=256),
                                     axis=1)
        adversarial_predictions = np.copy(true_predictions)
        while np.sum(true_predictions != adversarial_predictions) < amount:
            adversarial_images = np.zeros(unlabeled.shape)
            for i in range(0, unlabeled.shape[0], 100):
                print("At {i} out of {n}".format(i=i, n=unlabeled.shape[0]))
                if i + 100 > unlabeled.shape[0]:
                    adversarial_images[i:] = deep_fool.generate_np(
                        unlabeled[i:], **deep_fool_params)
                else:
                    adversarial_images[i:i + 100] = deep_fool.generate_np(
                        unlabeled[i:i + 100], **deep_fool_params)
            pertubations = adversarial_images - unlabeled
            norms = np.linalg.norm(np.reshape(pertubations,
                                              (unlabeled.shape[0], -1)),
                                   axis=1)
            adversarial_predictions = np.argmax(self.model.predict(
                adversarial_images, batch_size=256),
                                                axis=1)
            norms[true_predictions == adversarial_predictions] = np.inf
            deep_fool_params['max_iter'] *= 2

        selected_indices = np.argpartition(norms, amount)[:amount]

        del keras_wrapper
        del deep_fool
        gc.collect()

        return np.hstack((labeled_idx, unlabeled_idx[selected_indices]))
Пример #13
0
def mnist_deepfool_attack(sample,
                          target,
                          model,
                          sess,
                          targeted=True,
                          attack_iterations=100):
    print('deepfool attack start')
    deepfool = DeepFool(model, sess=sess)
    deepfool_params = {
        'over_shoot': 0.02,
        'clip_min': 0.,
        'clip_max': 1.,
        'max_iter': 300,
        'nb_candidate': 2,
    }
    adv_x = deepfool.generate_np(sample, **deepfool_params)
    return adv_x
Пример #14
0
def get_adversarial_attack_and_params(attack_name, wrap, sess):
    params = None
    stop_gradient = False

    if attack_name == "fgsm":
        attack = FastGradientMethod(wrap, sess=sess)
        params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        stop_gradient = True
    if attack_name == "deepfool":
        attack = DeepFool(wrap, sess=sess)
    if attack_name == "lbfgs":
        attack = LBFGS(wrap, sess=sess)
    if attack_name == "saliency":
        attack = SaliencyMapMethod(wrap, sess=sess)
    if attack_name == "bim":
        attack = BasicIterativeMethod(wrap, sess=sess)

    return attack, params, stop_gradient
Пример #15
0
    def next_images():
        tf.logging.set_verbosity(tf.logging.INFO)
        print("{} generator graph is ready!".format(mode))
        tf.reset_default_graph()
        sess = tf.Session()
        x_input = tf.placeholder(tf.float32, shape=importer.batch_shape)
        params = {}
        model = InceptionModelLogits(importer.num_classes, x_input)
        if mode == 'deep_fool':
            graph = DeepFool(model, sess=sess)
            params['max_iter'] = 5
        else:
            raise Exception("Not supported mode")

        print('graph params: {}'.format(params))
        variables = tf.get_collection(tf.GraphKeys.VARIABLES)
        saver = tf.train.Saver(variables)
        saver.restore(sess, importer.checkpoint_path)
        image_iterator = importer.load_images_generator(batch_shape)
        while True:
            filenames, images = next(image_iterator, (None, None))
            if filenames is None: break
            true_classes = importer.filename_to_class(filenames)
            target = np.expand_dims(np.zeros(importer.num_classes), 1)
            if mode == 'carlini_wagner':
                assert (len(true_classes) == 1)
                target[true_classes[0]] = 1
                params["y"] = target
            x_adv = graph.generate(x_input, **params)
            adversarial_images = sess.run(x_adv, feed_dict={x_input: images})
            print("Image:{}, diff:{}".format(
                filenames[0],
                np.sum(np.abs(images[0] - adversarial_images[0]))))
            if is_return_orig_images:
                yield filenames, adversarial_images, images
            else:
                yield filenames, adversarial_images
Пример #16
0
def train_child(t, p, m, load_dict=False):
    # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3])
    raw_model = TestCNN().cuda(0)
    model = TestCNN().cuda(0)
    tf_model = convert_pytorch_model_to_tf(model)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')
    session = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32))
    fgsm = FastGradientMethod(cleverhans_model, sess=session)
    # stm = SpatialTransformationMethod(cleverhans_model, sess=session)
    # cw2 = CarliniWagnerL2(cleverhans_model, sess=session)
    pgd = ProjectedGradientDescent(cleverhans_model, sess=session)
    noise = Noise(cleverhans_model, sess=session)
    mim = MomentumIterativeMethod(cleverhans_model, sess=session)
    df = DeepFool(cleverhans_model, sess=session)
    tf_raw_model = convert_pytorch_model_to_tf(raw_model)
    cleverhans_raw_model = CallableModelWrapper(tf_raw_model,
                                                output_layer='logits')
    # pgd_raw = ProjectedGradientDescent(cleverhans_raw_model, sess=session)
    noise_raw = Noise(cleverhans_raw_model, sess=session)

    def fgsm_op(x, eps):
        att = fgsm.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    # def stm_op(x, eps):
    #     att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps)
    #     return session.run(att, feed_dict={x_op: x})
    # def cw2_op(x, eps):
    #     att = cw2.generate(x_op, max_iterations=3)
    def pgd_op(x, eps):
        att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3)
        return session.run(att, feed_dict={x_op: x})

    # def pgd_raw_op(x, eps):
    #     att = pgd_raw.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3)
    #     return session.run(att, feed_dict={x_op: x})
    def noise_op(x, eps):
        att = noise.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    def noise_raw_op(x, eps):
        att = noise_raw.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    def df_op(x):
        att = df.generate(x_op, nb_candidate=10, max_iter=3)
        return session.run(att, feed_dict={x_op: x})

    def mim_op(x, eps):
        att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2)
        return session.run(att, feed_dict={x_op: x})

    def attack_train(x):
        attacks = [fgsm_op, pgd_op, mim_op]
        attacks_name = ['FGSM', 'PGD', 'MIM']
        eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]]
        train_x_adv = x.copy()
        adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv))
        for i, (ti, pi, mi) in enumerate(
                tqdm(zip(t, p, m),
                     total=len(t),
                     desc='Subpolicy: ',
                     leave=False)):
            adv_i = train_x_adv[adv_type == i]
            for j, (tj, pj, mj) in enumerate(
                    tqdm(zip(ti, pi, mi),
                         total=len(ti),
                         desc='Operation: ',
                         leave=False)):
                tj, pj, mj = (*tj, *pj, *mj)
                adv_j = adv_i[np.random.randn(len(adv_i)) < pj]
                for i in tqdm(range(0, len(adv_j), BATCH_SIZE),
                              desc=attacks_name[tj] + ': ',
                              leave=False):
                    adv_j[i:][:BATCH_SIZE] = attacks[tj](
                        adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT *
                        (eps[tj][1] - eps[tj][0]) + eps[tj][0])
        return train_x_adv

    optimizer = optim.SGD(model.parameters(), lr=1e-3)
    raw_optimizer = optim.SGD(raw_model.parameters(), lr=1e-3)
    train_x_adv = attack_train(train_x)
    adv_trainset = torch.utils.data.TensorDataset(
        torch.tensor(train_x_adv, dtype=torch.float),
        torch.tensor(train_y, dtype=torch.long))
    adv_trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=BATCH_SIZE,
                                                  shuffle=True,
                                                  num_workers=4)
    if load_dict:
        model.load_state_dict(torch.load('black_eval_runs/model.pt'))
        optimizer.load_state_dict(torch.load('black_eval_runs/optimizer.pt'))
        raw_model.load_state_dict(torch.load('black_eval_runs/raw_model.pt'))
        raw_optimizer.load_state_dict(
            torch.load('black_eval_runs/raw_optimizer.pt'))
    model.train()
    batch_tqdm = tqdm(adv_trainloader, leave=False)
    for x, y in batch_tqdm:
        optimizer.zero_grad()
        output = model(x.cuda(0))
        loss = criterion(output, y.cuda(0))
        loss.backward()
        optimizer.step()
        acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0)
        batch_tqdm.set_description(f'adv {loss:.3f} {acc:.3f}')
    batch_tqdm = tqdm(trainloader, leave=False)
    raw_model.train()
    for x, y in batch_tqdm:
        raw_optimizer.zero_grad()
        output = raw_model(x.cuda(0))
        loss = criterion(output, y.cuda(0))
        loss.backward()
        raw_optimizer.step()
        acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0)
        batch_tqdm.set_description(f'raw {loss:.3f} {acc:.3f}')
    with torch.no_grad():
        model.eval()
        batch_tqdm = tqdm(valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        adv_raw_acc = tot_acc / len(val_x)
        val_x_adv = np.zeros_like(val_x)
        for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE),
                      desc='Noise: ',
                      leave=False):
            val_x_adv[i:][:BATCH_SIZE] = noise_op(val_x[i:][:BATCH_SIZE], 0.3)
        adv_valset = torch.utils.data.TensorDataset(
            torch.tensor(val_x_adv, dtype=torch.float),
            torch.tensor(val_y, dtype=torch.long))
        adv_valloader = torch.utils.data.DataLoader(adv_valset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    num_workers=4)
        batch_tqdm = tqdm(adv_valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        adv_adv_acc = tot_acc / len(val_x)
        raw_model.eval()
        batch_tqdm = tqdm(valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = raw_model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        raw_raw_acc = tot_acc / len(val_x)
        val_x_adv = np.zeros_like(val_x)
        for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE),
                      desc='Noise: ',
                      leave=False):
            val_x_adv[i:][:BATCH_SIZE] = noise_raw_op(val_x[i:][:BATCH_SIZE],
                                                      0.3)
        adv_valset = torch.utils.data.TensorDataset(
            torch.tensor(val_x_adv, dtype=torch.float),
            torch.tensor(val_y, dtype=torch.long))
        adv_valloader = torch.utils.data.DataLoader(adv_valset,
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=False,
                                                    num_workers=4)
        batch_tqdm = tqdm(adv_valloader, leave=False)
        tot_acc = 0
        for x, y in batch_tqdm:
            output = raw_model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
        raw_adv_acc = tot_acc / len(val_x)
    with open('black_eval_runs/acc.csv', 'a') as f:
        f.write(f'{adv_raw_acc},{adv_adv_acc},{raw_raw_acc},{raw_adv_acc}\n')
    print(
        f'adv {adv_raw_acc:.3f} -> {adv_adv_acc:.3f} | raw {raw_raw_acc:.3f} -> {raw_adv_acc:.3f}'
    )
    torch.save(model.state_dict(), 'black_eval_runs/model.pt')
    torch.save(optimizer.state_dict(), 'black_eval_runs/optimizer.pt')
    torch.save(raw_model.state_dict(), 'black_eval_runs/raw_model.pt')
    torch.save(raw_optimizer.state_dict(), 'black_eval_runs/raw_optimizer.pt')
Пример #17
0
    def setUp(self):
        super(TestDeepFool, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = DeepFool(self.model, sess=self.sess)
Пример #18
0
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128):

    if attack_method == "fgsm":
        from cleverhans.attacks import FastGradientMethod
        params = {'eps': 8/255,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = FastGradientMethod(model, sess=sess)

    elif attack_method == "basic_iterative":
        from cleverhans.attacks import BasicIterativeMethod
        params = {'eps': 8./255,
                  'eps_iter': 1./255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.,
                  'ord': np.inf
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = BasicIterativeMethod(model,sess = sess)

    elif attack_method == "momentum_iterative":
        from cleverhans.attacks import MomentumIterativeMethod
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MomentumIterativeMethod(model,sess = sess)

    elif attack_method == "saliency":
        from cleverhans.attacks import SaliencyMapMethod
        params = {'theta':8/255,
                  'gamma':0.1,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = SaliencyMapMethod(model,sess = sess)

    elif attack_method == "virtual":
        from cleverhans.attacks import VirtualAdversarialMethod
        params = {'eps':8/255,
                  'num_iterations':10,
                  'xi' :1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = VirtualAdversarialMethod(model,sess = sess)

    elif attack_method == "cw":
        from cleverhans.attacks import CarliniWagnerL2
        params = {
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = CarliniWagnerL2(model,sess = sess)

    elif attack_method == "elastic_net":
        from cleverhans.attacks import ElasticNetMethod
        params = {
            "fista": "FISTA",
            "beta": 0.1,
            "decision_rule":"EN",
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = ElasticNetMethod(model,sess = sess)

    elif attack_method == "deepfool":
        from cleverhans.attacks import DeepFool
        params = {
            "nb_candidate":10,
            "overshoot":1e-3,
            "max_iter":100,
            "nb_classes":10,
            "clip_min":0,
            "clip_max":1
        }
        assert target is None
        method = DeepFool(model,sess = sess)

    elif attack_method == "lbfgs":
        from cleverhans.attacks import LBFGS
        params = {
            'batch_size':128,
            "binary_search_steps":10,
            "max_iterations":1000,
            "initial_const":1e-2,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is not None
        params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = LBFGS(model,sess = sess)

    elif attack_method == "madry":
        from cleverhans.attacks import MadryEtAl
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter':10,
                  'ord':np.inf,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MadryEtAl(model, sess = sess)

    elif attack_method == "SPSA":
        from cleverhans.attacks import SPSA
        params = {
            'epsilon':1/255,
            'num_steps':10,
            'is_targeted':False,
            'early_stop_loss_threshold':None,
            'learning_rate':0.01,
            'delta':0.01,
            'batch_size':128,
            'spsa_iters':1,
            'is_debug':False
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
            params["is_targeted"] = True
        method = SPSA(model, sess = sess)

    else:
        raise ValueError("Can not recognize this attack method: %s" % attack_method)

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        x_feed = x_test[i*batch_size:(i+1)*batch_size]
        #y_feed = y_test[i*batch_size:(i+1)*batch_size]

        adv_img = sess.run(adv_x, feed_dict={x: x_feed})
        adv_imgs.append(adv_img)

    adv_imgs = np.concatenate(adv_imgs, axis=0)
    return adv_imgs
    j = 0
    for i in range(0,x_test.shape[0]):
      if np.argmax(model.predict(x_test[i:i+1])) == np.argmax(y_test[i]):
        adv_inputs[j] = x_test[i]
        adv_labels[j] = y_test[i]
        # csvFile1.append([[i,j]])
        j += 1
    adv_inputs = adv_inputs[:100]
    adv_labels = adv_labels[:100]
    print("Legitimate test accuracy = %0.3f" % (j/y_test.shape[0]))
    print("Dataset of %d to be attacked." % adv_inputs.shape[0])
    print(adv_inputs.shape, adv_labels.shape)  

    # Attack
    wrap = KerasModelWrapper(model)
    deepfool = DeepFool(wrap, sess=sess)
    params = {}
    x_adv_1 = deepfool.generate_np(adv_inputs[:20], **params)
    x_adv_2 = deepfool.generate_np(adv_inputs[20:40], **params)
    x_adv_3 = deepfool.generate_np(adv_inputs[40:60], **params)
    x_adv_4 = deepfool.generate_np(adv_inputs[60:80], **params)
    x_adv_5 = deepfool.generate_np(adv_inputs[80:], **params)
    x_adv = np.concatenate((x_adv_1, x_adv_2, x_adv_3, x_adv_4, x_adv_5), axis=0)
    score = model.evaluate(x_adv, adv_labels, verbose=0)
    print('Adv. Test accuracy: %0.3f' % score[1])

    # Initialize random choosing of adversarial images
    num_examples = 100

    index_list = list(range(x_adv.shape[0]))
    import random
Пример #20
0
                'clip_min': 0.,
                'clip_max': 1.
            }
            adv_x = attack.generate(x, **params)
            adv_x = tf.stop_gradient(adv_x)

            print('Epsilon:', eps)
            f.write('Epsilon: ' + str(eps) + '\n')

            save_images(kmodel, adv_x, x_test, y_test, preds1,
                        save_loc + '_e' + str(eps))

# -------------------------------------------------------------------------------
# Attack DeepFool
if FLAGS.attack == 'DeepFool':
    attack = DeepFool(wrap, sess=sess)
    params = {
        'nb_candidate': 10,
        'max_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_x = attack.generate(x, **params)
    save_images(kmodel, adv_x, x_test, y_test, preds1, save_loc)

# -------------------------------------------------------------------------------
# Attack C&W
if FLAGS.attack == 'CW':
    attack = CarliniWagnerL2(wrap, sess=sess)
    params = {
        'batch_size': 1,
Пример #21
0
def main(args):
    normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize])

    dataset = ImageDataset(args.image_folder,
                           transform=transform,
                           return_paths=True)
    # n_images = len(dataset)
    dataloader = DataLoader(dataset,
                            shuffle=False,
                            batch_size=args.batch_size,
                            pin_memory=True,
                            num_workers=0)

    model = models.resnet50(pretrained=True).to(args.device)
    model.eval()

    config = tf.ConfigProto(intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=1,
                            allow_soft_placement=True,
                            device_count={'CPU': 1})
    sess = tf.Session(config=config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        3,
        224,
        224,
    ))

    tf_model = convert_pytorch_model_to_tf(model, args.device)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')

    # compute clip_min and clip_max suing a full black and a full white image
    clip_min = normalize(torch.zeros(3, 1, 1)).min().item()
    clip_max = normalize(torch.ones(3, 1, 1)).max().item()

    eps = args.eps / 255.
    eps_iter = 20
    nb_iter = 10
    args.ord = np.inf if args.ord < 0 else args.ord
    grad_params = {'eps': eps, 'ord': args.ord}
    common_params = {'clip_min': clip_min, 'clip_max': clip_max}
    iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter}

    attack_name = ''
    if args.attack == 'fgsm':
        attack_name = '_L{}_eps{}'.format(args.ord, args.eps)
        attack_op = FastGradientMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params}
    elif args.attack == 'iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = BasicIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'm-iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'pgd':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MadryEtAl(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'jsma':
        attack_op = SaliencyMapMethod(cleverhans_model, sess=sess)
        attack_params = {'theta': eps, 'symbolic_impl': False, **common_params}
    elif args.attack == 'deepfool':
        attack_op = DeepFool(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'cw':
        attack_op = CarliniWagnerL2(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'lbfgs':
        attack_op = LBFGS(cleverhans_model, sess=sess)
        target = np.zeros((1, 1000))
        target[0, np.random.randint(1000)] = 1
        y = tf.placeholder(tf.float32, target.shape)
        attack_params = {'y_target': y, **common_params}

    attack_name = args.attack + attack_name

    print('Running [{}]. Params: {}'.format(args.attack.upper(),
                                            attack_params))

    adv_x_op = attack_op.generate(x_op, **attack_params)
    adv_preds_op = tf_model(adv_x_op)
    preds_op = tf_model(x_op)

    n_success = 0
    n_processed = 0
    progress = tqdm(dataloader)
    for paths, x in progress:

        progress.set_description('ATTACK')

        z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op],
                                   feed_dict={
                                       x_op: x,
                                       y: target
                                   })

        src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1)
        success = src != dst
        success_paths = np.array(paths)[success]
        success_adv_x = adv_x[success]
        success_src = src[success]
        success_dst = dst[success]

        n_success += success_adv_x.shape[0]
        n_processed += x.shape[0]

        progress.set_postfix(
            {'Success': '{:3.2%}'.format(n_success / n_processed)})
        progress.set_description('SAVING')

        for p, a, s, d in zip(success_paths, success_adv_x, success_src,
                              success_dst):
            path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d)
            path = os.path.join(args.out_folder, path)
            np.savez_compressed(path, img=a)
Пример #22
0
def JSMA_FGSM_BIM(train_start=0,
                  train_end=60000,
                  test_start=0,
                  test_end=10000,
                  nb_epochs=6,
                  batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  backprop_through_attack=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma, preds_adv_fgsm, preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
Пример #23
0
def eval(sess,
         model_name,
         X_train,
         Y_train,
         X_test,
         Y_test,
         cnn=False,
         rbf=False,
         fgsm=False,
         jsma=False,
         df=False,
         bim=False):
    """ Load model saved in model_name.json and model_name_weights.h5 and 
    evaluate its accuracy on legitimate test samples and adversarial samples.
    Use cnn=True if the model is CNN based.
    """

    # open text file and output accuracy results to it
    text_file = open("cifar_results.txt", "w")

    # load saved model
    print("Load model ... ")
    '''
    json = open('models/{}.json'.format(model_name), 'r')
    model = json.read()
    json.close()
    loaded_model = model_from_json(model)
    loaded_model.load_weights("models/{}_weights.h5".format(model_name))
    '''
    if rbf:
        loaded_model = load_model("rbfmodels/{}.h5".format(model_name),
                                  custom_objects={'RBFLayer': RBFLayer})
        text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name))
    else:
        loaded_model = load_model("models/{}.h5".format(model_name))
        text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name))

    # Set placeholders
    if cnn:
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 3072))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    predictions = loaded_model(x)

    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args={"batch_size": 128})
    text_file.write('Test accuracy on legitimate test examples: {0}\n'.format(
        str(accuracy)))
    #print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Craft adversarial examples depending on the input parameters
    wrap = KerasModelWrapper(loaded_model)

    # FGSM
    if fgsm:
        fgsm = FastGradientMethod(wrap, sess=sess)
        fgsm_params = {'eps': 0.3}
        adv_x = fgsm.generate(x, **fgsm_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on fgsm adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy))

    # JSMA
    if jsma:
        jsma = SaliencyMapMethod(wrap, sess=sess)
        jsma_params = {
            'theta': 2.,
            'gamma': 0.145,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }
        adv_x = jsma.generate(x, **jsma_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on jsma adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy))

    # DeepFool
    if df:
        df = DeepFool(wrap, sess=sess)
        df_params = {'nb_candidate': 10, 'max_iter': 50}
        adv_x = df.generate(x, **df_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on df adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on df adversarial test examples: ' + str(accuracy))

    # Basic Iterative Method
    # Commented out as it is hanging on batch #0 at the moment
    '''
    if bim:
        bim = ProjectedGradientDescent(wrap, sess=sess)
        bim_params = {'eps': 0.3}
        adv_x = bim.generate(x, **bim_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the CIFAR-10 model on adversarial examples
        accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128})
        text_file.write('Test accuracy on bim adversarial test examples: {0}\n'.format(str(accuracy)))
        #print('Test accuracy on bim adversarial test examples: ' + str(accuracy))
    '''
    print('Accuracy results outputted to cifar10_results.txt')
    text_file.close()

    # Close TF session
    sess.close()
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
Пример #25
0
                    model = old_models.ShallowConvNet(nb_classes=nb_classes,
                                                      Chans=channels,
                                                      Samples=samples)
                else:
                    raise Exception('No such model:{}'.format(model_used))

                model.compile(optimizer='Adam',
                              loss='sparse_categorical_crossentropy',
                              metrics=['acc'])
                model.load_weights(model_path)

                y_test = y_test.astype('int32').flatten()
                y_test_pre = np.argmax(model.predict(x_test), axis=1)

                ch_model = KerasModelWrapper(model)
                deepfool = DeepFool(ch_model, back='tf', sess=K.get_session())
                raw_acc = np.sum(y_test_pre == y_test) / len(y_test_pre)

                # np.random.seed(2009)
                shape = x_test.shape
                # random_v = a * np.random.rand(1, 1, channels, samples)
                random_v = a * np.random.uniform(-1, 1,
                                                 (1, 1, channels, samples))
                random_x = x_test + random_v

                y_rand_pre = np.argmax(model.predict(random_x), axis=1)
                rand_acc = np.sum(y_rand_pre == y_test) / len(y_rand_pre)

                v, fool_list = universal_perturbation(model,
                                                      deepfool,
                                                      x_train,
def attack_batch(model, in_im, net_name, attack_name, im_list, gt_labels, sample_size, batch_size):
    logging.basicConfig(filename='Logs/'+net_name+"_"+attack_name+'.log', level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s')
    config = tf.ConfigProto(device_count = {'GPU': 2})
    imgs = open(im_list).readlines()  # [::10]
    gt_labels = open(gt_labels).readlines()  # [::10]
    top_1 = 0;top_1_real = 0;fool_rate = 0
    isotropic, size = get_params(net_name)
    imageModel = CallableModelWrapper(model, 'logits')

    
    with tf.Session(config=config) as sess:
        if attack_name=='fgsm':
            attack = FastGradientMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,clip_min=-124, clip_max=155)
        if attack_name=='ifgsm':
            attack = BasicIterativeMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155)
        if attack_name=='cw2':
            attack = CarliniWagnerL2(imageModel, back='tf')
            adv_x = attack.generate(in_im,clip_min=-124, clip_max=155)
        if attack_name=='jsma':
            attack = SaliencyMapMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im)
        if attack_name=='pgd':
            attack = MadryEtAl(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155)
        if attack_name=='deepfool':
            attack = DeepFool(imageModel, back='tf')
            adv_x = attack.generate(in_im, sess=sess, clip_min=-124, clip_max=155)
        
        sess.run(tf.global_variables_initializer())
        img_loader = loader_func(net_name, sess, isotropic, size)
        batch_im = np.zeros((batch_size, size, size, 3))
        
        for i in range(sample_size/batch_size):
            lim = min(batch_size, len(imgs)-i*batch_size)
            for j in range(lim):
                im = img_loader(imgs[i*batch_size+j].strip())
                batch_im[j] = np.copy(im)
            gt = np.array([int(gt_labels[i*batch_size+j].strip())
                       for j in range(lim)])
            adv_x_np=adv_x.eval(feed_dict={in_im: batch_im})

            # Calculate the neural probabilities
            y_adv_prob=tf.nn.softmax(model(in_im), name="yadv").eval(feed_dict={in_im: adv_x_np}); y_adv = np.argmax(y_adv_prob,1)
            y_true_prob=tf.nn.softmax(model(in_im), name="ypred").eval(feed_dict={in_im: batch_im}); y_true =  np.argmax(y_true_prob,1)

            # Calculate the top-1, top-1-true accuracies and fooling rate
            top_1 += np.sum(y_adv == gt); top_1_real += np.sum(y_true == gt)
            fool_rate += np.sum(y_true != y_adv )
            

            if i != 0 and i % 2 == 0:
                logging.info("batch: {} ==================================================================".format(i))
                logging.info("fooling rate {}".format((fool_rate)/float((i+1)*batch_size)*100))
            

    logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")           
    logging.info('Real Top-1 Accuracy = {}'.format(
    top_1_real/float(sample_size)*100))
    logging.info('Top-1 Accuracy = {}'.format((top_1/float(sample_size)*100)))
    logging.info('Top-1 Fooling Rate = {}'.format(fool_rate/float(sample_size)*100))
    logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") 
Пример #27
0
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"):

    size = 256
    eval_params = {'batch_size': 128}

    ############################################# Prepare the Data #####################################################

    if dataset == 'CIFAR10':
        (_, _), (x_test, y_test) = prepare_CIFAR10()
        num_classes = 10
        input_dim = 32
    elif dataset == 'CIFAR100':
        (_, _), (x_test, y_test) = prepare_CIFAR100()
        num_classes = 100
        input_dim = 32
    else:
        (_, _), (x_test, y_test) = prepare_SVHN("./Data/")
        num_classes = 10
        input_dim = 32

    x_test = x_test / 255.
    y_test = keras.utils.to_categorical(y_test, num_classes)

    ############################################# Prepare the Data #####################################################


    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:

        # prepare the placeholders
        x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3])
        y = tf.placeholder(tf.float32, [None, num_classes])

        input_output = []
        def modelBuilder(x, num_classes, dataset, type, sess, input_output):

            if len(input_output) == 0:

                reuse = False

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes,
                                            inputT=x, sess=sess,
                                            checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)
                else:

                    _, tf_model = \
                        prepare_Resnet(num_classes,
                                       inputT=x, sess=sess,
                                       checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)

            else:

                reuse = True

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse)
                else:
                    _, tf_model = \
                        prepare_Resnet(num_classes, inputT=x, reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)


            return tf_model.logits

        # create an attackable model for the cleverhans
        model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits')

        # TODO: check the configurations
        if attack_type == "FGM": # pass
            attack = FastGradientMethod(model, back='tf', sess=sess)
            params = {
                'eps' : 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "CWL2": # pass
            attack = CarliniWagnerL2(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "DF": # pass
            attack = DeepFool(model, back='tf', sess=sess)
            params = {
            }
        elif attack_type == "ENM": # configurations checked, quickly tested
            attack = ElasticNetMethod(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "FFA": # configuration checked
            attack = FastFeatureAdversaries(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'eps_iter': 0.005,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "LBFGS":
            attack = LBFGS(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MEA":
            attack = MadryEtAl(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MIM":
            attack = MomentumIterativeMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SMM":
            attack = SaliencyMapMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SPSA":
            attack = SPSA(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VATM":
            attack = vatm(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VAM":
            attack = VirtualAdversarialMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        else:
            raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type))

        # tf operation
        adv_x = attack.generate(x, **params)

        # generate the adversarial examples
        adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]})

        # notice that "adv_vals" may contain NANs because of the failure of the attack
        # also the input may not be perturbed at all because of the failure of the attack
        to_delete = []
        for idx, adv in enumerate(adv_vals):
            # for nan
            if np.isnan(adv).any():
                to_delete.append(idx)
            # for no perturbation
            if np.array_equiv(adv, x_test[idx]):
                to_delete.append(idx)

        # cleanings
        adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0)
        ori_cleaned = np.delete(x_test[:size], to_delete, axis=0)
        y_cleaned = np.delete(y_test[:size], to_delete, axis=0)

        if len(adv_vals_cleaned) == 0:
            print("No adversarial example is generated!")
            return

        print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size))

        print("The average L_inf distortion is {}".format(
            np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)])))

        # TODO: visualize the adv_vals

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size],
                              args=eval_params)
        print('Test accuracy on normal examples: %0.4f' % accuracy)

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
Пример #28
0
            # graph = tf.get_default_graph()
            x_input1 = tf.placeholder(tf.float32, shape=batch_shape)
            x_input2 = tf.placeholder(tf.float32, shape=batch_shape)
            prediction = model(x_input2, x_input1)

            # prediction = sess.run(predictions, feed_dict={phase_train_placeholder: False})
            # Define FGSM for the model
            steps = 1
            df_params = {
                'nb_classes': 2,
                'max_iter': 150,
                'clip_min': 0.0,
                'clip_max': 1.0,
                'nb_candidate': 2
            }
            DF = DeepFool(model, back='tf', sess=sess)
            adv_x = DF.generate(x_input1, x_input2, faces2_batch, **df_params)
            inception_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='InceptionResnetV1')
            saver = tf.train.Saver(inception_vars, max_to_keep=3)
            # sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))
            pretrained_model = '/home/fan/facenet_adversarial_faces/models/facenet/20170512-110547/'
            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                # facenet.load_model(pretrained_model)

                model_exp = os.path.expanduser(pretrained_model)
                print('Model directory: %s' % model_exp)
                _, ckpt_file = facenet.get_model_filenames(model_exp)

                # print('Metagraph file: %s' % meta_file)
Пример #29
0
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
if __name__ == '__main__':
    model_keras = keras.models.load_model('model_cifar.h5')
    batch_size = 512
    success = 0

    data_size = X_train.shape[0]
    adv_train = []
    for st in range(0, data_size, batch_size):
        sample = np.array(
            X_train[st:st + batch_size].reshape(-1, 32 * 32 * 3) / 255,
            dtype=np.float)
        # sample = np.array([sample])
        sess = keras.backend.get_session()
        model = KerasModelWrapper(model_keras)
        attack = DeepFool(model, sess=sess)
        # print(model.predict(panda.reshape(1, *panda.shape)))

        param = dict(
            nb_candidate=10,
            overshoot=0.01,
            #overshoot=0.0,
            max_iter=20,
            clip_min=0.,
            clip_max=1.)
        advs = attack.generate_np(sample, **param)
        # plt.imsave("sample.png", advs[0])
        adv_train.append(advs)
        preb = model_keras.predict(advs).argmax(axis=1).reshape(
            (sample.shape[0], ))
        y_sample = model_keras.predict(sample).argmax(axis=1).reshape(
Пример #30
0
def whitebox(gan,
             rec_data_path=None,
             batch_size=128,
             learning_rate=0.001,
             nb_epochs=10,
             eps=0.3,
             online_training=False,
             test_on_dev=True,
             attack_type='fgsm',
             defense_type='gan',
             num_tests=-1,
             num_train=-1):
    """Based on MNIST tutorial from cleverhans.
    
    Args:
         gan: A `GAN` model.
         rec_data_path: A string to the directory.
         batch_size: The size of the batch.
         learning_rate: The learning rate for training the target models.
         nb_epochs: Number of epochs for training the target model.
         eps: The epsilon of FGSM.
         online_training: Training Defense-GAN with online reconstruction. The
            faster but less accurate way is to reconstruct the dataset once and use
            it to train the target models with:
            `python train.py --cfg <path-to-model> --save_recs`
         attack_type: Type of the white-box attack. It can be `fgsm`,
            `rand+fgsm`, or `cw`.
         defense_type: String representing the type of attack. Can be `none`,
            `defense_gan`, or `adv_tr`.
    """

    FLAGS = tf.flags.FLAGS

    # Set logging level to see debug information.
    set_log_level(logging.WARNING)

    if defense_type == 'defense_gan':
        assert gan is not None

    # Create TF session.
    if defense_type == 'defense_gan':
        sess = gan.sess
        if FLAGS.train_on_recs:
            assert rec_data_path is not None or online_training
    else:
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

    train_images, train_labels, test_images, test_labels = \
        get_cached_gan_data(gan, test_on_dev)

    rec_test_images = test_images
    rec_test_labels = test_labels

    _, _, test_images, test_labels = \
        get_cached_gan_data(gan, test_on_dev, orig_data_flag=True)

    x_shape = [None] + list(train_images.shape[1:])
    images_pl = tf.placeholder(tf.float32,
                               shape=[None] + list(train_images.shape[1:]))
    labels_pl = tf.placeholder(tf.float32,
                               shape=[None] + [train_labels.shape[1]])

    if num_tests > 0:
        test_images = test_images[:num_tests]
        rec_test_images = rec_test_images[:num_tests]
        test_labels = test_labels[:num_tests]

    if num_train > 0:
        train_images = train_images[:num_train]
        train_labels = train_labels[:num_train]

    # GAN defense flag.
    models = {
        'A': model_a,
        'B': model_b,
        'C': model_c,
        'D': model_d,
        'E': model_e,
        'F': model_f
    }
    model = models[FLAGS.model](input_shape=x_shape,
                                nb_classes=train_labels.shape[1])

    preds = model.get_probs(images_pl)
    report = AccuracyReport()

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test
        # examples.
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         images_pl,
                         labels_pl,
                         preds,
                         rec_test_images,
                         rec_test_labels,
                         args=eval_params,
                         feed={K.learning_phase(): 0})
        report.clean_train_clean_eval = acc
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
    }

    rng = np.random.RandomState([11, 24, 1990])
    tf.set_random_seed(11241990)

    preds_adv = None
    if FLAGS.defense_type == 'adv_tr':
        attack_params = {
            'eps': FLAGS.fgsm_eps_tr,
            'clip_min': 0.,
            'clip_max': 1.
        }
        if gan:
            if gan.dataset_name == 'celeba':
                attack_params['clip_min'] = -1.0

        attack_obj = FastGradientMethod(model, sess=sess)
        adv_x_tr = attack_obj.generate(images_pl, **attack_params)
        adv_x_tr = tf.stop_gradient(adv_x_tr)
        preds_adv = model(adv_x_tr)

    model_train(sess,
                images_pl,
                labels_pl,
                preds,
                train_images,
                train_labels,
                args=train_params,
                rng=rng,
                predictions_adv=preds_adv,
                init_all=False,
                feed={K.learning_phase(): 1},
                evaluate=evaluate)

    # Calculate training error.
    eval_params = {'batch_size': batch_size}
    acc = model_eval(
        sess,
        images_pl,
        labels_pl,
        preds,
        train_images,
        train_labels,
        args=eval_params,
        feed={K.learning_phase(): 0},
    )
    print('[#] Accuracy on clean examples {}'.format(acc))
    if attack_type is None:
        return acc, 0, None

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph.

    if FLAGS.defense_type == 'defense_gan':
        z_init_val = None

        if FLAGS.same_init:
            z_init_val = tf.constant(
                np.random.randn(batch_size * gan.rec_rr,
                                gan.latent_dim).astype(np.float32))

        model.add_rec_model(gan, z_init_val, batch_size)

    min_val = 0.0
    if gan:
        if gan.dataset_name == 'celeba':
            min_val = -1.0

    if 'rand' in FLAGS.attack_type:
        test_images = np.clip(
            test_images +
            args.alpha * np.sign(np.random.randn(*test_images.shape)), min_val,
            1.0)
        eps -= args.alpha

    if 'fgsm' in FLAGS.attack_type:
        attack_params = {
            'eps': eps,
            'ord': np.inf,
            'clip_min': min_val,
            'clip_max': 1.
        }
        attack_obj = FastGradientMethod(model, sess=sess)
    elif FLAGS.attack_type == 'cw':
        attack_obj = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_iterations = 100
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 10.0,
            'batch_size': batch_size,
            'initial_const': 100,
            'feed': {
                K.learning_phase(): 0
            }
        }
    elif FLAGS.attack_type == 'mim':
        attack_obj = MomentumIterativeMethod(model, back='tf', sess=sess)
        attack_params = {
            'eps': eps,
            'ord': np.inf,
            'clip_min': min_val,
            'clip_max': 1.
        }
    elif FLAGS.attack_type == 'deepfool':
        attack_obj = DeepFool(model, back='tf', sess=sess)
        attack_params = {
            'eps': eps,
            'clip_min': min_val,
            'clip_max': 1.,
            'nb_candidate': 2,
            'nb_classes': 2
        }
    elif FLAGS.attack_type == 'lbfgs':
        attack_obj = LBFGS(model, back='tf', sess=sess)
        attack_params = {'clip_min': min_val, 'clip_max': 1.}

    adv_x = attack_obj.generate(images_pl, **attack_params)

    eval_par = {'batch_size': batch_size}
    if FLAGS.defense_type == 'defense_gan':
        preds_adv = model.get_probs(adv_x)

        num_dims = len(images_pl.get_shape())
        avg_inds = list(range(1, num_dims))
        diff_op = tf.reduce_mean(tf.square(adv_x - images_pl), axis=avg_inds)
        acc_adv, roc_info = model_eval_gan(
            sess,
            images_pl,
            labels_pl,
            preds_adv,
            None,
            test_images=test_images,
            test_labels=test_labels,
            args=eval_par,
            feed={K.learning_phase(): 0},
            diff_op=diff_op,
        )
        print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv)
    else:
        preds_adv = model(adv_x)
        roc_info = None
        acc_adv = model_eval(sess,
                             images_pl,
                             labels_pl,
                             preds_adv,
                             test_images,
                             test_labels,
                             args=eval_par,
                             feed={K.learning_phase(): 0})
        print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv)

    if FLAGS.debug and gan is not None:  # To see some qualitative results.
        adv_x_debug = adv_x[:batch_size]
        images_pl_debug = images_pl[:batch_size]

        debug_dir = os.path.join('debug', 'whitebox', FLAGS.debug_dir)
        ensure_dir(debug_dir)

        reconstructed_tensors = gan.reconstruct(adv_x_debug,
                                                batch_size=batch_size,
                                                reconstructor_id=2)

        x_rec_orig = gan.reconstruct(images_tensor,
                                     batch_size=batch_size,
                                     reconstructor_id=3)
        x_adv_sub_val = sess.run(x_adv_sub,
                                 feed_dict={
                                     images_tensor: images_pl_debug,
                                     K.learning_phase(): 0
                                 })
        sess.run(tf.local_variables_initializer())
        x_rec_debug_val, x_rec_orig_val = sess.run(
            [reconstructed_tensors, x_rec_orig],
            feed_dict={
                images_tensor: images_pl_debug,
                K.learning_phase(): 0
            })

        save_images_files(x_adv_sub_val, output_dir=debug_dir, postfix='adv')

        postfix = 'gen_rec'
        save_images_files(x_rec_debug_val,
                          output_dir=debug_dir,
                          postfix=postfix)
        save_images_files(images_pl_debug,
                          output_dir=debug_dir,
                          postfix='orig')
        save_images_files(x_rec_orig_val,
                          output_dir=debug_dir,
                          postfix='orig_rec')

    return acc_adv, 0, roc_info