def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]

        # Create simple CNN
        model = Sequential()
        model.add(Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Get classifier
        krc = KerasClassifier((0, 1), model, use_logits=False)
        krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2)

        # First attack
        cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=100, binary_search_steps=10,
                               learning_rate=2e-2, initial_const=3, decay=1e-2)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10,
                               learning_rate=2e-2, initial_const=3, decay=1e-2)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).all())

        # Third attack
        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10,
                               learning_rate=2e-2, initial_const=3, decay=1e-2)
        params = {}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        y_pred = np.argmax(krc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())
    def test_mnist(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {"loss": 'categorical_crossentropy',
                       "optimizer": 'adam',
                       "metrics": ['accuracy']}

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 10
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0)
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        df = CarliniL2Method(classifier, sess=session, targeted=False, max_iterations=100, binary_search_steps=2,
                             learning_rate=1e-2, initial_const=1)
        params = {'y_val': random_targets(Y_test, classifier.model.get_output_shape_at(-1)[-1])}
        x_test_adv = df.generate(X_test, **params)
        self.assertFalse((X_test == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((Y_test == y_pred).all())

        scores = classifier.evaluate(x_test_adv, Y_test)
        print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
示例#3
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "FGSM":
        attacker = FastGradientMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "Elastic":
        attacker = ElasticNet(classifier=classifier, confidence=0.5)
    elif attacker_name == "BasicIterativeMethod":
        attacker = BasicIterativeMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "NewtonFool":
        attacker = NewtonFool(classifier=classifier, max_iter=20)
    elif attacker_name == "HopSkipJump":
        attacker = HopSkipJump(classifier=classifier, max_iter=20)
    elif attacker_name == "ZooAttack":
        attacker = ZooAttack(classifier=classifier, max_iter=20)
    elif attacker_name == "VirtualAdversarialMethod":
        attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20)
    elif attacker_name == "UniversalPerturbation":
        attacker = UniversalPerturbation(classifier=classifier, max_iter=20)
    elif attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=20)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=20)  #, max_iter=20
    elif attacker_name == "CarliniL2":
        attacker = CarliniL2Method(classifier=classifier,
                                   confidence=0.5,
                                   learning_rate=0.001,
                                   max_iter=15)
    elif attacker_name == "CarliniLinf":
        attacker = CarliniLInfMethod(classifier=classifier,
                                     confidence=0.5,
                                     learning_rate=0.001,
                                     max_iter=15)
    elif attacker_name == "DeepFool":
        attacker = DeepFool(classifier)
    elif attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
    def test_failure_attack(self):
        """
        Test the corner case when attack is failed.
        :return:
        """
        # Build a TFClassifier
        # Define input and output placeholders
        self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
        self._output_ph = tf.placeholder(tf.int32, shape=[None, 10])

        # Define the tensorflow graph
        conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu)
        conv = tf.layers.max_pooling2d(conv, 2, 2)
        fc = tf.contrib.layers.flatten(conv)

        # Logits layer
        self._logits = tf.layers.dense(fc, 10)

        # Train operator
        self._loss = tf.reduce_mean(
            tf.losses.softmax_cross_entropy(logits=self._logits,
                                            onehot_labels=self._output_ph))
        optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
        self._train = optimizer.minimize(self._loss)

        # Tensorflow session and initialization
        self._sess = tf.Session()
        self._sess.run(tf.global_variables_initializer())

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 5000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]

        # Train the classifier
        tfc = TFClassifier((0, 1), self._input_ph, self._logits,
                           self._output_ph, self._train, self._loss, None,
                           self._sess)
        tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10)

        # Failure attack
        cl2m = CarliniL2Method(classifier=tfc,
                               targeted=True,
                               max_iter=0,
                               binary_search_steps=0,
                               learning_rate=0,
                               initial_const=1,
                               decay=0)
        params = {'y': random_targets(y_test, tfc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        np.testing.assert_almost_equal(x_test, x_test_adv, 3)
示例#5
0
else:
    if args.adv_method == 'deepfool':
        adv_crafter = DeepFool(classifier,
                               session,
                               clip_min=min_,
                               clip_max=max_)
    elif args.adv_method == 'jsma':
        adv_crafter = SaliencyMapMethod(classifier,
                                        sess=session,
                                        clip_min=min_,
                                        clip_max=max_,
                                        gamma=1,
                                        theta=max_)
    elif args.adv_method == 'carlini':
        adv_crafter = CarliniL2Method(classifier,
                                      sess=session,
                                      targeted=False,
                                      confidence=10)
    else:
        adv_crafter = UniversalPerturbation(classifier,
                                            session,
                                            p=np.inf,
                                            attacker_params={
                                                'clip_min': min_,
                                                'clip_max': max_
                                            })

    X_train_adv = adv_crafter.generate(x_val=X_train)
    X_test_adv = adv_crafter.generate(x_val=X_test)

    if args.save:
        np.save(os.path.join(SAVE_ADV, "train.npy"), X_train_adv)
示例#6
0
l1_reg_levels = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005]

for l1_level in range(0, 5):
    classifier = neural_networks.two_layer_dnn_art(x_train.shape[1:], 0,
                                                   l1_reg_levels[l1_level], 0)
    classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

    # Evaluate the classifier on the test set
    preds = np.argmax(classifier.predict(x_test), axis=1)
    acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
    print("\nTest accuracy on L1 regularization level %.5f%%: %.2f%%" %
          (l1_reg_levels[l1_level], acc * 100))

    # Craft adversarial samples with CW attack
    adv_crafter = CarliniL2Method(classifier, targeted=False)
    x_test_adv = adv_crafter.generate(x=x_test[:1000])

    # Evaluate the classifier on the adversarial examples
    preds = np.argmax(classifier.predict(x_test_adv), axis=1)
    acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
    print(
        "\nTest accuracy on adversarial sample for L1 regularization level %.5f%%: %.2f%%"
        % (l1_reg_levels[l1_level], acc * 100))

    # Calculate the average perturbation in L1 and L2 norms. Note that I don't de-normalize the values.
    perturbations = np.absolute((x_test_adv - x_test[:1000]))
    l1_perturbations = [
        LA.norm(perturbation, 1) for perturbation in perturbations
    ]
    l2_perturbations = [
示例#7
0
adv_plt = np.column_stack([
    nat_img[0].reshape(28, 28), adv_nse[0].reshape(28, 28),
    adv_img[0].reshape(28, 28)
])

fig = plt.figure()
plt.imshow(adv_plt)
plt.title("IFGSM:y_test:{},y_adv:{} -- Var: {}".format(np.argmax(y_test[0]),
                                                       adv_prd[0], adv_var))
fig.set_size_inches(24, 12)
fo = ddir + "IFGSM-O{}A{}_varx{}_examp.png".format(np.argmax(y_test[0]),
                                                   adv_prd[0], adv_var)
fig.savefig(fo, dpi=100)

################ CW;
adv_crafter = CarliniL2Method(classifier)
#targets
y_test_tar = np.random.randint(0, 10, y_test.__len__())
y_test_tarm = np.zeros(y_test.shape)
y_test_tarm[np.arange(0, y_test.shape[0]), y_test_tar] = 1
x_test_adv = adv_crafter.generate(x=x_test, y=y_test_tarm)
# Evaluate the classifier on the adversarial examples
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
# grab a particular example to play with
a = (preds != np.argmax(y_test, axis=1))
nat_img = x_test[a]
adv_img = x_test_adv[a]
adv_nse = adv_img - nat_img
adv_prd = preds[a]
    def test_ptclassifier(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Get MNIST
        batch_size, nb_train, nb_test = 100, 5000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]
        x_train = np.swapaxes(x_train, 1, 3)
        x_test = np.swapaxes(x_test, 1, 3)

        # Create simple CNN
        # Define the network
        model = Model()

        # Define a loss function and optimizer
        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # Get classifier
        ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28),
                                10)
        ptc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10)

        # First attack
        cl2m = CarliniL2Method(classifier=ptc,
                               targeted=True,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, ptc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=ptc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, ptc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).any())

        # Third attack
        cl2m = CarliniL2Method(classifier=ptc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        y_pred = np.argmax(ptc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((y_pred != y_pred_adv).any())
    def test_tfclassifier(self):
        """
        First test with the TFClassifier.
        :return:
        """
        # Build a TFClassifier
        # Define input and output placeholders
        self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
        self._output_ph = tf.placeholder(tf.int32, shape=[None, 10])

        # Define the tensorflow graph
        conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu)
        conv = tf.layers.max_pooling2d(conv, 2, 2)
        fc = tf.contrib.layers.flatten(conv)

        # Logits layer
        self._logits = tf.layers.dense(fc, 10)

        # Train operator
        self._loss = tf.reduce_mean(
            tf.losses.softmax_cross_entropy(logits=self._logits,
                                            onehot_labels=self._output_ph))
        optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
        self._train = optimizer.minimize(self._loss)

        # Tensorflow session and initialization
        self._sess = tf.Session()
        self._sess.run(tf.global_variables_initializer())

        # Get MNIST
        batch_size, nb_train, nb_test = 100, 5000, 10
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
        x_test, y_test = x_test[:nb_test], y_test[:nb_test]

        # Train the classifier
        tfc = TFClassifier((0, 1), self._input_ph, self._logits,
                           self._output_ph, self._train, self._loss, None,
                           self._sess)
        tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10)

        # First attack
        cl2m = CarliniL2Method(classifier=tfc,
                               targeted=True,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, tfc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        #print(x_test_adv)
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % target)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(target == y_pred_adv) / float(len(target))))
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=tfc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, tfc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % target)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(target != y_pred_adv) / float(len(target))))
        self.assertTrue((target != y_pred_adv).any())

        # Third attack
        cl2m = CarliniL2Method(classifier=tfc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        y_pred = np.argmax(tfc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % y_pred)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(y_pred != y_pred_adv) / float(len(y_pred))))
        self.assertTrue((y_pred != y_pred_adv).any())
          float(confidence * 5))

    dropout_classifier = neural_networks.three_layer_dnn(
        x_train.shape[1:], 300, 100, dropout, 0, 0)
    baseline_classifier = neural_networks.three_layer_dnn(
        x_train.shape[1:], 300, 100, 0, 0, 0)
    dropout_classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)
    baseline_classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

    true_labels = np.argmax(y_test[:1000], axis=1)
    target_labels = np.array([to_one_hot((c + 1) % 10) for c in true_labels])

    # Craft adversarial samples with CW attack
    # We direct the attacks to find an adversarial sample with class (true label + 1) mod 10.
    baseline_attacker = CarliniL2Method(baseline_classifier,
                                        targeted=True,
                                        binary_search_steps=30,
                                        confidence=float(confidence * 5))
    dropout_attacker = CarliniL2Method(dropout_classifier,
                                       targeted=True,
                                       binary_search_steps=30,
                                       confidence=float(confidence * 5))
    x_adv_baseline = baseline_attacker.generate(x=x_test[:1000],
                                                y=target_labels)
    x_adv_dropout = dropout_attacker.generate(x=x_test[:1000], y=target_labels)

    # Verify transferability percentage on dropout adversarial examples to baseline model
    baseline_preds = np.argmax(baseline_classifier.predict(x_adv_dropout),
                               axis=1)
    baseline_transfer = (np.sum(baseline_preds == true_labels) / 1000) * 100
    print(
        "\nAccuracy on adversarial samples generated on the dropout model evaluated by baseline model:"
示例#11
0
    0.7, 0.75
]

for dropout in range(0, 16):
    classifier = neural_networks.three_layer_dnn(x_train.shape[1:], 300, 100,
                                                 dropout_levels[dropout], 0, 0)
    classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

    # Evaluate the classifier on the test set
    preds = np.argmax(classifier.predict(x_test), axis=1)
    acc = (np.sum(preds == np.argmax(y_test, axis=1)) / len(y_test)) * 100
    print("\nTest accuracy on dropout level %.2f%%: %.3f%%" %
          (dropout_levels[dropout], acc))

    # Craft adversarial samples with CW attack
    attacker = CarliniL2Method(classifier, targeted=False)
    x_real = x_test[:1000]
    y_real = np.argmax(y_test[:1000], axis=1)
    x_test_adv = attacker.generate(x_real)

    # Evaluate the classifier on the adversarial examples
    adversarial_preds = np.argmax(classifier.predict(x_test_adv), axis=1)
    adversarial_acc = (np.sum(adversarial_preds == y_real) / len(y_real)) * 100
    print("\nTest accuracy on adversarial sample for dropout %.2f%%: %.3f%%" %
          (dropout_levels[dropout], adversarial_acc))

    # Calculate the average perturbation in L1 and L2 norms. Note that I don't de-normalize the values.
    perturbations = np.absolute((x_test_adv - x_real))
    l1_perturbations = [
        LA.norm(perturbation, 1) for perturbation in perturbations
    ]
示例#12
0
    def attack(self, model=None, attack_str=""):
        imgs = self._load_images(attack_str, self._test_or_val_dataset)

        if self._test_or_val_dataset == "_x_test_set_":
            X = self.__data.x_test
            Y = self.__data.y_test
        else:
            X = self.__data.x_val
            Y = self.__data.y_val

        if type(imgs) != type(None):
            print('\n{0} adversarial examples using {1} attack loaded...\n'.
                  format(self.__dataset, self.__attack))
            return imgs

        if type(model) == type(None):
            model = self.surrogate_model.fit(self.__data.x_train,
                                             self.__data.y_train,
                                             verbose=1,
                                             epochs=self.__epochs,
                                             batch_size=128)
            wrap = KerasClassifier((0., 1.), model=self.surrogate_model)
        else:
            wrap = KerasClassifier((0., 1.), model=model)

        if self.__attack == 'FGSM':
            print('\nCrafting adversarial examples using FGSM attack...\n')
            fgsm = FastGradientMethod(wrap)

            if self.__data.dataset_name == 'MNIST':
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.2)
            else:
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.025)

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "fgsm.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack.startswith("CW"):
            print('\nCrafting adversarial examples using CW attack...\n')
            cw = CarliniL2Method(wrap,
                                 confidence=0.0,
                                 targeted=False,
                                 binary_search_steps=1,
                                 learning_rate=0.2,
                                 initial_const=10,
                                 max_iter=100)
            x_adv_images = cw.generate(X[self.idx_adv][:self._length])

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'BIM':
            print('\nCrafting adversarial examples using BIM attack...\n')

            if self.__dataset == 'MNIST':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.25,
                                           eps_step=0.2,
                                           max_iter=100,
                                           norm=np.inf)
            if self.__dataset == 'CIFAR':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.025,
                                           eps_step=0.01,
                                           max_iter=1000,
                                           norm=np.inf)

            x_adv_images = bim.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'DEEPFOOL':
            print('\nCrafting adversarial examples using DeepFool attack...\n')

            deepfool = DeepFool(wrap)
            x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "deepfool.pkl")
            helpers.save_pkl(x_adv_images, path)

        return x_adv_images
示例#13
0
文件: t.py 项目: tongwu2020/models
import torch.nn.functional as F
import torchfile
import numpy as np
from vgg_face import VGG_16
import shutil
import re
import numpy as np

from art.attacks.carlini import CarliniL2Method

if __name__ == '__main__':
    model = torch.load('/home/research/tongwu/glass/donemodel/model2.pkl')

    data_dir = '/home/research/tongwu/glass/test'
    image_datasets = datasets.ImageFolder(
        data_dir,
        transforms.Compose(
            [transforms.Resize(size=(224, 224)),
             transforms.ToTensor()]))

    dataloaders = torch.utils.data.DataLoader(image_datasets,
                                              batch_size=1,
                                              shuffle=True)

    class_names = image_datasets.classes

    attack = CarliniL2Method()

    attack.generate(x_test[:100])

attacker = FastGradientMethod(classifier, eps=0.5)
    def test_krclassifier(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        # Initialize a tf session
        session = tf.Session()
        k.set_session(session)

        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Create simple CNN
        model = Sequential()
        model.add(
            Conv2D(4,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(10, activation='softmax'))

        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Get classifier
        krc = KerasClassifier((0, 1), model, use_logits=False)
        krc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10)

        # First attack
        cl2m = CarliniL2Method(classifier=krc,
                               targeted=True,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % target)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(target == y_pred_adv) / float(len(target))))
        self.assertTrue((target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=krc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {'y': random_targets(y_test, krc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % target)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(target != y_pred_adv) / float(len(target))))
        self.assertTrue((target != y_pred_adv).any())

        # Third attack
        cl2m = CarliniL2Method(classifier=krc,
                               targeted=False,
                               max_iter=100,
                               binary_search_steps=1,
                               learning_rate=1,
                               initial_const=10,
                               decay=0)
        params = {}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1.0001).all())
        self.assertTrue((x_test_adv >= -0.0001).all())
        y_pred = np.argmax(krc.predict(x_test), axis=1)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        print("CW2 Target: %s" % y_pred)
        print("CW2 Actual: %s" % y_pred_adv)
        print("CW2 Success Rate: %f" %
              (sum(y_pred != y_pred_adv) / float(len(y_pred))))
        self.assertTrue((y_pred != y_pred_adv).any())
示例#15
0
def evaluation(x_test, y_test, classify_idx_lst, model, test_acc, ws,
               current_line, attack_name, flag, column_i):

    classifier = KerasClassifier((0., 1.), model=model)

    if attack_name == "FGM":
        # ===========================参数设置========================= #
        # Maximum perturbation
        # Order of the norm
        parameter_lst = [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1], [60, 1],
                         [70, 1], [80, 1], [90, 1], [100, 1], [1, 2], [2, 2],
                         [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2],
                         [9, 2], [10, 2], [0.05, np.inf], [0.10, np.inf],
                         [0.15, np.inf], [0.20, np.inf], [0.25, np.inf],
                         [0.30, np.inf], [0.35, np.inf], [0.40, np.inf],
                         [0.45, np.inf], [0.50, np.inf]]
        # ===========================进行攻击========================= #
        for [epsilon, norm_type] in parameter_lst:
            # print("current parameter: " + str(epsilon) + ", " + str(norm_type))
            adv_crafter = FastGradientMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              eps=epsilon,
                                              norm=norm_type)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(round(epsilon, 4)) + ", " + str(norm_type) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "BIM":
        # ===========================参数设置========================= #
        # Order of the norm
        # Maximum perturbation that the attacker can introduce
        # Attack step size (input variation) at each iteration
        # The maximum number of iterations.
        parameter_lst = [[1, 20.0, 2.0, 10], [1, 20.0, 4.0, 10],
                         [1, 20.0, 6.0, 10], [1, 20.0, 8.0, 10],
                         [1, 20.0, 10.0, 10], [1, 20.0, 2.0, 50],
                         [1, 20.0, 4.0, 50], [1, 20.0, 6.0, 50],
                         [1, 20.0, 8.0, 50], [1, 20.0, 10.0, 50],
                         [2, 2.0, 0.2, 10], [2, 2.0, 0.4,
                                             10], [2, 2.0, 0.6, 10],
                         [2, 2.0, 0.8, 10], [2, 2.0, 1.0,
                                             10], [2, 2.0, 0.2, 50],
                         [2, 2.0, 0.4, 50], [2, 2.0, 0.6, 50],
                         [2, 2.0, 0.8, 50], [2, 2.0, 1.0, 50],
                         [np.inf, 0.1, 0.002, 10], [np.inf, 0.1, 0.004, 10],
                         [np.inf, 0.1, 0.006, 10], [np.inf, 0.1, 0.008, 10],
                         [np.inf, 0.1, 0.010, 10], [np.inf, 0.1, 0.002, 50],
                         [np.inf, 0.1, 0.004, 50], [np.inf, 0.1, 0.006, 50],
                         [np.inf, 0.1, 0.008, 50], [np.inf, 0.1, 0.010, 50]]
        # ===========================进行攻击========================= #
        for [norm_type, epsilon, epsilon_step, max_iteration] in parameter_lst:
            # print("current parameter: " + str(norm_type) + ", " + str(epsilon) + ", " + str(epsilon_step) + ", " + str(
            #     max_iteration))
            adv_crafter = BasicIterativeMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              norm=norm_type,
                                              eps=epsilon,
                                              eps_step=epsilon_step,
                                              max_iter=max_iteration)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(norm_type) + ", " + str(round(epsilon, 4)) + ", " +
                str(round(epsilon_step, 4)) + ", " + str(max_iteration) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "JSMA":
        # ===========================参数设置========================= #
        # Perturbation introduced to each modified feature per step (can be positive or negative).
        # Maximum percentage of perturbed features (between 0 and 1).
        parameter_lst = [[0.5, 0.5], [0.4, 0.5], [0.3, 0.5], [0.2, 0.5],
                         [0.1, 0.5], [-0.1, 0.5], [-0.2, 0.5], [-0.3, 0.5],
                         [-0.4, 0.5], [-0.5, 0.5]]
        # ===========================进行攻击========================= #
        for [theta, gamma] in parameter_lst:
            # print("current parameter: " + str(theta) + ", " + str(gamma))
            adv_crafter = SaliencyMapMethod(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              theta=theta,
                                              gamma=gamma)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(round(theta, 4)) + ", " + str(round(gamma, 4)) + ")")

            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "DeepFool":
        # ===========================参数设置========================= #
        # The maximum number of iterations.
        # Overshoot parameter.
        parameter_lst = [[2, 0.10], [4, 0.10], [6, 0.10], [8, 0.10],
                         [10, 0.10], [12, 0.10], [14, 0.10], [16, 0.10],
                         [18, 0.10], [20, 0.10]]
        # ===========================进行攻击========================= #
        for [max_iteration, epsilon] in parameter_lst:
            # print("current parameter: " + str(max_iteration) + ", " + str(epsilon))
            adv_crafter = DeepFool(classifier)
            x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst],
                                              max_iter=max_iteration,
                                              epsilon=epsilon)
            score = model.evaluate(x_test_adv,
                                   y_test[classify_idx_lst],
                                   verbose=0)
            acc = score[1]
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1,
                "(" + str(max_iteration) + ", " + str(round(epsilon, 4)) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, acc)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, acc)
            current_line += 1

    elif attack_name == "CW-L2":
        # ===========================参数设置========================= #
        # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away,
        #         from the original input, but classified with higher confidence as the target class.
        # The maximum number of iterations.
        parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]]
        # ===========================进行攻击========================= #
        for [confidence_value, max_iter_value] in parameter_lst:
            # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value))
            adv_crafter = CarliniL2Method(classifier)
            sum_adv_acc = 0
            for adv_label in range(0, 10):
                one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                one_hot_label[adv_label] = 1
                x_test_adv = adv_crafter.generate(
                    x=x_test[classify_idx_lst],
                    confidence=confidence_value,
                    targeted=True,
                    max_iter=max_iter_value,
                    y=np.array([one_hot_label] *
                               x_test[classify_idx_lst].shape[0]))
                score = model.evaluate(x_test_adv,
                                       y_test[classify_idx_lst],
                                       verbose=0)
                acc = score[1]
                sum_adv_acc += acc
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1, "(" + str(round(confidence_value, 4)) + ", " +
                str(max_iter_value) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10)
            current_line += 1

    elif attack_name == "CW-Linf":
        # ===========================参数设置========================= #
        # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away,
        #         from the original input, but classified with higher confidence as the target class.
        # The maximum number of iterations.
        parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]]
        # ===========================进行攻击========================= #
        for [confidence_value, max_iter_value] in parameter_lst:
            # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value))
            adv_crafter = CarliniLInfMethod(classifier)
            sum_adv_acc = 0
            for adv_label in range(0, 10):
                one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                one_hot_label[adv_label] = 1
                x_test_adv = adv_crafter.generate(
                    x=x_test[classify_idx_lst],
                    confidence=confidence_value,
                    targeted=True,
                    max_iter=max_iter_value,
                    y=np.array([one_hot_label] *
                               x_test[classify_idx_lst].shape[0]))
                score = model.evaluate(x_test_adv,
                                       y_test[classify_idx_lst],
                                       verbose=0)
                acc = score[1]
                sum_adv_acc += acc
            ws.write(current_line, 0, attack_name)
            ws.write(
                current_line, 1, "(" + str(round(confidence_value, 4)) + ", " +
                str(max_iter_value) + ")")
            if flag == "ori":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            elif flag == "adv":
                ws.write(current_line, 3, test_acc)
                ws.write(current_line, 4, sum_adv_acc / 10)
            else:
                ws.write(current_line, 0 + 3 * column_i, test_acc)
                ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10)
            current_line += 1

    current_line += 1
    # print("\n------------------------------------------------")
    return ws, current_line
                                                    "mnist")
        elif args.experiment_type == "leNet5":
            classifier = convolutional.leNet_cnn_single(
                dropout_levels[dropout])

        classifier.fit(x_train, y_train, nb_epochs=50, batch_size=128)

        # Evaluate the classifier on the test set
        preds = np.argmax(classifier.predict(x_test), axis=1)
        acc = (np.sum(preds == np.argmax(y_test, axis=1)) / len(y_test)) * 100
        accuracy.append(acc)

        # Craft adversarial samples with CW attack

        attacker = CarliniL2Method(classifier,
                                   targeted=False,
                                   binary_search_steps=args.binary_steps,
                                   confidence=args.confidence)

        x_real = x_test[:1000]
        y_real = np.argmax(y_test[:1000], axis=1)
        x_test_adv = attacker.generate(x_real)

        # Calculate the average perturbation and MMD metric. Note that I don't de-normalize the values.
        perturbations = np.absolute((x_test_adv - x_real))
        l1_perturbations = [
            LA.norm(perturbation.flatten(), 1)
            for perturbation in perturbations
        ]
        l2_perturbations = [
            LA.norm(perturbation.flatten(), 2)
            for perturbation in perturbations