def test_class_gradient_target(self): classifier = get_classifier_pt() gradients = classifier.class_gradient(self.x_test, label=3) self.assertEqual(gradients.shape, (NB_TEST, 1, 1, 28, 28)) expected_gradients_1 = np.asarray([ -0.00195835, -0.00134457, -0.00307221, -0.00340564, 0.00175022, -0.00239714, -0.00122619, 0.0, 0.0, -0.00520899, -0.00046105, 0.00414874, -0.00171095, 0.00429184, 0.0075138, 0.00792443, 0.0019566, 0.00035517, 0.00504575, -0.00037397, 0.00022343, -0.00530035, 0.0020528, 0.0, 0.0, 0.0, 0.0, 0.0 ]) np.testing.assert_array_almost_equal(gradients[0, 0, 0, :, 14], expected_gradients_1, decimal=4) expected_gradients_2 = np.asarray([ 5.0867130e-03, 4.8564533e-03, 6.1040395e-03, 8.6531248e-03, -6.0958802e-03, -1.4114541e-02, -7.1085966e-04, -5.0330797e-04, 1.2943064e-02, 8.2416134e-03, -1.9859453e-04, -9.8110031e-05, -3.8902226e-03, -1.2945874e-03, 7.5138002e-03, 1.7720887e-03, 3.1399354e-04, 2.3657191e-04, -3.0891625e-03, -1.0211228e-03, 2.0828887e-03, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00 ]) np.testing.assert_array_almost_equal(gradients[0, 0, 0, 14, :], expected_gradients_2, decimal=4)
def setUpClass(cls): (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train = np.reshape(x_train, (x_train.shape[0], 1, 28, 28)).astype(np.float32) x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) cls.x_train = x_train[:NB_TRAIN] cls.y_train = y_train[:NB_TRAIN] cls.x_test = x_test[:NB_TEST] cls.y_test = y_test[:NB_TEST] # Define the internal classifier classifier = get_classifier_pt() # Define the internal detector conv = nn.Conv2d(1, 16, 5) linear = nn.Linear(2304, 1) torch.nn.init.xavier_uniform_(conv.weight) torch.nn.init.xavier_uniform_(linear.weight) model = nn.Sequential(conv, nn.ReLU(), nn.MaxPool2d(2, 2), Flatten(), linear) model = Model(model) loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) detector = PyTorchClassifier(model=model, loss=loss_fn, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=1, clip_values=(0, 1)) # Define the detector-classifier cls.detector_classifier = DetectorClassifier(classifier=classifier, detector=detector)
def test_loss_gradient(self): classifier = get_classifier_pt() gradients = classifier.loss_gradient(self.x_test, self.y_test) self.assertEqual(gradients.shape, (NB_TEST, 1, 28, 28)) expected_gradients_1 = np.asarray([ 3.6839640e-05, 3.2549749e-05, 7.7749821e-05, 8.3091691e-05, -3.7349419e-05, 6.3347623e-05, 3.8059810e-05, 0.0000000e+00, 0.0000000e+00, -8.7319646e-04, -9.1992842e-05, 7.8577449e-04, -3.5397310e-04, 7.8797276e-04, 1.6001392e-03, 1.9111208e-03, 1.0337514e-03, 2.0264980e-04, 1.5017156e-03, 2.5167916e-04, -4.8513880e-06, -8.3324237e-04, 2.1826664e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00 ]) np.testing.assert_array_almost_equal(gradients[0, 0, :, 14], expected_gradients_1, decimal=4) expected_gradients_2 = np.asarray([ 8.3541102e-04, 7.9759455e-04, 9.6892234e-04, 1.1802778e-03, -6.0561800e-04, -1.6849663e-03, 2.7197969e-04, 4.3571385e-05, 1.2168724e-03, 4.9924687e-04, 4.7540435e-04, -3.6275905e-04, -1.1702902e-03, -7.0383825e-04, 1.6001392e-03, 6.1103603e-04, -5.1674922e-04, 1.6046617e-04, -6.3084543e-04, -2.0675475e-04, 4.2173881e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00 ]) np.testing.assert_array_almost_equal(gradients[0, 0, 14, :], expected_gradients_2, decimal=4)
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt() # Attack up = UniversalPerturbation(ptc, max_iter=1, attacker="newtonfool", attacker_params={"max_iter": 5}) x_train_adv = up.generate(x_train) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.noise self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(ptc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ (_, _), (x_test, _) = self.mnist x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt() # Attack nf = NewtonFool(ptc, max_iter=5, batch_size=100) x_test_adv = nf.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) y_pred = ptc.predict(x_test) y_pred_adv = ptc.predict(x_test_adv) y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred y_pred_max = y_pred.max(axis=1) y_pred_adv_max = y_pred_adv[y_pred_bool] self.assertTrue((y_pred_max >= .9 * y_pred_adv_max).all()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_fit_generator(self): classifier = get_classifier_pt() accuracy = np.sum( np.argmax(classifier.predict(self.x_test), axis=1) == np.argmax( self.y_test, axis=1)) / NB_TEST logger.info('Accuracy: %.2f%%', (accuracy * 100)) # Create tensors from data x_train_tens = torch.from_numpy(self.x_train) x_train_tens = x_train_tens.float() y_train_tens = torch.from_numpy(self.y_train) # Create PyTorch dataset and loader dataset = torch.utils.data.TensorDataset(x_train_tens, y_train_tens) data_loader = DataLoader(dataset=dataset, batch_size=5, shuffle=True) data_gen = PyTorchDataGenerator(data_loader, size=NB_TRAIN, batch_size=5) # Fit model with generator classifier.fit_generator(data_gen, nb_epochs=2) accuracy_2 = np.sum( np.argmax(classifier.predict(self.x_test), axis=1) == np.argmax( self.y_test, axis=1)) / NB_TEST logger.info('Accuracy: %.2f%%', (accuracy_2 * 100)) self.assertEqual(accuracy, 0.4) self.assertAlmostEqual(accuracy_2, 0.75, delta=0.1)
def test_pytorch(self): """ Third test with the PyTorchClassifier. :return: """ ptc = get_classifier_pt() x_train = np.reshape(self.x_train, (self.x_train.shape[0], 1, 28, 28)).astype( np.float32) attack_ap = AdversarialPatch(ptc, rotation_max=22.5, scale_min=0.1, scale_max=1.0, learning_rate=5.0, batch_size=10, max_iter=500) patch_adv, _ = attack_ap.generate(x_train) self.assertAlmostEqual(patch_adv[0, 8, 8], -3.143605902784875, delta=0.1) self.assertAlmostEqual(patch_adv[0, 14, 14], 19.790434152473054, delta=0.1) self.assertAlmostEqual(float(np.sum(patch_adv)), 383.068, delta=0.1)
def test_fit_predict(self): classifier = get_classifier_pt() predictions = classifier.predict(self.x_test) accuracy = np.sum( np.argmax(predictions, axis=1) == np.argmax(self.y_test, axis=1)) / NB_TEST logger.info('Accuracy after fitting: %.2f%%', (accuracy * 100)) self.assertEqual(accuracy, 0.4)
def test_pytorch_mnist(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.reshape(x_train, (x_train.shape[0], 1, 28, 28)).astype(np.float32) x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Create basic PyTorch model classifier = get_classifier_pt(from_logits=True) scores = get_labels_np_array(classifier.predict(x_train)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%', (accuracy * 100)) scores = get_labels_np_array(classifier.predict(x_test)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('[PyTorch, MNIST] Accuracy on test set: %.2f%%', (accuracy * 100)) attack = DeepFool(classifier, max_iter=5, batch_size=11) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) accuracy = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (accuracy * 100)) accuracy = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_mnist(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) classifier = get_classifier_pt() scores = get_labels_np_array(classifier.predict(x_train)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%', (acc * 100)) scores = get_labels_np_array(classifier.predict(x_test)) acc = np.sum(np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('[PyTorch, MNIST] Accuracy on test set: %.2f%%', (acc * 100)) self._test_backend_mnist(classifier, x_test, y_test)
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt() # First targeted attack boundary = BoundaryAttack(classifier=ptc, targeted=True, max_iter=20) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = boundary.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack boundary = BoundaryAttack(classifier=ptc, targeted=False, max_iter=20) x_test_adv = boundary.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_mnist_L2(self): """ Third test with the PyTorchClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt(from_logits=True) # First attack cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) logger.info('CW2 Success Rate: %.2f', (sum(target == y_pred_adv) / float(len(target)))) # Second attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10) x_test_adv = cl2m.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) logger.info('CW2 Success Rate: %.2f', (sum(target != y_pred_adv) / float(len(target)))) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_resume(self): (_, _), (x_test, y_test) = self.mnist x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) # Build PyTorchClassifier ptc = get_classifier_pt() # HSJ attack hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=10, max_eval=100, init_eval=10) params = {'y': y_test[2:3], 'x_adv_init': x_test[2:3]} x_test_adv1 = hsj.generate(x_test[0:1], **params) diff1 = np.linalg.norm(x_test_adv1 - x_test) params.update(resume=True, x_adv_init=x_test_adv1) x_test_adv2 = hsj.generate(x_test[0:1], **params) params.update(x_adv_init=x_test_adv2) x_test_adv2 = hsj.generate(x_test[0:1], **params) diff2 = np.linalg.norm(x_test_adv2 - x_test) self.assertGreater(diff1, diff2)
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt(from_logits=False) # First attack ead = ElasticNet(classifier=ptc, targeted=True, max_iter=2) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = ead.generate(x_test, **params) expected_x_test_adv = np.asarray([0.01678124, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00665895, 0.0, 0.11374763, 0.36250514, 0.5472948, 0.9308808, 1.0, 0.99920374, 0.86274165, 0.6346757, 0.5597227, 0.24191494, 0.25882354, 0.0091916, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) np.testing.assert_array_almost_equal(x_test_adv[2, 0, :, 14], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack ead = ElasticNet(classifier=ptc, targeted=False, max_iter=2) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = ead.generate(x_test, **params) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) np.testing.assert_array_equal(y_pred_adv, np.asarray([7, 1, 1, 4, 4, 1, 4, 4, 4, 4])) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_mnist(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier ptc = get_classifier_pt() # Get MNIST x_test = np.swapaxes(self.x_test, 1, 3).astype(np.float32) x_test_original = x_test.copy() # First attack # zoo = ZooAttack(classifier=ptc, targeted=True, max_iter=10, binary_search_steps=10) # params = {'y': random_targets(self.y_test, ptc.nb_classes())} # x_test_adv = zoo.generate(x_test, **params) # self.assertFalse((x_test == x_test_adv).all()) # self.assertLessEqual(np.amax(x_test_adv), 1.0) # self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # target = np.argmax(params['y'], axis=1) # y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) # logger.debug('ZOO target: %s', target) # logger.debug('ZOO actual: %s', y_pred_adv) # logger.info('ZOO success rate on MNIST: %.2f', (sum(target != y_pred_adv) / float(len(target)))) # Second attack zoo = ZooAttack(classifier=ptc, targeted=False, learning_rate=1e-2, max_iter=15, binary_search_steps=10, abort_early=False, use_resize=False, use_importance=False) x_test_adv = zoo.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) # print(x_test[0, 0, 14, :]) # print(x_test_adv[0, 0, 14, :]) # print(np.amax(x_test - x_test_adv)) x_test_adv_expected = [] # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ x_train = np.reshape(self.x_train, (self.x_train.shape[0], 1, 28, 28)).astype( np.float32) x_test = np.reshape( self.x_test, (self.x_test.shape[0], 1, 28, 28)).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt(from_logits=True) # Attack attack_st = SpatialTransformation(ptc, max_translation=10.0, num_translations=3, max_rotation=30.0, num_rotations=3) x_train_adv = attack_st.generate(x_train) self.assertAlmostEqual(x_train_adv[0, 0, 13, 18], 0.627451, delta=0.01) self.assertAlmostEqual(attack_st.fooling_rate, 0.59, delta=0.01) self.assertEqual(attack_st.attack_trans_x, 0) self.assertEqual(attack_st.attack_trans_y, 3) self.assertEqual(attack_st.attack_rot, 0.0) x_test_adv = attack_st.generate(x_test) self.assertLessEqual(abs(x_test_adv[0, 0, 14, 14] - 0.008591662), 0.01) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_mnist_LInf(self): """ Third test with the PyTorchClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) # Build PyTorchClassifier ptc = get_classifier_pt(from_logits=True) # First attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=True, max_iter=10, eps=0.5) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = clinfm.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack clinfm = CarliniLInfMethod(classifier=ptc, targeted=False, max_iter=10, eps=0.5) x_test_adv = clinfm.generate(x_test) self.assertLessEqual(np.amax(x_test_adv), 1.0 + 1e-6) self.assertGreaterEqual(np.amin(x_test_adv), -1e-6) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any())
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Build PyTorchClassifier victim_ptc = get_classifier_pt() class Model(nn.Module): """ Create model for pytorch. """ def __init__(self): super(Model, self).__init__() self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=7) self.pool = nn.MaxPool2d(4, 4) self.fullyconnected = nn.Linear(25, 10) # pylint: disable=W0221 # disable pylint because of API requirements for function def forward(self, x): """ Forward function to evaluate the model :param x: Input to the model :return: Prediction of the model """ x = self.conv(x) x = torch.nn.functional.relu(x) x = self.pool(x) x = x.reshape(-1, 25) x = self.fullyconnected(x) x = torch.nn.functional.softmax(x) return x # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier thieved_ptc = PyTorchClassifier(model=model, loss=loss_fn, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10, clip_values=(0, 1)) # Create attack copycat_cnn = CopycatCNN(classifier=victim_ptc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN) self.x_train = np.swapaxes(self.x_train, 1, 3) thieved_ptc = copycat_cnn.extract(x=self.x_train, thieved_classifier=thieved_ptc) victim_preds = np.argmax(victim_ptc.predict(x=self.x_train[:100]), axis=1) thieved_preds = np.argmax(thieved_ptc.predict(x=self.x_train[:100]), axis=1) self.x_train = np.swapaxes(self.x_train, 1, 3) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3)
def test_nb_classes(self): classifier = get_classifier_pt() self.assertEqual(classifier.nb_classes(), 10)
def test_input_shape(self): classifier = get_classifier_pt() self.assertEqual(classifier.input_shape, (1, 28, 28))
def test_pytorch_mnist_targeted(self): (_, _), (x_test, _) = self.mnist classifier = get_classifier_pt() x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) self._test_mnist_targeted(classifier, x_test)
def test_class_gradient(self): classifier = get_classifier_pt() # Test all gradients label = None gradients = classifier.class_gradient(self.x_test) self.assertEqual(gradients.shape, (NB_TEST, 10, 1, 28, 28)) expected_gradients_1 = np.asarray([ -0.00367321, -0.0002892, 0.00037825, -0.00053344, 0.00192121, 0.00112047, 0.0023135, 0.0, 0.0, -0.00391743, -0.0002264, 0.00238103, -0.00073711, 0.00270405, 0.00389043, 0.00440818, -0.00412769, -0.00441795, 0.00081916, -0.00091284, 0.00119645, -0.00849089, 0.00547925, 0.0, 0.0, 0.0, 0.0, 0.0 ]) np.testing.assert_array_almost_equal(gradients[0, 5, 0, :, 14], expected_gradients_1, decimal=4) expected_gradients_2 = np.asarray([ -1.0557442e-03, -1.0079540e-03, -7.7426381e-04, 1.7387437e-03, 2.1773505e-03, 5.0880131e-05, 1.6497375e-03, 2.6113102e-03, 6.0904315e-03, 4.1080985e-04, 2.5268074e-03, -3.6661496e-04, -3.0568994e-03, -1.1665225e-03, 3.8904310e-03, 3.1726388e-04, 1.3203262e-03, -1.1720933e-04, -1.4315107e-03, -4.7676827e-04, 9.7251305e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00 ]) np.testing.assert_array_almost_equal(gradients[0, 5, 0, 14, :], expected_gradients_2, decimal=4) # Test 1 gradient label = 5 gradients = classifier.class_gradient(self.x_test, label=5) self.assertEqual(gradients.shape, (NB_TEST, 1, 1, 28, 28)) expected_gradients_1 = np.asarray([ -0.00367321, -0.0002892, 0.00037825, -0.00053344, 0.00192121, 0.00112047, 0.0023135, 0.0, 0.0, -0.00391743, -0.0002264, 0.00238103, -0.00073711, 0.00270405, 0.00389043, 0.00440818, -0.00412769, -0.00441795, 0.00081916, -0.00091284, 0.00119645, -0.00849089, 0.00547925, 0.0, 0.0, 0.0, 0.0, 0.0 ]) np.testing.assert_array_almost_equal(gradients[0, 0, 0, :, 14], expected_gradients_1, decimal=4) expected_gradients_2 = np.asarray([ -1.0557442e-03, -1.0079540e-03, -7.7426381e-04, 1.7387437e-03, 2.1773505e-03, 5.0880131e-05, 1.6497375e-03, 2.6113102e-03, 6.0904315e-03, 4.1080985e-04, 2.5268074e-03, -3.6661496e-04, -3.0568994e-03, -1.1665225e-03, 3.8904310e-03, 3.1726388e-04, 1.3203262e-03, -1.1720933e-04, -1.4315107e-03, -4.7676827e-04, 9.7251305e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00 ]) np.testing.assert_array_almost_equal(gradients[0, 0, 0, 14, :], expected_gradients_2, decimal=4) # Test a set of gradients label = array label = np.random.randint(5, size=NB_TEST) gradients = classifier.class_gradient(self.x_test, label=label) self.assertEqual(gradients.shape, (NB_TEST, 1, 1, 28, 28)) expected_gradients_1 = np.asarray([ -0.00195835, -0.00134457, -0.00307221, -0.00340564, 0.00175022, -0.00239714, -0.00122619, 0.0, 0.0, -0.00520899, -0.00046105, 0.00414874, -0.00171095, 0.00429184, 0.0075138, 0.00792443, 0.0019566, 0.00035517, 0.00504575, -0.00037397, 0.00022343, -0.00530035, 0.0020528, 0.0, 0.0, 0.0, 0.0, 0.0 ]) np.testing.assert_array_almost_equal(gradients[0, 0, 0, :, 14], expected_gradients_1, decimal=4) expected_gradients_2 = np.asarray([ 5.0867130e-03, 4.8564533e-03, 6.1040395e-03, 8.6531248e-03, -6.0958802e-03, -1.4114541e-02, -7.1085966e-04, -5.0330797e-04, 1.2943064e-02, 8.2416134e-03, -1.9859453e-04, -9.8110031e-05, -3.8902226e-03, -1.2945874e-03, 7.5138002e-03, 1.7720887e-03, 3.1399354e-04, 2.3657191e-04, -3.0891625e-03, -1.0211228e-03, 2.0828887e-03, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00 ]) np.testing.assert_array_almost_equal(gradients[0, 0, 0, 14, :], expected_gradients_2, decimal=4)
def test_pytorch_mnist(self): (x_train, y_train), (x_test, y_test) = self.mnist x_train = np.swapaxes(x_train, 1, 3).astype(np.float32) x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Create basic PyTorch model classifier = get_classifier_pt() scores = get_labels_np_array(classifier.predict(x_train)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('[PyTorch, MNIST] Accuracy on training set: %.2f%%', (accuracy * 100)) scores = get_labels_np_array(classifier.predict(x_test)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('\n[PyTorch, MNIST] Accuracy on test set: %.2f%%', (accuracy * 100)) # targeted # Generate random target classes nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=NB_TEST) while (targets == np.argmax(y_test, axis=1)).any(): targets = np.random.randint(nb_classes, size=NB_TEST) # Perform attack df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes)) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (accuracy * 100)) # untargeted df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_adv = df.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ (_, _), (x_test, y_test) = self.mnist x_test = np.swapaxes(x_test, 1, 3).astype(np.float32) x_test_original = x_test.copy() # Build PyTorchClassifier ptc = get_classifier_pt() # First targeted attack and norm=2 hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = hsj.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # First targeted attack and norm=np.inf hsj = HopSkipJump(classifier=ptc, targeted=True, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) params = {'y': random_targets(y_test, ptc.nb_classes())} x_test_adv = hsj.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second untargeted attack and norm=2 hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = hsj.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Second untargeted attack and norm=np.inf hsj = HopSkipJump(classifier=ptc, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = hsj.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any()) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def test_pytorch_mnist(self): (_, _), (x_test, y_test) = self.mnist x_test = np.reshape(x_test, (x_test.shape[0], 1, 28, 28)).astype(np.float32) classifier = get_classifier_pt() self._test_backend_mnist(classifier, x_test, y_test)