def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build a TFClassifier # Define input and output placeholders self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) self._output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer self._logits = tf.layers.dense(fc, 10) # Train operator self._loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) self._train = optimizer.minimize(self._loss) # Tensorflow session and initialization self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) # Get MNIST batch_size, nb_train, nb_test = 10, 10, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Train the classifier tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph, self._train, self._loss, None, self._sess) tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2) # Attack # TODO Launch with all possible attacks attack_params = { "attacker": "newtonfool", "attacker_params": { "max_iter": 20 } } up = UniversalPerturbation(tfc) x_train_adv = up.generate(x_train, **attack_params) self.assertTrue((up.fooling_rate >= 0.2) or not up.converged) x_test_adv = x_test + up.v self.assertFalse((x_test == x_test_adv).all()) train_y_pred = np.argmax(tfc.predict(x_train_adv), axis=1) test_y_pred = np.argmax(tfc.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == test_y_pred).all()) self.assertFalse((np.argmax(y_train, axis=1) == train_y_pred).all())
def test_failure_attack(self): """ Test the corner case when attack is failed. :return: """ # Build a TFClassifier # Define input and output placeholders self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) self._output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer self._logits = tf.layers.dense(fc, 10) # Train operator self._loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) self._train = optimizer.minimize(self._loss) # Tensorflow session and initialization self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) # Get MNIST batch_size, nb_train, nb_test = 100, 5000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Train the classifier tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph, self._train, self._loss, None, self._sess) tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10) # Failure attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1, decay=0) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) np.testing.assert_almost_equal(x_test, x_test_adv, 3)
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build a TFClassifier # Define input and output placeholders self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) self._output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer self._logits = tf.layers.dense(fc, 10) # Train operator self._loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) self._train = optimizer.minimize(self._loss) # Tensorflow session and initialization self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Train the classifier tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph, self._train, self._loss, None, self._sess) tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2) # Attack nf = NewtonFool(tfc) nf.set_params(max_iter=5) x_test_adv = nf.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) y_pred = tfc.predict(x_test) y_pred_adv = tfc.predict(x_test_adv) y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred y_pred_max = y_pred.max(axis=1) y_pred_adv_max = y_pred_adv[y_pred_bool] self.assertTrue((y_pred_max >= y_pred_adv_max).all())
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build a TFClassifier # Define input and output placeholders self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) self._output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer self._logits = tf.layers.dense(fc, 10) # Train operator self._loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) self._train = optimizer.minimize(self._loss) # Tensorflow session and initialization self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) # Get MNIST batch_size, nb_train, nb_test = 100, 5000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Train the classifier tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph, self._train, self._loss, None, self._sess) tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10) # First attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) #print(x_test_adv) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % target) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % target) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Third attack cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % y_pred) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(y_pred != y_pred_adv) / float(len(y_pred)))) self.assertTrue((y_pred != y_pred_adv).any())