def testEndToEnd(self, predictor_cls, attack_cls, optimizer_cls, epsilon, restarted=False): # l-\infty norm of perturbation ball. if isinstance(epsilon, list): # We test the ability to have different epsilons across dimensions. epsilon = tf.constant([epsilon], dtype=tf.float32) bounds = (-.5, 2.5) # Create a simple network. m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(1.), }) z = tf.constant([[1, 2]], dtype=tf.float32) predictor = predictor_cls(m, self) # Not important for the test but needed. labels = tf.constant([1], dtype=tf.int64) # We create two attacks to maximize and then minimize the output. max_spec = ibp.LinearSpecification(tf.constant([[[1.]]])) max_attack = attack_cls(predictor, max_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: max_attack = ibp.RestartedAttack(max_attack, num_restarts=10) z_max = max_attack(z, labels) min_spec = ibp.LinearSpecification(tf.constant([[[-1.]]])) min_attack = attack_cls(predictor, min_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: min_attack = ibp.RestartedAttack(min_attack, num_restarts=10) z_min = min_attack(z, labels) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) z_max_values, z_min_values = sess.run([z_max, z_min]) z_max_values = z_max_values[0] z_min_values = z_min_values[0] self.assertAlmostEqual(2., z_max_values[0]) self.assertAlmostEqual(2.5, z_max_values[1]) self.assertAlmostEqual(0., z_min_values[0]) self.assertAlmostEqual(1., z_min_values[1])
def testLinearSpecification(self): # c has shape [batch_size, num_specifications, num_outputs] # d has shape [batch_size, num_specifications] c = tf.constant([[[1, 2]]], dtype=tf.float32) d = tf.constant([[3]], dtype=tf.float32) # The above is equivalent to z_{K,1} + 2 * z_{K,2} + 3 <= 0 spec = ibp.LinearSpecification(c, d, collapse=False) spec_collapse = ibp.LinearSpecification(c, d, collapse=True) modules = _build_spec_input() values = spec(modules) values_collapse = spec_collapse(modules) with self.test_session() as sess: self.assertAlmostEqual(17., sess.run(values).item()) self.assertAlmostEqual(17., sess.run(values_collapse).item())
def testEndToEnd(self): predictor = FixedNN() predictor = ibp.VerifiableModelWrapper(predictor) # Labels. labels = tf.constant([1], dtype=tf.int64) # Connect to input. z = tf.constant([[1, 2, 3]], dtype=tf.float32) predictor(z, is_training=True) # Input bounds. eps = 1. input_bounds = ibp.IntervalBounds(z - eps, z + eps) predictor.propagate_bounds(input_bounds) # Create output specification (that forces the first logits to be greater). c = tf.constant([[[1, -1]]], dtype=tf.float32) d = tf.constant([[0]], dtype=tf.float32) # Turn elision off for more interesting results. spec = ibp.LinearSpecification(c, d, collapse=False) # Create an attack. attack = ibp.UntargetedPGDAttack(predictor, spec, eps, num_steps=1, input_bounds=(-100., 100)) # Build loss. losses = ibp.Losses(predictor, spec, attack, interval_bounds_loss_type='hinge', interval_bounds_hinge_margin=0.) losses(labels) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) # We expect the worst-case logits from IBP to be [9, 4]. # The adversarial attack should fail since logits are always [l, l + 1]. # Similarly, the nominal predictions are correct. accuracy_values, loss_values = sess.run( [losses.scalar_metrics, losses.scalar_losses]) self.assertAlmostEqual(1., accuracy_values.nominal_accuracy) self.assertAlmostEqual(0., accuracy_values.verified_accuracy) self.assertAlmostEqual(1., accuracy_values.attack_accuracy) expected_xent = 0.31326168751822947 self.assertAlmostEqual(expected_xent, loss_values.nominal_cross_entropy, places=5) self.assertAlmostEqual(expected_xent, loss_values.attack_cross_entropy, places=5) expected_hinge = 5. self.assertAlmostEqual(expected_hinge, loss_values.verified_loss)
def _build_classification_specification(label, num_classes): """Returns a LinearSpecification for adversarial classification.""" # Pre-construct the specifications of the different classes. eye = np.eye(num_classes - 1) specifications = [] for i in range(num_classes): specifications.append( np.concatenate( [eye[:, :i], -np.ones((num_classes - 1, 1)), eye[:, i:]], axis=1)) specifications = np.array(specifications, dtype=np.float32) specifications = tf.constant(specifications) # We can then use gather. c = tf.gather(specifications, label) # By construction all specifications are relevant. d = tf.zeros(shape=(tf.shape(label)[0], num_classes - 1)) return ibp.LinearSpecification(c, d, prune_irrelevant=False)
def _generate_identity_spec(modules, shape, dimension=1): spec = ibp.LinearSpecification(tf.reshape(tf.eye(dimension), shape), prune_irrelevant=False) initial_bound = ibp.crown.create_initial_backward_bounds(spec, modules) return initial_bound