def IsMrsTrueInModel(self): l = layers.TaylorScorer(compute_removal_saliency=True, compute_mean_replacement_saliency=True) l_before = tf.keras.layers.Dense(20, activation=tf.nn.tanh) model = tf.keras.Sequential([ l_before, l, tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense( 32, activation=lambda x: tf.nn.log_softmax(x, axis=1)) ]) # Building the model. Don't need the return value. model(tf.random.uniform((3, 5))) l_before.weights[0].assign( tf.concat([l_before.weights[0][:, 1:], tf.zeros((5, 1))], axis=1)) x = tf.Variable(tf.random.uniform((3, 5))) a_mean = tf.reduce_mean(l_before(x), axis=0) with tf.GradientTape() as tape: y = model(x) loss = tf.reduce_sum(y) # Don't need the gradient itself, this would accumulate mrs_score. tape.gradient(loss, x) self.assertAllEqual(a_mean, l.get_saved_values('mean')[0]) self.assertAllEqual(a_mean, l.get_saved_values('mean')[1]) self.assertAllEqual(a_mean, l.get_saved_values('mean')[2]) # Since last unit creates just whatever its bias is, it should be zero self.assertEqual(l.get_saved_values('mrs')[-1].numpy(), 0.0)
def testIdentity(self): l = layers.TaylorScorer(compute_removal_saliency=False, compute_mean_replacement_saliency=False) a = tf.random.uniform((3, 5)) self.assertAllEqual(l(a), a) a = tf.random.uniform((3, 5, 5, 2)) self.assertAllEqual(l(a), a)
def testIsRsTrue(self): values = [tf.random.uniform((3, 5)), tf.random.uniform((3, 5, 5, 4))] for inp in values: n_dim = len(inp.shape) l = layers.TaylorScorer(compute_removal_saliency=True, compute_mean_replacement_saliency=False) zeros_channel = tf.zeros(inp.shape.as_list()[:-1] + [1]) inp_concat = tf.concat((inp, zeros_channel), axis=n_dim - 1) x = tf.Variable(inp_concat) x_mean = tf.reduce_mean(x, axis=list(range(n_dim - 1))) with tf.GradientTape() as tape: y = l(x) loss = tf.reduce_sum(y) # After forward pass it needs to be set to None self.assertIsNone(l.get_saved_values('rs')) dx = tape.gradient(loss, x) # RS should be set to None. self.assertIsNone(l.get_saved_values('mrs')) # dy is just 1's. self.assertAllEqual(dx, tf.ones_like(inp_concat)) # Normalize the sum. avg_change = -x if n_dim > 2: avg_change = tf.reduce_sum(avg_change, axis=list(range(1, n_dim - 1))) correct_rs = tf.reduce_sum(tf.abs(avg_change), axis=0) / int( tf.size(x[Ellipsis, 0])) self.assertAllClose(correct_rs, l.get_saved_values('rs')) # Since last unit is just ones, replacing it with its mean has 0 penalty. self.assertEqual(l.get_saved_values('rs')[-1].numpy(), 0.0) # We still expect the mean to be calculated self.assertAllEqual(x_mean, l.get_saved_values('mean')) self.assertAllEqual(l(inp), inp)
def testIsAbsTrue(self): l = layers.TaylorScorer(is_abs=False, compute_removal_saliency=True) a = tf.constant([[-1, 0, 1], [1, 0, 1]], dtype=tf.float32) x = tf.Variable(a) x_mean = tf.reduce_mean(x, axis=0) with tf.GradientTape() as tape: y = l(x) loss = tf.reduce_sum(y) # Before backward pass it is None. self.assertIsNone(l.get_saved_values('rs')) dx = tape.gradient(loss, x) # dy is just 1's. correct_rs = tf.constant([0, 0, -1]) self.assertAllEqual(dx, tf.ones_like(a)) self.assertAllEqual(correct_rs, l.get_saved_values('rs')) # We still expect the mean to be calculated self.assertAllEqual(x_mean, l.get_saved_values('mean')) # Lets do the same with and get non_zero rs. with tf.GradientTape() as tape: y = l(x, is_abs=True) loss = tf.reduce_sum(y) # Before backward pass it is None. self.assertIsNone(l.get_saved_values('rs')) tape.gradient(loss, x) correct_rs = tf.constant([1, 0, 1]) self.assertAllEqual(correct_rs, l.get_saved_values('rs'))
def testAggregationRS(self): l = layers.TaylorScorer(compute_removal_saliency=False, compute_mean_replacement_saliency=False) x1 = tf.Variable(tf.random.uniform((3, 5))) with tf.GradientTape() as tape: y = l(x1, compute_removal_saliency=True) loss = tf.reduce_sum(y) # After forward pass it needs to be set to None tape.gradient(loss, x1) first_rs = l.get_saved_values('rs') # This should remove the previos rs, mrs, mean values. y = l(x1) self.assertIsNone(l.get_saved_values('rs')) # Another input x2 = tf.Variable(tf.random.uniform((3, 5))) with tf.GradientTape() as tape: y = l(x2, compute_removal_saliency=True) loss = tf.reduce_sum(y) # After forward pass it needs to be set to None tape.gradient(loss, x2) second_rs = l.get_saved_values('rs') # Aggregating once with tf.GradientTape() as tape: y = l(x1, compute_removal_saliency=True, aggregate_values=True) loss = tf.reduce_sum(y) tape.gradient(loss, x1) self.assertAllClose((first_rs + second_rs) / 2, l.get_saved_values('rs')) # Aggregating twice. with tf.GradientTape() as tape: y = l(x1, compute_removal_saliency=True, aggregate_values=True) loss = tf.reduce_sum(y) tape.gradient(loss, x1) self.assertAllClose((first_rs + first_rs + second_rs) / 3, l.get_saved_values('rs'))
def testArgs(self): l = layers.TaylorScorer(name='test', compute_removal_saliency=False, compute_mean_replacement_saliency=True) self.assertEqual(l.name, 'test') self.assertFalse(l.compute_removal_saliency) self.assertTrue(l.compute_mean_replacement_saliency) self.assertTrue(l.is_abs) self.assertFalse(l.save_l2norm)
def testGetConfig(self): l = layers.TaylorScorer() expected_config = { 'is_abs': True, 'compute_removal_saliency': False, 'compute_mean_replacement_saliency': False, 'save_l2norm': False, 'trainable': False } self.assertDictContainsSubset(expected_config, l.get_config())
def testGetMeanValuesAggregated(self): l = layers.TaylorScorer(compute_removal_saliency=False, compute_mean_replacement_saliency=False) x1 = tf.random.uniform((3, 5)) l(x1) x2 = tf.random.uniform((6, 5)) l(x2, aggregate_values=True) correct_mean = tf.reduce_mean(tf.concat([x1, x2], 0), axis=0) self.assertAllClose(l.get_saved_values('mean'), correct_mean)
def testAggregationMean(self): l = layers.TaylorScorer(compute_removal_saliency=False, compute_mean_replacement_saliency=False) x1 = tf.random.uniform((3, 5)) l(x1) first_mean = l.get_saved_values('mean') self.assertEqual(len(l._mean), 2) x2 = tf.random.uniform((6, 5)) # Removing the previous one l(x2) second_mean = l.get_saved_values('mean') self.assertEqual(len(l._mean), 2) l(x1, aggregate_values=True) self.assertAllClose((first_mean + second_mean * 2) / 3, l.get_saved_values('mean'))
def testL2Norm(self): l = layers.TaylorScorer() x1 = tf.random.uniform((3, 5)) l(x1) self.assertIsNone(l.get_saved_values('l2norm')) self.assertIsNone(l._l2norm) l(x1, save_l2norm=True) correct_l2normsquared = tf.square(tf.norm(x1, axis=0)) / x1.shape[0] self.assertAllClose(l.get_saved_values('l2norm'), correct_l2normsquared) x2 = tf.random.uniform((3, 5)) l(x2, save_l2norm=True, aggregate_values=True) correct_l2normsquared2 = tf.square(tf.norm(x2, axis=0)) / x2.shape[0] self.assertAllClose(l.get_saved_values('l2norm'), (correct_l2normsquared + correct_l2normsquared2) / 2)
def testGetMeanValues(self): l = layers.TaylorScorer(compute_removal_saliency=False, compute_mean_replacement_saliency=False) x = tf.random.uniform((3, 5)) l(x) x_mean = tf.reduce_mean(x, axis=0) self.assertAllEqual(x_mean, l.get_saved_values('mean')) self.assertAllEqual( tf.broadcast_to(x_mean, x.shape), l.get_saved_values('mean', broadcast_to_input_shape=True)) rand_mask = tf.cast( tf.random.uniform(x_mean.shape[:1], dtype=tf.int32, maxval=2), tf.float32) self.assertAllEqual(rand_mask * x_mean, l.get_saved_values('mean', unit_mask=rand_mask)) self.assertAllEqual( tf.broadcast_to(rand_mask * x_mean, x.shape), l.get_saved_values('mean', unit_mask=rand_mask, broadcast_to_input_shape=True))