def testCostGradientsStopsAtFirstUnit(self): h = tf.constant([[0.999] * 4]) (cost, num_units, distrib) = act.adaptive_computation_time(h) cost_grad = tf.gradients(cost, h) with self.test_session() as sess: cost_grad_out = sess.run(cost_grad) self.assertAllClose(cost_grad_out, np.array([[[0.] * 4]]))
def testCostGradientsStopsAtMiddleUnit(self): h = tf.constant([[0.01, 0.50, 0.60, 0.70]]) (cost, num_units, distrib) = act.adaptive_computation_time(h) cost_grad = tf.gradients(cost, h) with self.test_session() as sess: cost_grad_out = sess.run(cost_grad) self.assertAllClose(cost_grad_out, np.array([[[-1., -1., 0., 0.]]]))
def testStopsAtLastUnit(self): h = tf.constant([[0.01] * 4]) (cost, num_units, distrib) = act.adaptive_computation_time(h) with self.test_session() as sess: (cost_out, num_units_out, distrib_out) = sess.run( (cost, num_units, distrib)) self.assertAllClose(cost_out, np.array([5.96])) self.assertAllEqual(num_units_out, np.array([5])) self.assertAllClose(distrib_out, np.array([[0.01] * 4 + [0.96]]))
def testStopsAtFirstUnit(self): h = tf.constant([[0.999] * 4]) (cost, num_units, distrib) = act.adaptive_computation_time(h, eps=1e-2) with self.test_session() as sess: (cost_out, num_units_out, distrib_out) = sess.run( (cost, num_units, distrib)) self.assertAllClose(cost_out, np.array([2.0])) self.assertAllEqual(num_units_out, np.array([1])) self.assertAllClose(distrib_out, np.array([[1.] + [0.] * 4]))
def testStopsAtMiddleUnit(self): h = tf.constant([[0.01, 0.50, 0.60, 0.70]]) (cost, num_units, distrib) = act.adaptive_computation_time(h) with self.test_session() as sess: (cost_out, num_units_out, distrib_out) = sess.run( (cost, num_units, distrib)) self.assertAllClose(cost_out, np.array([3.49])) self.assertAllEqual(num_units_out, np.array([3])) self.assertAllClose(distrib_out, np.array([[0.01, 0.50, 0.49, 0., 0.]]))
def testOutputSize(self): batch_size = 5 max_units = 8 h = tf.sigmoid(tf.random_normal(shape=[batch_size, max_units - 1])) (cost, num_units, distrib) = act.adaptive_computation_time(h) with self.test_session() as sess: (cost_out, num_units_out, distrib_out) = sess.run( (cost, num_units, distrib)) self.assertEqual(cost_out.shape, (batch_size, )) self.assertEqual(num_units_out.shape, (batch_size, )) self.assertEqual(distrib_out.shape, (batch_size, max_units))
def testEqualValuesInBatch(self): batch_size = 2 max_units = 8 h = tf.sigmoid(tf.random_normal(shape=[1, max_units - 1])) h = tf.tile(h, tf.stack([batch_size, 1])) (cost, num_units, distrib) = act.adaptive_computation_time(h) with self.test_session() as sess: (cost_out, num_units_out, distrib_out) = sess.run( (cost, num_units, distrib)) self.assertAlmostEqual(cost_out[0], cost_out[1]) self.assertEqual(num_units_out[0], num_units_out[1]) self.assertAllEqual(distrib_out[0], distrib_out[1])