示例#1
0
class InterfaceTests(keras_parameterized.TestCase):

  def testNoDependency(self):
    root = tf.Module()
    hasdep = tf.Module()
    root.hasdep = hasdep
    nodep = tf.Module()
    root.nodep = data_structures.NoDependency(nodep)
    self.assertEqual(1, len(root._checkpoint_dependencies))
    self.assertIs(root._checkpoint_dependencies[0].ref, root.hasdep)
    self.assertIs(root.hasdep, hasdep)
    self.assertIs(root.nodep, nodep)

    class NoDependencyModel(training.Model):

      @base.no_automatic_dependency_tracking
      def __init__(self):
        super(NoDependencyModel, self).__init__()
        self.a = []
        self.b = tf.Module()

    nodeps = NoDependencyModel()
    self.assertEqual([nodeps], util.list_objects(nodeps))

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testDictionariesBasic(self):
    a = training.Model()
    b = training.Model()
    a.attribute = {"b": b}
    c = training.Model()
    a.attribute["c"] = []
    a.attribute["c"].append(c)
    a_deps = util.list_objects(a)
    self.assertIn(b, a_deps)
    self.assertIn(c, a_deps)
    self.assertIs(b, a.attribute["b"])
    six.assertCountEqual(
        self,
        ["b", "c"],
        [dep.name for dep in a.attribute._checkpoint_dependencies])
    self.assertEqual([b, c], a.layers)
    self.assertEqual([b, c], a.attribute.layers)
    self.assertEqual([c], a.attribute["c"].layers)
    checkpoint = tf.train.Checkpoint(a=a)
    save_path = checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
    with self.cached_session():
      checkpoint.restore(save_path).assert_consumed().initialize_or_restore()

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testNoDepList(self):
    a = training.Model()
    a.l1 = data_structures.NoDependency([])
    a.l1.insert(1, 0)
    self.assertIsInstance(a.l1, list)
    checkpoint = tf.train.Checkpoint(a=a)
    checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
    a.l2 = []
    a.l2.insert(1, tf.Module())
    with self.assertRaisesRegex(ValueError, "A list element was replaced"):
      checkpoint.save(os.path.join(self.get_temp_dir(), "ckpt"))
示例#2
0
class MixedPrecisionTest(keras_parameterized.TestCase):

  IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE'

  def setUp(self):
    super(MixedPrecisionTest, self).setUp()
    # Enable the tests to be run on pre-Volta GPUs by telling the grappler pass
    # to ignore performance and always transform the graph.
    self._original_ignore_perf_value = os.getenv(self.IGNORE_PERF_VAR)
    os.environ[self.IGNORE_PERF_VAR] = '1'

  def tearDown(self):
    # Set the IGNORE_PERF_VAR variable back to it's original value.
    if self._original_ignore_perf_value is not None:
      os.environ[self.IGNORE_PERF_VAR] = self._original_ignore_perf_value
    else:
      del os.environ[self.IGNORE_PERF_VAR]

    tf.compat.v1.mixed_precision.disable_mixed_precision_graph_rewrite()
    super(MixedPrecisionTest, self).tearDown()

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_wrap_optimizer(self):
    opt = gradient_descent_v2.SGD(1.0)
    opt = tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt, 123.)
    self.assertIsInstance(
        opt, loss_scale_optimizer_v2.LossScaleOptimizerV1)
    self.assertEqual(self.evaluate(opt.loss_scale), 123.)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_optimizer_errors(self):
    opt = gradient_descent_v2.SGD(1.0)
    opt = loss_scale_optimizer_v2.LossScaleOptimizerV1(opt, 'dynamic')
    with self.assertRaisesRegex(
        ValueError, '"opt" must not already be an instance of a '
        'LossScaleOptimizer.'):
      tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(opt)
    self.assertFalse(tf.config.optimizer.get_experimental_options()
                     .get('auto_mixed_precision', False))

  @testing_utils.enable_v2_dtype_behavior
  def test_error_if_policy_is_set(self):
    with policy.policy_scope('mixed_float16'):
      with self.assertRaisesRegex(ValueError,
                                  'the global Keras dtype Policy has been set'):
        tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
            gradient_descent_v2.SGD(1.0))
    # Test no error is thrown when the policy is currently the default.
    tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
        gradient_descent_v2.SGD(1.0))
    # Test no error is thrown when the policy is a non-mixed policy.
    with policy.policy_scope('float64'):
      tf.compat.v1.mixed_precision.enable_mixed_precision_graph_rewrite(
          gradient_descent_v2.SGD(1.0))
示例#3
0
class GRULayerGradientTapeTest(keras_parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['eager']))
  def test_in_tape(self):
    with self.test_session(config=_config):
      time_steps = 10
      embedding_size = 11
      gru_unit_size = 12

      gru = rnn.GRU(gru_unit_size,
                    return_sequences=True,
                    return_state=True,
                    recurrent_activation='sigmoid',
                    recurrent_initializer='glorot_uniform')

      x = tf.random.uniform([1, time_steps, embedding_size])
      y = tf.random.uniform([1, gru_unit_size])

      with tf.GradientTape() as tape:
        hidden_state = tf.zeros([1, gru_unit_size], dtype=tf.float32)
        _, state = gru(x, initial_state=hidden_state)

        loss = tf.reduce_mean(tf.square(state - y))

      tape.gradient(loss, gru.variables)
示例#4
0
class SequenceFeaturesSavingTest(tf.test.TestCase, parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_saving_with_sequence_features(self):
    cols = [
        tf.feature_column.sequence_numeric_column('a'),
        tf.feature_column.indicator_column(
            tf.feature_column.sequence_categorical_column_with_vocabulary_list(
                'b', ['one', 'two']))
    ]
    input_layers = {
        'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
        'b':
            keras.layers.Input(
                shape=(None, 1), sparse=True, name='b', dtype='string')
    }

    fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
    # TODO(tibell): Figure out the right dtype and apply masking.
    # sequence_length_mask = array_ops.sequence_mask(sequence_length)
    # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
    x = keras.layers.GRU(32)(fc_layer)
    output = keras.layers.Dense(10)(x)

    model = keras.models.Model(input_layers, output)

    model.compile(
        loss=keras.losses.MSE,
        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
    loaded_model = model_config.model_from_json(config)

    batch_size = 10
    timesteps = 1

    values_a = np.arange(10, dtype=np.float32)
    indices_a = np.zeros((10, 3), dtype=np.int64)
    indices_a[:, 0] = np.arange(10)
    inputs_a = tf.SparseTensor(indices_a, values_a,
                                          (batch_size, timesteps, 1))

    values_b = np.zeros(10, dtype=np.str)
    indices_b = np.zeros((10, 3), dtype=np.int64)
    indices_b[:, 0] = np.arange(10)
    inputs_b = tf.SparseTensor(indices_b, values_b,
                                          (batch_size, timesteps, 1))

    with self.cached_session():
      # Initialize tables for V1 lookup.
      if not tf.executing_eagerly():
        self.evaluate(tf.compat.v1.tables_initializer())

      self.assertLen(
          loaded_model.predict({
              'a': inputs_a,
              'b': inputs_b
          }, steps=1), batch_size)
class BatchNormalizationV1Test(keras_parameterized.TestCase):
    @combinations.generate(combinations.combine(mode=['graph', 'eager']))
    def test_v1_fused_attribute(self):
        norm = batch_normalization_v1.BatchNormalization()
        inp = keras.layers.Input((4, 4, 4))
        norm(inp)
        self.assertEqual(norm.fused, True)

        norm = batch_normalization_v1.BatchNormalization(fused=False)
        self.assertEqual(norm.fused, False)
        inp = keras.layers.Input(shape=(4, 4, 4))
        norm(inp)
        self.assertEqual(norm.fused, False)

        norm = batch_normalization_v1.BatchNormalization(virtual_batch_size=2)
        self.assertEqual(norm.fused, True)
        inp = keras.layers.Input(shape=(2, 2, 2))
        norm(inp)
        self.assertEqual(norm.fused, False)
示例#6
0
class EmbeddingTest(keras_parameterized.TestCase):
    @keras_parameterized.run_all_keras_modes
    def test_embedding(self):
        if tf.test.is_gpu_available():
            self.skipTest('Only test embedding on CPU.')

        testing_utils.layer_test(keras.layers.Embedding,
                                 kwargs={
                                     'output_dim': 4,
                                     'input_dim': 10,
                                     'input_length': 2
                                 },
                                 input_shape=(3, 2),
                                 input_dtype='int32',
                                 expected_output_dtype='float32')

        testing_utils.layer_test(keras.layers.Embedding,
                                 kwargs={
                                     'output_dim': 4,
                                     'input_dim': 10,
                                     'mask_zero': True
                                 },
                                 input_shape=(3, 2),
                                 input_dtype='int32',
                                 expected_output_dtype='float32')

        testing_utils.layer_test(keras.layers.Embedding,
                                 kwargs={
                                     'output_dim': 4,
                                     'input_dim': 10,
                                     'mask_zero': True
                                 },
                                 input_shape=(3, 4, 2),
                                 input_dtype='int32',
                                 expected_output_dtype='float32')

        testing_utils.layer_test(keras.layers.Embedding,
                                 kwargs={
                                     'output_dim': 4,
                                     'input_dim': 10,
                                     'mask_zero': True,
                                     'input_length': (None, 2)
                                 },
                                 input_shape=(3, 4, 2),
                                 input_dtype='int32',
                                 expected_output_dtype='float32')

    @keras_parameterized.run_all_keras_modes
    def test_embedding_correctness(self):
        layer = keras.layers.Embedding(output_dim=2, input_dim=2)
        model = keras.models.Sequential([layer])

        layer.set_weights([np.array([[1, 1], [2, 2]])])
        model.run_eagerly = testing_utils.should_run_eagerly()
        outputs = model.predict(np.array([[0, 1, 0]], dtype='int32'))
        self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]])

    def test_embedding_incorrect_dimension(self):
        with self.assertRaises(ValueError):
            keras.layers.Embedding(input_dim=0, output_dim=1)

        with self.assertRaises(ValueError):
            keras.layers.Embedding(input_dim=1, output_dim=0)

    @combinations.generate(combinations.combine(mode=['graph', 'eager']))
    def test_eager_gpu_cpu(self):
        l = keras.layers.Embedding(output_dim=2, input_dim=2)
        l.build((None, 2))
        inputs = keras.backend.constant([[0, 1, 0]], dtype='int32')
        with tf.GradientTape() as tape:
            output = l(inputs)
        gs = tape.gradient(output, l.weights)
        opt = tf.compat.v1.train.AdagradOptimizer(0.1)
        opt.apply_gradients(zip(gs, l.weights))
        self.assertAllEqual(len(gs), 1)

    @keras_parameterized.run_all_keras_modes
    def test_embedding_with_ragged_input(self):
        layer = keras.layers.Embedding(
            input_dim=3,
            output_dim=2,
            weights=[np.array([[0., 0.], [1., 1.], [2., 2.]])])
        inputs = keras.layers.Input(shape=(None, ),
                                    dtype=tf.float32,
                                    ragged=True)
        # pylint: disable=unnecessary-lambda
        outputs = keras.layers.Lambda(
            lambda args: keras.backend.identity(args))(inputs)
        # pylint: enable=unnecessary-lambda
        outputs = layer(outputs)

        model = keras.Model(inputs, outputs)
        model.run_eagerly = testing_utils.should_run_eagerly()
        outputs = model.predict(
            tf.ragged.constant([[1., 2., 2.], [0.], [1., 2.]], ragged_rank=1))
        self.assertAllClose(
            outputs,
            tf.ragged.constant([[[1., 1.], [2., 2.], [2., 2.]], [[0., 0.]],
                                [[1., 1.], [2., 2.]]],
                               ragged_rank=1))

    @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
    def test_embedding_with_sharded_variable(self):
        layer = keras.layers.Embedding(input_dim=5, output_dim=2)
        v = [
            tf.Variable([[1., 2.], [3., 4.]]),
            tf.Variable([[5., 6.], [7., 8.]]),
            tf.Variable([[9., 10.]])
        ]
        model = keras.models.Sequential([layer])
        layer.embeddings = sharded_variable.ShardedVariable(v)
        model.run_eagerly = testing_utils.should_run_eagerly()
        outputs = model.predict(np.array([[0, 2, 4]], dtype='int32'))
        self.assertAllClose(outputs, [[[1., 2.], [5., 6.], [9., 10.]]])

    @testing_utils.enable_v2_dtype_behavior
    def test_mixed_precision_embedding(self):
        try:
            policy.set_policy('mixed_float16')
            layer = keras.layers.Embedding(input_dim=5, output_dim=2)
            self.assertEqual(layer._dtype_policy.name, 'mixed_float16')
            outputs = layer(np.array([0, 1, 2]))
            self.assertEqual(outputs.dtype, 'float16')
        finally:
            policy.set_policy('float32')
示例#7
0
class RMSpropOptimizerTest(tf.test.TestCase, parameterized.TestCase):
    def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, rho, momentum,
                              epsilon, centered):
        rms_t = rms * rho + (1 - rho) * g * g
        if centered:
            mg_t = mg * rho + (1 - rho) * g
            denom_t = rms_t - mg_t * mg_t
        else:
            mg_t = mg
            denom_t = rms_t
        if momentum > 0.:
            mom_t = momentum * mom + lr * g / (np.sqrt(denom_t + epsilon))
            var_t = var - mom_t
        else:
            mom_t = mom
            var_t = var - lr * g / (np.sqrt(denom_t) + epsilon)
        return var_t, mg_t, rms_t, mom_t

    def _sparse_rmsprop_update_numpy(self, var, gindexs, gvalues, mg, rms, mom,
                                     lr, rho, momentum, epsilon, centered):
        mg_t = copy.deepcopy(mg)
        rms_t = copy.deepcopy(rms)
        mom_t = copy.deepcopy(mom)
        var_t = copy.deepcopy(var)
        for i in range(len(gindexs)):
            gindex = gindexs[i]
            gvalue = gvalues[i]
            rms_t[gindex] = rms[gindex] * rho + (1 - rho) * gvalue * gvalue
            if centered:
                mg_t[gindex] = mg_t[gindex] * rho + (1 - rho) * gvalue
                denom_t = rms_t[gindex] - mg_t[gindex] * mg_t[gindex]
            else:
                denom_t = rms_t[gindex]
            if momentum > 0.:
                mom_t[gindex] = momentum * mom[gindex] + lr * gvalue / np.sqrt(
                    denom_t + epsilon)
                var_t[gindex] = var[gindex] - mom_t[gindex]
            else:
                mom_t[gindex] = mom[gindex]
                var_t[gindex] = var[gindex] - lr * gvalue / (np.sqrt(denom_t) +
                                                             epsilon)
        return var_t, mg_t, rms_t, mom_t

    def testDense(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for (dtype, learning_rate, rho, momentum, epsilon,
             centered) in _TESTPARAMS:
            with tf.compat.v1.get_default_graph().as_default(
            ), testing_utils.use_gpu():
                # Initialize variables for numpy implementation.
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np, dtype=dtype)
                var1 = tf.Variable(var1_np, dtype=dtype)
                grads0 = tf.constant(grads0_np, dtype=dtype)
                grads1 = tf.constant(grads1_np, dtype=dtype)
                opt = rmsprop.RMSprop(learning_rate=learning_rate,
                                      rho=rho,
                                      momentum=momentum,
                                      epsilon=epsilon,
                                      centered=centered)

                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                if centered:
                    mg0 = opt.get_slot(var0, "mg")
                    mg1 = opt.get_slot(var1, "mg")
                else:
                    mg0 = None
                    mg1 = None

                if momentum > 0.:
                    mom0 = opt.get_slot(var0, "momentum")
                    mom1 = opt.get_slot(var1, "momentum")
                else:
                    mom0 = None
                    mom1 = None

                rms0 = opt.get_slot(var0, "rms")
                self.assertIsNotNone(rms0)
                rms1 = opt.get_slot(var1, "rms")
                self.assertIsNotNone(rms1)

                mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of RMSprop
                for _ in range(1, 4):
                    self.evaluate(update)

                    var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                        var0_np, grads0_np, mg0_np, rms0_np, mom0_np,
                        learning_rate, rho, momentum, epsilon, centered)
                    var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                        var1_np, grads1_np, mg1_np, rms1_np, mom1_np,
                        learning_rate, rho, momentum, epsilon, centered)

                    # Validate updated params
                    if centered:
                        self.assertAllCloseAccordingToType(
                            mg0_np, self.evaluate(mg0))
                        self.assertAllCloseAccordingToType(
                            mg1_np, self.evaluate(mg1))
                    if momentum > 0.:
                        self.assertAllCloseAccordingToType(
                            mom0_np, self.evaluate(mom0))
                        self.assertAllCloseAccordingToType(
                            mom1_np, self.evaluate(mom1))
                    self.assertAllCloseAccordingToType(rms0_np,
                                                       self.evaluate(rms0))
                    self.assertAllCloseAccordingToType(rms1_np,
                                                       self.evaluate(rms1))
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))

    def testDenseWithLearningRateDecay(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            var0_np = np.array([1.0, 2.0])
            grads0_np = np.array([0.1, 0.2])
            var1_np = np.array([3.0, 4.0])
            grads1_np = np.array([0.01, 0.2])

            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)
            learning_rate = 0.01
            rho = 0.9
            momentum = 0.0
            epsilon = 1e-7
            centered = False
            decay = 0.5
            opt = rmsprop.RMSprop(learning_rate=learning_rate,
                                  rho=rho,
                                  momentum=momentum,
                                  epsilon=epsilon,
                                  centered=centered,
                                  decay=decay)

            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            if momentum > 0.:
                mom0 = opt.get_slot(var0, "momentum")
                mom1 = opt.get_slot(var1, "momentum")
            else:
                mom0 = None
                mom1 = None

            mg0_np = np.array([0.0, 0.0])
            mg1_np = np.array([0.0, 0.0])
            rms0_np = np.array([0.0, 0.0])
            rms1_np = np.array([0.0, 0.0])
            mom0_np = np.array([0.0, 0.0])
            mom1_np = np.array([0.0, 0.0])

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 4 steps of RMSprop
            for t in range(2):
                self.evaluate(update)

                lr = learning_rate / (1 + decay * t)
                var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                    var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho,
                    momentum, epsilon, centered)
                var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                    var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho,
                    momentum, epsilon, centered)

                # Validate updated params
                self.assertAllCloseAccordingToType(rms0_np,
                                                   self.evaluate(rms0))
                self.assertAllCloseAccordingToType(rms1_np,
                                                   self.evaluate(rms1))
                if momentum > 0.:
                    self.assertAllCloseAccordingToType(mom0_np,
                                                       self.evaluate(mom0))
                    self.assertAllCloseAccordingToType(mom1_np,
                                                       self.evaluate(mom1))
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))

    def testDenseWithLearningRateInverseTimeDecay(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            var0_np = np.array([1.0, 2.0])
            grads0_np = np.array([0.1, 0.2])
            var1_np = np.array([3.0, 4.0])
            grads1_np = np.array([0.01, 0.2])

            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)
            learning_rate = 0.01
            rho = 0.9
            momentum = 0.0
            epsilon = 1e-7
            centered = False
            decay = 0.5
            lr_schedule = learning_rate_schedule.InverseTimeDecay(
                learning_rate, decay_steps=1.0, decay_rate=decay)
            opt = rmsprop.RMSprop(learning_rate=lr_schedule,
                                  rho=rho,
                                  momentum=momentum,
                                  epsilon=epsilon,
                                  centered=centered)

            update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

            rms0 = opt.get_slot(var0, "rms")
            self.assertIsNotNone(rms0)
            rms1 = opt.get_slot(var1, "rms")
            self.assertIsNotNone(rms1)
            if momentum > 0.:
                mom0 = opt.get_slot(var0, "momentum")
                mom1 = opt.get_slot(var1, "momentum")
            else:
                mom0 = None
                mom1 = None

            mg0_np = np.array([0.0, 0.0])
            mg1_np = np.array([0.0, 0.0])
            rms0_np = np.array([0.0, 0.0])
            rms1_np = np.array([0.0, 0.0])
            mom0_np = np.array([0.0, 0.0])
            mom1_np = np.array([0.0, 0.0])

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))

            # Run 4 steps of RMSprop
            for t in range(2):
                self.evaluate(update)

                lr = learning_rate / (1 + decay * t)
                var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
                    var0_np, grads0_np, mg0_np, rms0_np, mom0_np, lr, rho,
                    momentum, epsilon, centered)
                var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
                    var1_np, grads1_np, mg1_np, rms1_np, mom1_np, lr, rho,
                    momentum, epsilon, centered)

                # Validate updated params
                self.assertAllCloseAccordingToType(rms0_np,
                                                   self.evaluate(rms0))
                self.assertAllCloseAccordingToType(rms1_np,
                                                   self.evaluate(rms1))
                if momentum > 0.:
                    self.assertAllCloseAccordingToType(mom0_np,
                                                       self.evaluate(mom0))
                    self.assertAllCloseAccordingToType(mom1_np,
                                                       self.evaluate(mom1))
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))

    def testMinimizeSparseResourceVariable(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
                    return pred * pred

                sgd_op = rmsprop.RMSprop(learning_rate=1.0,
                                         rho=0.0,
                                         momentum=0.0,
                                         epsilon=0.0,
                                         centered=False).minimize(
                                             loss, var_list=[var0])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllCloseAccordingToType([[1.0, 2.0]],
                                                   self.evaluate(var0))
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType([[0., 1.]],
                                                   self.evaluate(var0),
                                                   atol=0.01)

    def testMinimizeSparseResourceVariableCentered(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                if test_util.is_xla_enabled() and dtype.is_complex:
                    self.skipTest("b/143578550")
                var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
                    return pred * pred

                # loss = lambda: pred * pred  # pylint: disable=cell-var-from-loop
                sgd_op = rmsprop.RMSprop(learning_rate=1.0,
                                         rho=0.0,
                                         momentum=0.0,
                                         epsilon=1.0,
                                         centered=True).minimize(
                                             loss, var_list=[var0])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllCloseAccordingToType([[1.0, 2.0]],
                                                   self.evaluate(var0))
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType([[-111, -138]],
                                                   self.evaluate(var0),
                                                   atol=0.01)

    def testSparse(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for (dtype, learning_rate, rho, momentum, epsilon,
             centered) in _TESTPARAMS:
            with tf.compat.v1.get_default_graph().as_default(
            ), testing_utils.use_gpu():
                # Initialize variables for numpy implementation.
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0_np_indices = np.array([0], dtype=np.int32)
                grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                          tf.constant(grads0_np_indices),
                                          tf.constant([1]))
                grads1_np_indices = np.array([1], dtype=np.int32)
                grads1 = tf.IndexedSlices(tf.constant(grads1_np),
                                          tf.constant(grads1_np_indices),
                                          tf.constant([1]))
                opt = rmsprop.RMSprop(learning_rate=learning_rate,
                                      rho=rho,
                                      momentum=momentum,
                                      epsilon=epsilon,
                                      centered=centered)
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                if centered:
                    mg0 = opt.get_slot(var0, "mg")
                    self.assertEqual(mg0 is not None, centered)
                    mg1 = opt.get_slot(var1, "mg")
                    self.assertEqual(mg1 is not None, centered)
                else:
                    mg0 = None
                    mg1 = None
                rms0 = opt.get_slot(var0, "rms")
                self.assertIsNotNone(rms0)
                rms1 = opt.get_slot(var1, "rms")
                self.assertIsNotNone(rms1)
                if momentum > 0.:
                    mom0 = opt.get_slot(var0, "momentum")
                    mom1 = opt.get_slot(var1, "momentum")
                else:
                    mom0 = None
                    mom1 = None

                mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                rms1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of RMSprop
                for _ in range(1, 4):
                    self.evaluate(update)

                    var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy(
                        var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np,
                        mom0_np, learning_rate, rho, momentum, epsilon,
                        centered)
                    var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy(
                        var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np,
                        mom1_np, learning_rate, rho, momentum, epsilon,
                        centered)

                    # Validate updated params
                    if centered:
                        self.assertAllCloseAccordingToType(
                            mg0_np, self.evaluate(mg0))
                        self.assertAllCloseAccordingToType(
                            mg1_np, self.evaluate(mg1))
                    self.assertAllCloseAccordingToType(rms0_np,
                                                       self.evaluate(rms0))
                    self.assertAllCloseAccordingToType(rms1_np,
                                                       self.evaluate(rms1))
                    if momentum > 0.:
                        self.assertAllCloseAccordingToType(
                            mom0_np, self.evaluate(mom0))
                        self.assertAllCloseAccordingToType(
                            mom1_np, self.evaluate(mom1))
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testCallableParams(self):
        for dtype in _DATA_TYPES:
            var0 = tf.Variable([1.0, 2.0], dtype=dtype)
            var1 = tf.Variable([3.0, 4.0], dtype=dtype)
            grads0 = tf.constant([0.1, 0.1], dtype=dtype)
            grads1 = tf.constant([0.01, 0.01], dtype=dtype)

            learning_rate = lambda: 2.0
            rho = lambda: 0.9
            momentum = lambda: 0.0
            epsilon = 1.0
            opt = rmsprop.RMSprop(learning_rate, rho, momentum, epsilon)

            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))
            self.assertAllClose([3.0, 4.0], self.evaluate(var1))
            # Step 1: the rms accumulators where 1. So we should see a normal
            # update: v -= grad * learning_rate
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            # Check the parameters.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)),
                    2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0))
                ]), self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([
                    3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)),
                    4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0))
                ]), self.evaluate(var1))
            # Step 2: the root mean square accumulators contain the previous update.
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            # Check the parameters.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) -
                    (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0)),
                    2.0 - (0.1 * 2.0 / math.sqrt(0.001 + 1.0)) -
                    (0.1 * 2.0 / math.sqrt(0.001 * 0.9 + 0.001 + 1.0))
                ]), self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([
                    3.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) -
                    (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0)),
                    4.0 - (0.01 * 2.0 / math.sqrt(0.00001 + 1.0)) -
                    (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0))
                ]), self.evaluate(var1))

    def testConstructRMSpropWithLR(self):
        opt = rmsprop.RMSprop(lr=1.0)
        opt_2 = rmsprop.RMSprop(learning_rate=0.1, lr=1.0)
        opt_3 = rmsprop.RMSprop(learning_rate=0.1)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testSlotsUniqueEager(self):
        v1 = tf.Variable(1.)
        v2 = tf.Variable(1.)

        opt = rmsprop.RMSprop(1., momentum=0., centered=False)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and one unique slot variable for v1 and v2.
        self.assertLen(set({id(v) for v in opt.variables()}), 3)
        self.assertEqual(self.evaluate(opt.variables()[0]),
                         self.evaluate(opt.iterations))

        opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=False)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and two unique slot variables for v1 and v2.
        self.assertLen(set({id(v) for v in opt.variables()}), 5)
        self.assertEqual(self.evaluate(opt.variables()[0]),
                         self.evaluate(opt.iterations))

        opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=True)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and three unique slot variables for v1 and v2
        self.assertLen(set({id(v) for v in opt.variables()}), 7)
        self.assertEqual(self.evaluate(opt.variables()[0]),
                         self.evaluate(opt.iterations))
示例#8
0
class KerasFunctionalMetricsTest(tf.test.TestCase, parameterized.TestCase):
    def test_metrics(self):
        with self.cached_session():
            y_a = K.variable(np.random.random((6, 7)))
            y_b = K.variable(np.random.random((6, 7)))
            for metric in [
                    metrics.binary_accuracy, metrics.categorical_accuracy
            ]:
                output = metric(y_a, y_b)
                self.assertEqual(K.eval(output).shape, (6, ))

    def test_sparse_categorical_accuracy_int(self):
        with self.cached_session():
            metric = metrics.sparse_categorical_accuracy
            y_true = K.variable(np.random.randint(0, 7, (6, )))
            y_pred = K.variable(np.random.random((6, 7)))
            self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6, ))

            # Test correctness if the shape of y_true is (num_samples,)
            y_true = K.variable([1., 0., 0., 0.])
            y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3],
                                 [0.9, 0.1]])
            self.assertAllEqual(K.eval(metric(y_true, y_pred)),
                                [0., 1., 1., 1.])

            # Test correctness if the shape of y_true is (num_samples, 1)
            y_true = K.variable([[1.], [0.], [0.], [0.]])
            y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3],
                                 [0.9, 0.1]])
            self.assertAllEqual(K.eval(metric(y_true, y_pred)),
                                [0., 1., 1., 1.])

            # Test correctness if the shape of y_true is (batch_size, seq_length) and
            # y_pred is (batch_size, seq_length, num_classes)
            y_pred = K.variable(
                np.array([[[0.2, 0.3, 0.1], [0.1, 0.2, 0.7]],
                          [[0.3, 0.2, 0.1], [0.7, 0.2, 0.1]]]))
            y_true = K.variable(np.array([[1, 0], [1, 0]]))
            self.assertAllEqual(K.eval(metric(y_true, y_pred)),
                                [[1., 0.], [0., 1.]])

    def test_sparse_categorical_accuracy_float(self):
        with self.cached_session():
            metric = metrics.sparse_categorical_accuracy
            y_true = K.variable(np.random.random((6, )))
            y_pred = K.variable(np.random.random((6, 7)))
            self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6, ))

    @combinations.generate(combinations.combine(mode=['eager']))
    def test_sparse_categorical_accuracy_eager(self):
        """Tests that ints passed in via Eager return results. See b/113504761."""
        metric = metrics.sparse_categorical_accuracy
        y_true = np.arange(6).reshape([6, 1])
        y_pred = np.arange(36).reshape([6, 6])
        self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.])

    @combinations.generate(combinations.combine(mode=['eager']))
    def test_sparse_categorical_accuracy_float_eager(self):
        """Tests that floats passed in via Eager return results. See b/113504761."""
        metric = metrics.sparse_categorical_accuracy
        y_true = np.arange(6, dtype=np.float32).reshape([6, 1])
        y_pred = np.arange(36).reshape([6, 6])
        self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.])

    def test_sparse_top_k_categorical_accuracy(self):
        with self.cached_session():
            # Test correctness if the shape of y_true is (num_samples, 1)
            y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
            y_true = K.variable(np.array([[1], [0]]))
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
            self.assertEqual(np.mean(result), 1)
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
            self.assertEqual(np.mean(result), 0.5)
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
            self.assertEqual(np.mean(result), 0.)

            # Test correctness if the shape of y_true is (num_samples,)
            y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
            y_true = K.variable(np.array([1, 0]))
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
            self.assertEqual(np.mean(result), 1)
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
            self.assertEqual(np.mean(result), 0.5)
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
            self.assertEqual(np.mean(result), 0.)

            # Test correctness if the shape of y_true is (batch_size, seq_length) and
            # y_pred is (batch_size, seq_length, num_classes)
            y_pred = K.variable(
                np.array([[[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.1, 0.2, 0.7]],
                          [[0.3, 0.2, 0.1], [0.1, 0.2, 0.7], [0.3, 0.2,
                                                              0.1]]]))
            y_true = K.variable(np.array([[1, 0, 0], [1, 0, 1]]))
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
            self.assertEqual(np.mean(result), 1)
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
            self.assertEqual(np.mean(result), 0.5)
            result = K.eval(
                metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
            self.assertEqual(np.mean(result), 0.)

    def test_top_k_categorical_accuracy(self):
        with self.cached_session():
            y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
            y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]]))
            result = K.eval(
                metrics.top_k_categorical_accuracy(y_true, y_pred, k=3))
            self.assertEqual(np.mean(result), 1)
            result = K.eval(
                metrics.top_k_categorical_accuracy(y_true, y_pred, k=2))
            self.assertEqual(np.mean(result), 0.5)
            result = K.eval(
                metrics.top_k_categorical_accuracy(y_true, y_pred, k=1))
            self.assertEqual(np.mean(result), 0.)
示例#9
0
class MappingTests(keras_parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testTracking(self):
    with self.test_session():
      model = HasMapping()
      output = model(tf.ones([32, 2]))
      self.assertAllEqual([32, 7], output.shape.as_list())
      self.assertEqual(5, len(model.layers))
      six.assertCountEqual(self, model.layers, model.layer_dict.layers)
      self.assertEqual(1, len(model._checkpoint_dependencies))
      self.assertIs(model.layer_dict, model._checkpoint_dependencies[0].ref)
      self.evaluate([v.initializer for v in model.variables])
      test_var = model.layer_dict["output"].kernel
      self.evaluate(test_var.assign(tf.ones([6, 7])))
      save_path = os.path.join(self.get_temp_dir(), "ckpt")
      model.save_weights(save_path)
      self.evaluate(test_var.assign(tf.zeros([6, 7])))
      model.load_weights(save_path)
      self.assertAllEqual(numpy.ones([6, 7]),
                          self.evaluate(test_var))

  def testLayerCollectionWithExternalMutation(self):
    d = {}
    root = tf.Module()
    root.wrapper = d
    self.assertEqual([], root.wrapper.layers)
    self.assertEqual([], root.wrapper.trainable_weights)
    layer1 = core.Dense(1)
    layer2 = core.Dense(1)
    d["a"] = layer1
    d["b"] = layer2
    self.assertEqual([layer1, layer2], root.wrapper.layers)
    # The layers have still not created variables
    self.assertEqual([], root.wrapper.trainable_weights)

  def testDictWrapperBadKeys(self):
    a = tf.Module()
    a.d = {}
    a.d[1] = data_structures.wrap_or_unwrap([])
    model = training.Model()
    model.sub = a
    save_path = os.path.join(self.get_temp_dir(), "ckpt")
    with self.assertRaisesRegex(ValueError, "non-string key"):
      model.save_weights(save_path)

  def testDictWrapperNoDependency(self):
    a = tf.Module()
    a.d = data_structures.NoDependency({})
    a.d[1] = [3]
    self.assertEqual([a], util.list_objects(a))
    model = training.Model()
    model.sub = a
    save_path = os.path.join(self.get_temp_dir(), "ckpt")
    model.save_weights(save_path)
    model.load_weights(save_path)

  def testNonStringKeyNotTrackableValue(self):
    a = tf.Module()
    a.d = {}
    a.d["a"] = [3]
    a.d[1] = data_structures.NoDependency([3])
    self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a))
    model = training.Model()
    model.sub = a
    save_path = os.path.join(self.get_temp_dir(), "ckpt")
    model.save_weights(save_path)
    model.load_weights(save_path)

  def testNonAppendNotTrackable(self):
    # Non-append mutations (deleting or overwriting values) are OK when the
    # values aren't tracked.
    a = tf.Module()
    a.d = {}
    a.d["a"] = [3]
    a.d[1] = 3
    a.d[1] = 2
    self.assertEqual(2, a.d[1])
    del a.d[1]
    a.d[2] = data_structures.NoDependency(tf.Module())
    second = tf.Module()
    a.d[2] = data_structures.NoDependency(second)
    self.assertIs(second, a.d[2])
    self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a))
    model = training.Model()
    model.sub = a
    save_path = os.path.join(self.get_temp_dir(), "ckpt")
    model.save_weights(save_path)
    model.load_weights(save_path)

  def testPopNoSave(self):
    model = training.Model()
    model.d = {}
    model.d["a"] = []
    model.d.pop("a")
    save_path = os.path.join(self.get_temp_dir(), "ckpt")
    with self.assertRaisesRegex(ValueError, "Unable to save"):
      model.save_weights(save_path)

  def testExternalModificationNoSave(self):
    model = training.Model()
    external_reference = {}
    model.d = external_reference
    external_reference["a"] = []
    save_path = os.path.join(self.get_temp_dir(), "ckpt")
    with self.assertRaisesRegex(ValueError, "modified outside the wrapper"):
      model.save_weights(save_path)

  def testOverwriteCanStillSave(self):
    model = training.Model()
    model.d = {}
    model.d["a"] = {}
    model.d["a"] = {}
    save_path = os.path.join(self.get_temp_dir(), "ckpt")
    model.save_weights(save_path)

  def testIter(self):
    model = training.Model()
    model.d = {1: 3}
    model.d[1] = 3
    self.assertEqual([1], list(model.d))
    new_dict = {}
    # This update() is super tricky. If the dict wrapper subclasses dict,
    # CPython will access its storage directly instead of calling any
    # methods/properties on the object. So the options are either not to
    # subclass dict (in which case update will call normal iter methods, but the
    # object won't pass isinstance checks) or to subclass dict and keep that
    # storage updated (no shadowing all its methods like ListWrapper).
    new_dict.update(model.d)
    self.assertEqual({1: 3}, new_dict)
示例#10
0
class TraceModelCallTest(keras_parameterized.TestCase):

  def _assert_all_close(self, expected, actual):
    if not tf.executing_eagerly():
      with self.cached_session() as sess:
        K._initialize_variables(sess)
        self.assertAllClose(expected, actual)
    else:
      self.assertAllClose(expected, actual)

  @keras_parameterized.run_with_all_model_types
  @keras_parameterized.run_all_keras_modes
  def test_trace_model_outputs(self):
    input_dim = 5 if testing_utils.get_model_type() == 'functional' else None
    model = testing_utils.get_small_mlp(10, 3, input_dim)
    inputs = tf.ones((8, 5))

    if input_dim is None:
      with self.assertRaisesRegex(ValueError, 'input shapes have not been set'):
        saving_utils.trace_model_call(model)
      model._set_inputs(inputs)

    fn = saving_utils.trace_model_call(model)
    signature_outputs = fn(inputs)
    if model.output_names:
      expected_outputs = {model.output_names[0]: model(inputs)}
    else:
      expected_outputs = {'output_1': model(inputs)}

    self._assert_all_close(expected_outputs, signature_outputs)

  @keras_parameterized.run_with_all_model_types
  @keras_parameterized.run_all_keras_modes
  def test_trace_model_outputs_after_fitting(self):
    input_dim = 5 if testing_utils.get_model_type() == 'functional' else None
    model = testing_utils.get_small_mlp(10, 3, input_dim)
    model.compile(
        optimizer='sgd',
        loss='mse',
        run_eagerly=testing_utils.should_run_eagerly())
    model.fit(
        x=np.random.random((8, 5)).astype(np.float32),
        y=np.random.random((8, 3)).astype(np.float32),
        epochs=2)

    inputs = tf.ones((8, 5))

    fn = saving_utils.trace_model_call(model)
    signature_outputs = fn(inputs)
    if model.output_names:
      expected_outputs = {model.output_names[0]: model(inputs)}
    else:
      expected_outputs = {'output_1': model(inputs)}

    self._assert_all_close(expected_outputs, signature_outputs)

  @keras_parameterized.run_with_all_model_types(exclude_models='sequential')
  @keras_parameterized.run_all_keras_modes
  def test_trace_multi_io_model_outputs(self):
    input_dim = 5
    num_classes = 3
    num_classes_b = 4
    input_a = keras.layers.Input(shape=(input_dim,), name='input_a')
    input_b = keras.layers.Input(shape=(input_dim,), name='input_b')

    dense = keras.layers.Dense(num_classes, name='dense')
    dense2 = keras.layers.Dense(num_classes_b, name='dense2')
    dropout = keras.layers.Dropout(0.5, name='dropout')
    branch_a = [input_a, dense]
    branch_b = [input_b, dense, dense2, dropout]

    model = testing_utils.get_multi_io_model(branch_a, branch_b)

    input_a_np = np.random.random((10, input_dim)).astype(np.float32)
    input_b_np = np.random.random((10, input_dim)).astype(np.float32)

    if testing_utils.get_model_type() == 'subclass':
      with self.assertRaisesRegex(ValueError, 'input shapes have not been set'):
        saving_utils.trace_model_call(model)

    model.compile(
        optimizer='sgd',
        loss='mse',
        run_eagerly=testing_utils.should_run_eagerly())
    model.fit(x=[np.random.random((8, input_dim)).astype(np.float32),
                 np.random.random((8, input_dim)).astype(np.float32)],
              y=[np.random.random((8, num_classes)).astype(np.float32),
                 np.random.random((8, num_classes_b)).astype(np.float32)],
              epochs=2)

    fn = saving_utils.trace_model_call(model)
    signature_outputs = fn([input_a_np, input_b_np])
    outputs = model([input_a_np, input_b_np])
    if model.output_names:
      expected_outputs = {
          model.output_names[0]: outputs[0],
          model.output_names[1]: outputs[1]
      }
    else:
      expected_outputs = {'output_1': outputs[0], 'output_2': outputs[1]}
    self._assert_all_close(expected_outputs, signature_outputs)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_trace_features_layer(self):
    columns = [tf.feature_column.numeric_column('x')]
    model = sequential.Sequential([dense_features.DenseFeatures(columns)])
    model_input = {'x': tf.constant([[1.]])}
    model.predict(model_input, steps=1)
    fn = saving_utils.trace_model_call(model)
    self.assertAllClose({'output_1': [[1.]]}, fn({'x': [[1.]]}))

    columns = [
        tf.feature_column.numeric_column('x'),
        tf.feature_column.numeric_column('y')
    ]
    model = sequential.Sequential([dense_features.DenseFeatures(columns)])
    model_input = {'x': tf.constant([[1.]]),
                   'y': tf.constant([[2.]])}
    model.predict(model_input, steps=1)
    fn = saving_utils.trace_model_call(model)
    self.assertAllClose({'output_1': [[1., 2.]]},
                        fn({'x': [[1.]], 'y': [[2.]]}))

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_specify_input_signature(self):
    model = testing_utils.get_small_sequential_mlp(10, 3, None)
    inputs = tf.ones((8, 5))

    with self.assertRaisesRegex(ValueError, 'input shapes have not been set'):
      saving_utils.trace_model_call(model)

    fn = saving_utils.trace_model_call(
        model, [tf.TensorSpec(shape=[None, 5], dtype=tf.float32)])
    signature_outputs = fn(inputs)
    if model.output_names:
      expected_outputs = {model.output_names[0]: model(inputs)}
    else:
      expected_outputs = {'output_1': model(inputs)}
    self._assert_all_close(expected_outputs, signature_outputs)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_subclassed_model_with_input_signature(self):

    class Model(keras.Model):

      def __init__(self):
        super(Model, self).__init__()
        self.dense = keras.layers.Dense(3, name='dense')

      @tf.function(
          input_signature=[[tf.TensorSpec([None, 5], tf.float32),
                            tf.TensorSpec([None], tf.float32)]],)
      def call(self, inputs, *args):
        x, y = inputs
        return self.dense(x) + y

    model = Model()
    fn = saving_utils.trace_model_call(model)
    x = tf.ones((8, 5), dtype=tf.float32)
    y = tf.ones((3,), dtype=tf.float32)
    expected_outputs = {'output_1': model([x, y])}
    signature_outputs = fn([x, y])
    self._assert_all_close(expected_outputs, signature_outputs)

  @keras_parameterized.run_with_all_model_types
  @keras_parameterized.run_all_keras_modes
  def test_model_with_fixed_input_dim(self):
    """Ensure that the batch_dim is removed when saving.

    When serving or retraining, it is important to reset the batch dim.
    This can be an issue inside of tf.function. See b/132783590 for context.
    """
    model = testing_utils.get_small_mlp(10, 3, 5)

    loss_object = keras.losses.MeanSquaredError()
    optimizer = gradient_descent.SGD()

    @tf.function
    def train_step(data, labels):
      with tf.GradientTape() as tape:
        predictions = model(data)
        loss = loss_object(labels, predictions)
      gradients = tape.gradient(loss, model.trainable_variables)
      optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    x = np.random.random((8, 5))
    y = np.random.random((8, 3))

    train_step(x, y)

    fn = saving_utils.trace_model_call(model)
    self.assertEqual(fn.input_signature[0].shape.as_list(),
                     tf.TensorShape([None, 5]).as_list())
示例#11
0
class TestLayerCallTracing(tf.test.TestCase, parameterized.TestCase):

  def test_functions_have_same_trace(self):

    class Layer(keras.engine.base_layer.Layer):

      def call(self, inputs):
        return inputs

      def call2(self, inputs):
        return inputs * 2

    layer = Layer()

    call_collection = keras_save.LayerCallCollection(layer)
    fn = call_collection.add_function(layer.call, 'call', True)
    fn2 = call_collection.add_function(layer.call2, 'call2', True)

    with keras_save.tracing_scope():
      fn(np.ones((2, 3)))
      fn(np.ones((4, 5)))

    self.assertLen(
        fn.wrapped_call._list_all_concrete_functions_for_serialization(), 2)
    self.assertLen(
        fn2.wrapped_call._list_all_concrete_functions_for_serialization(), 2)

    # Check that the shapes are correct
    self.assertEqual(
        {(2, 3), (4, 5)},
        set(tuple(c.structured_input_signature[0][0].shape.as_list()) for c in
            fn2.wrapped_call._list_all_concrete_functions_for_serialization()))

  def test_training_arg_replacement(self):

    def assert_num_traces(layer_cls, training_keyword):
      layer = layer_cls()
      call_collection = keras_save.LayerCallCollection(layer)
      fn = call_collection.add_function(layer.call, 'call', True)

      with keras_save.tracing_scope():
        fn(np.ones((2, 3)), training=True)
      self.assertLen(
          fn.wrapped_call._list_all_concrete_functions_for_serialization(), 2)
      with keras_save.tracing_scope():
        fn(np.ones((2, 4)), training=False)
      self.assertLen(
          fn.wrapped_call._list_all_concrete_functions_for_serialization(), 4)

      if training_keyword:
        with keras_save.tracing_scope():
          fn(np.ones((2, 5)), True)
        self.assertLen(
            fn.wrapped_call._list_all_concrete_functions_for_serialization(), 6)
        with keras_save.tracing_scope():
          fn(np.ones((2, 6)))
        self.assertLen(
            fn.wrapped_call._list_all_concrete_functions_for_serialization(), 8)

    class LayerWithTrainingKeyword(keras.engine.base_layer.Layer):

      def call(self, inputs, training=False):
        return inputs * training

    assert_num_traces(LayerWithTrainingKeyword, training_keyword=True)

    class LayerWithKwargs(keras.engine.base_layer.Layer):

      def call(self, inputs, **kwargs):
        return inputs * kwargs['training']

    assert_num_traces(LayerWithKwargs, training_keyword=False)

    class LayerWithChildLayer(keras.engine.base_layer.Layer):

      def __init__(self):
        self.child = LayerWithKwargs()
        super(LayerWithChildLayer, self).__init__()

      def call(self, inputs):
        return self.child(inputs)

    assert_num_traces(LayerWithChildLayer, training_keyword=False)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_maintains_losses(self):
    layer = LayerWithLoss()
    layer(np.ones((2, 3)))
    previous_losses = layer.losses[:]

    call_collection = keras_save.LayerCallCollection(layer)
    fn = call_collection.add_function(layer.call, 'call', True)
    fn(np.ones((2, 3)))

    self.assertAllEqual(previous_losses, layer.losses)
示例#12
0
class DatasetCreatorTest(tf.test.TestCase, parameterized.TestCase):
    def test_dataset_creator(self):
        with self.assertRaisesRegex(
                TypeError,
                "`dataset_fn` for `DatasetCreator` must be a `callable`."):
            dataset_creator.DatasetCreator(2)

        dataset_fn = lambda: 3
        with self.assertRaisesRegex(
                TypeError,
                "The `callable` provided to `DatasetCreator` must return "
                "a Dataset."):
            dataset_creator.DatasetCreator(dataset_fn)()

        dataset_fn = lambda: tf.data.Dataset.from_tensor_slices([1, 1])
        got = dataset_creator.DatasetCreator(dataset_fn)()
        self.assertEqual(
            next(iter(got)),
            next(iter(tf.data.Dataset.from_tensor_slices([1, 1]))))

    def _get_dataset_fn(self):
        def dataset_fn(input_context):
            global_batch_size = 64
            batch_size = input_context.get_per_replica_batch_size(
                global_batch_size)
            dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat()
            dataset = dataset.shard(input_context.num_input_pipelines,
                                    input_context.input_pipeline_id)
            dataset = dataset.batch(batch_size)
            dataset = dataset.prefetch(2)
            return dataset

        return dataset_fn

    @combinations.generate(
        combinations.combine(use_input_options=[True, False]))
    def test_dataset_creator_model_fit_without_strategy(
            self, use_input_options):
        model = sequential.Sequential([core_layers.Dense(10)])
        model.compile(gradient_descent.SGD(), loss="mse")

        input_options = tf.distribute.InputOptions(
        ) if use_input_options else None
        history = model.fit(dataset_creator.DatasetCreator(
            self._get_dataset_fn(), input_options),
                            epochs=10,
                            steps_per_epoch=10,
                            verbose=0)
        self.assertLen(history.history["loss"], 10)

    def _get_parameter_server_strategy(self):
        cluster_def = multi_worker_testing_utils.create_in_process_cluster(
            num_workers=2, num_ps=1, rpc_layer="grpc")
        return tf.distribute.experimental.ParameterServerStrategy(
            SimpleClusterResolver(ClusterSpec(cluster_def), rpc_layer="grpc"))

    @combinations.generate(
        combinations.combine(use_input_options=[True, False]))
    def test_dataset_creator_usage_in_parameter_server_model_fit(
            self, use_input_options):
        strategy = self._get_parameter_server_strategy()
        with strategy.scope():
            model = sequential.Sequential([core_layers.Dense(10)])
        model.compile(gradient_descent.SGD(), loss="mse")

        input_options = tf.distribute.InputOptions(
        ) if use_input_options else None
        history = model.fit(dataset_creator.DatasetCreator(
            self._get_dataset_fn(), input_options),
                            epochs=10,
                            steps_per_epoch=10,
                            verbose=0)
        self.assertLen(history.history["loss"], 10)

    def test_dataset_creator_input_options(self):
        dataset_fn = lambda _: tf.data.Dataset.from_tensor_slices([1, 1])
        input_options = tf.distribute.InputOptions(
            experimental_fetch_to_device=True,
            experimental_per_replica_buffer_size=2)
        x = dataset_creator.DatasetCreator(dataset_fn,
                                           input_options=input_options)
        with tf.distribute.MultiWorkerMirroredStrategy().scope():
            data_handler = data_adapter.get_data_handler(
                x,
                steps_per_epoch=2,
                model=sequential.Sequential([core_layers.Dense(10)]))

        # Ensuring the resulting `DistributedDatasetsFromFunction` has the right
        # options.
        self.assertTrue(
            data_handler._dataset._options.experimental_fetch_to_device)
        self.assertEqual(
            data_handler._dataset._options.
            experimental_per_replica_buffer_size, 2)

    def test_dataset_creator_input_options_with_cluster_coordinator(self):
        dataset_fn = lambda _: tf.data.Dataset.from_tensor_slices([1, 1])
        input_options = tf.distribute.InputOptions(
            experimental_fetch_to_device=True,
            experimental_per_replica_buffer_size=2)
        x = dataset_creator.DatasetCreator(dataset_fn,
                                           input_options=input_options)
        strategy = self._get_parameter_server_strategy()
        with strategy.scope():
            model = sequential.Sequential([core_layers.Dense(10)])
            model._cluster_coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator(
                strategy)
            data_handler = data_adapter.get_data_handler(x,
                                                         steps_per_epoch=2,
                                                         model=model)

        iter_rv = iter(data_handler._dataset)._values[0]
        iter_rv._rebuild_on(model._cluster_coordinator._cluster.workers[0])
        distributed_iterator = iter_rv._get_values()

        # Ensuring the resulting `DistributedIterator` has the right options.
        self.assertTrue(
            distributed_iterator._options.experimental_fetch_to_device)
        self.assertEqual(
            distributed_iterator._options.experimental_per_replica_buffer_size,
            2)
示例#13
0
class AdagradOptimizerTest(tf.test.TestCase, parameterized.TestCase):
    def doTestBasic(self, use_callable_params=False):
        for dtype in _DATA_TYPES:
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)

            learning_rate = lambda: 3.0
            if not use_callable_params:
                learning_rate = learning_rate()

            ada_opt = adagrad.Adagrad(learning_rate)

            accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

            if not tf.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for _ in range(3):
                if not tf.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, 3.0)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, 3.0)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasic(self):
        self.doTestBasic()

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testBasicCallableParams(self):
        self.doTestBasic(use_callable_params=True)

    def testBasicWithLearningRateDecay(self):
        for dtype in _DATA_TYPES:
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)

            learning_rate = 3.0
            decay = 0.5

            ada_opt = adagrad.Adagrad(learning_rate, decay=decay)

            accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

            if not tf.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for t in range(3):
                if not tf.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                lr_np = learning_rate / (1 + decay * t)
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, lr_np)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, lr_np)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))

    def testBasicWithLargeEpsilon(self):
        var0_np = np.array([1.0, 2.0])
        var1_np = np.array([3.0, 4.0])
        grads0_np = np.array([0.1, 0.1])
        grads1_np = np.array([0.01, 0.01])
        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 3.0

        ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.0)

        accum0_np = np.array([0.1, 0.1])
        accum1_np = np.array([0.1, 0.1])

        if not tf.executing_eagerly():
            ada_update = ada_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

        # Fetch params to validate initial values
        v0_val, v1_val = self.evaluate([var0, var1])
        self.assertAllClose([1.0, 2.0], v0_val)
        self.assertAllClose([3.0, 4.0], v1_val)

        # Run 3 steps of adagrad
        for _ in range(3):
            if not tf.executing_eagerly():
                self.evaluate(ada_update)
            else:
                ada_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            var0_np, accum0_np = adagrad_update_numpy(var0_np, accum0_np,
                                                      grads0_np, 3.0, 1.0)
            var1_np, accum1_np = adagrad_update_numpy(var1_np, accum1_np,
                                                      grads1_np, 3.0, 1.0)
            self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
            self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

    def testBasicWithLearningRateInverseTimeDecay(self):
        for dtype in _DATA_TYPES:
            var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
            var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
            grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
            var0 = tf.Variable(var0_np)
            var1 = tf.Variable(var1_np)
            grads0 = tf.constant(grads0_np)
            grads1 = tf.constant(grads1_np)

            learning_rate = 3.0
            decay = 0.5
            lr_schedule = learning_rate_schedule.InverseTimeDecay(
                learning_rate, decay_steps=1.0, decay_rate=decay)

            ada_opt = adagrad.Adagrad(lr_schedule)

            accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
            accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

            if not tf.executing_eagerly():
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

            # Fetch params to validate initial values
            v0_val, v1_val = self.evaluate([var0, var1])
            self.assertAllClose([1.0, 2.0], v0_val)
            self.assertAllClose([3.0, 4.0], v1_val)

            # Run 3 steps of adagrad
            for t in range(3):
                if not tf.executing_eagerly():
                    self.evaluate(ada_update)
                else:
                    ada_opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))
                lr_np = learning_rate / (1 + decay * t)
                var0_np, accum0_np = adagrad_update_numpy(
                    var0_np, accum0_np, grads0_np, lr_np)
                var1_np, accum1_np = adagrad_update_numpy(
                    var1_np, accum1_np, grads1_np, lr_np)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))

    def testMinimizeSparseResourceVariable(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0 = tf.Variable([[1.0, 2.0], [3.0, 4.0]], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
                    return pred * pred

                sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]],
                                                   self.evaluate(var0))
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType([[0, 1], [3, 4]],
                                                   self.evaluate(var0),
                                                   atol=0.01)

    def testTensorLearningRate(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                learning_rate = tf.constant(3.0)
                ada_opt = adagrad.Adagrad(learning_rate)
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                # Run 3 steps of adagrad
                for _ in range(3):
                    self.evaluate(ada_update)
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))

    def testSparseBasic(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0, 0.01],
                                     dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0_np_indices = np.array([0, 2], dtype=np.int32)
                grads0 = tf.IndexedSlices(
                    tf.constant(grads0_np[grads0_np_indices]),
                    tf.constant(grads0_np_indices), tf.constant([3]))
                grads1_np_indices = np.array([0, 2], dtype=np.int32)
                grads1 = tf.IndexedSlices(
                    tf.constant(grads1_np[grads1_np_indices]),
                    tf.constant(grads1_np_indices), tf.constant([3]))
                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate)
                ada_update = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1))

                accum0_np = np.array([0.1, 0.1, 0.1],
                                     dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1, 0.1],
                                     dtype=dtype.as_numpy_dtype)

                # Run 3 step of sgd
                for _ in range(3):
                    self.evaluate(ada_update)

                    var0_np, accum0_np = sparse_adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np_indices,
                        grads0_np[grads0_np_indices], learning_rate)
                    var1_np, accum1_np = sparse_adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np_indices,
                        grads1_np[grads1_np_indices], learning_rate)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))

    def testSparseSingleVarDim(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                grads0_np_indices = np.array([0], dtype=np.int32)
                grads0 = tf.IndexedSlices(
                    tf.constant(grads0_np[grads0_np_indices]),
                    tf.constant(grads0_np_indices), tf.constant([3]))
                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate, epsilon=1.)
                ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0], self.evaluate(var0))

                accum0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)

                # Run 3 step of sgd
                for _ in range(3):
                    self.evaluate(ada_update)

                    var0_np, accum0_np = sparse_adagrad_update_numpy(
                        var0_np,
                        accum0_np,
                        grads0_np_indices,
                        grads0_np[grads0_np_indices],
                        learning_rate,
                        epsilon=1.)
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))

    def testSparseRepeatedIndices(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype)

                repeated_index_update_var = tf.Variable(var_np, dtype=dtype)
                aggregated_update_var = tf.Variable(var_np, dtype=dtype)
                grad_repeated_index = tf.IndexedSlices(
                    tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
                    tf.constant([1, 1]), tf.constant([2, 1]))
                grad_aggregated = tf.IndexedSlices(
                    tf.constant([0.2], shape=[1, 1], dtype=dtype),
                    tf.constant([1]), tf.constant([2, 1]))
                repeated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_repeated_index, repeated_index_update_var)
                ])
                aggregated_update = adagrad.Adagrad(3.0).apply_gradients([
                    (grad_aggregated, aggregated_update_var)
                ])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertAllClose(self.evaluate(aggregated_update_var),
                                    self.evaluate(repeated_index_update_var))
                for _ in range(3):
                    self.evaluate(repeated_update)
                    self.evaluate(aggregated_update)
                    self.assertAllClose(
                        self.evaluate(aggregated_update_var),
                        self.evaluate(repeated_index_update_var))

    def testSparseRepeatedIndicesByEmbeddingLookUp(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var_repeated = tf.Variable([1.0, 2.0], dtype=dtype)
                loss_repeated = lambda: tf.reduce_sum(  # pylint: disable=g-long-lambda
                    tf.compat.v1.nn.embedding_lookup(var_repeated, [0, 0]))  # pylint: disable=cell-var-from-loop
                var_aggregated = tf.Variable([1.0, 2.0], dtype=dtype)
                loss_aggregated = lambda: 2 * tf.reduce_sum(  # pylint: disable=g-long-lambda
                    tf.compat.v1.nn.embedding_lookup(var_aggregated, [0]))  # pylint: disable=cell-var-from-loop
                update_op_repeated = adagrad.Adagrad(2.0).minimize(
                    loss_repeated, var_list=[var_repeated])
                update_op_aggregated = adagrad.Adagrad(2.0).minimize(
                    loss_aggregated, var_list=[var_aggregated])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertAllCloseAccordingToType(
                    self.evaluate(var_repeated), self.evaluate(var_aggregated))
                for _ in range(3):
                    self.evaluate(update_op_repeated)
                    self.evaluate(update_op_aggregated)
                    self.assertAllCloseAccordingToType(
                        self.evaluate(var_repeated),
                        self.evaluate(var_aggregated))

    def testSparseStability(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half]:
                shape = [1, 6]
                var0_np = np.array([[
                    0.00872496, -0.106952, 0.110467, 0.226505, -0.0147257,
                    -0.0105945
                ]],
                                   dtype=dtype.as_numpy_dtype)
                var0 = tf.Variable(var0_np)
                grads0_np = np.array([[
                    -5.91278e-05, 5.31673e-05, -2.5779e-06, 4.29153e-05,
                    -8.4877e-05, -9.48906e-05
                ]],
                                     dtype=dtype.as_numpy_dtype)
                grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                          tf.constant([0]), tf.constant(shape))
                ada_opt = adagrad.Adagrad(1.0)
                ada_update = ada_opt.apply_gradients(zip([grads0], [var0]))
                slot0 = ada_opt.get_slot(var0, "accumulator")
                init = tf.compat.v1.global_variables_initializer()
                for _ in range(100):
                    self.evaluate(init)
                    self.evaluate(ada_update)
                    self.assertAllCloseAccordingToType(
                        np.array([[0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]),
                        self.evaluate(slot0))
                    self.assertAllCloseAccordingToType(
                        np.array([[
                            0.00891194, -0.10712013, 0.11047515, 0.22636929,
                            -0.0144573, -0.01029443
                        ]]), self.evaluate(var0))

    def testSharing(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in _DATA_TYPES:
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                learning_rate = 3.0
                ada_opt = adagrad.Adagrad(learning_rate)
                # Apply the optimizer twice.  Both applications will use
                # the same accums.
                ada_update1 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                ada_update2 = ada_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                slot0 = ada_opt.get_slot(var0, "accumulator")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = ada_opt.get_slot(var1, "accumulator")
                self.assertEqual(slot1.shape, var1.shape)
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values.
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Mix the first and the second adagrad for 3 steps.
                self.evaluate(ada_update1)
                self.evaluate(ada_update2)
                self.evaluate(ada_update1)

                accum0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                for _ in range(3):
                    var0_np, accum0_np = adagrad_update_numpy(
                        var0_np, accum0_np, grads0_np, learning_rate)
                    var1_np, accum1_np = adagrad_update_numpy(
                        var1_np, accum1_np, grads1_np, learning_rate)
                self.assertAllCloseAccordingToType(var0_np,
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType(var1_np,
                                                   self.evaluate(var1))

    def testConstructAdagradWithLR(self):
        opt = adagrad.Adagrad(lr=1.0)
        opt_2 = adagrad.Adagrad(learning_rate=0.1, lr=1.0)
        opt_3 = adagrad.Adagrad(learning_rate=0.1)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
示例#14
0
class AdadeltaOptimizerTest(tf.test.TestCase, parameterized.TestCase):

  def doTestBasic(self, use_resource=False, use_callable_params=False):
    num_updates = 4  # number of ADADELTA steps to perform
    for dtype in _DATA_TYPES:
      for grad in [0.2, 0.1, 0.01]:
        for lr in [1.0, 0.5, 0.1]:
          var0_init = [1.0, 2.0]
          var1_init = [3.0, 4.0]
          if use_resource:
            var0 = tf.Variable(var0_init, dtype=dtype)
            var1 = tf.Variable(var1_init, dtype=dtype)
          else:
            var0 = tf.Variable(var0_init, dtype=dtype)
            var1 = tf.Variable(var1_init, dtype=dtype)

          grads = tf.constant([grad, grad], dtype=dtype)

          accum = 0.0
          accum_update = 0.0

          # ADADELTA gradient optimizer
          rho = 0.95
          epsilon = 1e-8
          if use_callable_params:
            adadelta_opt = adadelta.Adadelta(
                learning_rate=lambda: lr,  # pylint: disable=cell-var-from-loop
                rho=lambda: rho,  # pylint: disable=cell-var-from-loop
                epsilon=epsilon)  # pylint: disable=cell-var-from-loop
          else:
            adadelta_opt = adadelta.Adadelta(
                learning_rate=lr, rho=rho, epsilon=epsilon)
          if not tf.executing_eagerly():
            adadelta_update = adadelta_opt.apply_gradients(
                zip([grads, grads], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())

            # Assign slots
            slot = [None] * 2
            slot_update = [None] * 2
            slot[0] = adadelta_opt.get_slot(var0, "accum_grad")
            self.assertEqual(slot[0].shape, var0.shape)

            slot_update[0] = adadelta_opt.get_slot(var0, "accum_var")
            self.assertEqual(slot_update[0].shape, var0.shape)

            slot[1] = adadelta_opt.get_slot(var1, "accum_grad")
            self.assertEqual(slot[1].shape, var1.shape)

            slot_update[1] = adadelta_opt.get_slot(var1, "accum_var")
            self.assertEqual(slot_update[1].shape, var1.shape)

          # Fetch params to validate initial values
          self.assertAllClose(var0_init, self.evaluate(var0))
          self.assertAllClose(var1_init, self.evaluate(var1))

          update = [None] * num_updates
          tot_update = 0
          for step in range(num_updates):
            # Run adadelta update for comparison
            if not tf.executing_eagerly():
              self.evaluate(adadelta_update)
            else:
              adadelta_opt.apply_gradients(zip([grads, grads], [var0, var1]))

            # Perform initial update without previous accum values
            accum = accum * rho + (grad**2) * (1 - rho)
            update[step] = (
                np.sqrt(accum_update + epsilon) *
                (1. / np.sqrt(accum + epsilon)) * grad)
            accum_update = (
                accum_update * rho + (update[step]**2) * (1.0 - rho))
            tot_update += update[step] * lr

            if not tf.executing_eagerly():
              # Check that the accumulators have been updated
              # TODO(lxuechen): This is hard to test in eager mode
              for slot_idx in range(2):
                self.assertAllCloseAccordingToType(
                    np.array([accum, accum], dtype=dtype.as_numpy_dtype(0)),
                    self.evaluate(slot[slot_idx]),
                    rtol=1e-5)

                self.assertAllCloseAccordingToType(
                    np.array(
                        [accum_update, accum_update],
                        dtype=dtype.as_numpy_dtype(0)),
                    self.evaluate(slot_update[slot_idx]),
                    rtol=1e-5)

              # Check that the parameters have been updated
              self.assertAllCloseAccordingToType(
                  np.array(
                      [var0_init[0] - tot_update, var0_init[1] - tot_update],
                      dtype=dtype.as_numpy_dtype(0)),
                  self.evaluate(var0),
                  rtol=1e-5)

              self.assertAllCloseAccordingToType(
                  np.array(
                      [var1_init[0] - tot_update, var1_init[1] - tot_update],
                      dtype=dtype.as_numpy_dtype(0)),
                  self.evaluate(var1),
                  rtol=1e-5)

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testResourceBasic(self):
    self.doTestBasic(use_resource=True)

  @combinations.generate(combinations.combine(mode=["eager"]))
  def testBasicCallableParams(self):
    self.doTestBasic(use_resource=True, use_callable_params=True)

  def testMinimizeSparseResourceVariable(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    with tf.Graph().as_default():
      for dtype in _DATA_TYPES:
        var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
        x = tf.constant([[4.0], [5.0]], dtype=dtype)

        def loss():
          pred = tf.matmul(tf.compat.v1.nn.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
          return pred * pred

        sgd_op = adadelta.Adadelta(1.0, 1.0, 1.0).minimize(
            loss, var_list=[var0])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Fetch params to validate initial values
        self.assertAllCloseAccordingToType([[1.0, 2.0]], self.evaluate(var0))
        # Run 1 step of sgd
        self.evaluate(sgd_op)
        # Validate updated params
        self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))

  def testConstructAdadeltaWithLR(self):
    opt = adadelta.Adadelta(lr=1.0, rho=0.9, epsilon=1.)
    opt_2 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1., lr=1.0)
    opt_3 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1.)
    self.assertIsInstance(opt.lr, tf.Variable)
    self.assertIsInstance(opt_2.lr, tf.Variable)
    self.assertIsInstance(opt_3.lr, tf.Variable)

    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertAllClose(self.evaluate(opt.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_3.lr), (0.1))

  def testConstructAdadeltaWithEpsilonValues(self):
    opt = adadelta.Adadelta(epsilon=None)
    self.assertEqual(opt.epsilon, 1e-7)

    opt = adadelta.Adadelta(epsilon=1e-8)
    self.assertEqual(opt.epsilon, 1e-8)
示例#15
0
class AdamOptimizerTest(tf.test.TestCase, parameterized.TestCase):

  def testSparse(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for dtype in [tf.half, tf.float32, tf.float64]:
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0_np_indices = np.array([0, 2], dtype=np.int32)
        grads0 = tf.IndexedSlices(
            tf.constant(grads0_np[grads0_np_indices]),
            tf.constant(grads0_np_indices), tf.constant([3]))
        grads1_np_indices = np.array([0, 2], dtype=np.int32)
        grads1 = tf.IndexedSlices(
            tf.constant(grads1_np[grads1_np_indices]),
            tf.constant(grads1_np_indices), tf.constant([3]))
        opt = adam.Adam()
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1))

        beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
        # Run 3 steps of Adam
        for t in range(3):
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          update.run()

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

  def testSparseDevicePlacement(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for index_dtype in [tf.int32, tf.int64]:
      with tf.Graph().as_default(), self.cached_session(
          force_gpu=tf.test.is_gpu_available()):
        # If a GPU is available, tests that all optimizer ops can be placed on
        # it (i.e. they have GPU kernels).
        var = tf.Variable([[1.0], [2.0]])
        indices = tf.constant([0, 1], dtype=index_dtype)
        g_sum = lambda: tf.reduce_sum(tf.gather(var, indices))  # pylint: disable=cell-var-from-loop
        optimizer = adam.Adam(3.0)
        minimize_op = optimizer.minimize(g_sum, var_list=[var])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        minimize_op.run()

  def testSparseRepeatedIndices(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for dtype in [tf.half, tf.float32, tf.float64]:
      with tf.Graph().as_default(), self.cached_session():
        repeated_index_update_var = tf.Variable(
            [[1.0], [2.0]], dtype=dtype)
        aggregated_update_var = tf.Variable(
            [[1.0], [2.0]], dtype=dtype)
        grad_repeated_index = tf.IndexedSlices(
            tf.constant(
                [0.1, 0.1], shape=[2, 1], dtype=dtype),
            tf.constant([1, 1]),
            tf.constant([2, 1]))
        grad_aggregated = tf.IndexedSlices(
            tf.constant(
                [0.2], shape=[1, 1], dtype=dtype),
            tf.constant([1]),
            tf.constant([2, 1]))
        repeated_update = adam.Adam().apply_gradients(
            [(grad_repeated_index, repeated_index_update_var)])
        aggregated_update = adam.Adam().apply_gradients(
            [(grad_aggregated, aggregated_update_var)])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(aggregated_update_var,
                            self.evaluate(repeated_index_update_var))
        for _ in range(3):
          repeated_update.run()
          aggregated_update.run()
          self.assertAllClose(aggregated_update_var,
                              self.evaluate(repeated_index_update_var))

  def doTestBasic(self, use_callable_params=False):
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = lambda: 0.001
        beta1 = lambda: 0.9
        beta2 = lambda: 0.999
        epsilon = lambda: 1e-8
        if not use_callable_params:
          learning_rate = learning_rate()
          beta1 = beta1()
          beta2 = beta2()
          epsilon = epsilon()

        opt = adam.Adam(learning_rate=learning_rate)
        if not tf.executing_eagerly():
          update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          if not tf.executing_eagerly():
            self.evaluate(update)
          else:
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testResourceBasic(self):
    self.doTestBasic()

  @combinations.generate(combinations.combine(mode=["eager"]))
  def testBasicCallableParams(self):
    self.doTestBasic(use_callable_params=True)

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testBasicWithAmsgrad(self):
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        opt = adam.Adam(amsgrad=True)
        if not tf.executing_eagerly():
          update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          if not tf.executing_eagerly():
            self.evaluate(update)
          else:
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

          var0_np, m0, v0, v0hat = adam_update_numpy_amsgrad(
              var0_np, grads0_np, t, m0, v0, v0hat)
          var1_np, m1, v1, v1hat = adam_update_numpy_amsgrad(
              var1_np, grads1_np, t, m1, v1, v1hat)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testSparseWithAmsgrad(self):
    # dtypes.half does not work on gpu + eager.
    for dtype in [tf.float32, tf.float64]:
      with self.cached_session():
        m0 = np.array([[0.0], [0.0]])
        v0 = np.array([[0.0], [0.0]])
        v0hat = np.array([[0.0], [0.0]])
        indices_np = np.array([1])
        indices = tf.constant(indices_np, dtype=tf.int32)
        var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype)
        repeated_index_update_var = tf.Variable(var0_np, dtype=dtype)
        aggregated_update_var = tf.Variable(var0_np, dtype=dtype)
        grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype)
        grad_repeated_index = tf.IndexedSlices(
            tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
            tf.constant([1, 1]), tf.constant([2, 1]))
        grad_aggregated = tf.IndexedSlices(grads0_np, indices,
                                            tf.constant([2, 1]))
        opt_repeated = adam.Adam(amsgrad=True)
        opt_aggregated = adam.Adam(amsgrad=True)
        if not tf.executing_eagerly():
          repeated_update = opt_repeated.apply_gradients(
              [(grad_repeated_index, repeated_index_update_var)])
          aggregated_update = opt_aggregated.apply_gradients(
              [(grad_aggregated, aggregated_update_var)])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(
            self.evaluate(aggregated_update_var),
            self.evaluate(repeated_index_update_var))
        for t in range(3):
          if not tf.executing_eagerly():
            self.evaluate(repeated_update)
            self.evaluate(aggregated_update)
          else:
            opt_repeated.apply_gradients(
                [(grad_repeated_index, repeated_index_update_var)])
            opt_aggregated.apply_gradients(
                [(grad_aggregated, aggregated_update_var)])

          var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad(
              var0_np, indices_np, grads0_np, t, m0, v0, v0hat)

          # Validate updated params
          self.assertAllCloseAccordingToType(
              var0_np, self.evaluate(aggregated_update_var))
          self.assertAllCloseAccordingToType(
              self.evaluate(aggregated_update_var),
              self.evaluate(repeated_index_update_var))

  def testBasicWithLearningRateDecay(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 0.001
        beta_1 = 0.9
        beta_2 = 0.999
        epsilon = 1e-7
        decay = 0.5

        opt = adam.Adam(
            learning_rate=learning_rate,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon,
            decay=decay)
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          self.evaluate(update)
          lr_np = learning_rate / (1 + decay * t)

          var0_np, m0, v0 = adam_update_numpy(
              var0_np, grads0_np, t, m0, v0, lr=lr_np)
          var1_np, m1, v1 = adam_update_numpy(
              var1_np, grads1_np, t, m1, v1, lr=lr_np)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

  def testBasicWithLearningRateInverseTimeDecay(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 0.001
        decay = 0.5
        lr_schedule = learning_rate_schedule.InverseTimeDecay(
            learning_rate, decay_steps=1.0, decay_rate=decay)
        beta_1 = 0.9
        beta_2 = 0.999
        epsilon = 1e-7

        opt = adam.Adam(
            learning_rate=lr_schedule,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon)
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          self.evaluate(update)

          lr_np = learning_rate / (1 + decay * t)

          var0_np, m0, v0 = adam_update_numpy(
              var0_np, grads0_np, t, m0, v0, lr=lr_np)
          var1_np, m1, v1 = adam_update_numpy(
              var1_np, grads1_np, t, m1, v1, lr=lr_np)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

  def testTensorLearningRate(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for dtype in [tf.half, tf.float32, tf.float64]:
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)
        opt = adam.Adam(tf.constant(0.001))
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))

        beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
        # Run 3 steps of Adam
        for t in range(3):
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          update.run()

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

  def testSharing(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for dtype in [tf.half, tf.float32, tf.float64]:
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)
        opt = adam.Adam()
        update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(tf.compat.v1.global_variables_initializer())

        beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))

        # Run 3 steps of intertwined Adam1 and Adam2.
        for t in range(3):
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          if t % 2 == 0:
            update1.run()
          else:
            update2.run()

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))

  @combinations.generate(combinations.combine(mode=["eager"]))
  def testSlotsUniqueEager(self):
    v1 = tf.Variable(1.)
    v2 = tf.Variable(1.)
    opt = adam.Adam(1.)
    opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
    # There should be iteration, and two unique slot variables for v1 and v2.
    self.assertLen(set(v.ref() for v in opt.variables()), 5)
    self.assertEqual(
        self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations))

  def testSetWeightsFromV1AdamWithoutMinimize(self):
    keras_v1_adam = optimizer_v1.Adam()
    keras_v2_adam = adam.Adam()
    keras_v2_adam.set_weights(keras_v1_adam.get_weights())
    keras_v1_iteration = keras_v1_adam.iterations
    keras_v2_iteration = keras_v2_adam.iterations
    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertEqual(
        self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration))

  def testConstructAdamWithLR(self):
    opt = adam.Adam(lr=1.0)
    opt_2 = adam.Adam(learning_rate=0.1, lr=1.0)
    opt_3 = adam.Adam(learning_rate=0.1)
    self.assertIsInstance(opt.lr, tf.Variable)
    self.assertIsInstance(opt_2.lr, tf.Variable)
    self.assertIsInstance(opt_3.lr, tf.Variable)

    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertAllClose(self.evaluate(opt.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
示例#16
0
class CheckpointCompatibilityTests(keras_parameterized.TestCase):
    def _initialized_model(self):
        input_value = tf.constant([[3.]])
        model = MyModel()
        optimizer = adam.Adam(0.001)
        root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model)
        with tf.GradientTape() as tape:
            loss = model(input_value)
        variables = model.trainable_variables
        gradients = tape.gradient(loss, variables)
        train_op = optimizer.apply_gradients(zip(gradients, variables))
        self.evaluate(trackable_utils.gather_initializers(root_trackable))
        self.evaluate(train_op)
        # A regular variable, a slot variable, and a non-slot Optimizer variable
        # with known values to check when loading.
        self.evaluate(model._named_dense.bias.assign([1.]))
        self.evaluate(
            optimizer.get_slot(var=model._named_dense.bias,
                               slot_name="m").assign([2.]))
        self.evaluate(optimizer.beta_1.assign(3.))
        return root_trackable

    def _set_sentinels(self, root_trackable):
        self.evaluate(root_trackable.model._named_dense.bias.assign([101.]))
        self.evaluate(
            root_trackable.optimizer.get_slot(
                var=root_trackable.model._named_dense.bias,
                slot_name="m").assign([102.]))
        self.evaluate(root_trackable.optimizer.beta_1.assign(103.))

    def _check_sentinels(self, root_trackable):
        self.assertAllEqual([1.],
                            self.evaluate(
                                root_trackable.model._named_dense.bias))
        self.assertAllEqual([2.],
                            self.evaluate(
                                root_trackable.optimizer.get_slot(
                                    var=root_trackable.model._named_dense.bias,
                                    slot_name="m")))
        self.assertAllEqual(3., self.evaluate(root_trackable.optimizer.beta_1))

    def _write_name_based_checkpoint(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        with context.graph_mode():
            save_graph = tf.Graph()
            with save_graph.as_default(), self.session(
                    graph=save_graph) as session:
                root = self._initialized_model()
                name_saver = tf.compat.v1.train.Saver()
                return name_saver.save(sess=session,
                                       save_path=checkpoint_prefix,
                                       global_step=root.optimizer.iterations)

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testLoadFromNameBasedSaver(self):
        """Save a name-based checkpoint, load it using the object-based API."""
        with testing_utils.device(should_use_gpu=True):
            with self.test_session():
                save_path = self._write_name_based_checkpoint()
                root = self._initialized_model()
                self._set_sentinels(root)
                with self.assertRaises(AssertionError):
                    self._check_sentinels(root)
                object_saver = trackable_utils.TrackableSaver(
                    graph_view.ObjectGraphView(root))
                self._set_sentinels(root)
                status = object_saver.restore(save_path)
                if tf.executing_eagerly():
                    self._check_sentinels(root)
                if tf.executing_eagerly():
                    status.assert_consumed()
                    status.assert_existing_objects_matched()
                    status.assert_nontrivial_match()
                else:
                    # When graph building, we haven't read any keys, so we don't know
                    # whether the restore will be complete.
                    with self.assertRaisesRegex(AssertionError,
                                                "not restored"):
                        status.assert_consumed()
                    with self.assertRaisesRegex(AssertionError,
                                                "not restored"):
                        status.assert_existing_objects_matched()
                    with self.assertRaisesRegex(AssertionError,
                                                "not restored"):
                        status.assert_nontrivial_match()
                status.run_restore_ops()
                self._check_sentinels(root)
                self._set_sentinels(root)
                status = object_saver.restore(save_path)
                status.initialize_or_restore()
                status.assert_nontrivial_match()
                self._check_sentinels(root)
                # Check that there is no error when keys are missing from the name-based
                # checkpoint.
                root.not_in_name_checkpoint = tf.Variable([1.])
                status = object_saver.restore(save_path)
                with self.assertRaises(AssertionError):
                    status.assert_existing_objects_matched()

    def testSaveGraphLoadEager(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        with context.graph_mode():
            save_graph = tf.Graph()
            with save_graph.as_default(), self.session(graph=save_graph):
                root = self._initialized_model()
                save_path = root.save(file_prefix=checkpoint_prefix)
        with tf.__internal__.eager_context.eager_mode():
            root = self._initialized_model()
            self._set_sentinels(root)
            root.restore(save_path).assert_consumed()
            self._check_sentinels(root)

    def testSaveEagerLoadGraph(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        with tf.__internal__.eager_context.eager_mode():
            root = self._initialized_model()
            save_path = root.save(file_prefix=checkpoint_prefix)
        with context.graph_mode():
            save_graph = tf.Graph()
            with save_graph.as_default(), self.session(graph=save_graph):
                root = self._initialized_model()
                self._set_sentinels(root)
                root.restore(save_path).assert_consumed().run_restore_ops()
                self._check_sentinels(root)

    def testIgnoreSaveCounter(self):
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        with self.cached_session() as session:
            # Create and save a model using Saver() before using a Checkpoint. This
            # generates a snapshot without the Checkpoint's `save_counter`.
            model = sequential.Sequential()
            model.add(core.Flatten(input_shape=(1, )))
            model.add(core.Dense(1))
            name_saver = tf.compat.v1.train.Saver(model.trainable_variables)
            save_path = name_saver.save(sess=session,
                                        save_path=checkpoint_prefix,
                                        global_step=1)
            # Checkpoint.restore must successfully load that checkpoint.
            ckpt = tf.train.Checkpoint(model=model)
            status = ckpt.restore(save_path)
            status.assert_existing_objects_matched()
            # It should, however, refuse to load a checkpoint where an unrelated
            # `save_counter` variable is missing.
            model.layers[1].var = tf.Variable(0., name="save_counter")
            status = ckpt.restore(save_path)
            with self.assertRaises(AssertionError):
                status.assert_existing_objects_matched()
示例#17
0
class CheckpointingTests(keras_parameterized.TestCase):
    @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
    def testNamingWithOptimizer(self):
        input_value = tf.constant([[3.]])
        model = MyModel()
        # A nuisance Model using the same optimizer. Its slot variables should not
        # go in the checkpoint, since it is never depended on.
        other_model = MyModel()
        optimizer = adam.Adam(0.001)
        step = tf.compat.v1.train.get_or_create_global_step()
        root_trackable = tf.train.Checkpoint(optimizer=optimizer,
                                             model=model,
                                             step=step)

        with tf.GradientTape() as tape:
            loss = model(input_value)
        variables = model.trainable_variables
        gradients = tape.gradient(loss, variables)
        train_op = tf.group(
            optimizer.apply_gradients(zip(gradients, variables)),
            step.assign_add(1))

        with tf.GradientTape() as tape:
            loss = other_model(input_value)
        variables = other_model.trainable_variables
        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))

        self.evaluate(trackable_utils.gather_initializers(root_trackable))
        self.evaluate(train_op)
        named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
            root_trackable).serialize_object_graph()
        expected_slot_keys = (
            "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
            "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
            "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
            "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
            "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
            "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
        )
        expected_checkpoint_names = (
            # Created in the root node, so no prefix.
            "step",
            "model/_second/kernel",
            "model/_named_dense/kernel",
            "model/_named_dense/bias",
            # non-Layer dependency of the model
            "model/_non_layer/a_variable",
            "optimizer/learning_rate",
            "optimizer/beta_1",
            "optimizer/beta_2",
            "optimizer/iter",
            "optimizer/decay",
        ) + expected_slot_keys
        suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
        expected_checkpoint_names = [
            name + suffix for name in expected_checkpoint_names
        ]
        named_variables = {v.name: v for v in named_variables}
        self.assertEqual(len(expected_checkpoint_names),
                         len(named_variables.keys()))
        # Check that we've mapped to the right variable objects (not exhaustive)
        self.assertEqual("global_step",
                         named_variables["step" + suffix].full_name)
        self.assertEqual(
            "my_model/dense_1/kernel",
            named_variables["model/_second/kernel" + suffix].full_name)
        self.assertEqual(
            "my_model/dense/kernel",
            named_variables["model/_named_dense/kernel" + suffix].full_name)
        self.assertEqual(
            "Adam/beta_1",
            named_variables["optimizer/beta_1" + suffix].full_name)
        self.assertEqual(
            "Adam/beta_2",
            named_variables["optimizer/beta_2" + suffix].full_name)
        # Spot check the generated protocol buffers.
        self.assertEqual("optimizer",
                         serialized_graph.nodes[0].children[1].local_name)
        optimizer_node = serialized_graph.nodes[
            serialized_graph.nodes[0].children[1].node_id]
        children = [node.local_name for node in optimizer_node.children]
        self.assertEqual(
            # hyper variable dependencies
            len(["beta_1", "beta_2", "iter", "decay", "learning_rate"]),
            len(children))
        serialized_slot_keys = []
        for slot in optimizer_node.slot_variables:
            for attribute in (serialized_graph.nodes[
                    slot.slot_variable_node_id].attributes):
                serialized_slot_keys.append(attribute.checkpoint_key)
        self.assertEqual(len([key + suffix for key in expected_slot_keys]),
                         len(serialized_slot_keys))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testSaveRestore(self):
        with self.test_session():
            model = MyModel()
            optimizer = adam.Adam(0.001)
            root_trackable = tf.train.Checkpoint(optimizer=optimizer,
                                                 model=model)
            input_value = tf.constant([[3.]])
            with tf.GradientTape() as tape:
                loss = model(input_value)
            variables = model.trainable_variables
            gradients = tape.gradient(loss, variables)
            train_op = optimizer.apply_gradients(zip(gradients, variables))
            self.assertFalse(root_trackable.save_counter.trainable)
            self.evaluate(trackable_utils.gather_initializers(root_trackable))
            self.evaluate(train_op)
            prefix = os.path.join(self.get_temp_dir(), "ckpt")
            self.evaluate(
                tf.compat.v1.assign(model._named_dense.variables[1], [42.]))
            m_bias_slot = optimizer.get_slot(model._named_dense.variables[1],
                                             "m")
            self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5]))
            save_path = root_trackable.save(file_prefix=prefix)
            self.evaluate(
                tf.compat.v1.assign(model._named_dense.variables[1], [43.]))
            self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3))
            optimizer_variables = self.evaluate(
                sorted(optimizer.variables(), key=lambda v: v.name))
            self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.]))
            # Immediate restoration
            status = root_trackable.restore(
                save_path=save_path).assert_consumed()
            status.run_restore_ops()
            self.assertAllEqual([42.],
                                self.evaluate(model._named_dense.variables[1]))
            self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
            self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
            if not tf.executing_eagerly():
                return  # Restore-on-create is only supported when executing eagerly
            on_create_model = MyModel()
            on_create_optimizer = adam.Adam(0.001)
            on_create_root = tf.train.Checkpoint(optimizer=on_create_optimizer,
                                                 model=on_create_model)
            # Deferred restoration
            status = on_create_root.restore(save_path=save_path)
            status.assert_nontrivial_match()
            status.assert_existing_objects_matched()
            with self.assertRaises(AssertionError):
                status.assert_consumed()
            on_create_model(tf.constant([[3.]]))  # create variables
            self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
            self.assertAllEqual([42.],
                                self.evaluate(
                                    on_create_model._named_dense.variables[1]))
            on_create_m_bias_slot = on_create_optimizer.get_slot(
                on_create_model._named_dense.variables[1], "m")
            status.assert_existing_objects_matched()
            if not tf.executing_eagerly():
                with self.assertRaises(AssertionError):
                    status.assert_consumed()
            # Optimizer slot variables are created when the original variable is
            # restored.
            self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
            dummy_var = tf.Variable([1.])
            on_create_optimizer.minimize(loss=dummy_var.read_value,
                                         var_list=[dummy_var])
            status.assert_existing_objects_matched()
            status.assert_consumed()
            self.assertAllEqual(
                optimizer_variables,
                # Creation order is different, so .variables() needs to be re-sorted.
                self.evaluate(
                    sorted(optimizer.variables(), key=lambda v: v.name)))

    # TODO(allenl): Debug garbage created by this test in python3.
    def testDeferredRestorationUsageEager(self):
        """An idiomatic eager execution example."""
        num_training_steps = 10
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        for training_continuation in range(3):
            model = MyModel()
            optimizer = adam.Adam(0.001)
            root = tf.train.Checkpoint(optimizer=optimizer, model=model)
            root.restore(tf.train.latest_checkpoint(checkpoint_directory))
            for _ in range(num_training_steps):
                # TODO(allenl): Use a Dataset and serialize/checkpoint it.
                input_value = tf.constant([[3.]])
                with tf.GradientTape() as tape:
                    loss = model(input_value)
                variables = model.trainable_variables
                gradients = tape.gradient(loss, variables)
                optimizer.apply_gradients(zip(gradients, variables))
            root.save(file_prefix=checkpoint_prefix)
            self.assertEqual((training_continuation + 1) * num_training_steps,
                             root.optimizer.iterations.numpy())

    def testUsageGraph(self):
        """Expected usage when graph building."""
        with context.graph_mode():
            num_training_steps = 10
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with tf.Graph().as_default():
                    model = MyModel()
                    optimizer = adam.Adam(0.001)
                    root = tf.compat.v1.train.Checkpoint(optimizer=optimizer,
                                                         model=model)
                    input_value = tf.constant([[3.]])
                    with tf.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    train_op = optimizer.apply_gradients(
                        zip(gradients, variables))

                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    with self.session(
                            graph=tf.compat.v1.get_default_graph()) as session:
                        status = root.restore(save_path=checkpoint_path)
                        status.initialize_or_restore(session=session)
                        if checkpoint_path is None:
                            self.assertEqual(0, training_continuation)
                            with self.assertRaises(AssertionError):
                                status.assert_consumed()
                            with self.assertRaises(AssertionError):
                                status.assert_existing_objects_matched()
                        else:
                            status.assert_consumed()
                            status.assert_existing_objects_matched()
                        for _ in range(num_training_steps):
                            session.run(train_op)
                        root.save(file_prefix=checkpoint_prefix,
                                  session=session)
                        self.assertEqual(
                            (training_continuation + 1) * num_training_steps,
                            session.run(root.optimizer.iterations))
                        self.assertEqual(training_continuation + 1,
                                         session.run(root.save_counter))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testAgnosticUsage(self):
        """Graph/eager agnostic usage."""
        # Does create garbage when executing eagerly due to ops.Graph() creation.
        with self.test_session():
            num_training_steps = 10
            checkpoint_directory = self.get_temp_dir()
            optimizer = adam.Adam(0.001)

            def _train_fn(model, input_value):
                with tf.GradientTape() as tape:
                    loss = model(input_value)
                variables = model.trainable_variables
                gradients = tape.gradient(loss, variables)
                return optimizer.apply_gradients(zip(gradients, variables))

            for training_continuation in range(3):
                with testing_utils.device(should_use_gpu=True):
                    model = MyModel()
                    root = tf.train.Checkpoint(optimizer=optimizer,
                                               model=model)
                    manager = tf.train.CheckpointManager(root,
                                                         checkpoint_directory,
                                                         max_to_keep=1)
                    status = root.restore(save_path=manager.latest_checkpoint)
                    input_value = tf.constant([[3.]])
                    train_fn = functools.partial(_train_fn, model, input_value)
                    if not tf.executing_eagerly():
                        train_fn = functools.partial(self.evaluate, train_fn())
                    status.initialize_or_restore()
                    for _ in range(num_training_steps):
                        train_fn()
                    manager.save()
                    self.assertEqual(
                        (training_continuation + 1) * num_training_steps,
                        self.evaluate(root.optimizer.iterations))
                    self.assertEqual(training_continuation + 1,
                                     self.evaluate(root.save_counter))

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testPartialRestoreWarningObject(self):
        optimizer = adam.Adam(0.0)
        original_root = tf.train.Checkpoint(v1=tf.Variable(2.),
                                            v2=tf.Variable(3.),
                                            optimizer=optimizer)
        # Create a slot variable to save
        optimizer.minimize(original_root.v1.read_value, [original_root.v1])
        prefix = os.path.join(self.get_temp_dir(), "ckpt")
        save_path = original_root.save(prefix)
        partial_root = tf.train.Checkpoint(v1=tf.Variable(0.))
        weak_partial_root = weakref.ref(partial_root)
        weak_v1 = weakref.ref(partial_root.v1)
        partial_root.restore(save_path)
        self.assertEqual(2., partial_root.v1.numpy())
        with tf.compat.v1.test.mock.patch.object(logging,
                                                 "warning") as mock_log:
            del partial_root
            self.assertIsNone(weak_partial_root())
            self.assertIsNone(weak_v1())
            messages = str(mock_log.call_args_list)
        self.assertIn("(root).v2'", messages)
        self.assertIn("(root).optimizer's state 'm' for (root).v1", messages)
        self.assertNotIn("(root).v1'", messages)
        self.assertIn("expect_partial()", messages)

    # pylint: disable=cell-var-from-loop
    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testWithDefun(self):
        with self.test_session():
            num_training_steps = 2
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with testing_utils.device(should_use_gpu=True):
                    model = MyModel()
                    # Don't actually train so we can test variable values
                    optimizer = adam.Adam(0.)
                    root = tf.train.Checkpoint(optimizer=optimizer,
                                               model=model)
                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    status = root.restore(save_path=checkpoint_path)

                    def train_fn():
                        @tf.function
                        def _call_model(x):
                            return model(x)

                        with tf.GradientTape() as tape:
                            loss = _call_model(tf.constant([[3.]]))
                        gradients = tape.gradient(loss, model.variables)
                        return optimizer.apply_gradients(
                            zip(gradients, model.variables))

                    if not tf.executing_eagerly():
                        train_fn = functools.partial(self.evaluate, train_fn())
                    status.initialize_or_restore()
                    for _ in range(num_training_steps):
                        train_fn()
                    if training_continuation > 0:
                        status.assert_consumed()
                        self.assertAllClose([[42.]],
                                            self.evaluate(model.variables[0]))
                    else:
                        self.evaluate(model.variables[0].assign([[42.]]))
                    root.save(file_prefix=checkpoint_prefix)
                    self.assertEqual(
                        (training_continuation + 1) * num_training_steps,
                        self.evaluate(optimizer.iterations))
                    self.assertEqual(training_continuation + 1,
                                     self.evaluate(root.save_counter))

    # pylint: enable=cell-var-from-loop

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testAnonymousVarsInInit(self):
        class Model(training.Model):
            def __init__(self):
                super(Model, self).__init__()
                self.w = tf.Variable(0.0)
                self.b = tf.Variable(0.0)
                self.vars = [self.w, self.b]

            def call(self, x):
                return x * self.w + self.b

        model = Model()
        optimizer = adam.Adam(learning_rate=0.05)
        checkpoint_directory = self.get_temp_dir()
        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
        checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
        for _ in range(2):
            checkpoint.save(checkpoint_prefix)
            with tf.GradientTape() as tape:
                loss = (tf.constant(1.) - model(tf.constant(1.)))**2
            grad = tape.gradient(loss, model.vars)
            optimizer.apply_gradients([(g, v)
                                       for g, v in zip(grad, model.vars)])

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testDeferredSlotRestoration(self):
        with self.test_session():
            checkpoint_directory = self.get_temp_dir()

            root = tf.train.Checkpoint()
            root.var = trackable_utils.add_variable(root,
                                                    name="var",
                                                    initializer=0.)
            optimizer = adam.Adam(0.1)
            variables = [root.var]
            gradients = [1.]
            train_op = optimizer.apply_gradients(zip(gradients, variables))
            # Note that `optimizer` has not been added as a dependency of
            # `root`. Create a one-off grouping so that slot variables for `root.var`
            # get initialized too.
            self.evaluate(
                trackable_utils.gather_initializers(
                    tf.train.Checkpoint(root=root, optimizer=optimizer)))
            self.evaluate(train_op)
            self.evaluate(tf.compat.v1.assign(root.var, 12.))
            no_slots_path = root.save(
                os.path.join(checkpoint_directory, "no_slots"))
            root.optimizer = optimizer
            self.evaluate(tf.compat.v1.assign(root.var, 13.))
            self.evaluate(
                tf.compat.v1.assign(
                    optimizer.get_slot(slot_name="m", var=root.var), 14.))
            slots_path = root.save(
                os.path.join(checkpoint_directory, "with_slots"))
            new_root = tf.train.Checkpoint()
            # Load the slot-containing checkpoint (deferred), then immediately
            # overwrite the non-slot variable (also deferred).
            slot_status = new_root.restore(slots_path)
            no_slot_status = new_root.restore(no_slots_path)
            with self.assertRaises(AssertionError):
                no_slot_status.assert_consumed()
            new_root.var = trackable_utils.add_variable(new_root,
                                                        name="var",
                                                        shape=[])
            no_slot_status.assert_consumed()
            no_slot_status.run_restore_ops()
            self.assertEqual(12., self.evaluate(new_root.var))
            new_root.optimizer = adam.Adam(0.1)
            slot_status.assert_existing_objects_matched()
            if not tf.executing_eagerly():
                with self.assertRaisesRegex(AssertionError,
                                            "Unresolved object"):
                    slot_status.assert_consumed()
            self.assertEqual(12., self.evaluate(new_root.var))
            if tf.executing_eagerly():
                # Slot variables are only created with restoring initializers when
                # executing eagerly.
                self.assertEqual(
                    14.,
                    self.evaluate(
                        new_root.optimizer.get_slot(slot_name="m",
                                                    var=new_root.var)))
            else:
                # Slot variables are not created eagerly when graph building.
                with self.assertRaises(KeyError):
                    new_root.optimizer.get_slot(slot_name="m",
                                                var=new_root.var)
            variables = [new_root.var]
            gradients = [1.]
            train_op = new_root.optimizer.apply_gradients(
                zip(gradients, variables))
            # The slot variable now exists; restore() didn't create it, but we should
            # now have a restore op for it.
            slot_status.run_restore_ops()
            if not tf.executing_eagerly():
                # The train op hasn't run when graph building, so the slot variable has
                # its restored value. It has run in eager, so the value will
                # be different.
                self.assertEqual(
                    14.,
                    self.evaluate(
                        new_root.optimizer.get_slot(slot_name="m",
                                                    var=new_root.var)))
            self.evaluate(train_op)
            slot_status.assert_consumed()

    def testManySavesGraph(self):
        """Saves after the first should not modify the graph."""
        with context.graph_mode():
            graph = tf.Graph()
            with graph.as_default(), self.session(graph):
                checkpoint_directory = self.get_temp_dir()
                checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
                obj = tf.train.Checkpoint()
                obj.var = tf.Variable(0., name="v")
                obj.opt = adam.Adam(0.1)
                variables = [obj.var]
                gradients = [1.]
                obj.opt.apply_gradients(zip(gradients, variables))
                self.evaluate(trackable_utils.gather_initializers(obj))
                obj.save(checkpoint_prefix)
                graph.finalize()
                obj.save(checkpoint_prefix)

    def testManyRestoresGraph(self):
        """Restores after the first should not modify the graph."""
        with context.graph_mode():
            graph = tf.Graph()
            with graph.as_default(), self.session(graph):
                checkpoint_directory = self.get_temp_dir()
                checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
                obj = tf.train.Checkpoint()
                obj.var = tf.Variable(0., name="v")
                obj.opt = adam.Adam(0.1)
                variables = [obj.var]
                gradients = [1.]
                obj.opt.apply_gradients(zip(gradients, variables))
                self.evaluate(trackable_utils.gather_initializers(obj))
                save_path = obj.save(checkpoint_prefix)
                obj.restore(save_path)
                graph.finalize()
                obj.restore(save_path)

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def test_sequential(self):
        with self.test_session():
            model = sequential.Sequential()
            checkpoint = tf.train.Checkpoint(model=model)
            model.add(core.Dense(4))
            second_dense = core.Dense(5)
            model.add(second_dense)
            model(tf.constant([[1.]]))
            checkpoint.restore(None).initialize_or_restore()
            self.evaluate(
                second_dense.bias.assign(tf.constant([1., 2., 3., 4., 5.])))
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            save_path = checkpoint.save(checkpoint_prefix)
            self.evaluate(
                second_dense.bias.assign(tf.constant([5., 6., 7., 8., 9.])))
            checkpoint.restore(save_path).assert_consumed().run_restore_ops()
            self.assertAllEqual([1., 2., 3., 4., 5.],
                                self.evaluate(second_dense.bias))

            deferred_sequential = sequential.Sequential()
            deferred_sequential_checkpoint = tf.train.Checkpoint(
                model=deferred_sequential)
            status = deferred_sequential_checkpoint.restore(save_path)
            deferred_sequential.add(core.Dense(4))
            deferred_second_dense = core.Dense(5)
            deferred_sequential.add(deferred_second_dense)
            deferred_sequential(tf.constant([[1.]]))
            status.run_restore_ops()
            self.assertAllEqual([1., 2., 3., 4., 5.],
                                self.evaluate(deferred_second_dense.bias))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def test_initialize_if_not_restoring(self):
        with self.test_session():
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
            with testing_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = adam.Adam(0.001)
                root = tf.train.Checkpoint(
                    model=model
                )  # Do not save the optimizer with the checkpoint.
                optimizer_checkpoint = tf.train.Checkpoint(optimizer=optimizer)

                checkpoint_path = tf.train.latest_checkpoint(
                    checkpoint_directory)
                status = root.restore(save_path=checkpoint_path)
                input_value = tf.constant([[3.]])

                def train_fn():
                    with tf.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    return optimizer.apply_gradients(zip(gradients, variables))

                if not tf.executing_eagerly():
                    train_fn = functools.partial(self.evaluate, train_fn())
                status.initialize_or_restore()
                # TODO(tanzheny): Add hyper variables to .variables(), and set them with
                # set_weights etc.
                variables_not_in_the_variables_property = [
                    obj for obj in optimizer._hyper.values()
                    if isinstance(obj, tf.Variable)
                ]
                self.evaluate([
                    v.initializer for v in optimizer.variables() +
                    variables_not_in_the_variables_property
                ])
                train_fn()
                model_save_path = root.save(file_prefix=checkpoint_prefix)
                self.evaluate(optimizer.beta_1.assign(42.))
                optimizer_save_path = optimizer_checkpoint.save(
                    optimizer_only_prefix)
            del train_fn

            # Restore into a graph with the optimizer
            with testing_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = adam.Adam(0.001)
                root = tf.train.Checkpoint(optimizer=optimizer, model=model)
                status = root.restore(save_path=model_save_path)
                input_value = tf.constant([[3.]])

                def train_fn1():
                    with tf.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    return optimizer.apply_gradients(zip(gradients, variables))

                if not tf.executing_eagerly():
                    train_fn1 = functools.partial(self.evaluate, train_fn1())
                status.initialize_or_restore()
                train_fn1()
                with self.assertRaises(AssertionError):
                    status.assert_existing_objects_matched()
                with self.assertRaises(AssertionError):
                    status.assert_consumed()
            del train_fn1

            # Make sure initialization doesn't clobber later restores
            with testing_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = adam.Adam(0.001, beta_1=1.0)
                root = tf.train.Checkpoint(optimizer=optimizer, model=model)
                opt_root = tf.train.Checkpoint(optimizer=optimizer)
                status = root.restore(save_path=model_save_path)
                init_only_optimizer_status = opt_root.restore(save_path=None)
                optimizer_status = opt_root.restore(
                    save_path=optimizer_save_path)
                input_value = tf.constant([[3.]])

                def train_fn2():
                    with tf.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    return optimizer.apply_gradients(zip(gradients, variables))

                if not tf.executing_eagerly():
                    train_fn2 = functools.partial(self.evaluate, train_fn2())
                optimizer_status.run_restore_ops()
                status.initialize_or_restore()
                init_only_optimizer_status.initialize_or_restore()
                train_fn2()
                self.assertEqual(42., self.evaluate(optimizer.beta_1))
示例#18
0
class GradientDescentOptimizerTest(tf.test.TestCase, parameterized.TestCase):
    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasic(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            var0 = tf.Variable([1.0, 2.0], dtype=dtype)
            var1 = tf.Variable([3.0, 4.0], dtype=dtype)
            grads0 = tf.constant([0.1, 0.1], dtype=dtype)
            grads1 = tf.constant([0.01, 0.01], dtype=dtype)
            sgd = gradient_descent.SGD(3.0)
            sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType(
                [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1))

    def _test_basic_sgd_with_learning_rate_decay(self, sgd, dtype):
        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
        grads0 = tf.constant([0.1, 0.1], dtype=dtype)
        grads1 = tf.constant([0.01, 0.01], dtype=dtype)
        if not tf.executing_eagerly():
            sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 2 steps of sgd
        if not tf.executing_eagerly():
            self.evaluate(sgd_op)
        else:
            sgd.apply_gradients(zip([grads0, grads1], [var0, var1]))
        # Validate updated params
        self.assertAllCloseAccordingToType([1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1],
                                           self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1))

        if not tf.executing_eagerly():
            self.evaluate(sgd_op)
        else:
            sgd.apply_gradients(zip([grads0, grads1], [var0, var1]))
        # Validate updated params
        self.assertAllCloseAccordingToType(
            [1.0 - 3.0 * 0.1 - 2.0 * 0.1, 2.0 - 3.0 * 0.1 - 2.0 * 0.1],
            self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            [3.0 - 3.0 * 0.01 - 2.0 * 0.01, 4.0 - 3.0 * 0.01 - 2.0 * 0.01],
            self.evaluate(var1))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasicWithLearningRateDecay(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            learning_rate = 3.0
            decay = 0.5
            sgd = gradient_descent.SGD(learning_rate=learning_rate,
                                       decay=decay)
            self._test_basic_sgd_with_learning_rate_decay(sgd, dtype)

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasicWithLearningRateInverseTimeDecay(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            learning_rate = learning_rate_schedule.InverseTimeDecay(
                3.0, decay_steps=1.0, decay_rate=0.5)
            sgd = gradient_descent.SGD(learning_rate=learning_rate)
            self._test_basic_sgd_with_learning_rate_decay(sgd, dtype)

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            learning_rate = learning_rate_schedule.InverseTimeDecay(
                3.0, decay_steps=1.0, decay_rate=0.5)
            sgd = gradient_descent.SGD(learning_rate=learning_rate)
            sgd = gradient_descent.SGD.from_config(sgd.get_config())
            self._test_basic_sgd_with_learning_rate_decay(sgd, dtype)

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasicCallableParams(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            var0 = tf.Variable([1.0, 2.0], dtype=dtype)
            var1 = tf.Variable([3.0, 4.0], dtype=dtype)
            grads0 = tf.constant([0.1, 0.1], dtype=dtype)
            grads1 = tf.constant([0.01, 0.01], dtype=dtype)
            lr = lambda: 3.0
            sgd = gradient_descent.SGD(lr)
            sgd_op = sgd.apply_gradients(zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType(
                [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testMinimizeResourceVariable(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
            var1 = tf.Variable([3.0], dtype=dtype)
            x = tf.constant([[4.0], [5.0]], dtype=dtype)
            loss = lambda: tf.matmul(var0, x) + var1  # pylint: disable=cell-var-from-loop
            sgd = gradient_descent.SGD(1.0)
            sgd_op = sgd.minimize(loss, [var0, var1])
            self.evaluate(tf.compat.v1.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType([[1.0 - 4.0, 2.0 - 5.0]],
                                               self.evaluate(var0))
            self.assertAllCloseAccordingToType([3.0 - 1.0],
                                               self.evaluate(var1))

    def testMinimizeSparseResourceVariable(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
                var1 = tf.Variable([3.0], dtype=dtype)
                x = tf.constant([[4.0], [5.0]], dtype=dtype)

                def loss():
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
                    pred += var1  # pylint: disable=cell-var-from-loop
                    return pred * pred

                sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
                np_grad = 2 * np_pred
                self.assertAllCloseAccordingToType(
                    [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]],
                    self.evaluate(var0))
                self.assertAllCloseAccordingToType([3.0 - np_grad],
                                                   self.evaluate(var1))

    def testTensorLearningRate(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            var0 = tf.Variable([1.0, 2.0], dtype=dtype)
            var1 = tf.Variable([3.0, 4.0], dtype=dtype)
            grads0 = tf.constant([0.1, 0.1], dtype=dtype)
            grads1 = tf.constant([0.01, 0.01], dtype=dtype)
            lrate = tf.constant(3.0)
            sgd_op = gradient_descent.SGD(lrate).apply_gradients(
                zip([grads0, grads1], [var0, var1]))
            self.evaluate(tf.compat.v1.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType(
                [1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1], self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                [3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1))

    def testGradWrtRef(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                opt = gradient_descent.SGD(3.0)
                values = [1.0, 3.0]
                vars_ = [tf.Variable([v], dtype=dtype) for v in values]
                loss = lambda: vars_[0] + vars_[1]  # pylint: disable=cell-var-from-loop
                grads_and_vars = opt._compute_gradients(loss, vars_)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                for grad, _ in grads_and_vars:
                    self.assertAllCloseAccordingToType([1.0],
                                                       self.evaluate(grad))

    def testSparseBasic(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([[1.0], [2.0]], dtype=dtype)
                var1 = tf.Variable([[3.0], [4.0]], dtype=dtype)
                grads0 = tf.IndexedSlices(
                    tf.constant([0.1], shape=[1, 1], dtype=dtype),
                    tf.constant([0]), tf.constant([2, 1]))
                grads1 = tf.IndexedSlices(
                    tf.constant([0.01], shape=[1, 1], dtype=dtype),
                    tf.constant([1]), tf.constant([2, 1]))
                sgd_op = gradient_descent.SGD(3.0).apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                                   self.evaluate(var1))

    def testSparseBasicWithLearningRateDecay(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([[1.0], [2.0]], dtype=dtype)
                var1 = tf.Variable([[3.0], [4.0]], dtype=dtype)
                grads0 = tf.IndexedSlices(
                    tf.constant([0.1], shape=[1, 1], dtype=dtype),
                    tf.constant([0]), tf.constant([2, 1]))
                grads1 = tf.IndexedSlices(
                    tf.constant([0.01], shape=[1, 1], dtype=dtype),
                    tf.constant([1]), tf.constant([2, 1]))
                sgd_op = gradient_descent.SGD(3.0, decay=0.5).apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Run 2 steps of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]],
                                                   self.evaluate(var0))
                self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]],
                                                   self.evaluate(var1))

                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType(
                    [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]],
                    self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]],
                    self.evaluate(var1))

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testCapturingInFunctionWhileExecutingEagerly(self):
        optimizer = gradient_descent.SGD(1.0)

        var_holder = {}

        def step():
            if not var_holder:
                var_holder["var"] = tf.Variable(1.0)
            else:
                var_holder["var"].assign(1.0)

            with tf.GradientTape() as tape:
                loss = var_holder["var"]**2
            grad = tape.gradient(loss, var_holder["var"])
            optimizer.apply_gradients([(grad, var_holder["var"])])
            return var_holder["var"].read_value()

        compiled_step = tf.function(step)

        self.assertEqual(float(step()), -1.0)
        self.assertEqual(float(compiled_step()), -1.0)
        # This shouldn't fail; in particular, the learning rate tensor should
        # be an EagerTensor once again, not a graph Tensor.
        self.assertEqual(float(step()), -1.0)

    def testConstructSGDWithLR(self):
        opt = gradient_descent.SGD(lr=1.0)
        opt_2 = gradient_descent.SGD(learning_rate=0.1, lr=1.0)
        opt_3 = gradient_descent.SGD(learning_rate=0.1)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
示例#19
0
class TrainingGPUTest(tf.test.TestCase, parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_model_with_crossentropy_losses_channels_first(self):
    """Tests use of all crossentropy losses with `channels_first`.

    Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`,
    and `binary_crossentropy`.
    Verifies that evaluate gives the same result with either `channels_first`
    or `channels_last` image_data_format.
    """
    def prepare_simple_model(input_tensor, loss_name, target):
      axis = 1 if backend.image_data_format() == 'channels_first' else -1
      loss = None
      num_channels = None
      activation = None
      if loss_name == 'sparse_categorical_crossentropy':
        loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy(  # pylint: disable=g-long-lambda
            y_true, y_pred, axis=axis)
        num_channels = int(np.amax(target) + 1)
        activation = 'softmax'
      elif loss_name == 'categorical_crossentropy':
        loss = lambda y_true, y_pred: backend.categorical_crossentropy(  # pylint: disable=g-long-lambda
            y_true, y_pred, axis=axis)
        num_channels = target.shape[axis]
        activation = 'softmax'
      elif loss_name == 'binary_crossentropy':
        loss = lambda y_true, y_pred: backend.binary_crossentropy(  # pylint: disable=g-long-lambda, unnecessary-lambda
            y_true, y_pred)
        num_channels = target.shape[axis]
        activation = 'sigmoid'

      predictions = Conv2D(num_channels,
                           1,
                           activation=activation,
                           kernel_initializer='ones',
                           bias_initializer='ones')(input_tensor)
      simple_model = training.Model(inputs=input_tensor, outputs=predictions)
      simple_model.compile(optimizer='rmsprop', loss=loss)
      return simple_model

    if tf.test.is_gpu_available(cuda_only=True):
      with testing_utils.use_gpu():
        losses_to_test = ['sparse_categorical_crossentropy',
                          'categorical_crossentropy', 'binary_crossentropy']

        data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55],
                                          [0.9, 4.2, 11.2]]]], dtype=np.float32)
        # Labels for testing 4-class sparse_categorical_crossentropy, 4-class
        # categorical_crossentropy, and 2-class binary_crossentropy:
        labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                 np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]],
                                            [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
                                            [[0, 0, 0], [1, 0, 0], [0, 0, 1]],
                                            [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                 np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]],
                                            [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)]  # pylint: disable=line-too-long
        # Compute one loss for each loss function in the list `losses_to_test`:
        loss_channels_last = [0., 0., 0.]
        loss_channels_first = [0., 0., 0.]

        old_data_format = backend.image_data_format()

        # Evaluate a simple network with channels last, with all three loss
        # functions:
        backend.set_image_data_format('channels_last')
        data = np.moveaxis(data_channels_first, 1, -1)
        for index, loss_function in enumerate(losses_to_test):
          labels = np.moveaxis(labels_channels_first[index], 1, -1)
          inputs = input_layer.Input(shape=(3, 3, 1))
          model = prepare_simple_model(inputs, loss_function, labels)
          loss_channels_last[index] = model.evaluate(x=data, y=labels,
                                                     batch_size=1, verbose=0)

        # Evaluate the same network with channels first, with all three loss
        # functions:
        backend.set_image_data_format('channels_first')
        data = data_channels_first
        for index, loss_function in enumerate(losses_to_test):
          labels = labels_channels_first[index]
          inputs = input_layer.Input(shape=(1, 3, 3))
          model = prepare_simple_model(inputs, loss_function, labels)
          loss_channels_first[index] = model.evaluate(x=data, y=labels,
                                                      batch_size=1, verbose=0)

        backend.set_image_data_format(old_data_format)

        np.testing.assert_allclose(
            loss_channels_first,
            loss_channels_last,
            rtol=1e-06,
            err_msg='{}{}'.format('Computed different losses for ',
                                  'channels_first and channels_last'))
示例#20
0
class MomentumOptimizerTest(tf.test.TestCase, parameterized.TestCase):
    def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum):
        accum = accum * momentum - g * lr
        var += (accum * momentum - g * lr)
        return var, accum

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasic(self):
        for _, dtype in enumerate([tf.half, tf.float32, tf.float64]):
            var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0")
            var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1")
            grads0 = tf.constant([0.1, 0.1], dtype=dtype)
            grads1 = tf.constant([0.01, 0.01], dtype=dtype)
            learning_rate = 2.0
            momentum = 0.9
            mom_opt = gradient_descent.SGD(learning_rate=learning_rate,
                                           momentum=momentum)
            # self.assertFalse(mom_opt._initial_decay)
            mom_update = mom_opt.apply_gradients(
                zip([grads0, grads1], [var0, var1]))

            # Check we have slots
            slot0 = mom_opt.get_slot(var0, "momentum")
            self.assertEqual(slot0.shape, var0.shape)
            slot1 = mom_opt.get_slot(var1, "momentum")
            self.assertEqual(slot1.shape, var1.shape)

            # Step 1: the momentum accumulators where 0. So we should see a normal
            # update: v -= grad * learning_rate
            self.evaluate(tf.compat.v1.global_variables_initializer())
            self.evaluate(mom_update)
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]),
                                               self.evaluate(slot0))
            self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]),
                                               self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                self.evaluate(var1))
            # Step 2: the momentum accumulators contain the previous update.
            self.evaluate(mom_update)
            if tf.executing_eagerly():
                mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
            # Check that the momentum accumulators have been updated.
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * (-0.2) - 2.0 * 0.1),
                          (0.9 * (-0.2) - 2.0 * 0.1)]), self.evaluate(slot0))
            self.assertAllCloseAccordingToType(
                np.array([(0.9 * (-0.02) - 2.0 * 0.01),
                          (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1))
            # Check that the parameters have been updated.
            self.assertAllCloseAccordingToType(
                np.array([
                    1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                    2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                ]), self.evaluate(var0))
            self.assertAllCloseAccordingToType(
                np.array([
                    2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                    3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                ]), self.evaluate(var1))

    def testNesterovMomentum(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.float32, tf.float64]:
                var0 = tf.Variable([1.0, 2.0], dtype=dtype, name="var0")
                var1 = tf.Variable([3.0, 4.0], dtype=dtype, name="var1")
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                loss = lambda: 5 * var0 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
                mom_op = gradient_descent.SGD(learning_rate=2.0,
                                              momentum=0.9,
                                              nesterov=True)
                opt_op = mom_op.minimize(loss, [var0, var1])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                for _ in range(1, 5):
                    self.evaluate(opt_op)
                    var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                        var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                    var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                        var1_np, accum1_np, 3, 2.0, 0.9)
                    self.assertAllClose(var0_np, self.evaluate(var0))
                    self.assertAllClose(var1_np, self.evaluate(var1))

    def testSparseNesterovMomentum(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session() as sess:
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                grads = []
                for t in range(1, 5):
                    grads.append(var0_np * 10)
                    var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                        var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                    var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                        var1_np, accum1_np, 3, 2.0, 0.9)
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
                var0 = tf.Variable(var0_np, dtype=dtype, name="var0")
                var1 = tf.Variable(var1_np, dtype=dtype, name="var1")
                mom_op = gradient_descent.SGD(learning_rate=2.0,
                                              momentum=0.9,
                                              nesterov=True)
                x_feed = tf.compat.v1.placeholder(dtype)
                y_feed = tf.IndexedSlices(x_feed, tf.constant([0, 1]),
                                          tf.constant([2]))
                grads_and_vars = [(y_feed, var0),
                                  (tf.constant([3.0, 3.0], dtype=dtype), var1)]
                opt_update = mom_op.apply_gradients(grads_and_vars)
                self.evaluate(tf.compat.v1.global_variables_initializer())
                for t in range(1, 5):
                    sess.run(opt_update, feed_dict={x_feed: grads[t - 1]})
                    var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                        var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                    var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                        var1_np, accum1_np, 3, 2.0, 0.9)
                    self.assertAllClose(var0_np, self.evaluate(var0))
                    self.assertAllClose(var1_np, self.evaluate(var1))

    def testMinimizeSparseResourceVariable(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)

                # pylint: disable=cell-var-from-loop
                def loss():
                    x = tf.constant([[4.0], [5.0]], dtype=dtype)
                    pred = tf.matmul(
                        tf.compat.v1.nn.embedding_lookup([var0], [0]), x)
                    return pred * pred

                # pylint: enable=cell-var-from-loop

                opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9)
                sgd_op = opt.minimize(loss, [var0])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Run 1 step of sgd
                self.evaluate(sgd_op)
                # Validate updated params
                self.assertAllCloseAccordingToType([[-111, -138]],
                                                   self.evaluate(var0))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testMinimizeWith2DIndicesForEmbeddingLookup(self):
        var0 = tf.Variable(tf.ones([2, 2]))

        def loss():
            return tf.reduce_sum(tf.compat.v1.nn.embedding_lookup(var0, [[1]]))

        opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9)
        sgd_op = opt.minimize(loss, [var0])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(sgd_op)
        self.assertAllCloseAccordingToType([[1, 1], [0, 0]],
                                           self.evaluate(var0))

    def testTensorLearningRateAndMomentum(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([1.0, 2.0], dtype=dtype)
                var1 = tf.Variable([3.0, 4.0], dtype=dtype)
                grads0 = tf.constant([0.1, 0.1], dtype=dtype)
                grads1 = tf.constant([0.01, 0.01], dtype=dtype)
                mom_opt = gradient_descent.SGD(learning_rate=tf.constant(2.0),
                                               momentum=tf.constant(0.9))
                mom_update = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())
                # Check we have slots
                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEqual(slot1.shape, var1.shape)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Step 1: the momentum accumulators where 0. So we should see a normal
                # update: v -= grad * learning_rate
                self.evaluate(mom_update)
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]),
                                                   self.evaluate(slot0))
                self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]),
                                                   self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                    self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                    self.evaluate(var1))
                # Step 2: the momentum accumulators contain the previous update.
                self.evaluate(mom_update)
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.2) - 2.0 * 0.1),
                              (0.9 * (-0.2) - 2.0 * 0.1)]),
                    self.evaluate(slot0))
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.02) - 2.0 * 0.01),
                              (0.9 * (-0.02) - 2.0 * 0.01)]),
                    self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]), self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([
                        2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]), self.evaluate(var1))

    def testSparse(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable(tf.zeros([4, 2], dtype=dtype))
                var1 = tf.Variable(tf.constant(1.0, dtype, [4, 2]))
                grads0 = tf.IndexedSlices(tf.constant([[.1, .1]], dtype=dtype),
                                          tf.constant([1]), tf.constant([4,
                                                                         2]))
                grads1 = tf.IndexedSlices(
                    tf.constant([[.01, .01], [.01, .01]], dtype=dtype),
                    tf.constant([2, 3]), tf.constant([4, 2]))
                mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9)
                mom_update = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Check we have slots
                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEqual(slot1.shape, var1.shape)

                # Fetch params to validate initial values
                self.assertAllClose([0, 0], self.evaluate(var0)[0])
                self.assertAllClose([0, 0], self.evaluate(var0)[1])
                self.assertAllClose([1, 1], self.evaluate(var1)[2])

                # Step 1: the momentum accumulators are 0. So we should see a normal
                # update: v -= grad * learning_rate
                self.evaluate(mom_update)
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([0, 0]),
                                                   self.evaluate(slot0)[0])
                self.assertAllCloseAccordingToType(
                    np.array([-2.0 * .1, -2.0 * .1]),
                    self.evaluate(slot0)[1])
                self.assertAllCloseAccordingToType(
                    np.array([-2.0 * .01, -2.0 * .01]),
                    self.evaluate(slot1)[2])
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(np.array([0, 0]),
                                                   self.evaluate(var0)[0])
                self.assertAllCloseAccordingToType(
                    np.array([-(0.1 * 2.0), -(0.1 * 2.0)]),
                    self.evaluate(var0)[1])
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]),
                    self.evaluate(var1)[2])
                # Step 2: the momentum accumulators contain the previous update.
                self.evaluate(mom_update)
                # Check that the momentum accumulators have been updated.
                self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0])
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.2) - 2.0 * 0.1),
                              (0.9 * (-0.2) - 2.0 * 0.1)]),
                    self.evaluate(slot0)[1])
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.02) - 2.0 * 0.01),
                              (0.9 * (-0.02) - 2.0 * 0.01)]),
                    self.evaluate(slot1)[2])
                # Check that the parameters have been updated.
                self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0])
                self.assertAllCloseAccordingToType(
                    np.array([
                        -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]),
                    self.evaluate(var0)[1])
                self.assertAllCloseAccordingToType(
                    np.array([
                        0.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        0.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]),
                    self.evaluate(var1)[2])

    def testSharing(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        with tf.Graph().as_default():
            for dtype in [tf.half, tf.float32, tf.float64]:
                var0 = tf.Variable([1.0, 2.0], dtype=dtype)
                var1 = tf.Variable([3.0, 4.0], dtype=dtype)
                grads0 = tf.constant([0.1, 0.1], dtype=dtype)
                grads1 = tf.constant([0.01, 0.01], dtype=dtype)
                mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9)
                mom_update1 = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                mom_update2 = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEqual(slot0.shape, var0.shape)
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEqual(slot1.shape, var1.shape)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Step 1: the momentum accumulators where 0. So we should see a normal
                # update: v -= grad * learning_rate
                self.evaluate(mom_update1)
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([-0.2, -0.2]),
                                                   self.evaluate(slot0))
                self.assertAllCloseAccordingToType(np.array([-0.02, -0.02]),
                                                   self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                    self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                    self.evaluate(var1))
                # Step 2: the second momentum accumulators contain the previous update.
                self.evaluate(mom_update2)
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.2) - 2.0 * 0.1),
                              (0.9 * (-0.2) - 2.0 * 0.1)]),
                    self.evaluate(slot0))
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * (-0.02) - 2.0 * 0.01),
                              (0.9 * (-0.02) - 2.0 * 0.01)]),
                    self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]), self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([
                        2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]), self.evaluate(var1))

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testConfig(self):
        opt = gradient_descent.SGD(learning_rate=1.0,
                                   momentum=0.9,
                                   nesterov=True)
        config = opt.get_config()
        opt2 = gradient_descent.SGD.from_config(config)
        lr = opt.lr
        lr2 = opt2.lr
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(lr), self.evaluate(lr2))
        self.assertAllClose(self.evaluate(opt._get_hyper("momentum")),
                            self.evaluate(opt2._get_hyper("momentum")))
        self.assertAllClose(self.evaluate(opt._get_hyper("decay")),
                            self.evaluate(opt2._get_hyper("decay")))
        var0 = tf.Variable([[1.0], [2.0]], dtype=tf.float32)
        loss = lambda: 3 * var0
        # learning rate variable created when calling minimize.
        opt.minimize(loss, [var0])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        config = opt.get_config()
        opt3 = gradient_descent.SGD.from_config(config)
        lr3 = opt3.lr
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(lr), self.evaluate(lr3))
        self.assertAllClose(self.evaluate(opt._get_hyper("momentum")),
                            self.evaluate(opt3._get_hyper("momentum")))
        self.assertAllClose(self.evaluate(opt._get_hyper("decay")),
                            self.evaluate(opt3._get_hyper("decay")))
        self.assertTrue(opt3.nesterov)

    def testNesterovWithoutMomentum(self):
        with self.assertRaisesRegex(ValueError, "must be between"):
            gradient_descent.SGD(learning_rate=1.0, momentum=2.0)

    def testConstructMomentumWithLR(self):
        opt = gradient_descent.SGD(lr=1.0, momentum=0.9)
        opt_2 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9, lr=1.0)
        opt_3 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testMinimizeLossTensor(self):
        for dtype in [tf.half, tf.float32, tf.float64]:
            var0 = tf.Variable([[1.0, 2.0]], dtype=dtype)
            var1 = tf.Variable([3.0], dtype=dtype)
            x = tf.constant([[4.0], [5.0]], dtype=dtype)

            tape = tf.GradientTape()
            with tape:
                loss = tf.matmul(var0, x) + var1
            sgd = gradient_descent.SGD(1.0)
            with self.assertRaisesRegex(ValueError, "`tape` is required"):
                sgd.minimize(loss, [var0, var1])
            sgd.minimize(loss, [var0, var1], tape=tape)

            self.assertAllCloseAccordingToType([[1.0 - 4.0, 2.0 - 5.0]],
                                               self.evaluate(var0))
            self.assertAllCloseAccordingToType([3.0 - 1.0],
                                               self.evaluate(var1))
示例#21
0
class TupleTests(keras_parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testTracking(self):
    with self.test_session():
      model = HasTuple()
      output = model(tf.ones([32, 2]))
      self.assertAllEqual([32, 5], output.shape.as_list())
      self.assertLen(model.layers, 4)
      self.assertLen(model.layer_list.layers, 3)
      six.assertCountEqual(
          self,
          model.layers,
          tuple(model.layer_list.layers) + model.layers_with_updates)
      self.assertEqual(3, model.layer_list.layers[0].units)
      self.assertEqual(4, model.layer_list.layers[1].units)
      self.assertEqual(5, model.layer_list.layers[2].units)
      self.assertLen(model._checkpoint_dependencies, 2)
      self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref)
      self.assertIs(model.layers_with_updates,
                    model._checkpoint_dependencies[1].ref)
      self.assertLen(
          model._checkpoint_dependencies[0].ref._checkpoint_dependencies, 3)
      self.evaluate([v.initializer for v in model.variables])
      self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]]))
      save_path = os.path.join(self.get_temp_dir(), "ckpt")
      model.save_weights(save_path)
      self.evaluate(model.variables[0].assign(tf.zeros([2, 3])))
      model.load_weights(save_path)
      self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]],
                          self.evaluate(model.variables[0]))
      v = tf.Variable(1.)
      model.var_list = (v,)
      self.assertIn(id(v), [id(obj) for obj in model.variables])
      self.assertIn(id(v), [id(obj) for obj in model.trainable_variables])
      self.assertNotIn(id(v),
                       [id(obj) for obj in model.non_trainable_variables])
      self.assertIn(id(model.layer_list[0].trainable_weights[0]),
                    [id(obj) for obj in model.trainable_weights])

  @parameterized.named_parameters(
      ("Module", tf.Module),
      ("Model", training.Model),
  )
  def testSubModelTracking(self, module_subclass):
    model = module_subclass()
    model.v = tf.Variable(1.)
    self.assertIn(model.v, model.trainable_variables)
    model2 = module_subclass()
    model2.m = (model,)
    self.assertIn(model.v, model2.trainable_variables)

  def testSubSequentialTracking(self):

    class _Subclassed(training.Model):

      def __init__(self, wrapped):
        super(_Subclassed, self).__init__()
        self._wrapped = wrapped

      def call(self, x):
        return self._wrapped(x)

    model = sequential.Sequential()
    layer = core.Dense(1)
    model.add(layer)
    model2 = _Subclassed(model)
    model2(tf.ones([1, 2]))
    model2.m = (model,)
    self.assertIn(layer.kernel, model2.trainable_weights)

  def testUpdatesForwarded(self):
    with tf.Graph().as_default():
      model = HasTuple()
      model_input = tf.ones([32, 2])
      model(model_input)
      self.assertNotEmpty(model.layers_with_updates[0].updates)
      self.assertEqual(set(model.layers_with_updates[0].updates),
                       set(model.updates))

    model = HasTuple()
    model_input = tf.ones([32, 2])
    model(model_input)
    self.assertEmpty(model.updates)

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testLossesForwarded(self):
    model = HasTuple()
    model_input = tf.ones([32, 2])
    model(model_input)
    self.assertLen(model.losses, 1)

  def testModelContainersCompareEqual(self):
    class HasEqualContainers(training.Model):

      def __init__(self):
        super(HasEqualContainers, self).__init__()
        self.l1 = ()
        self.l2 = ()

    model = HasEqualContainers()
    first_layer = HasEqualContainers()
    model.l1 = (first_layer,)
    second_layer = HasEqualContainers()
    model.l2 = (second_layer,)
    self.assertEqual((first_layer,), model.l1)
    d = {model.l1: 1, model.l2: 2}
    self.assertEqual(1, d[model.l1])
    self.assertEqual(1, d[(first_layer,)])
    self.assertEqual(2, d[model.l2])
    self.assertEqual(2, d[(second_layer,)])
    self.assertEqual([first_layer, second_layer], model.layers)

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testTensorConversion(self):

    class TupleToTensor(training.Model):

      def __init__(self):
        super(TupleToTensor, self).__init__()
        self.l = (1., 2., 3.)

    self.assertAllEqual(
        (1., 2., 3.),
        self.evaluate(tf.constant(TupleToTensor().l)))

    self.assertAllEqual(
        (1., 2., 3.),
        self.evaluate(tf.raw_ops.Pack(values=TupleToTensor().l)))
示例#22
0
class AdamaxOptimizerTest(tf.test.TestCase, parameterized.TestCase):
    def testResourceSparse(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                # Initialize variables for numpy implementation.
                zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype)  # pylint: disable=cell-var-from-loop
                m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(
                ), zero_slots()
                var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)

                grads0_np_indices = np.array([0, 1], dtype=np.int32)
                grads0 = tf.IndexedSlices(tf.constant(grads0_np),
                                          tf.constant(grads0_np_indices),
                                          tf.constant([3]))
                grads1_np_indices = np.array([2, 1], dtype=np.int32)
                grads1 = tf.IndexedSlices(tf.constant(grads1_np),
                                          tf.constant(grads1_np_indices),
                                          tf.constant([3]))
                opt = adamax.Adamax()
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0, 3.0], var0)
                self.assertAllClose([4.0, 5.0, 6.0], var1)

                beta1_power = get_beta_accumulators(opt, dtype)

                # Run 3 steps of Adamax
                for t in range(3):
                    self.assertAllCloseAccordingToType(0.9**(t + 1),
                                                       beta1_power)
                    update.run()

                    var0_np, m0, v0 = adamax_sparse_update_numpy(
                        var0_np, grads0_np_indices, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_sparse_update_numpy(
                        var1_np, grads1_np_indices, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0)
                    self.assertAllCloseAccordingToType(var1_np, var1)

    def testSparseDevicePlacement(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for index_dtype in [tf.int32, tf.int64]:
            with tf.Graph().as_default(), self.cached_session(
                    force_gpu=tf.test.is_gpu_available()):
                # If a GPU is available, tests that all optimizer ops can be placed on
                # it (i.e. they have GPU kernels).
                var = tf.Variable([[1.0], [2.0]])
                indices = tf.constant([0, 1], dtype=index_dtype)
                g_sum = lambda: tf.reduce_sum(tf.compat.v1.gather(
                    var, indices))  # pylint: disable=cell-var-from-loop
                optimizer = adamax.Adamax(3.0)
                minimize_op = optimizer.minimize(g_sum, var_list=[var])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                minimize_op.run()

    def testSparseRepeatedIndices(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                repeated_index_update_var = tf.Variable([[1.0], [2.0]],
                                                        dtype=dtype)
                aggregated_update_var = tf.Variable([[1.0], [2.0]],
                                                    dtype=dtype)
                grad_repeated_index = tf.IndexedSlices(
                    tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
                    tf.constant([1, 1]), tf.constant([2, 1]))
                grad_aggregated = tf.IndexedSlices(
                    tf.constant([0.2], shape=[1, 1], dtype=dtype),
                    tf.constant([1]), tf.constant([2, 1]))
                repeated_update = adamax.Adamax().apply_gradients([
                    (grad_repeated_index, repeated_index_update_var)
                ])
                aggregated_update = adamax.Adamax().apply_gradients([
                    (grad_aggregated, aggregated_update_var)
                ])
                self.evaluate(tf.compat.v1.global_variables_initializer())
                self.assertAllClose(aggregated_update_var,
                                    repeated_index_update_var.eval())
                for _ in range(3):
                    repeated_update.run()
                    aggregated_update.run()
                    self.assertAllClose(aggregated_update_var,
                                        repeated_index_update_var.eval())

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasic(self):
        for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
            with self.session(graph=tf.Graph(), use_gpu=True):
                # Initialize variables for numpy implementation.
                m0 = np.array([0.0, 0.0])
                v0 = np.array([0.0, 0.0])
                m1 = np.array([0.0, 0.0])
                v1 = np.array([0.0, 0.0])
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np, name="var0_%d" % i)
                var1 = tf.Variable(var1_np, name="var1_%d" % i)

                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                opt = adamax.Adamax()
                if not tf.executing_eagerly():
                    update = opt.apply_gradients(
                        zip([grads0, grads1], [var0, var1]))

                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of Adamax
                for t in range(3):
                    beta_1_power = get_beta_accumulators(opt, dtype)
                    self.assertAllCloseAccordingToType(
                        0.9**(t + 1), self.evaluate(beta_1_power))
                    if not tf.executing_eagerly():
                        self.evaluate(update)
                    else:
                        opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))

                    var0_np, m0, v0 = adamax_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0),
                                                       rtol=1e-2)
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1),
                                                       rtol=1e-2)

    @combinations.generate(combinations.combine(mode=["graph", "eager"]))
    def testBasicWithLearningRateDecay(self):
        for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
            with self.session(graph=tf.Graph(), use_gpu=True):
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np, name="var0_%d" % i)
                var1 = tf.Variable(var1_np, name="var1_%d" % i)

                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)

                learning_rate = 0.001
                decay = 0.002
                opt = adamax.Adamax(learning_rate=learning_rate, decay=decay)
                if not tf.executing_eagerly():
                    update = opt.apply_gradients(
                        zip([grads0, grads1], [var0, var1]))

                if not tf.executing_eagerly():
                    self.evaluate(tf.compat.v1.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                # Run 3 steps of Adamax
                for t in range(3):
                    beta_1_power = get_beta_accumulators(opt, dtype)
                    self.assertAllCloseAccordingToType(
                        0.9**(t + 1), self.evaluate(beta_1_power))
                    if not tf.executing_eagerly():
                        self.evaluate(update)
                    else:
                        opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))

                    lr = learning_rate / (1 + decay * t)

                    var0_np, m0, v0 = adamax_update_numpy(var0_np,
                                                          grads0_np,
                                                          t,
                                                          m0,
                                                          v0,
                                                          alpha=lr)
                    var1_np, m1, v1 = adamax_update_numpy(var1_np,
                                                          grads1_np,
                                                          t,
                                                          m1,
                                                          v1,
                                                          alpha=lr)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0),
                                                       rtol=1e-2)
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1),
                                                       rtol=1e-2)

    def testTensorLearningRate(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)
                opt = adamax.Adamax(tf.constant(0.001))
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0)
                self.assertAllClose([3.0, 4.0], var1)

                beta1_power = get_beta_accumulators(opt, dtype)

                # Run 3 steps of Adamax
                for t in range(3):
                    self.assertAllCloseAccordingToType(0.9**(t + 1),
                                                       beta1_power)
                    update.run()

                    var0_np, m0, v0 = adamax_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0)
                    self.assertAllCloseAccordingToType(var1_np, var1)

    def testSharing(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for dtype in [tf.half, tf.float32, tf.float64]:
            with tf.Graph().as_default(), self.cached_session():
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = tf.Variable(var0_np)
                var1 = tf.Variable(var1_np)
                grads0 = tf.constant(grads0_np)
                grads1 = tf.constant(grads1_np)
                opt = adamax.Adamax()
                update1 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                update2 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(tf.compat.v1.global_variables_initializer())

                beta1_power = get_beta_accumulators(opt, dtype)

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0)
                self.assertAllClose([3.0, 4.0], var1)

                # Run 3 steps of intertwined Adamax1 and Adamax2.
                for t in range(3):
                    self.assertAllCloseAccordingToType(0.9**(t + 1),
                                                       beta1_power)
                    if t % 2 == 0:
                        update1.run()
                    else:
                        update2.run()

                    var0_np, m0, v0 = adamax_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adamax_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0)
                    self.assertAllCloseAccordingToType(var1_np, var1)

    @combinations.generate(combinations.combine(mode=["eager"]))
    def testSlotsUniqueEager(self):
        v1 = tf.Variable(1.)
        v2 = tf.Variable(1.)
        opt = adamax.Adamax(1.)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and two unique slot variables for v1 and v2.
        self.assertLen({id(v) for v in opt.variables()}, 5)

    def testConstructAdamaxWithLR(self):
        opt = adamax.Adamax(lr=1.0)
        opt_2 = adamax.Adamax(learning_rate=0.1, lr=1.0)
        opt_3 = adamax.Adamax(learning_rate=0.1)
        self.assertIsInstance(opt.lr, tf.Variable)
        self.assertIsInstance(opt_2.lr, tf.Variable)
        self.assertIsInstance(opt_3.lr, tf.Variable)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(opt.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
        self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
示例#23
0
class ListTests(keras_parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testTracking(self):
    with self.test_session():
      model = HasList()
      output = model(tf.ones([32, 2]))
      self.assertAllEqual([32, 12], output.shape)
      self.assertEqual(11, len(model.layers))
      self.assertEqual(10, len(model.layer_list.layers))
      six.assertCountEqual(
          self,
          model.layers,
          model.layer_list.layers + model.layers_with_updates)
      for index in range(10):
        self.assertEqual(3 + index, model.layer_list.layers[index].units)
      self.assertEqual(2, len(model._checkpoint_dependencies))
      self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref)
      self.assertIs(model.layers_with_updates,
                    model._checkpoint_dependencies[1].ref)
      self.assertEqual(
          10,
          len(model._checkpoint_dependencies[0].ref._checkpoint_dependencies))
      self.evaluate([v.initializer for v in model.variables])
      self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]]))
      save_path = os.path.join(self.get_temp_dir(), "ckpt")
      model.save_weights(save_path)
      self.evaluate(model.variables[0].assign(tf.zeros([2, 3])))
      model.load_weights(save_path)
      self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]],
                          self.evaluate(model.variables[0]))
      v = tf.Variable(1.)
      model.var_list = [v]
    self.assertTrue(any(v is t for t in model.variables))
    self.assertTrue(any(v is t for t in model.trainable_variables))
    self.assertFalse(any(v is t for t in model.non_trainable_variables))
    self.assertTrue(any(model.layer_list[0].trainable_weights[0]
                        is t for t in model.trainable_weights))

  def testSubModelTracking(self):
    model = training.Model()
    model.v = tf.Variable(1.)
    self.assertIn(model.v, model.trainable_weights)
    model2 = training.Model()
    model2.m = [model]
    self.assertIn(model.v, model2.trainable_weights)

  def testSubSequentialTracking(self):

    class _Subclassed(training.Model):

      def __init__(self, wrapped):
        super(_Subclassed, self).__init__()
        self._wrapped = wrapped

      def call(self, x):
        return self._wrapped(x)

    model = sequential.Sequential()
    layer = core.Dense(1)
    model.add(layer)
    model2 = _Subclassed(model)
    model2(tf.ones([1, 2]))
    model2.m = [model]
    self.assertIn(layer.kernel, model2.trainable_weights)

  def testLayerTrackedThroughSequential(self):
    class AttrDict(dict):

      def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

    def ffnet(layer_sizes, name):
      ff = sequential.Sequential(name=name)
      for i, width in enumerate(layer_sizes):
        ff.add(core.Dense(
            width,
            activation=("relu" if i < len(layer_sizes)-1 else None)))
      return ff

    class MyModel2(training.Model):

      def __init__(self, config, name="my_model_2"):
        super(MyModel2, self).__init__(name=name)
        self._num_tokens = config.num_tokens

        # list of sub-models
        self._ffnet = [ffnet(config.module_layers + (self._num_tokens,), "ff")]

      def null_input(self):
        return tf.zeros([1, self._num_tokens], dtype=tf.float32)

      def call(self, input_, module_index=None):
        return self._ffnet[0](input_)

    m2 = MyModel2(AttrDict(
        num_tokens=5,
        module_layers=(50, 30)))

    # Construct
    m2(m2.null_input())
    self.assertLen(m2.trainable_variables, 6)

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testUpdatesForwarded(self):
    model = HasList()
    model_input = tf.ones([32, 2])
    model(model_input)
    if tf.executing_eagerly():
      self.assertEqual(0, len(model.updates))
    else:
      self.assertGreater(len(model.layers_with_updates[0].updates), 0)
      self.assertEqual(set(model.layers_with_updates[0].updates),
                       set(model.updates))

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testLossesForwarded(self):
    model = HasList()
    model_input = tf.ones([32, 2])
    model(model_input)
    self.assertEqual(2, len(model.losses))

  def testModelContainersCompareEqual(self):
    class HasEqualContainers(training.Model):

      def __init__(self):
        super(HasEqualContainers, self).__init__()
        self.l1 = []
        self.l2 = []

    model = HasEqualContainers()
    first_layer = HasEqualContainers()
    model.l1.append(first_layer)
    second_layer = HasEqualContainers()
    model.l2.append(second_layer)
    self.assertEqual([first_layer, second_layer], model.layers)

  @combinations.generate(combinations.combine(mode=["graph", "eager"]))
  def testTensorConversion(self):

    class ListToTensor(training.Model):

      def __init__(self):
        super(ListToTensor, self).__init__()
        self.l = [1., 2., 3.]

    self.assertAllEqual(
        [1., 2., 3.],
        self.evaluate(tf.constant(ListToTensor().l)))

    self.assertAllEqual(
        [1., 2., 3.],
        self.evaluate(tf.raw_ops.Pack(values=ListToTensor().l)))
示例#24
0
class DenseTest(tf.test.TestCase, parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testDenseProperties(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')
    self.assertEqual(dense.units, 2)
    self.assertEqual(dense.activation, tf.nn.relu)
    self.assertEqual(dense.kernel_regularizer, None)
    self.assertEqual(dense.bias_regularizer, None)
    self.assertEqual(dense.activity_regularizer, None)
    self.assertEqual(dense.use_bias, True)

    # Test auto-naming
    dense = core_layers.Dense(2, activation=tf.nn.relu)
    dense.apply(tf.random.uniform((5, 2)))
    self.assertEqual(dense.name, 'dense_1')
    dense = core_layers.Dense(2, activation=tf.nn.relu)
    dense.apply(tf.random.uniform((5, 2)))
    self.assertEqual(dense.name, 'dense_2')

  @test_util.run_deprecated_v1
  def testVariableInput(self):
    with self.cached_session():
      v = tf.compat.v1.get_variable(
          'X', initializer=tf.compat.v1.zeros_initializer(), shape=(1, 1))
      x = core_layers.Dense(1)(v)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      self.assertAllEqual(x, [[0.0]])

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testCall(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')
    inputs = tf.random.uniform((5, 4), seed=1)
    outputs = dense(inputs)
    self.assertListEqual([5, 2], outputs.get_shape().as_list())
    self.assertListEqual(dense.variables, [dense.kernel, dense.bias])
    self.assertListEqual(dense.trainable_variables,
                         [dense.kernel, dense.bias])
    self.assertListEqual(dense.non_trainable_variables, [])
    if not tf.executing_eagerly():
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2)
    self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
    self.assertEqual(dense.bias.name, 'my_dense/bias:0')

  @test_util.assert_no_new_pyobjects_executing_eagerly
  def testNoEagerLeak(self):
    # Tests that repeatedly constructing and building a Layer does not leak
    # Python objects.
    inputs = tf.random.uniform((5, 4), seed=1)
    core_layers.Dense(5)(inputs)
    core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')(inputs)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testCallTensorDot(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='my_dense')
    inputs = tf.random.uniform((5, 4, 3), seed=1)
    outputs = dense(inputs)
    self.assertListEqual([5, 4, 2], outputs.get_shape().as_list())

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testNoBias(self):
    dense = core_layers.Dense(2, use_bias=False, name='my_dense')
    inputs = tf.random.uniform((5, 2), seed=1)
    _ = dense(inputs)
    self.assertListEqual(dense.variables, [dense.kernel])
    self.assertListEqual(dense.trainable_variables, [dense.kernel])
    self.assertListEqual(dense.non_trainable_variables, [])
    if not tf.executing_eagerly():
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 1)
    self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
    self.assertEqual(dense.bias, None)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testNonTrainable(self):
    dense = core_layers.Dense(2, trainable=False, name='my_dense')
    inputs = tf.random.uniform((5, 2), seed=1)
    _ = dense(inputs)
    self.assertListEqual(dense.variables, [dense.kernel, dense.bias])
    self.assertListEqual(dense.non_trainable_variables,
                         [dense.kernel, dense.bias])
    self.assertListEqual(dense.trainable_variables, [])
    if not tf.executing_eagerly():
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 0)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testOutputShape(self):
    dense = core_layers.Dense(7, activation=tf.nn.relu, name='my_dense')
    inputs = tf.random.uniform((5, 3), seed=1)
    outputs = dense.apply(inputs)
    self.assertEqual(outputs.get_shape().as_list(), [5, 7])

    inputs = tf.random.uniform((5, 2, 3), seed=1)
    outputs = dense(inputs)
    self.assertEqual(outputs.get_shape().as_list(), [5, 2, 7])

    inputs = tf.random.uniform((1, 2, 4, 3), seed=1)
    outputs = dense.apply(inputs)
    self.assertEqual(outputs.get_shape().as_list(), [1, 2, 4, 7])

  @test_util.run_deprecated_v1
  def testCallOnPlaceHolder(self):
    inputs = tf.compat.v1.placeholder(dtype=tf.float32)
    dense = core_layers.Dense(4, name='my_dense')
    with self.assertRaises(ValueError):
      dense(inputs)

    inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None])
    dense = core_layers.Dense(4, name='my_dense')
    with self.assertRaises(ValueError):
      dense(inputs)

    inputs = tf.compat.v1.placeholder(
        dtype=tf.float32, shape=[None, None, None])
    dense = core_layers.Dense(4, name='my_dense')
    with self.assertRaises(ValueError):
      dense(inputs)

    inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 3])
    dense = core_layers.Dense(4, name='my_dense')
    dense(inputs)

    inputs = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, None, 3])
    dense = core_layers.Dense(4, name='my_dense')
    dense(inputs)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testActivation(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1')
    inputs = tf.random.uniform((5, 3), seed=1)
    outputs = dense(inputs)
    if not tf.executing_eagerly():
      self.assertEqual(outputs.op.name, 'dense1/Relu')

    dense = core_layers.Dense(2, name='dense2')
    inputs = tf.random.uniform((5, 3), seed=1)
    outputs = dense(inputs)
    if not tf.executing_eagerly():
      self.assertEqual(outputs.op.name, 'dense2/BiasAdd')

  @test_util.run_deprecated_v1
  def testActivityRegularizer(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    dense = core_layers.Dense(
        2, name='my_dense', activity_regularizer=regularizer)
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = dense(inputs)
    loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    self.assertEqual(len(loss_keys), 1)
    self.assertListEqual(dense.losses, loss_keys)

  @test_util.run_deprecated_v1
  def testKernelRegularizer(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    dense = core_layers.Dense(
        2, name='my_dense', kernel_regularizer=regularizer)
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = dense(inputs)
    loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    self.assertEqual(len(loss_keys), 1)
    self.evaluate([v.initializer for v in dense.variables])
    self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys))

  @test_util.run_deprecated_v1
  def testKernelRegularizerWithReuse(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = core_layers.dense(
        inputs, 2, name='my_dense', kernel_regularizer=regularizer)
    self.assertEqual(
        len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1)
    _ = core_layers.dense(
        inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True)
    self.assertEqual(
        len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)), 1)

  @test_util.run_deprecated_v1
  def testBiasRegularizer(self):
    regularizer = lambda x: tf.reduce_sum(x) * 1e-3
    dense = core_layers.Dense(2, name='my_dense', bias_regularizer=regularizer)
    inputs = tf.random.uniform((5, 3), seed=1)
    _ = dense(inputs)
    loss_keys = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    self.assertEqual(len(loss_keys), 1)
    self.evaluate([v.initializer for v in dense.variables])
    self.assertAllEqual(self.evaluate(dense.losses), self.evaluate(loss_keys))

  @test_util.run_deprecated_v1
  def testFunctionalDense(self):
    with self.cached_session():
      inputs = tf.random.uniform((5, 3), seed=1)
      outputs = core_layers.dense(
          inputs, 2, activation=tf.nn.relu, name='my_dense')
      self.assertEqual(
          len(tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES)), 2)
      self.assertEqual(outputs.op.name, 'my_dense/Relu')

  @test_util.run_deprecated_v1
  def testFunctionalDenseTwice(self):
    inputs = tf.random.uniform((5, 3), seed=1)
    core_layers.dense(inputs, 2)
    vars1 = _get_variable_dict_from_varstore().values()
    core_layers.dense(inputs, 2)
    vars2 = _get_variable_dict_from_varstore().values()
    self.assertEqual(len(vars1), 2)
    self.assertEqual(len(vars2), 4)

  # TODO(alive): get this to  work in eager mode.
  def testFunctionalDenseTwiceReuse(self):
    with self.cached_session():
      inputs = tf.random.uniform((5, 3), seed=1)
      core_layers.dense(inputs, 2, name='my_dense')
      vars1 = tf.compat.v1.trainable_variables()
      core_layers.dense(inputs, 2, name='my_dense', reuse=True)
      vars2 = tf.compat.v1.trainable_variables()
      self.assertEqual(vars1, vars2)

  # TODO(alive): get this to  work in eager mode.
  def testFunctionalDenseTwiceReuseFromScope(self):
    with self.cached_session():
      with tf.compat.v1.variable_scope('scope'):
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2, name='my_dense')
        vars1 = tf.compat.v1.trainable_variables()
      with tf.compat.v1.variable_scope('scope', reuse=True):
        core_layers.dense(inputs, 2, name='my_dense')
        vars2 = tf.compat.v1.trainable_variables()
      self.assertEqual(vars1, vars2)

  @test_util.run_deprecated_v1
  def testFunctionalDenseInitializerFromScope(self):
    with tf.compat.v1.variable_scope(
        'scope',
        initializer=tf.compat.v1.ones_initializer()), self.cached_session():
      inputs = tf.random.uniform((5, 3), seed=1)
      core_layers.dense(inputs, 2)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      weights = _get_variable_dict_from_varstore()
      self.assertEqual(len(weights), 2)
      # Check that the matrix weights got initialized to ones (from scope).
      self.assertAllClose(weights['scope/dense/kernel'].read_value(),
                          np.ones((3, 2)))
      # Check that the bias still got initialized to zeros.
      self.assertAllClose(weights['scope/dense/bias'].read_value(), np.zeros(
          (2)))

  def testFunctionalDenseWithCustomGetter(self):
    called = [0]

    def custom_getter(getter, *args, **kwargs):
      called[0] += 1
      return getter(*args, **kwargs)

    with tf.compat.v1.variable_scope('test', custom_getter=custom_getter):
      inputs = tf.random.uniform((5, 3), seed=1)
      core_layers.dense(inputs, 2)
    self.assertEqual(called[0], 2)

  @test_util.run_deprecated_v1
  def testFunctionalDenseInScope(self):
    with self.cached_session():
      with tf.compat.v1.variable_scope('test'):
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2, name='my_dense')
        var_dict = _get_variable_dict_from_varstore()
        var_key = 'test/my_dense/kernel'
        self.assertEqual(var_dict[var_key].name, '%s:0' % var_key)
      with tf.compat.v1.variable_scope('test1') as scope:
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2, name=scope)
        var_dict = _get_variable_dict_from_varstore()
        var_key = 'test1/kernel'
        self.assertEqual(var_dict[var_key].name, '%s:0' % var_key)
      with tf.compat.v1.variable_scope('test2'):
        inputs = tf.random.uniform((5, 3), seed=1)
        core_layers.dense(inputs, 2)
        var_dict = _get_variable_dict_from_varstore()
        var_key = 'test2/dense/kernel'
        self.assertEqual(var_dict[var_key].name, '%s:0' % var_key)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testComputeOutputShape(self):
    dense = core_layers.Dense(2, activation=tf.nn.relu, name='dense1')
    ts = tf.TensorShape
    # pylint: disable=protected-access
    with self.assertRaises(ValueError):
      dense.compute_output_shape(ts(None))
    with self.assertRaises(ValueError):
      dense.compute_output_shape(ts([]))
    with self.assertRaises(ValueError):
      dense.compute_output_shape(ts([1]))
    self.assertEqual(
        [None, 2],
        dense.compute_output_shape((None, 3)).as_list())
    self.assertEqual(
        [None, 2],
        dense.compute_output_shape(ts([None, 3])).as_list())
    self.assertEqual(
        [None, 4, 2],
        dense.compute_output_shape(ts([None, 4, 3])).as_list())
    # pylint: enable=protected-access

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testConstraints(self):
    k_constraint = lambda x: x / tf.reduce_sum(x)
    b_constraint = lambda x: x / tf.reduce_max(x)
    dense = core_layers.Dense(2,
                              kernel_constraint=k_constraint,
                              bias_constraint=b_constraint)
    inputs = tf.random.uniform((5, 3), seed=1)
    dense(inputs)
    self.assertEqual(dense.kernel_constraint, k_constraint)
    self.assertEqual(dense.bias_constraint, b_constraint)
示例#25
0
        opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=False)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and two unique slot variables for v1 and v2.
        self.assertLen(set({id(v) for v in opt.variables()}), 5)
        self.assertEqual(self.evaluate(opt.variables()[0]),
                         self.evaluate(opt.iterations))

        opt = rmsprop.RMSprop(learning_rate=1., momentum=0.2, centered=True)
        opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
        # There should be iteration, and three unique slot variables for v1 and v2
        self.assertLen(set({id(v) for v in opt.variables()}), 7)
        self.assertEqual(self.evaluate(opt.variables()[0]),
                         self.evaluate(opt.iterations))


@combinations.generate(combinations.combine(mode=["graph", "eager"]))
class SlotColocationTest(tf.test.TestCase, parameterized.TestCase):
    @parameterized.parameters([True, False])
    @test_util.run_gpu_only
    def testRunMinimizeOnGPUForCPUVariables(self, use_resource):
        with tf.compat.v1.device("/device:CPU:0"):
            if use_resource:
                var0 = tf.Variable([1.0, 2.0], dtype=tf.float32)
                var1 = tf.Variable([3.0, 4.0], dtype=tf.float32)
            else:
                var0 = tf.Variable([1.0, 2.0], dtype=tf.float32)
                var1 = tf.Variable([3.0, 4.0], dtype=tf.float32)

        def loss():
            return 5 * var0 + 3 * var1
示例#26
0
class DropoutTest(tf.test.TestCase, parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testDropoutProperties(self):
    dp = core_layers.Dropout(0.5, name='dropout')
    self.assertEqual(dp.rate, 0.5)
    self.assertEqual(dp.noise_shape, None)
    dp.apply(tf.ones(()))
    self.assertEqual(dp.name, 'dropout')

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testBooleanLearningPhase(self):
    dp = core_layers.Dropout(0.5)
    inputs = tf.ones((5, 3))
    dropped = dp.apply(inputs, training=True)
    if not tf.executing_eagerly():
      self.evaluate(tf.compat.v1.global_variables_initializer())
    np_output = self.evaluate(dropped)
    self.assertAlmostEqual(0., np_output.min())
    dropped = dp.apply(inputs, training=False)
    np_output = self.evaluate(dropped)
    self.assertAllClose(np.ones((5, 3)), np_output)

  @test_util.run_deprecated_v1
  def testDynamicLearningPhase(self):
    with self.cached_session() as sess:
      dp = core_layers.Dropout(0.5, seed=1)
      inputs = tf.ones((5, 5))
      training = tf.compat.v1.placeholder(dtype='bool')
      dropped = dp.apply(inputs, training=training)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      np_output = sess.run(dropped, feed_dict={training: True})
      self.assertAlmostEqual(0., np_output.min())
      np_output = sess.run(dropped, feed_dict={training: False})
      self.assertAllClose(np.ones((5, 5)), np_output)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def testDynamicNoiseShape(self):
    inputs = tf.ones((5, 3, 2))
    noise_shape = [None, 1, None]
    dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
    dropped = dp.apply(inputs, training=True)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    np_output = self.evaluate(dropped)
    self.assertAlmostEqual(0., np_output.min())
    self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])

  def testCustomNoiseShape(self):
    inputs = tf.ones((5, 3, 2))
    noise_shape = [5, 1, 2]
    dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
    dropped = dp.apply(inputs, training=True)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    np_output = self.evaluate(dropped)
    self.assertAlmostEqual(0., np_output.min())
    self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])

  @test_util.run_deprecated_v1
  def testFunctionalDropout(self):
    with self.cached_session():
      inputs = tf.ones((5, 5))
      dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      np_output = self.evaluate(dropped)
      self.assertAlmostEqual(0., np_output.min())
      dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1)
      np_output = self.evaluate(dropped)
      self.assertAllClose(np.ones((5, 5)), np_output)

  @test_util.run_deprecated_v1
  def testDynamicRate(self):
    with self.cached_session() as sess:
      rate = tf.compat.v1.placeholder(dtype='float32', name='rate')
      dp = core_layers.Dropout(rate, name='dropout')
      inputs = tf.ones((5, 5))
      dropped = dp.apply(inputs, training=True)
      self.evaluate(tf.compat.v1.global_variables_initializer())
      np_output = sess.run(dropped, feed_dict={rate: 0.5})
      self.assertAlmostEqual(0., np_output.min())
      np_output = sess.run(dropped, feed_dict={rate: 0.0})
      self.assertAllClose(np.ones((5, 5)), np_output)
示例#27
0
class TestWeightSavingAndLoadingTFFormat(tf.test.TestCase, parameterized.TestCase):

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_tensorflow_format_overwrite(self):
    with self.cached_session() as session:
      model = SubclassedModel()
      temp_dir = self.get_temp_dir()
      prefix = os.path.join(temp_dir, 'ckpt')

      x = tf.constant(np.random.random((3, 2)), dtype=tf.float32)
      executing_eagerly = tf.executing_eagerly()
      model(x)  # pylint: disable=not-callable
      if not executing_eagerly:
        session.run([v.initializer for v in model.variables])
      model.save_weights(prefix, save_format='tensorflow')
      model.save_weights(prefix, save_format='tensorflow', overwrite=True)
      with self.assertRaises(EOFError):
        # Indirectly tests that the user is prompted
        model.save_weights(prefix, save_format='tensorflow', overwrite=False)

  def test_no_default_session(self):
    with tf.Graph().as_default():
      self.assertFalse(tf.compat.v1.get_default_session())
      data = np.random.random((1000, 32)).astype(np.float32)
      labels = np.random.random((1000, 10)).astype(np.float32)

      model = keras.models.Sequential([
          keras.layers.Dense(10, activation='softmax'),
          keras.layers.Dense(10, activation='softmax')])

      model.compile(optimizer=tf.compat.v1.train.RMSPropOptimizer(0.001),
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])

      model.fit(data, labels)
      fname = os.path.join(self.get_temp_dir(), 'weights', 'ckpt')
      model.save_weights(fname)
      model.load_weights(fname)

  def test_no_graph_pollution(self):
    with tf.compat.v1.get_default_graph().as_default():
      graph = tf.Graph()
      with graph.as_default(), self.session(graph) as session:
        model = SubclassedModel()
        temp_dir = self.get_temp_dir()
        prefix = os.path.join(temp_dir, 'ckpt')

        x = tf.constant(np.random.random((3, 2)), dtype=tf.float32)
        model(x)  # pylint: disable=not-callable
        session.run([v.initializer for v in model.variables])
        model.save_weights(prefix, save_format='tensorflow')
        op_count = len(graph.get_operations())
        model.save_weights(prefix, save_format='tensorflow')
        self.assertLen(graph.get_operations(), op_count)

        model.load_weights(prefix)
        op_count = len(graph.get_operations())
        model.load_weights(prefix)
        self.assertLen(graph.get_operations(), op_count)

  def _weight_loading_test_template(self, make_model_fn):
    with self.cached_session():
      model = make_model_fn()
      model.compile(
          loss='mse',
          optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1),
          metrics=['acc', keras.metrics.CategoricalAccuracy()])
      temp_dir = self.get_temp_dir()
      prefix = os.path.join(temp_dir, 'ckpt')
      train_x = np.random.random((3, 2))
      train_y = np.random.random((3,))
      x = tf.constant(train_x, dtype=tf.float32)

      model.train_on_batch(train_x, train_y)
      model.save_weights(prefix, save_format='tf')
      ref_y_before_train = model.predict(train_x)
      model.train_on_batch(train_x, train_y)
      ref_y_after_train = model.predict(train_x)
      for v in model.variables:
        self.evaluate(
            v.assign(tf.random.normal(shape=tf.shape(v))))

      self.addCleanup(shutil.rmtree, temp_dir)

      model.load_weights(prefix)
      self.assertAllClose(ref_y_before_train, self.evaluate(model(x)))

      # Test restore-on-create if this is a subclassed Model (graph Networks
      # will have already created their variables).
      load_model = make_model_fn()
      load_model.load_weights(prefix)
      self.assertAllClose(
          ref_y_before_train,
          self.evaluate(load_model(x)))
      load_model = make_model_fn()
      load_model.load_weights(prefix)
      # We need to run some of the restore ops for predict(), but not all
      # variables have been created yet (optimizer slot variables). Tests
      # incremental restore.
      load_model.predict(train_x)
      load_model.compile(
          loss='mse',
          optimizer=tf.compat.v1.train.RMSPropOptimizer(0.1),
          metrics=['acc', keras.metrics.CategoricalAccuracy()])
      load_model.train_on_batch(train_x, train_y)
      self.assertAllClose(ref_y_after_train, self.evaluate(load_model(x)))

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_weight_loading_graph_model(self):
    def _make_graph_model():
      a = keras.layers.Input(shape=(2,))
      x = keras.layers.Dense(3)(a)
      b = keras.layers.Dense(1)(x)
      return keras.models.Model(a, b)

    self._weight_loading_test_template(_make_graph_model)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_weight_loading_subclassed_model(self):
    self._weight_loading_test_template(SubclassedModel)

  def _new_layer_weight_loading_test_template(
      self, first_model_fn, second_model_fn):
    with self.cached_session() as session:
      model = first_model_fn()
      temp_dir = self.get_temp_dir()
      prefix = os.path.join(temp_dir, 'ckpt')

      x = tf.constant(np.random.random((3, 2)), dtype=tf.float32)
      executing_eagerly = tf.executing_eagerly()
      ref_y_tensor = model(x)
      if not executing_eagerly:
        session.run([v.initializer for v in model.variables])
      ref_y = self.evaluate(ref_y_tensor)
      model.save_weights(prefix)
      self.assertEqual(
          prefix,
          tf.train.latest_checkpoint(temp_dir))
      for v in model.variables:
        self.evaluate(
            v.assign(tf.random.normal(shape=tf.shape(v))))

      self.addCleanup(shutil.rmtree, temp_dir)

      second_model = second_model_fn()
      status = second_model.load_weights(prefix)
      second_model(x)
      status.run_restore_ops()
      second_model.save_weights(prefix)
      # Check that the second model's checkpoint loads into the original model
      status = model.load_weights(prefix)
      status.run_restore_ops(session)
      y = self.evaluate(model(x))
      self.assertAllClose(ref_y, y)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_weight_loading_graph_model_added_layer(self):
    def _save_graph_model():
      a = keras.layers.Input(shape=(2,))
      x = keras.layers.Dense(3, name='first')(a)
      b = keras.layers.Dense(1, name='second')(x)
      return keras.models.Model(a, b)
    def _restore_graph_model():
      a = keras.layers.Input(shape=(2,))
      x = keras.layers.Dense(3, name='first')(a)
      y = keras.layers.Dense(1, name='second')(x)
      b = keras.layers.Dense(3, name='secondjr')(y)
      return keras.models.Model(a, b)

    self._new_layer_weight_loading_test_template(
        _save_graph_model, _restore_graph_model)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_weight_loading_graph_model_added_no_weight_layer(self):
    def _save_graph_model():
      a = keras.layers.Input(shape=(2,))
      x = keras.layers.Dense(3, name='first')(a)
      b = keras.layers.Dense(1, name='second')(x)
      return keras.models.Model(a, b)
    def _restore_graph_model():
      a = keras.layers.Input(shape=(2,))
      x = keras.layers.Dense(3, name='first')(a)
      b = keras.layers.Dense(1, name='second')(x)
      y = keras.layers.Dropout(rate=0.1)(b)
      return keras.models.Model(a, y)

    self._new_layer_weight_loading_test_template(
        _save_graph_model, _restore_graph_model)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_weight_loading_subclassed_model_added_layer(self):

    class SubclassedModelRestore(training.Model):

      def __init__(self):
        super(SubclassedModelRestore, self).__init__()
        self.x_layer = keras.layers.Dense(3)
        self.y_layer = keras.layers.Dense(3)
        self.b_layer = keras.layers.Dense(1)

      def call(self, a):
        return self.b_layer(self.y_layer(self.x_layer(a)))

    self._new_layer_weight_loading_test_template(
        SubclassedModel, SubclassedModelRestore)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_incompatible_checkpoint(self):
    save_path = tf.train.Checkpoint().save(
        os.path.join(self.get_temp_dir(), 'ckpt'))
    m = DummySubclassModel()
    with self.assertRaisesRegex(AssertionError, 'Nothing to load'):
      m.load_weights(save_path)
    m.dense = keras.layers.Dense(2)
    m.dense(tf.constant([[1.]]))
    with self.assertRaisesRegex(AssertionError,
                                'Nothing except the root object matched'):
      m.load_weights(save_path)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_directory_passed(self):
    with self.cached_session():
      m = DummySubclassModel()
      v = m.add_weight(name='v', shape=[])
      self.evaluate(v.assign(42.))
      prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'ckpt/')
      m.save_weights(prefix)
      self.evaluate(v.assign(2.))
      m.load_weights(prefix)
      self.assertEqual(42., self.evaluate(v))

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_relative_path(self):
    with self.cached_session():
      m = DummySubclassModel()
      v = m.add_weight(name='v', shape=[])
      os.chdir(self.get_temp_dir())

      prefix = 'ackpt'
      self.evaluate(v.assign(42.))
      m.save_weights(prefix)
      self.assertTrue(tf.io.gfile.exists('ackpt.index'))
      self.evaluate(v.assign(1.))
      m.load_weights(prefix)
      self.assertEqual(42., self.evaluate(v))

      prefix = 'subdir/ackpt'
      self.evaluate(v.assign(43.))
      m.save_weights(prefix)
      self.assertTrue(tf.io.gfile.exists('subdir/ackpt.index'))
      self.evaluate(v.assign(2.))
      m.load_weights(prefix)
      self.assertEqual(43., self.evaluate(v))

      prefix = 'ackpt/'
      self.evaluate(v.assign(44.))
      m.save_weights(prefix)
      self.assertTrue(tf.io.gfile.exists('ackpt/.index'))
      self.evaluate(v.assign(3.))
      m.load_weights(prefix)
      self.assertEqual(44., self.evaluate(v))

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_nonexistent_prefix_directory(self):
    with self.cached_session():
      m = DummySubclassModel()
      v = m.add_weight(name='v', shape=[])
      self.evaluate(v.assign(42.))
      prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'bckpt')
      m.save_weights(prefix)
      self.evaluate(v.assign(2.))
      m.load_weights(prefix)
      self.assertEqual(42., self.evaluate(v))
示例#28
0
文件: save_test.py 项目: ohsdba/keras
class TestWholeModelSaving(keras_parameterized.TestCase):

  def _save_model_dir(self, dirname='saved_model'):
    temp_dir = self.get_temp_dir()
    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
    return os.path.join(temp_dir, dirname)

  def _assert_same_weights_and_metrics(self, model, loaded_model):
    """Checks that the loaded weights and metrics are the same as the original.

    Args:
      model: original model
      loaded_model: loaded model
    """
    self.assertAllClose(model.weights, loaded_model.weights)

    if loaded_model.optimizer:
      if testing_utils.get_save_format() == 'tf':
        # TODO(b/153110928): Keras TF format doesn't restore optimizer weights
        # currently.
        return
      self.assertAllClose(model.optimizer.weights,
                          loaded_model.optimizer.weights)

    # In V1/Graph mode, the model isn't built, so the metrics are not loaded
    # immediately (requires model to be called on some data before building
    # metrics).
    check_metrics = tf.__internal__.tf2.enabled() and tf.executing_eagerly()

    if check_metrics:
      self.assertAllEqual([m.name for m in model.metrics],
                          [m.name for m in loaded_model.metrics])

  @keras_parameterized.run_with_all_model_types
  @keras_parameterized.run_all_keras_modes
  def test_save_and_load(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    save_kwargs = testing_utils.get_save_kwargs()

    if ((save_format == 'h5' or not save_kwargs.get('save_traces', True)) and
        testing_utils.get_model_type() == 'subclass'):
      # HDF5 format currently does not allow saving subclassed models.
      # When saving with `save_traces=False`, the subclassed model must have a
      # get_config/from_config, which the autogenerated model does not have.
      return

    with self.cached_session():
      model = testing_utils.get_model_from_layers(
          [keras.layers.Dense(2),
           keras.layers.RepeatVector(3),
           keras.layers.TimeDistributed(keras.layers.Dense(3))],
          input_shape=(3,))
      model.compile(
          loss=keras.losses.MSE,
          optimizer=keras.optimizer_v2.rmsprop.RMSprop(lr=0.0001),
          metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalCrossentropy(
                  name='cce', label_smoothing=tf.constant(0.2)),
          ],
          weighted_metrics=[
              keras.metrics.categorical_crossentropy,
              keras.metrics.CategoricalCrossentropy(
                  name='cce', label_smoothing=tf.constant(0.2)),
          ],
          sample_weight_mode='temporal')

      x = np.random.random((1, 3))
      y = np.random.random((1, 3, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)
      keras.models.save_model(
          model, saved_model_dir, save_format=save_format,
          **save_kwargs)

      loaded_model = keras.models.load_model(saved_model_dir)
      self._assert_same_weights_and_metrics(model, loaded_model)

      out2 = loaded_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

      eval_out = model.evaluate(x, y)
      eval_out2 = loaded_model.evaluate(x, y)
      self.assertArrayNear(eval_out, eval_out2, 0.001)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_sequential_model_saving_without_input_shape(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
      model.compile(
          loss=keras.losses.MSE,
          optimizer='rmsprop',
          metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalAccuracy(name='cat_acc')
          ],
          weighted_metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalAccuracy(name='cat_acc2')
          ],
          sample_weight_mode='temporal')
      x = np.random.random((1, 3))
      y = np.random.random((1, 3, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)
      model.save(saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(saved_model_dir)

      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_sequential_model_saving_without_compile(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))

      x = np.random.random((1, 3))
      out = model.predict(x)

      # Save the model without any compilation or training.
      keras.models.save_model(model, saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(saved_model_dir)
      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_sequential_model_saving_2(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()

    with tf.Graph().as_default(), self.cached_session():
      # test with custom optimizer, loss

      class CustomOp(optimizer_v1.RMSprop):
        pass

      def custom_loss(y_true, y_pred):
        return keras.losses.mse(y_true, y_pred)

      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.Dense(3))
      model.compile(loss=custom_loss, optimizer=CustomOp(), metrics=['acc'])

      x = np.random.random((1, 3))
      y = np.random.random((1, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)
      keras.models.save_model(model, saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(
          saved_model_dir,
          custom_objects={'CustomOp': CustomOp,
                          'custom_loss': custom_loss})
      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_saving_without_compilation(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(2, input_shape=(3,)))
    model.add(keras.layers.Dense(3))
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])

    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    model = keras.models.load_model(saved_model_dir)

  def test_saving_with_tf_optimizer(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()

    model = keras.models.Sequential()
    model.add(keras.layers.Dense(2, input_shape=(3,)))
    model.add(keras.layers.Dense(3))
    model.compile(loss='mse',
                  optimizer=tf.compat.v1.train.AdadeltaOptimizer(0.1),
                  metrics=['acc'])

    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    model = keras.models.load_model(saved_model_dir)

  def test_saving_right_after_compilation(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.Dense(3))
      model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
      if not tf.compat.v1.executing_eagerly_outside_functions():
        model._make_train_function()
      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      model = keras.models.load_model(saved_model_dir)

  def test_saving_lambda_numpy_array_arguments(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()

    if h5py is None:
      self.skipTest('h5py required to run this test')

    mean = np.random.random((4, 2, 3))
    std = np.abs(np.random.random((4, 2, 3))) + 1e-5
    inputs = keras.layers.Input(shape=(4, 2, 3))
    output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std,
                                 arguments={'mu': mean, 'std': std})(inputs)
    model = keras.models.Model(inputs, output)
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])

    keras.models.save_model(model, saved_model_dir, save_format=save_format)

    model = keras.models.load_model(saved_model_dir)

    self.assertAllClose(mean, model.layers[1].arguments['mu'])
    self.assertAllClose(std, model.layers[1].arguments['std'])

  def test_saving_model_with_long_layer_names(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    with self.cached_session():
      # This layer name will make the `layers_name` HDF5 attribute blow
      # out of proportion. Note that it fits into the internal HDF5
      # attribute memory limit on its own but because h5py converts
      # the list of layer names into numpy array, which uses the same
      # amount of memory for every item, it increases the memory
      # requirements substantially.
      x = keras.Input(shape=(2,), name='input_' + ('x' * (2**15)))
      f = x
      for i in range(4):
        f = keras.layers.Dense(2, name='dense_%d' % (i,))(f)
      model = keras.Model(inputs=[x], outputs=[f])
      model.compile(
          'adam', loss=keras.losses.MeanSquaredError(), metrics=['acc'])

      x = np.random.random((1, 2))
      y = np.random.random((1, 2))
      model.train_on_batch(x, y)
      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      model = keras.models.load_model(saved_model_dir)

      if save_format in ['tf', 'tensorflow']:
        return
      # Check that the HDF5 files contains chunked array
      # of layer names.
      with h5py.File(saved_model_dir, 'r') as h5file:
        num_names_arrays = len([attr for attr in h5file['model_weights'].attrs
                                if attr.startswith('layer_names')])
      # The chunking of layer names array should have happened.
      self.assertGreater(num_names_arrays, 0)
      out2 = model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_saving_model_with_long_weights_names(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()

    with self.cached_session():
      x = keras.Input(shape=(2,), name='nested_model_input')
      f = x
      for i in range(4):
        f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i,))(f)
      # This layer name will make the `weights_name`
      # HDF5 attribute blow out of proportion.
      f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**14)))(f)
      nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model')

      x = keras.Input(shape=(2,), name='outer_model_input')
      f = nested_model(x)
      f = keras.layers.Dense(2, name='outer_model_output')(f)

      model = keras.Model(inputs=[x], outputs=[f])
      model.compile(loss='mse', optimizer='adam', metrics=['acc'])

      x = np.random.random((1, 2))
      y = np.random.random((1, 2))
      model.train_on_batch(x, y)
      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      model = keras.models.load_model(saved_model_dir)

      if save_format in ['h5', 'hdf5', 'keras']:
        # Check that the HDF5 files contains chunked array
        # of weight names.
        with h5py.File(saved_model_dir, 'r') as h5file:
          num_weight_arrays = len(
              [attr for attr in h5file['model_weights']['nested_model'].attrs
               if attr.startswith('weight_names')])
        # The chunking of layer names array should have happened.
        self.assertGreater(num_weight_arrays, 0)
      out2 = model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_model_saving_to_pre_created_h5py_file(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    with tf.Graph().as_default(), self.cached_session():
      inputs = keras.Input(shape=(3,))
      x = keras.layers.Dense(2)(inputs)
      outputs = keras.layers.Dense(3)(x)

      model = keras.Model(inputs, outputs)
      model.compile(
          loss=keras.losses.MSE,
          optimizer=optimizer_v1.Adam(),
          metrics=[
              keras.metrics.categorical_accuracy,
              keras.metrics.CategoricalAccuracy()
          ])
      x = np.random.random((1, 3))
      y = np.random.random((1, 3))
      model.train_on_batch(x, y)

      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      loaded_model = keras.models.load_model(saved_model_dir)
      out1 = loaded_model.predict(x)
      self.assertAllClose(out, out1, atol=1e-05)
      if save_format in ['tf', 'tensorflow']:
        return

      # Test h5 format specifically
      fd, fname = tempfile.mkstemp('.h5')
      with h5py.File(fname, mode='r+') as h5file:
        keras.models.save_model(model, h5file)
        loaded_model = keras.models.load_model(h5file)
        out2 = loaded_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

      # Test non-default options in h5
      with h5py.File(
          '_', driver='core', mode='w', backing_store=False) as h5file:
        keras.models.save_model(model, h5file)
        loaded_model = keras.models.load_model(h5file)
        out2 = loaded_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

      # Cleanup
      os.close(fd)
      os.remove(fname)

  def test_model_saving_to_new_dir_path(self):
    saved_model_dir = os.path.join(self._save_model_dir(), 'newdir',
                                   'saved_model')
    save_format = testing_utils.get_save_format()

    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))

      x = np.random.random((1, 3))
      out = model.predict(x)

      keras.models.save_model(model, saved_model_dir, save_format=save_format)

      new_model = keras.models.load_model(saved_model_dir)
      self._assert_same_weights_and_metrics(model, new_model)

      out2 = new_model.predict(x)
      self.assertAllClose(out, out2, atol=1e-05)

  def test_model_raise_exception_with_failed_saving(self):
    if h5py is None:
      self.skipTest('h5py required to run this test')

    saved_model_dir = self._save_model_dir()
    saved_model_path = os.path.join(saved_model_dir, 'saved_model.h5')

    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(2, input_shape=(3,)))
      model.add(keras.layers.RepeatVector(3))
      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))

      with self.assertRaisesRegex(OSError, 'Unable to create file'):
        with h5py.File(saved_model_path, 'w'):
          keras.models.save_model(model, saved_model_path)

  def test_saving_constant_initializer_with_numpy(self):
    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()

    model = keras.models.Sequential()
    model.add(
        keras.layers.Dense(
            2,
            input_shape=(3,),
            kernel_initializer=keras.initializers.Constant(np.ones((3, 2)))))
    model.add(keras.layers.Dense(3))
    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    model = keras.models.load_model(saved_model_dir)

  def test_saving_group_naming_h5py(self):
    # Test saving model with layer which name is prefix to a previous layer
    # name.

    temp_dir = self.get_temp_dir()
    self.addCleanup(shutil.rmtree, temp_dir)
    h5_path = os.path.join(temp_dir, 'test.h5')

    input_layer = keras.layers.Input((None, None, 3), name='test_input')
    x = keras.layers.Conv2D(1, 1, name='conv1/conv')(input_layer)
    x = keras.layers.Activation('relu', name='conv1')(x)
    model = keras.models.Model(inputs=input_layer, outputs=x)

    model.save_weights(h5_path)
    model.load_weights(h5_path)

  def test_primitive_attrs_contain_no_extraneous_strings(self):
    if h5py is None:
      self.skipTest('h5py required to run this test')

    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1, input_shape=[2]))
    model.save(saved_model_dir, save_format=save_format)
    if save_format in ['tf', 'tensorflow']:
      return

    h5file = h5py.File(saved_model_dir, 'r')
    self.assertRegex(h5file.attrs['keras_version'], r'^[\d]+\.[\d]+\.[\S]+$')

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_functional_model_with_custom_loss_and_metric(self):
    def _make_model():
      inputs = keras.Input(shape=(4,))
      x = keras.layers.Dense(8, activation='relu')(inputs)
      outputs = keras.layers.Dense(3, activation='softmax')(x)
      model = keras.Model(inputs=inputs, outputs=outputs)
      custom_loss = keras.layers.Lambda(lambda x: keras.backend.sum(x * x))(x)
      model.add_loss(custom_loss)
      model.add_metric(custom_loss, aggregation='mean', name='custom_loss')
      return model

    saved_model_dir = self._save_model_dir()
    save_format = testing_utils.get_save_format()

    with self.cached_session():
      model = _make_model()
      model.compile(
          loss=keras.losses.SparseCategoricalCrossentropy(),
          optimizer=optimizers.gradient_descent_v2.SGD(),
          metrics=[keras.metrics.SparseCategoricalCrossentropy()])
      x = np.random.normal(size=(32, 4))
      y = np.random.randint(0, 3, size=32)
      model.train_on_batch(x, y)
      evaluation_results = model.evaluate(x, y)
      # Save and reload model.
      model.save(saved_model_dir, save_format=save_format)
      del model  # Prevent misuse.
      loaded_model = keras.models.load_model(saved_model_dir)
      loaded_model_eval_results = loaded_model.evaluate(x, y)
      # Assert all evaluation results are the same.
      self.assertAllClose(evaluation_results, loaded_model_eval_results, 1e-9)
      # Check correctness of the loss calculation.
      self.assertAllGreater(evaluation_results, 0.)
      evaluation_results = dict(
          zip(loaded_model.metrics_names, evaluation_results))
      self.assertNear(
          evaluation_results['sparse_categorical_crossentropy'] +
          evaluation_results['custom_loss'], evaluation_results['loss'], 1e-6)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_save_uncompiled_model_with_optimizer(self):
    with self.cached_session() as session:
      saved_model_dir = self._save_model_dir()
      save_format = testing_utils.get_save_format()
      model = keras.models.Sequential([keras.layers.Dense(1, input_shape=(3,))])
      # Set the model's optimizer but don't compile. This can happen if the
      # model is trained with a custom training loop.
      model.optimizer = keras.optimizer_v2.rmsprop.RMSprop(lr=0.0001)
      if not tf.executing_eagerly():
        session.run([v.initializer for v in model.variables])
      model.save(saved_model_dir, save_format=save_format)

      if save_format in ['tf', 'tensorflow']:
        loaded = keras.models.load_model(saved_model_dir)
        self.assertIsInstance(loaded.optimizer,
                              keras.optimizer_v2.optimizer_v2.OptimizerV2)

  @combinations.generate(combinations.combine(mode=['eager']))
  def test_functional_model_with_getitem_op_layer(self):
    inp = keras.Input(shape=(8))

    out = inp[:]
    model = keras.Model(
        inputs=[inp],
        outputs=out)
    batch_size = 7
    x = tf.stack([
        tf.range(8) for _ in range(batch_size)])
    args = [x]
    expected = x[:]

    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)

    # Make sure it can be successfully saved and loaded
    save_format = testing_utils.get_save_format()
    saved_model_dir = self._save_model_dir()
    keras.models.save_model(model, saved_model_dir, save_format=save_format)

    loaded_model = keras.models.load_model(saved_model_dir)

    self.assertAllEqual(loaded_model(args), expected)
    self.assertAllEqual(loaded_model.predict(args, batch_size=batch_size),
                        expected)

  @combinations.generate(combinations.combine(mode=['eager']))
  def test_shared_objects(self):
    class OuterLayer(keras.layers.Layer):

      def __init__(self, inner_layer):
        super(OuterLayer, self).__init__()
        self.inner_layer = inner_layer

      def call(self, inputs):
        return self.inner_layer(inputs)

      def get_config(self):
        return {
            'inner_layer': generic_utils.serialize_keras_object(
                self.inner_layer)
        }

      @classmethod
      def from_config(cls, config):
        return cls(generic_utils.deserialize_keras_object(
            config['inner_layer']))

    class InnerLayer(keras.layers.Layer):

      def __init__(self):
        super(InnerLayer, self).__init__()
        self.v = self.add_weight(name='v', shape=[], dtype=tf.float32)

      def call(self, inputs):
        return self.v + inputs

      @classmethod
      def from_config(cls, config):
        return cls()

    # Create a model with 2 output layers that share the same inner layer.
    inner_layer = InnerLayer()
    outer_layer_1 = OuterLayer(inner_layer)
    outer_layer_2 = OuterLayer(inner_layer)
    input_ = keras.Input(shape=(1,))
    model = keras.Model(
        inputs=input_, outputs=[outer_layer_1(input_), outer_layer_2(input_)])

    # Changes to the shared layer should affect both outputs.
    model.layers[1].inner_layer.v.assign(5)
    self.assertAllEqual(model(1), [6.0, 6.0])
    model.layers[1].inner_layer.v.assign(3)
    self.assertAllEqual(model(1), [4.0, 4.0])

    # After loading, changes to the shared layer should still affect both
    # outputs.
    def _do_assertions(loaded):
      loaded.layers[1].inner_layer.v.assign(5)
      self.assertAllEqual(loaded(1), [6.0, 6.0])
      loaded.layers[1].inner_layer.v.assign(3)
      self.assertAllEqual(loaded(1), [4.0, 4.0])
      loaded.layers[2].inner_layer.v.assign(5)
      self.assertAllEqual(loaded(1), [6.0, 6.0])
      loaded.layers[2].inner_layer.v.assign(3)
      self.assertAllEqual(loaded(1), [4.0, 4.0])

    # We'd like to make sure we only attach shared object IDs when strictly
    # necessary, so we'll recursively traverse the generated config to count
    # whether we have the exact number we expect.
    def _get_all_keys_recursive(dict_or_iterable):
      if isinstance(dict_or_iterable, dict):
        for key in dict_or_iterable.keys():
          yield key
        for key in _get_all_keys_recursive(dict_or_iterable.values()):
          yield key
      elif isinstance(dict_or_iterable, string_types):
        return
      else:
        try:
          for item in dict_or_iterable:
            for key in _get_all_keys_recursive(item):
              yield key
        # Not an iterable or dictionary
        except TypeError:
          return

    with generic_utils.CustomObjectScope({
        'OuterLayer': OuterLayer, 'InnerLayer': InnerLayer}):

      # Test saving and loading to disk
      save_format = testing_utils.get_save_format()
      saved_model_dir = self._save_model_dir()
      keras.models.save_model(model, saved_model_dir, save_format=save_format)
      loaded = keras.models.load_model(saved_model_dir)
      _do_assertions(loaded)

      # Test recreating directly from config
      config = model.get_config()
      key_count = collections.Counter(_get_all_keys_recursive(config))
      self.assertEqual(key_count[generic_utils.SHARED_OBJECT_KEY], 2)
      loaded = keras.Model.from_config(config)
      _do_assertions(loaded)

  @combinations.generate(combinations.combine(mode=['eager']))
  def test_shared_objects_wrapper(self):
    """Tests that shared layers wrapped with `Wrapper` restore correctly."""
    input_ = keras.Input(shape=(1,))
    unwrapped = keras.layers.Layer(name='unwrapped')
    wrapped = keras.layers.Wrapper(unwrapped, name='wrapped')
    model = keras.Model(inputs=input_,
                        outputs=[unwrapped(input_), wrapped(input_)])

    # Test recreating directly from config
    config = model.get_config()
    loaded = keras.Model.from_config(config)
    self.assertIs(loaded.layers[1], loaded.layers[2].layer)

    # Test saving and loading to disk
    save_format = testing_utils.get_save_format()
    saved_model_dir = self._save_model_dir()
    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    loaded = keras.models.load_model(saved_model_dir)
    self.assertIs(loaded.layers[1], loaded.layers[2].layer)

  @combinations.generate(combinations.combine(mode=['eager']))
  def test_multi_output_metrics_name_stay_same(self):
    """Tests that metric names don't change with each save/load cycle.

    e.g. "head_0_accuracy" should not become "head_0_head_0_accuracy" after
    saving and loading a model.
    """
    input_ = keras.Input((4,))
    model = keras.Model(
        input_,
        [keras.layers.Softmax(name='head_0')(keras.layers.Dense(3)(input_)),
         keras.layers.Softmax(name='head_1')(keras.layers.Dense(5)(input_))])
    metric = keras.metrics.BinaryAccuracy()
    model.compile(optimizer='rmsprop',
                  loss='mse',
                  metrics={'head_0': [metric, 'accuracy']})

    # Run one iteration.
    x = np.random.rand(2, 4)
    y = {'head_0': np.random.randint(2, size=(2, 3)),
         'head_1': np.random.randint(2, size=(2, 5))}
    model.fit(x, y, verbose=0)

    # Save and reload.
    save_format = testing_utils.get_save_format()
    saved_model_dir = self._save_model_dir()
    keras.models.save_model(model, saved_model_dir, save_format=save_format)
    loaded = keras.models.load_model(saved_model_dir)

    # Make sure the metrics names from the model before saving match the loaded
    # model.
    self.assertSequenceEqual(model.metrics_names, loaded.metrics_names)
示例#29
0
        self.assertAllEqual(out_dense, out_ragged)

    @parameterized.named_parameters(
        *testing_utils.generate_combinations_with_testcase_name(layer=[
            keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply,
            keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average
        ]))
    def test_merge_with_scalar_input(self, layer):
        x1 = np.array((1))
        x2 = np.array((2))
        out = layer()([x1, x2])
        self.assertEqual(out.shape, ())


@combinations.generate(combinations.combine(mode=['graph', 'eager']))
class MergeLayersTestNoExecution(tf.test.TestCase):
    def test_merge_elementwise_errors(self):
        i1 = keras.layers.Input(shape=(4, 5))
        i2 = keras.layers.Input(shape=(4, 6))
        with self.assertRaises(ValueError):
            keras.layers.add([i1, i2])
        with self.assertRaises(ValueError):
            keras.layers.add([i1])
        with self.assertRaises(ValueError):
            keras.layers.add(i1)
        with self.assertRaises(ValueError):
            keras.layers.add([i1])

    def test_concatenate_errors(self):
        i1 = keras.layers.Input(shape=(4, 5))
示例#30
0
文件: save_test.py 项目: ohsdba/keras
class TestSaveModel(tf.test.TestCase, parameterized.TestCase):

  def setUp(self):
    super(TestSaveModel, self).setUp()
    self.model = testing_utils.get_small_sequential_mlp(1, 2, 3)
    self.subclassed_model = testing_utils.get_small_subclass_mlp(1, 2)

  def assert_h5_format(self, path):
    if h5py is not None:
      self.assertTrue(h5py.is_hdf5(path),
                      'Model saved at path {} is not a valid hdf5 file.'
                      .format(path))

  def assert_saved_model(self, path):
    loader_impl.parse_saved_model(path)

  @testing_utils.run_v2_only
  def test_save_format_defaults(self):
    path = os.path.join(self.get_temp_dir(), 'model_path')
    save.save_model(self.model, path)
    self.assert_saved_model(path)

  @testing_utils.run_v2_only
  def test_save_format_defaults_pathlib(self):
    if sys.version_info < (3, 6):
      self.skipTest('pathlib is only available for python version >= 3.6')
    path = pathlib.Path(self.get_temp_dir()) / 'model_path'
    save.save_model(self.model, path)
    self.assert_saved_model(path)

  @testing_utils.run_v2_only
  def test_save_hdf5(self):
    path = os.path.join(self.get_temp_dir(), 'model')
    save.save_model(self.model, path, save_format='h5')
    self.assert_h5_format(path)
    with self.assertRaisesRegex(
        NotImplementedError,
        'requires the model to be a Functional model or a Sequential model.'):
      save.save_model(self.subclassed_model, path, save_format='h5')

  @testing_utils.run_v2_only
  def test_save_load_hdf5_pathlib(self):
    if sys.version_info < (3, 6):
      self.skipTest('pathlib is only available for python version >= 3.6')
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    save.save_model(self.model, path, save_format='h5')
    save.load_model(path)

  @testing_utils.run_v2_only
  def test_save_tf(self):
    path = os.path.join(self.get_temp_dir(), 'model')
    save.save_model(self.model, path, save_format='tf')
    self.assert_saved_model(path)
    with self.assertRaisesRegex(ValueError, 'input shapes have not been set'):
      save.save_model(self.subclassed_model, path, save_format='tf')
    self.subclassed_model.predict(np.random.random((3, 5)))
    save.save_model(self.subclassed_model, path, save_format='tf')
    self.assert_saved_model(path)

  @testing_utils.run_v2_only
  def test_save_load_tf_string(self):
    path = os.path.join(self.get_temp_dir(), 'model')
    save.save_model(self.model, path, save_format='tf')
    save.load_model(path)

  @testing_utils.run_v2_only
  def test_save_load_tf_pathlib(self):
    if sys.version_info < (3, 6):
      self.skipTest('pathlib is only available for python version >= 3.6')
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    save.save_model(self.model, path, save_format='tf')
    save.load_model(path)

  @testing_utils.run_v2_only
  def test_save_load_weights_tf_pathlib(self):
    if sys.version_info < (3, 6):
      self.skipTest('pathlib is only available for python version >= 3.6')
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    self.model.save_weights(path, save_format='tf')
    self.model.load_weights(path)

  @testing_utils.run_v2_only
  def test_save_load_weights_hdf5_pathlib(self):
    if sys.version_info < (3, 6):
      self.skipTest('pathlib is only available for python version >= 3.6')
    path = pathlib.Path(self.get_temp_dir()) / 'model'
    self.model.save_weights(path, save_format='h5')
    self.model.load_weights(path)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_saving_with_dense_features(self):
    cols = [
        tf.feature_column.numeric_column('a'),
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'b', ['one', 'two']))
    ]
    input_layers = {
        'a': keras.layers.Input(shape=(1,), name='a'),
        'b': keras.layers.Input(shape=(1,), name='b', dtype='string')
    }

    fc_layer = dense_features.DenseFeatures(cols)(input_layers)
    output = keras.layers.Dense(10)(fc_layer)

    model = keras.models.Model(input_layers, output)

    model.compile(
        loss=keras.losses.MSE,
        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
    loaded_model = model_config.model_from_json(config)

    inputs_a = np.arange(10).reshape(10, 1)
    inputs_b = np.arange(10).reshape(10, 1).astype('str')

    with self.cached_session():
      # Initialize tables for V1 lookup.
      if not tf.executing_eagerly():
        self.evaluate(tf.compat.v1.tables_initializer())

      self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_saving_with_sequence_features(self):
    cols = [
        tf.feature_column.sequence_numeric_column('a'),
        tf.feature_column.indicator_column(
            tf.feature_column.sequence_categorical_column_with_vocabulary_list(
                'b', ['one', 'two']))
    ]
    input_layers = {
        'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
        'b':
            keras.layers.Input(
                shape=(None, 1), sparse=True, name='b', dtype='string')
    }

    fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
    # TODO(tibell): Figure out the right dtype and apply masking.
    # sequence_length_mask = array_ops.sequence_mask(sequence_length)
    # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
    x = keras.layers.GRU(32)(fc_layer)
    output = keras.layers.Dense(10)(x)

    model = keras.models.Model(input_layers, output)

    model.compile(
        loss=keras.losses.MSE,
        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
    loaded_model = model_config.model_from_json(config)

    batch_size = 10
    timesteps = 1

    values_a = np.arange(10, dtype=np.float32)
    indices_a = np.zeros((10, 3), dtype=np.int64)
    indices_a[:, 0] = np.arange(10)
    inputs_a = tf.SparseTensor(indices_a, values_a,
                                          (batch_size, timesteps, 1))

    values_b = np.zeros(10, dtype=np.str)
    indices_b = np.zeros((10, 3), dtype=np.int64)
    indices_b[:, 0] = np.arange(10)
    inputs_b = tf.SparseTensor(indices_b, values_b,
                                          (batch_size, timesteps, 1))

    with self.cached_session():
      # Initialize tables for V1 lookup.
      if not tf.executing_eagerly():
        self.evaluate(tf.compat.v1.tables_initializer())

      self.assertLen(
          loaded_model.predict({
              'a': inputs_a,
              'b': inputs_b
          }, steps=1), batch_size)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_saving_h5_for_rnn_layers(self):
    # See https://github.com/tensorflow/tensorflow/issues/35731 for details.
    inputs = keras.Input([10, 91], name='train_input')
    rnn_layers = [
        keras.layers.LSTMCell(size, recurrent_dropout=0, name='rnn_cell%d' % i)
        for i, size in enumerate([512, 512])
    ]
    rnn_output = keras.layers.RNN(
        rnn_layers, return_sequences=True, name='rnn_layer')(inputs)
    pred_feat = keras.layers.Dense(91, name='prediction_features')(rnn_output)
    pred = keras.layers.Softmax()(pred_feat)
    model = keras.Model(inputs=[inputs], outputs=[pred, pred_feat])
    path = os.path.join(self.get_temp_dir(), 'model_path.h5')
    model.save(path)

    # Make sure the variable name is unique.
    self.assertNotEqual(rnn_layers[0].kernel.name,
                        rnn_layers[1].kernel.name)
    self.assertIn('rnn_cell1', rnn_layers[1].kernel.name)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_saving_optimizer_weights(self):

    class MyModel(keras.Model):

      def __init__(self):
        super(MyModel, self).__init__()
        self.layer = keras.layers.Dense(1)

      def call(self, x):
        return self.layer(x)

    path = os.path.join(self.get_temp_dir(), 'weights_path')
    x, y = np.ones((10, 10)), np.ones((10, 1))

    model = MyModel()
    model.compile('rmsprop', loss='bce')
    model.train_on_batch(x, y)
    model.reset_metrics()
    model.save_weights(path, save_format='tf')

    batch_loss = model.train_on_batch(x, y)

    new_model = MyModel()
    new_model.compile('rmsprop', loss='bce')
    new_model.train_on_batch(x, y)
    new_model.reset_metrics()

    new_model.load_weights(path)
    new_batch_loss = new_model.train_on_batch(x, y)

    self.assertAllClose(batch_loss, new_batch_loss)

  @combinations.generate(combinations.combine(mode=['eager', 'graph']))
  def test_save_include_optimizer_false(self):

    def get_variables(file_name):
      reader = tf.train.load_checkpoint(
          os.path.join(file_name, 'variables/variables'))
      shape_from_key = reader.get_variable_to_shape_map()
      return sorted(shape_from_key.keys())

    with self.cached_session():
      model = keras.models.Sequential()
      model.add(keras.layers.Dense(1))
      model.compile('adam', loss='mse')
      x, y = np.ones((10, 10)), np.ones((10, 1))
      model.train_on_batch(x, y)

      path = os.path.join(self.get_temp_dir(), 'no_optimizer')
      model.save(path, save_format='tf', include_optimizer=False)
      variables = get_variables(path)

      for v in variables:
        self.assertNotIn('optimizer', v)

  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
  def test_saving_model_with_custom_object(self):
    with generic_utils.custom_object_scope(), self.cached_session():

      @generic_utils.register_keras_serializable()
      class CustomLoss(losses.MeanSquaredError):
        pass

      model = sequential.Sequential(
          [core.Dense(units=1, input_shape=(1,))])
      model.compile(optimizer='sgd', loss=CustomLoss())
      model.fit(np.zeros([10, 1]), np.zeros([10, 1]))

      temp_dir = self.get_temp_dir()
      filepath = os.path.join(temp_dir, 'saving')
      model.save(filepath)

      # Make sure the model can be correctly load back.
      _ = save.load_model(filepath, compile=True)