def testLearningRateDecayUsedInTwoFunctions(self): with context.eager_mode(): a = variables.Variable([1., 2.], name='var') b = variables.Variable([1.], name='var') learning_rate_decay = learning_rate_schedule.InverseTimeDecay( 0.5, decay_steps=1.0, decay_rate=0.5) opt = adam.Adam(learning_rate=learning_rate_decay) loss_a = lambda: 3 * a loss_b = lambda: 2 * b @def_function.function def fn_a(): opt.minimize(loss_a, [a]) return a @def_function.function def fn_b(): opt.minimize(loss_b, [b]) return b fn_a() fn_b()
def testDeferredRestorationUsageEager(self): """An idiomatic eager execution example.""" num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): model = MyModel() optimizer = adam.Adam(0.001) root = trackable_utils.Checkpoint( optimizer=optimizer, model=model) root.restore(checkpoint_management.latest_checkpoint( checkpoint_directory)) for _ in range(num_training_steps): # TODO(allenl): Use a Dataset and serialize/checkpoint it. input_value = constant_op.constant([[3.]]) with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, root.optimizer.iterations.numpy())
def testGettingAndSettingLearningRate(self, strategy_fn): with strategy_fn().scope() as strategy: var = variables.Variable([5.0]) opt = adam.Adam(learning_rate=1.0) loss = lambda: var * 2.0 run_fn = lambda: opt.minimize(loss, [var]) run_op = strategy.experimental_run(run_fn) self.evaluate(variables.global_variables_initializer()) self._run_if_in_graph_mode(run_op) lr = self.evaluate(opt.lr) self.assertEqual(1.0, lr) opt.lr = 2.0 lr = self.evaluate(opt.lr) self.assertEqual(2.0, lr) self.evaluate(opt.lr.assign(3.0)) lr = self.evaluate(opt.lr) self.assertEqual(3.0, lr) with self.assertRaises(AttributeError): opt.not_an_attr += 3
def testDeferredRestorationUsageEager(self): """An idiomatic eager execution example.""" num_training_steps = 10 checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): model = MyModel() optimizer = adam.Adam(0.001) root = util.Checkpoint( optimizer=optimizer, model=model, optimizer_step=training_util.get_or_create_global_step()) root.restore( checkpoint_management.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): # TODO(allenl): Use a Dataset and serialize/checkpoint it. input_value = constant_op.constant([[3.]]) optimizer.minimize( lambda: model(input_value), # pylint: disable=cell-var-from-loop global_step=root.optimizer_step) root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, root.optimizer_step.numpy())
def testDeferredRestorationUsageEager(self): """An idiomatic eager execution example.""" num_training_steps = 10 checkpoint_directory = self.get_temp_dir() for training_continuation in range(3): with self.test_scope(): model = Subclassed() optimizer = adam.Adam(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, model=model) manager = checkpoint_management.CheckpointManager( root, checkpoint_directory, max_to_keep=2) root.restore(manager.latest_checkpoint) for _ in range(num_training_steps): input_value = constant_op.constant([[3.]]) with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) manager.save() self.assertEqual((training_continuation + 1) * num_training_steps, root.optimizer.iterations.numpy())
def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.cached_session(): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) opt = adam.Adam(constant_op.constant(0.001)) update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) variables.global_variables_initializer().run() # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) # Run 3 steps of Adam for t in range(3): self.assertAllCloseAccordingToType(0.9**(t + 1), self.evaluate(beta_1_power)) self.assertAllCloseAccordingToType(0.999**(t + 1), self.evaluate(beta_2_power)) update.run() var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testPartialRestoreWarningObject(self): with context.eager_mode(): optimizer = adam.Adam(0.0) original_root = trackable_utils.Checkpoint(v1=variables_lib.Variable(2.), v2=variables_lib.Variable(3.), optimizer=optimizer) # Create a slot variable to save optimizer.minimize(original_root.v1.read_value, [original_root.v1]) prefix = os.path.join(self.get_temp_dir(), "ckpt") save_path = original_root.save(prefix) partial_root = trackable_utils.Checkpoint(v1=variables_lib.Variable(0.)) weak_partial_root = weakref.ref(partial_root) weak_v1 = weakref.ref(partial_root.v1) partial_root.restore(save_path) self.assertEqual(2., partial_root.v1.numpy()) with test.mock.patch.object(logging, "warning") as mock_log: del partial_root self.assertIsNone(weak_partial_root()) self.assertIsNone(weak_v1()) messages = str(mock_log.call_args_list) self.assertIn("(root).v2'", messages) self.assertIn("(root).optimizer's state 'm' for (root).v1", messages) self.assertNotIn("(root).v1'", messages) self.assertIn("expect_partial()", messages)
def test_getitem_slice_with_stop_and_ellipsis_only(self): if not context.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') inp = keras.Input(shape=(4, 3, 8)) slice_stop = keras.Input(shape=(), dtype='int32') out = inp[..., :slice_stop[0]] model = keras.Model( inputs=[inp, slice_stop], outputs=out) model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) batch_size = 7 stop = 6 x = array_ops.stack([ math_ops.range(8) for _ in range(batch_size)]) args = [x, constant_op.constant(stop, shape=(batch_size,))] expected = array_ops.stack([ math_ops.range(8)[:stop] for _ in range(batch_size)]) if keras_tensor.keras_tensors_enabled(): self.assertIn('tf.__operators__.getitem', ( x.name for x in model.layers)) self.assertNotIn('tf.strided_slice', ( x.name for x in model.layers)) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) # Make sure it can be successfully saved and loaded config = model.get_config() model = keras.Model.from_config(config) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. with self.test_session(): num_training_steps = 10 checkpoint_directory = self.get_temp_dir() optimizer = adam.Adam(0.001) def _train_fn(model, input_value): with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) for training_continuation in range(3): with testing_utils.device(should_use_gpu=True): model = MyModel() root = trackable_utils.Checkpoint(optimizer=optimizer, model=model) manager = checkpoint_management.CheckpointManager( root, checkpoint_directory, max_to_keep=1) status = root.restore(save_path=manager.latest_checkpoint) input_value = constant_op.constant([[3.]]) train_fn = functools.partial(_train_fn, model, input_value) if not context.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): train_fn() manager.save() self.assertEqual( (training_continuation + 1) * num_training_steps, self.evaluate(root.optimizer.iterations)) self.assertEqual(training_continuation + 1, self.evaluate(root.save_counter))
def testDeferredSlotRestoration(self): checkpoint_directory = self.get_temp_dir() root = tracking.Checkpointable() root.var = util.add_variable(root, name="var", initializer=0.) optimizer = adam.Adam(0.1) if context.executing_eagerly(): optimizer.minimize(root.var.read_value) else: train_op = optimizer.minimize(root.var) # Note that `optimizer` has not been added as a dependency of # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate( util.gather_initializers( util.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) self.evaluate(state_ops.assign(root.var, 12.)) no_slots_path = util.CheckpointableSaver(root).save( os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(state_ops.assign(root.var, 13.)) self.evaluate( state_ops.assign(optimizer.get_slot(name="m", var=root.var), 14.)) slots_path = util.CheckpointableSaver(root).save( os.path.join(checkpoint_directory, "with_slots")) new_root = tracking.Checkpointable() # Load the slot-containing checkpoint (deferred), then immediately overwrite # the non-slot variable (also deferred). slot_status = util.CheckpointableSaver(new_root).restore(slots_path) no_slot_status = util.CheckpointableSaver(new_root).restore( no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = util.add_variable(new_root, name="var", shape=[]) no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) new_root.optimizer = adam.Adam(0.1) with self.assertRaisesRegexp(AssertionError, "beta_1_power"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) if context.executing_eagerly(): # Slot variables are only created with restoring initializers when # executing eagerly. self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(name="m", var=new_root.var))) else: self.assertIs( new_root.optimizer.get_slot(name="m", var=new_root.var), None) if context.executing_eagerly(): new_root.optimizer.minimize(new_root.var.read_value) else: train_op = new_root.optimizer.minimize(new_root.var) # The slot variable now exists; restore() didn't create it, but we should # now have a restore op for it. slot_status.run_restore_ops() self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(name="m", var=new_root.var))) self.evaluate(train_op) slot_status.assert_consumed()
def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) eval_input_fn = self.dataset_input_fn(x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices)) predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] feature_columns = linear_feature_columns + dnn_feature_columns session_config = config_pb2.ConfigProto(log_device_placement=True, allow_soft_placement=True) estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, dnn_optimizer=adam.Adam(0.001), linear_optimizer=adam.Adam(0.001), config=run_config.RunConfig(train_distribute=distribution, eval_distribute=distribution, session_config=session_config)) num_steps = 2 if use_train_and_evaluate: scores, _ = training.train_and_evaluate( estimator, training.TrainSpec(train_input_fn, max_steps=num_steps), training.EvalSpec(eval_input_fn)) else: estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def create_model(self): """ 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 隐藏层:全连接层 输出层:全连接层,神经元数量为self.ms_output_size,使用softmax作为激活函数, CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 :return: """ input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3, 3), use_bias=False, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.05)(layer_h1) layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')( layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h3 = Dropout(0.05)(layer_h3) layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')( layer_h3) # 卷积层 layer_h4 = Dropout(0.1)(layer_h4) layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')( layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 layer_h6 = Dropout(0.1)(layer_h6) layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 layer_h7 = Dropout(0.15)(layer_h7) layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层 layer_h9 = Dropout(0.15)(layer_h9) layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 layer_h10 = Dropout(0.2)(layer_h10) layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 layer_h12 = Dropout(0.2)(layer_h12) layer_h13 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层 layer_h13 = Dropout(0.2)(layer_h13) layer_h14 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层 layer_h15 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层 layer_h16 = Reshape((200, 3200))(layer_h15) # Reshape层 layer_h16 = Dropout(0.3)(layer_h16) layer_h17 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层 layer_h17 = Dropout(0.3)(layer_h17) layer_h18 = Dense(self.ms_output_size, use_bias=True, kernel_initializer='he_normal')(layer_h17) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h18) model_data = Model(inputs=input_data, outputs=y_pred) # model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # model.summary() # clip norm seems to speeds up convergence opt = Adam_v2.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8) model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=opt) # logger.debug('Create Model Successful, Compiles Model Successful. ') return model, model_data
def _benchmark(self, gradient_type, num_gpus, mode, loss_scaling): """Benchmarks loss scaling. We run a simple model with several scalar variables. The loss is the sum of all variables. The model is simple because we want to measure only the performance of loss scaling, not the performance of the model itself. Args: gradient_type: "optimizer" or "gradient_tape". How gradients are computed. "optimizer" uses Optimizer.minimize. "gradient_tape" uses GradientTape.gradient along with LossScaleOptimizer.get_scaled_loss and LossScaleOptimizer.get_unscaled_gradients. num_gpus: The number of GPUs to use. Must be at least 1. mode: "eager" or "tf_function". "tf_function" causes all computations to be wrapped in a tf.function, while "eager" runs computations eagerly. loss_scaling: "fixed", "dynamic", or None. The type of loss scaling to use. None means use no loss scaling, which is useful as a baseline to see how much slower loss scaling is in comparison. """ ls_str = loss_scaling or 'no_loss_scaling' name = '%s_%d_GPU_%s_%s' % (gradient_type, num_gpus, mode, ls_str) with context.eager_mode(), _get_strategy(num_gpus).scope() as strategy: opt = adam.Adam() if loss_scaling == 'fixed': loss_scale = loss_scale_module.FixedLossScale(2.) elif loss_scaling == 'dynamic': # Make increment_period so high that it's effectively infinite. This # means the loss scale will never change. Any performance overhead # from increasing/decreasing the loss scale is typically negligible # since it happens infrequently, so we only benchmark the common case # of the loss scale not changing. increment_period = 1000000 loss_scale = loss_scale_module.DynamicLossScale( initial_loss_scale=2., increment_period=increment_period) else: assert loss_scaling is None loss_scale = None if loss_scale: opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale) num_vars = 200 num_warmup_iters = 1 num_iters = 20 # By using scalar variables, we reduce overhead of the actual GPU work of # multiplying variables, dividing gradients, and checking gradients for # NaNs. Measuring these overheads isn't very useful as there is little we # can do to reduce them (one such way would be to fuse dividing gradients # and checking them for NaNs). We still have all other overheads, such as # all-reducing the `is_finite` values and having a tf.cond or # tf.while_loop based on whether gradients are NaNs. Currently, these # other overheads are much more significant than the GPU work. var_list = [ variables.Variable(i, dtype='float32') for i in range(num_vars) ] def get_loss(): return math_ops.add_n(var_list) if gradient_type == 'gradient_tape': if loss_scale is None: def minimize_fn(): with backprop.GradientTape() as tape: loss = get_loss() grads = tape.gradient(loss, var_list) return opt.apply_gradients(zip(grads, var_list)) else: def minimize_fn(): with backprop.GradientTape() as tape: loss = get_loss() scaled_loss = opt.get_scaled_loss(loss) scaled_grads = tape.gradient(scaled_loss, var_list) grads = opt.get_unscaled_gradients(scaled_grads) return opt.apply_gradients(zip(grads, var_list)) else: assert gradient_type == 'optimizer' def minimize_fn(): return opt.minimize(get_loss, var_list) def run_fn(): strategy.run(minimize_fn) if mode == 'tf_function': run_fn = def_function.function(run_fn) for _ in range(num_warmup_iters): run_fn() start = time.time() for _ in range(num_iters): run_fn() end = time.time() self.report_benchmark(iters=num_iters, wall_time=(end - start) / num_iters, name=name)
def test_trackable_save_restore(self): with self.test_session(): def _templated(): v = variable_scope.get_variable( "v", shape=[1], initializer=init_ops.zeros_initializer(), use_resource=True) v2 = variable_scope.get_variable( "v2", shape=[1], initializer=init_ops.zeros_initializer(), use_resource=True) manual = _ManualScope() return v, v + 1., v2, manual, manual() save_template = template.make_template("s1", _templated) v1_save, _, v2_save, manual_scope, manual_scope_v = save_template() six.assertCountEqual( self, [ id(v1_save), id(v2_save), id(manual_scope), id(manual_scope_v), id(save_template) ], map(id, trackable_utils.list_objects(save_template))) manual_dep, = manual_scope._checkpoint_dependencies self.assertEqual("in_manual_scope", manual_dep.name) self.assertIs(manual_scope_v, manual_dep.ref) optimizer = adam.Adam(0.0) save_root = trackable_utils.Checkpoint(my_template=save_template, optimizer=optimizer) optimizer.minimize(v1_save.read_value, var_list=[v1_save]) self.evaluate([v.initializer for v in save_template.variables]) optimizer_variables = optimizer.variables() + list( optimizer._hyper.values()) self.evaluate([v.initializer for v in optimizer_variables]) self.evaluate(v1_save.assign([12.])) self.evaluate(v2_save.assign([14.])) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") save_path = save_root.save(checkpoint_prefix) load_template = template.make_template("s2", _templated) load_optimizer = adam.Adam(0.0) load_root = trackable_utils.Checkpoint(my_template=load_template, optimizer=load_optimizer) status = load_root.restore(save_path) var, var_plus_one, var2, _, _ = load_template() load_optimizer.minimize(var.read_value, var_list=[var]) self.assertLen(load_template._checkpoint_dependencies, 3) self.assertEqual("v", load_template._checkpoint_dependencies[0].name) self.assertEqual("v2", load_template._checkpoint_dependencies[1].name) self.assertEqual("ManualScope", load_template._checkpoint_dependencies[2].name) status.assert_consumed().run_restore_ops() self.assertAllEqual([12.], self.evaluate(var)) self.assertAllEqual([13.], self.evaluate(var_plus_one)) self.assertAllEqual([14.], self.evaluate(var2))
def testNamingWithOptimizer(self): input_value = constant_op.constant([[3.]]) model = MyModel() # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_model = MyModel() optimizer = adam.Adam(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = util.Checkpoint(optimizer=optimizer, model=model, optimizer_step=optimizer_step) if context.executing_eagerly(): optimizer.minimize(lambda: model(input_value), global_step=optimizer_step) optimizer.minimize(lambda: other_model(input_value), global_step=optimizer_step) else: train_op = optimizer.minimize(model(input_value), global_step=optimizer_step) optimizer.minimize(other_model(input_value), global_step=optimizer_step) self.evaluate(util.gather_initializers(root_checkpointable)) self.evaluate(train_op) named_variables, serialized_graph, _ = (util._serialize_object_graph( root_checkpointable, saveables_cache=None)) expected_checkpoint_names = ( # Created in the root node, so no prefix. "optimizer_step", "model/_second/kernel", "model/_named_dense/kernel", "model/_named_dense/bias", # non-Layer dependency of the model "model/_non_layer/a_variable", # The optimizer creates two non-slot variables "optimizer/beta_1_power", "optimizer/beta_2_power", # Slot variables "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names ] # The Dense layers also save get_config() JSON expected_checkpoint_names.extend([ "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON" ]) named_variables = {v.name: v for v in named_variables} six.assertCountEqual(self, expected_checkpoint_names, named_variables.keys()) # Check that we've mapped to the right variable objects (not exhaustive) self.assertEqual("global_step", named_variables["optimizer_step" + suffix].full_name) self.assertEqual( "my_model/dense_1/kernel", named_variables["model/_second/kernel" + suffix].full_name) self.assertEqual( "my_model/dense/kernel", named_variables["model/_named_dense/kernel" + suffix].full_name) self.assertEqual( "beta_1_power", named_variables["optimizer/beta_1_power" + suffix].full_name) self.assertEqual( "beta_2_power", named_variables["optimizer/beta_2_power" + suffix].full_name) # Spot check the generated protocol buffers. self.assertEqual("optimizer", serialized_graph.nodes[0].children[1].local_name) optimizer_node = serialized_graph.nodes[ serialized_graph.nodes[0].children[1].node_id] self.assertEqual("beta_1_power", optimizer_node.children[0].local_name) self.assertEqual( "beta_1_power", serialized_graph.nodes[ optimizer_node.children[0].node_id].attributes[0].full_name) self.assertEqual( "my_model/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[ 0].original_variable_node_id].attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( "my_model/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[ 0].slot_variable_node_id].attributes[0].full_name) self.assertEqual( "my_model/dense/kernel/Adam:0", optimizer.get_slot(var=model._named_dense.kernel, name="m").name) self.assertEqual( "model/_named_dense/kernel" + suffix, serialized_graph.nodes[optimizer_node.slot_variables[ 0].original_variable_node_id].attributes[0].checkpoint_key) self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) self.assertEqual( "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, serialized_graph.nodes[optimizer_node.slot_variables[ 0].slot_variable_node_id].attributes[0].checkpoint_key)
def __init__(self): super(_HasOptimizer, self).__init__() self.layer = core.Dense(1) self.optimizer = adam.Adam(0.01)
def testSaveRestore(self): model = MyModel() optimizer = adam.Adam(0.001) root_checkpointable = util.Checkpoint(optimizer=optimizer, model=model) input_value = constant_op.constant([[3.]]) if context.executing_eagerly(): optimizer.minimize(lambda: model(input_value)) else: train_op = optimizer.minimize(model(input_value)) # TODO(allenl): Make initialization more pleasant when graph building. root_checkpointable.save_counter # pylint: disable=pointless-statement self.evaluate(util.gather_initializers(root_checkpointable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(state_ops.assign(m_bias_slot, [1.5])) save_path = root_checkpointable.save(file_prefix=prefix) self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3)) optimizer_variables = self.evaluate(optimizer.variables()) self.evaluate(state_ops.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_checkpointable.restore( save_path=save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if not context.executing_eagerly(): return # Restore-on-create is only supported when executing eagerly on_create_model = MyModel() on_create_optimizer = adam.Adam( 0.001, # Preserve beta_1_power and beta_2_power when appying gradients # so we can test that they've been restored correctly. beta_1=1.0, beta_2=1.0) on_create_root = util.Checkpoint(optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) on_create_model(constant_op.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( on_create_model._named_dense.variables[1], "m") # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) self.assertAllEqual(optimizer_variables[2:], self.evaluate(on_create_optimizer.variables())) dummy_var = resource_variable_ops.ResourceVariable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value) status.assert_consumed() beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators( ) self.assertAllEqual(optimizer_variables[0], self.evaluate(beta_1_power)) self.assertAllEqual(optimizer_variables[1], self.evaluate(beta_2_power))
# Hyper-paratmeters of optimizer in graph. HP_IN_GRAPH = { 'Adam': ['decay', 'learning_rate'], 'Ftrl': [ 'decay', 'l1_regularization_strength', 'l2_regularization_strength', 'learning_rate', 'learning_rate_power' ], 'RMSProp': ['decay', 'learning_rate', 'momentum', 'rho'], 'Adagrad': ['decay', 'learning_rate'], 'SGD': ['decay', 'learning_rate', 'momentum'], } # optimizer v2 instance. OPT_V2_INSTANCE = { 'Adagrad': adagrad.Adagrad(), 'Adam': adam.Adam(), 'Ftrl': ftrl.Ftrl(), 'RMSProp': rmsprop.RMSprop(), 'SGD': gradient_descent.SGD(), } def _add_new_variable(initial_value, var_name_v2, var_name_v1, var_map, var_names_map): """Creates a new variable and add it to the variable maps.""" var = tf.Variable(initial_value, name=var_name_v2) var_map[var_name_v2] = var var_names_map[var_name_v2] = var_name_v1 def _add_opt_variable(opt_name_v2, var_name_v1, idx, suffix_v2, reader,
def testAdamCompatibility(self): opt_v1 = optimizers.Adam() opt_v2 = adam.Adam() self._testOptimizersCompatibility(opt_v1, opt_v2)
def testMultipleGraphsNonSlotVariables(self): with context.graph_mode(): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") optimizer = adam.Adam(0.001) # Construct a model in one graph first_graph = ops.Graph() first_session = session_lib.Session(graph=first_graph) with first_graph.as_default(), first_session.as_default(): first_variable = resource_variable_ops.ResourceVariable([1.]) first_root_checkpointable = util.Checkpoint( optimizer=optimizer, variable=first_variable) train_op = optimizer.minimize(first_variable.read_value) self.evaluate( util.gather_initializers(first_root_checkpointable)) self.evaluate(train_op) self.evaluate(first_variable.assign([1.])) self.evaluate( optimizer.get_slot(var=first_variable, name="m").assign([2.])) beta_1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta_1_power.assign(3.)) # Save and load in a second graph second_graph = ops.Graph() with second_graph.as_default(), session_lib.Session( graph=second_graph): second_variable = resource_variable_ops.ResourceVariable([1.]) second_root_checkpointable = util.Checkpoint( optimizer=optimizer, variable=second_variable) train_op = optimizer.minimize(second_variable.read_value) second_root_checkpointable.restore( None).initialize_or_restore() self.evaluate(train_op) self.evaluate(second_variable.assign([4.])) self.evaluate( optimizer.get_slot(var=second_variable, name="m").assign([5.])) beta_1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta_1_power.assign(6.)) save_path = second_root_checkpointable.save(checkpoint_prefix) self.evaluate(second_variable.assign([7.])) self.evaluate( optimizer.get_slot(var=second_variable, name="m").assign([8.])) beta_1_power, _ = optimizer._get_beta_accumulators() self.assertAllEqual(6., self.evaluate(beta_1_power)) status = second_root_checkpointable.restore(save_path) status.assert_consumed().run_restore_ops() self.assertAllEqual([4.], self.evaluate(second_variable)) self.assertAllEqual([5.], self.evaluate( optimizer.get_slot(var=second_variable, name="m"))) beta_1_power, _ = optimizer._get_beta_accumulators() self.assertAllEqual(6., self.evaluate(beta_1_power)) # Check that the first graph is unmolested with first_graph.as_default(), first_session.as_default(): self.assertAllEqual([1.], self.evaluate(first_variable)) self.assertAllEqual([2.], self.evaluate( optimizer.get_slot(var=first_variable, name="m"))) beta_1_power, _ = optimizer._get_beta_accumulators() self.assertAllEqual(3., self.evaluate(beta_1_power))
def testBasicWithLearningRateInverseTimeDecay(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): with ops.Graph().as_default(), self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) var0 = resource_variable_ops.ResourceVariable(var0_np, name="var0_%d" % i) var1 = resource_variable_ops.ResourceVariable(var1_np, name="var1_%d" % i) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) learning_rate = 0.001 decay = 0.5 lr_schedule = learning_rate_schedule.InverseTimeDecay( learning_rate, decay_steps=1.0, decay_rate=decay) beta_1 = 0.9 beta_2 = 0.999 epsilon = 1e-7 opt = adam.Adam(learning_rate=lr_schedule, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon) update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) # Run 3 steps of Adam for t in range(3): self.evaluate(update) lr_np = learning_rate / (1 + decay * t) var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0, lr=lr_np) var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1, lr=lr_np) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def doTestBasic(self, use_resource=False): for i, dtype in enumerate( [dtypes.half, dtypes.float32, dtypes.float64]): with self.session(graph=ops.Graph()): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) if use_resource: var0 = resource_variable_ops.ResourceVariable( var0_np, name="var0_%d" % i) var1 = resource_variable_ops.ResourceVariable( var1_np, name="var1_%d" % i) else: var0 = variables.Variable(var0_np) var1 = variables.Variable(var1_np) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) opt = adam.Adam() update = opt.apply_gradients( zip([grads0, grads1], [var0, var1])) opt_variables = opt.variables() beta1_power, beta2_power = opt._get_beta_accumulators() self.assertTrue(beta1_power is not None) self.assertTrue(beta2_power is not None) self.assertIn(beta1_power, opt_variables) self.assertIn(beta2_power, opt_variables) with ops.Graph().as_default(): # Shouldn't return non-slot variables from other graphs. self.assertEqual(0, len(opt.variables())) if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) self.assertAllClose([3.0, 4.0], self.evaluate(var1)) beta1_power, beta2_power = opt._get_beta_accumulators() # Run 3 steps of Adam for t in range(1, 4): if not context.executing_eagerly(): self.evaluate(update) elif t > 1: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.assertAllCloseAccordingToType( 0.9**(t + 1), self.evaluate(beta1_power)) self.assertAllCloseAccordingToType( 0.999**(t + 1), self.evaluate(beta2_power)) var0_np, m0, v0 = adam_update_numpy( var0_np, grads0_np, t, m0, v0) var1_np, m1, v1 = adam_update_numpy( var1_np, grads1_np, t, m1, v1) # Validate updated params self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) if use_resource: self.assertEqual("var0_%d/Adam:0" % (i, ), opt.get_slot(var=var0, name="m").name)
def testAmsgradWithError(self): with self.assertRaisesRegexp(ValueError, "Amsgrad is currently not supported"): adam.Adam(learning_rate=1., beta_1=0.9, beta_2=0.99, amsgrad=True)
def testDeferredSlotRestoration(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() root = trackable_utils.Checkpoint() root.var = trackable_utils.add_variable(root, name="var", initializer=0.) optimizer = adam.Adam(0.1) variables = [root.var] gradients = [1.] train_op = optimizer.apply_gradients(zip(gradients, variables)) # Note that `optimizer` has not been added as a dependency of # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate( trackable_utils.gather_initializers( trackable_utils.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) self.evaluate(state_ops.assign(root.var, 12.)) no_slots_path = root.save( os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(state_ops.assign(root.var, 13.)) self.evaluate( state_ops.assign( optimizer.get_slot(slot_name="m", var=root.var), 14.)) slots_path = root.save( os.path.join(checkpoint_directory, "with_slots")) new_root = trackable_utils.Checkpoint() # Load the slot-containing checkpoint (deferred), then immediately # overwrite the non-slot variable (also deferred). slot_status = new_root.restore(slots_path) no_slot_status = new_root.restore(no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = trackable_utils.add_variable(new_root, name="var", shape=[]) no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) new_root.optimizer = adam.Adam(0.1) slot_status.assert_existing_objects_matched() if not context.executing_eagerly(): with self.assertRaisesRegex(AssertionError, "Unresolved object"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) if context.executing_eagerly(): # Slot variables are only created with restoring initializers when # executing eagerly. self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) else: # Slot variables are not created eagerly when graph building. with self.assertRaises(KeyError): new_root.optimizer.get_slot(slot_name="m", var=new_root.var) variables = [new_root.var] gradients = [1.] train_op = new_root.optimizer.apply_gradients( zip(gradients, variables)) # The slot variable now exists; restore() didn't create it, but we should # now have a restore op for it. slot_status.run_restore_ops() if not context.executing_eagerly(): # The train op hasn't run when graph building, so the slot variable has # its restored value. It has run in eager, so the value will # be different. self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) self.evaluate(train_op) slot_status.assert_consumed()
def __init__(self): self.dense = core.Dense(1) self.optimizer = adam.Adam(0.01)
def test_initialize_if_not_restoring(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001) root = trackable_utils.Checkpoint( model=model ) # Do not save the optimizer with the checkpoint. optimizer_checkpoint = trackable_utils.Checkpoint( optimizer=optimizer) checkpoint_path = checkpoint_management.latest_checkpoint( checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = constant_op.constant([[3.]]) def train_fn(): with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not context.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() # TODO(tanzheny): Add hyper variables to .variables(), and set them with # set_weights etc. variables_not_in_the_variables_property = [ obj for obj in optimizer._hyper.values() if isinstance(obj, variables_lib.Variable) ] self.evaluate([ v.initializer for v in optimizer.variables() + variables_not_in_the_variables_property ]) train_fn() model_save_path = root.save(file_prefix=checkpoint_prefix) self.evaluate(optimizer.beta_1.assign(42.)) optimizer_save_path = optimizer_checkpoint.save( optimizer_only_prefix) del train_fn # Restore into a graph with the optimizer with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001) root = trackable_utils.Checkpoint(optimizer=optimizer, model=model) status = root.restore(save_path=model_save_path) input_value = constant_op.constant([[3.]]) def train_fn1(): with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not context.executing_eagerly(): train_fn1 = functools.partial(self.evaluate, train_fn1()) status.initialize_or_restore() train_fn1() with self.assertRaises(AssertionError): status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() del train_fn1 # Make sure initialization doesn't clobber later restores with testing_utils.device(should_use_gpu=True): model = MyModel() optimizer = adam.Adam(0.001, beta_1=1.0) root = trackable_utils.Checkpoint(optimizer=optimizer, model=model) opt_root = trackable_utils.Checkpoint(optimizer=optimizer) status = root.restore(save_path=model_save_path) init_only_optimizer_status = opt_root.restore(save_path=None) optimizer_status = opt_root.restore( save_path=optimizer_save_path) input_value = constant_op.constant([[3.]]) def train_fn2(): with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) return optimizer.apply_gradients(zip(gradients, variables)) if not context.executing_eagerly(): train_fn2 = functools.partial(self.evaluate, train_fn2()) optimizer_status.run_restore_ops() status.initialize_or_restore() init_only_optimizer_status.initialize_or_restore() train_fn2() self.assertEqual(42., self.evaluate(optimizer.beta_1))
def testNamingWithOptimizer(self): input_value = constant_op.constant([[3.]]) model = MyModel() # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_model = MyModel() optimizer = adam.Adam(0.001) step = training_util.get_or_create_global_step() root_trackable = trackable_utils.Checkpoint(optimizer=optimizer, model=model, step=step) with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = control_flow_ops.group( optimizer.apply_gradients(zip(gradients, variables)), step.assign_add(1)) with backprop.GradientTape() as tape: loss = other_model(input_value) variables = other_model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) named_variables, serialized_graph, _ = graph_view.ObjectGraphView( root_trackable).serialize_object_graph() expected_slot_keys = ( "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) expected_checkpoint_names = ( # Created in the root node, so no prefix. "step", "model/_second/kernel", "model/_named_dense/kernel", "model/_named_dense/bias", # non-Layer dependency of the model "model/_non_layer/a_variable", "optimizer/learning_rate", "optimizer/beta_1", "optimizer/beta_2", "optimizer/iter", "optimizer/decay", ) + expected_slot_keys suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names ] named_variables = {v.name: v for v in named_variables} six.assertCountEqual(self, expected_checkpoint_names, named_variables.keys()) # Check that we've mapped to the right variable objects (not exhaustive) self.assertEqual("global_step", named_variables["step" + suffix].full_name) self.assertEqual( "my_model/dense_1/kernel", named_variables["model/_second/kernel" + suffix].full_name) self.assertEqual( "my_model/dense/kernel", named_variables["model/_named_dense/kernel" + suffix].full_name) self.assertEqual( "Adam/beta_1", named_variables["optimizer/beta_1" + suffix].full_name) self.assertEqual( "Adam/beta_2", named_variables["optimizer/beta_2" + suffix].full_name) # Spot check the generated protocol buffers. self.assertEqual("optimizer", serialized_graph.nodes[0].children[1].local_name) optimizer_node = serialized_graph.nodes[ serialized_graph.nodes[0].children[1].node_id] children = [node.local_name for node in optimizer_node.children] six.assertCountEqual( self, # hyper variable dependencies ["beta_1", "beta_2", "iter", "decay", "learning_rate"], children) serialized_slot_keys = [] for slot in optimizer_node.slot_variables: for attribute in (serialized_graph.nodes[ slot.slot_variable_node_id].attributes): serialized_slot_keys.append(attribute.checkpoint_key) six.assertCountEqual(self, [key + suffix for key in expected_slot_keys], serialized_slot_keys)
def test_train_premade_widedeep_model_with_feature_layers(self): vocab_list = ['alpha', 'beta', 'gamma'] vocab_val = [0.4, 0.6, 0.9] data = np.random.choice(vocab_list, size=256) y = np.zeros_like(data, dtype=np.float32) for vocab, val in zip(vocab_list, vocab_val): indices = np.where(data == vocab) y[indices] = val + np.random.uniform( low=-0.01, high=0.01, size=indices[0].shape) cat_column = tf.feature_column.categorical_column_with_vocabulary_list( key='symbol', vocabulary_list=vocab_list) ind_column = tf.feature_column.indicator_column(cat_column) # TODO(tanzheny): use emb column for dense part once b/139667019 is fixed. # emb_column = feature_column.embedding_column(cat_column, dimension=5) keras_input = keras.layers.Input(name='symbol', shape=3, dtype=tf.dtypes.string) # build linear part with feature layer. linear_feature_layer = tf.compat.v1.keras.layers.DenseFeatures( [ind_column]) linear_model = linear.LinearModel(units=1, name='Linear', kernel_initializer='zeros') combined_linear = keras.Sequential( [linear_feature_layer, linear_model]) # build dnn part with feature layer. dnn_feature_layer = tf.compat.v1.keras.layers.DenseFeatures( [ind_column]) dense_layer = keras.layers.Dense(units=1, name='DNNDense', kernel_initializer='zeros') combined_dnn = keras.Sequential([dnn_feature_layer, dense_layer]) # build and compile wide deep. wide_deep_model = wide_deep.WideDeepModel(combined_linear, combined_dnn) wide_deep_model._set_inputs({'symbol': keras_input}) sgd_opt = gradient_descent.SGD(0.1) adam_opt = adam.Adam(0.1) wide_deep_model.compile([sgd_opt, adam_opt], 'mse', ['mse']) # build estimator. train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data}, y=y, num_epochs=20, shuffle=False) eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data}, y=y, num_epochs=20, shuffle=False) est = keras_lib.model_to_estimator(keras_model=wide_deep_model, config=self._config, checkpoint_format='saver') before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) est.train(input_fn=train_input_fn, steps=20) after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) self.assertLess(after_eval_results['loss'], 0.1)
adagrad_optimizer_v1_fn = combinations.NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) adam_optimizer_v1_fn = combinations.NamedObject( "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1)) rmsprop_optimizer_v1_fn = combinations.NamedObject( "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) # TODO(shiningsun): consider adding the other v1 optimizers optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn] adadelta_optimizer_keras_v2_fn = combinations.NamedObject( "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001)) adagrad_optimizer_keras_v2_fn = combinations.NamedObject( "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001)) adam_optimizer_keras_v2_fn = combinations.NamedObject( "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0)) adamax_optimizer_keras_v2_fn = combinations.NamedObject( "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0)) nadam_optimizer_keras_v2_fn = combinations.NamedObject( "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0)) ftrl_optimizer_keras_v2_fn = combinations.NamedObject( "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001)) gradient_descent_optimizer_keras_v2_fn = combinations.NamedObject( "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.2)) rmsprop_optimizer_keras_v2_fn = combinations.NamedObject( "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001)) # TODO(shiningsun): consider adding the other v2 optimizers optimizers_v2 = [ gradient_descent_optimizer_keras_v2_fn, adagrad_optimizer_keras_v2_fn ]
def testSaveRestore(self): with self.test_session(): model = MyModel() optimizer = adam.Adam(0.001) root_trackable = trackable_utils.Checkpoint(optimizer=optimizer, model=model) input_value = constant_op.constant([[3.]]) with backprop.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = optimizer.apply_gradients(zip(gradients, variables)) self.assertFalse(root_trackable.save_counter.trainable) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") self.evaluate( state_ops.assign(model._named_dense.variables[1], [42.])) m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(state_ops.assign(m_bias_slot, [1.5])) save_path = root_trackable.save(file_prefix=prefix) self.evaluate( state_ops.assign(model._named_dense.variables[1], [43.])) self.evaluate(state_ops.assign(root_trackable.save_counter, 3)) optimizer_variables = self.evaluate( sorted(optimizer.variables(), key=lambda v: v.name)) self.evaluate(state_ops.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_trackable.restore( save_path=save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if not context.executing_eagerly(): return # Restore-on-create is only supported when executing eagerly on_create_model = MyModel() on_create_optimizer = adam.Adam(0.001) on_create_root = trackable_utils.Checkpoint( optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) status.assert_nontrivial_match() status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() on_create_model(constant_op.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( on_create_model._named_dense.variables[1], "m") status.assert_existing_objects_matched() if not context.executing_eagerly(): with self.assertRaises(AssertionError): status.assert_consumed() # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) dummy_var = variables_lib.Variable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value, var_list=[dummy_var]) status.assert_existing_objects_matched() status.assert_consumed() self.assertAllEqual( optimizer_variables, # Creation order is different, so .variables() needs to be re-sorted. self.evaluate( sorted(optimizer.variables(), key=lambda v: v.name)))