def testNewOptSameVarScope(self): model = _simple_mlp() opt = optimizers.Kfac( learning_rate=0.01, damping=0.01, model=model, loss='mse') opt._create_optimizer() opt2 = optimizers.Kfac( learning_rate=0.02, damping=0.03, model=model, loss='mse') opt2._create_optimizer()
def testConfig(self): fisher_approx = {layers.Dense: 'kron_in_diag', 'dense_1': 'kron_both_diag'} kwargs = { 'loss': 'mse', 'momentum': 7, 'num_burnin_steps': 11, 'min_damping': 9, 'invert_every': 13, 'fisher_approx': fisher_approx, 'seed': 12, } opt = optimizers.Kfac( learning_rate=3, damping=5, model=_simple_mlp(), **kwargs) opt.learning_rate = 23 opt.damping = 27 config = opt.get_config() self.assertEqual(config['learning_rate'], 23) self.assertEqual(config['damping'], 27) dense_approx = fisher_approx.pop(layers.Dense) fisher_approx[utils._CLASS_NAME_PREFIX + 'Dense'] = dense_approx for key, val in kwargs.items(): self.assertEqual(config[key], val) # Below is how Keras's model.save saves the configs. If the config is not # serializable, it will throw a TypeError or OverflowError. json.dumps(config, default=serialization.get_json_type).encode('utf8')
def testCustomLossFn(self): rands = lambda: np.random.random((100, 1)).astype(np.float32) dataset = tf.data.Dataset.from_tensor_slices((rands(), rands())) dataset = dataset.repeat().batch(10, drop_remainder=True) train_batch = dataset.make_one_shot_iterator().get_next() model = tf.keras.Sequential( [tf.keras.layers.Dense(1, input_shape=(1, ))]) def loss_fn(inputs): mse = tf.keras.losses.mean_squared_error(model(inputs[0]), inputs[1]) return tf.reduce_mean(mse) loss = 'mse' train_batch = dataset.make_one_shot_iterator().get_next() optimizer = optimizers.Kfac(damping=10., train_batch=train_batch, adaptive=True, model=model, loss=loss, loss_fn=loss_fn, qmodel_update_rescale=0.01) model.compile(optimizer, loss) model.fit(train_batch, steps_per_epoch=10, epochs=1) self.assertEqual(loss_fn, optimizer.optimizer._loss_fn)
def testRegisterLayersWithLayerCollection(self): model, loss = _mnist_model(), 'categorical_crossentropy' lc = utils.get_layer_collection(model, loss) opt = optimizers.Kfac(learning_rate=0.01, damping=0.001) opt.register_layers(layer_collection=lc) model.compile(optimizer=opt, loss=loss) opt.get_updates(model.total_loss, model.trainable_weights)
def testCustomTrainingLoopMakeOptimizerBeforeModelCall(self): # We defer the creation of the layer_collection to the minimize call for # this situation, because if we make the layer_collection immediately it # will capture the wrong inbound node. model = tf.keras.Sequential([ layers.Conv2D(13, 5), layers.BatchNormalization(fused=False), layers.Conv2D(23, 3), layers.LayerNormalization(), layers.GlobalMaxPool2D(), layers.Dense(10, activation='softmax', name='output_test') ]) optimizer = optimizers.Kfac(learning_rate=0.01, damping=0.01, model=model, loss='binary_crossentropy') x, y = _get_synthetic_mnist_train_tensors(batch_size=10) model_input = tf.keras.Input(tensor=x) output = model(model_input) loss = tf.keras.losses.binary_crossentropy(output, y) train_op = optimizer.minimize(loss, var_list=model.trainable_weights) with self.cached_session() as sess: sess.run(tf.global_variables_initializer()) for _ in range(3): sess.run([train_op])
def test_functional_model_saving(self): if h5py is None: self.skipTest('h5py required to run this test') with self.cached_session(): inputs = keras.layers.Input(shape=(3, )) x = keras.layers.Dense(2)(inputs) output = keras.layers.Dense(3)(x) model = keras.models.Model(inputs, output) model.compile(loss=keras.losses.MSE, optimizer=optimizers.Kfac(model=model, **_KFAC_KWARGS), metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy() ], weighted_metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy() ]) x = np.random.random((1, 3)) y = np.random.random((1, 3)) model.train_on_batch(x, y) out = model.predict(x) fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = saving_utils.load_model(fname, optimizer_name='new') os.close(fd) os.remove(fname) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05)
def testRegisterLayersCompiledModel(self, loss): opt = optimizers.Kfac(learning_rate=0.01, damping=0.001) model = _mnist_model() model.compile(optimizer=opt, loss=loss) opt.register_layers(model=model) model.compile(optimizer=opt, loss=loss) opt.get_updates(model.total_loss, model.trainable_weights)
def testCustomTrainingLoopFunctionalInpShape(self): # We need to ensure correct inbound node is used for layer collection. x, y = _get_synthetic_mnist_train_tensors(batch_size=10) model_input = tf.keras.Input(tensor=x) # Build Model inp = tf.keras.Input(shape=(28, 28, 1)) x = layers.Conv2D(13, 5)(inp) x = layers.BatchNormalization(fused=True)(x) x = layers.Activation('relu')(x) x = layers.Conv2D(23, 3)(x) x = layers.LayerNormalization()(x) x = layers.GlobalMaxPool2D()(x) out = layers.Dense(10, activation='softmax', name='output_test')(x) model = tf.keras.Model(inputs=inp, outputs=out) output = model(model_input) loss = tf.keras.losses.binary_crossentropy(output, y) optimizer = optimizers.Kfac(damping=0.01, learning_rate=0.01, model=model, loss='binary_crossentropy') train_op = optimizer.minimize(loss, var_list=model.trainable_weights) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for _ in range(3): sess.run([train_op])
def testTrainWithoutCreatingOptimizerFails(self): with self.assertRaisesRegex(ValueError, '.*provide a model with a loss.*'): opt = optimizers.Kfac(learning_rate=0.01, damping=0.001) model = _mnist_model() model.compile(optimizer=opt, loss='categorical_crossentropy') grads_vars = opt.get_gradients(model.total_loss, model.trainable_weights) opt.apply_gradients(grads_vars)
def testInstantiationWithLayerCollection(self): model = _simple_mlp() lc = utils.get_layer_collection(model, 'mse') opt = optimizers.Kfac( learning_rate=0.1, damping=0.2, layer_collection=lc) model.compile(optimizer=opt, loss='mse') opt.get_updates(model.total_loss, model.trainable_weights)
def testFunctionalInstantiation(self): inputs = layers.Input(shape=(3,)) x = layers.Dense(4, activation=tf.nn.relu)(inputs) outputs = layers.Dense(5, activation=tf.nn.softmax)(x) model = tf.keras.Model(inputs=inputs, outputs=outputs) optimizers.Kfac(learning_rate=0.002, damping=0.04, model=model, loss='binary_crossentropy')
def _train_model(data, model, loss, lr=0.001, damping=0.001, batch_size=32, epochs=1, loss_weights=None): """Compiles and fits model to data and returns trainging results. Args: data: Tuple of numpy arrays shaped ((x_train, y_train), (x_test, y_test)). model: Uncompiled Keras model with inputs/output shapes matching the data. loss: tf.keras.losses loss function or serialized (string) loss function. lr: Learning rate for optimizer. damping: Damping parameter for KFAC. batch_size: Batch size used for training. epochs: Number of training epochs. loss_weights: List of weights or dict mapping layer names to loss function weight. Returns: A History object. Calling History.history gives you a dictionary with training and validation results. """ (x_train, y_train), valid_data = data opt = optimizers.Kfac(learning_rate=lr, damping=damping, model=model, loss=loss, loss_weights=loss_weights) model.compile(opt, loss, loss_weights=loss_weights) return model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=valid_data, verbose=0)
def testExponentialDampingValuesWithDecayRate(self): init_value = 0.01 decay_rate = 0.3 num_decay_steps = 4 num_delay_steps = 3 opt = optimizers.Kfac(learning_rate=0.01, damping=init_value, model=self.model, loss='mse') self.model.compile(opt, 'mse') damping_list = [] cbs = [ callbacks.ExponentialDecay(hyperparameter='damping', init_value=init_value, decay_rate=decay_rate, num_decay_steps=num_decay_steps, num_delay_steps=num_delay_steps, verbose=1), HyperParamTracker('damping', damping_list, HyperParamTracker.BATCH) ] self.model.fit(self.data, self.labels, batch_size=self.batch_size, callbacks=cbs) expected_list = [init_value] * num_delay_steps + [ init_value * decay_rate**min(i, num_decay_steps) for i in range(self.num_steps - num_delay_steps) ] self.assertAllClose(damping_list, expected_list)
def testClipValueFails(self): with self.assertRaises(ValueError): optimizers.Kfac(learning_rate=0.01, damping=0.01, model=_simple_mlp(), loss='mse', clipvalue=0.1)
def testFromConfig(self, kwargs_updates): kwargs = { 'learning_rate': 3, 'damping': 5, 'momentum': 7, 'min_damping': 9, 'num_burnin_steps': 11, 'invert_every': 13, 'fisher_approx': { layers.Dense: 'kron_in_diag', 'dense_1': 'kron_both_diag' }, } kwargs.update(kwargs_updates) opt = optimizers.Kfac(model=_simple_mlp(), **kwargs) config = opt.get_config() config['name'] = 'diff_scope_name' opt2 = optimizers.Kfac.from_config(config) config2 = opt2.get_config() config2.pop('name') config.pop('name') self.assertEqual(config, config2) # Below is how Keras's model.save saves the configs. If the config is not # serializable, it will throw a TypeError or OverflowError. json.dumps(config, default=serialization.get_json_type).encode('utf8') json.dumps(config2, default=serialization.get_json_type).encode('utf8')
def testInferredBatchSizeFail(self, kfac_kwargs): dataset = tf.data.Dataset.from_tensors(([1.], [1.])) dataset = dataset.repeat().batch(11, drop_remainder=False) train_batch = dataset.make_one_shot_iterator().get_next() with self.assertRaisesRegex(ValueError, 'Could not infer batch_size.*'): optimizer = optimizers.Kfac(damping=10., train_batch=train_batch, **kfac_kwargs)
def testLossTensor(self): loss_tensor = tf.convert_to_tensor(2.0) opt = optimizers.Kfac(learning_rate=0.01, damping=0.01, model=_simple_mlp(), loss='mse', loss_tensor=loss_tensor) self.assertEqual(opt.optimizer._loss_tensor, loss_tensor)
def testSeed(self): opt = optimizers.Kfac(learning_rate=0.01, damping=0.01, model=_simple_mlp(), loss='mse', seed=4321) lc = opt.optimizer.layers self.assertEqual(lc._loss_dict['squared_error_loss'][0]._default_seed, 4321)
def testGettingVariableHyperFails(self): self.skipTest('This is not fixed in TF 1.14 yet.') opt = optimizers.Kfac(model=_simple_mlp(), loss='mse', learning_rate=tf.Variable(0.1), damping=tf.Variable(0.1)) with self.assertRaisesRegex(tf.errors.FailedPreconditionError, '.*uninitialized.*'): backend.get_value(opt.learning_rate)
def testAdaptiveWithLR(self, kfac_kwargs): dataset = tf.data.Dataset.from_tensors(([1.], [1.])) dataset = dataset.repeat().batch(11, drop_remainder=True) train_batch = dataset.make_one_shot_iterator().get_next() with self.assertRaisesRegex(ValueError, 'learning_rate must be None.*'): optimizer = optimizers.Kfac(damping=10., train_batch=train_batch, learning_rate=0.1, **kfac_kwargs)
def testSequentialInstantiation(self): model = tf.keras.Sequential([ layers.Conv2D(7, (3, 3), input_shape=(28, 28, 3)), layers.Activation('relu'), layers.Conv2D(13, (3, 3), activation='relu'), layers.GlobalMaxPool2D(), layers.Activation('softmax') ]) optimizers.Kfac(learning_rate=0.03, damping=0.00007, model=model, loss='binary_crossentropy')
def testRNNFails(self): model = tf.keras.Sequential() model.add(layers.Embedding(43, 128)) model.add(layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(layers.Dense(1, activation='sigmoid')) opt = optimizers.Kfac(learning_rate=0.003, damping=0.003, model=model, loss='binary_crossentropy') with self.assertRaisesRegex(ValueError, '.*lstm.* has more than one parent tensor.$'): opt._create_optimizer()
def testModifyingTensorHypersFails(self, name, val): kwargs = {'learning_rate': 3, 'damping': 5, 'momentum': 7} kwargs[name] = tf.convert_to_tensor(val) opt = optimizers.Kfac(model=_simple_mlp(), loss='mse', **kwargs) with self.subTest(name='AssignedCorrectly'): self.assertEqual(backend.get_value(getattr(opt, name)), val) with self.subTest(name='RaisesError'): with self.assertRaisesRegex(AttributeError, "Can't set attribute: {}".format(name)): setattr(opt, name, 17)
def test_saving_model_with_long_weights_names(self): self.skipTest('KFAC does not support nested models yet.') if h5py is None: self.skipTest('h5py required to run this test') with self.cached_session(): x = keras.Input(shape=(2, ), name='nested_model_input') f = x for i in range(4): f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i, ))(f) # This layer name will make the `weights_name` # HDF5 attribute blow out of proportion. f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**14)))(f) nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model') x = keras.Input(shape=(2, ), name='outer_model_input') f = nested_model(x) f = keras.layers.Dense(2, name='outer_model_output')(f) model = keras.Model(inputs=[x], outputs=[f]) model.compile(loss='mse', optimizer=optimizers.Kfac(model=model, **_KFAC_KWARGS), metrics=['acc']) x = np.random.random((1, 2)) y = np.random.random((1, 2)) model.train_on_batch(x, y) out = model.predict(x) fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) model = saving_utils.load_model(fname, optimizer_name='new') # Check that the HDF5 files contains chunked array # of weight names. with h5py.File(fname, 'r') as h5file: num_weight_arrays = len([ attr for attr in h5file['model_weights']['nested_model'].attrs if attr.startswith('weight_names') ]) # The chunking of layer names array should have happened. self.assertGreater(num_weight_arrays, 0) out2 = model.predict(x) self.assertAllClose(out, out2, atol=1e-05) # Cleanup os.close(fd) os.remove(fname)
def test_sequential_model_saving(self): if h5py is None: self.skipTest('h5py required to run this test') with self.cached_session(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(2, ))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.Flatten()) model.add(keras.layers.Dense(3)) model.compile(loss=keras.losses.MSE, optimizer=optimizers.Kfac(model=model, **_KFAC_KWARGS), metrics=[ keras.metrics.categorical_accuracy, keras.metrics.CategoricalAccuracy() ]) x = np.random.random((1, 2)) y = np.random.random((1, 3)) # TODO(b/136561651): Since we use TFP distributions to sample from the # output distribution, optimizer's won't match exactly unless they are run # for the same number of steps. Even with a random seed, the internal # state of TFP changes with each call. We must switch to a stateless # sampler. Uncomment the train line below once this is implemented. # model.train_on_batch(x, y) out = model.predict(x) fd, fname = tempfile.mkstemp('.h5') keras.models.save_model(model, fname) new_model = saving_utils.load_model(fname, optimizer_name='new') os.close(fd) os.remove(fname) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 2)) y = np.random.random((1, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) x = np.random.random((1, 2)) y = np.random.random((1, 3)) eval_out = model.evaluate(x, y) eval_out2 = new_model.evaluate(x, y) self.assertArrayNear(eval_out, eval_out2, 1e-03) out = model.predict(x) out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05)
def testGettingHyper(self, hyper_ctor): kwarg_values = {'learning_rate': 3, 'damping': 20, 'momentum': 13} kwargs = {k: hyper_ctor(v) for k, v in kwarg_values.items()} opt = optimizers.Kfac(model=_simple_mlp(), loss='mse', **kwargs) get_value = backend.get_value tf_opt = opt.optimizer with self.subTest(name='MatchesFloat'): for name, val in kwarg_values.items(): self.assertEqual(get_value(getattr(opt, name)), val) with self.subTest(name='MatchesTfOpt'): self.assertEqual(get_value(opt.lr), get_value(tf_opt.learning_rate)) self.assertEqual(get_value(opt.damping), get_value(tf_opt.damping)) self.assertEqual(get_value(opt.momentum), get_value(tf_opt.momentum))
def testTrainModelWithFusedBN(self, has_shift): model = tf.keras.Sequential([ layers.Conv2D(13, 5, input_shape=(28, 28, 1)), layers.BatchNormalization(center=has_shift, fused=True), layers.GlobalMaxPool2D(), layers.Dense(10, activation='softmax') ]) (x_train, y_train), _ = _get_synthetic_mnist_dataset() loss = 'categorical_crossentropy' opt = optimizers.Kfac( learning_rate=0.01, damping=0.01, model=model, loss=loss) model.compile(opt, loss) return model.fit(x_train, y_train, batch_size=32, epochs=1, verbose=0)
def testSettingName(self): model = _simple_mlp() optimizer = optimizers.Kfac(damping=0.01, learning_rate=0.01, model=model, loss='mse') optimizer.name = 'new_name' self.assertEqual(optimizer._name, 'new_name') self.assertEqual(optimizer.get_config()['name'], 'new_name') self.assertEqual(optimizer._kfac_kwargs['name'], 'new_name') model.compile(optimizer, 'mse') model._make_train_function() with self.assertRaisesRegex(ValueError, '.*after the variables are created.*'): optimizer.name = 'another_name'
def testLRBackwardsCompatibility(self): """This tests learning rate getting/setting used by old Keras callbacks.""" opt = optimizers.Kfac( learning_rate=3, damping=5, model=_simple_mlp(), loss='mse') self.assertEqual(backend.get_value(opt.lr), 3) self.assertEqual(backend.get_value(opt.learning_rate), 3) opt.lr = 7 self.assertEqual(backend.get_value(opt.lr), 7) self.assertEqual(backend.get_value(opt.learning_rate), 7) backend.set_value(opt.lr, 9) self.assertEqual(backend.get_value(opt.lr), 9) self.assertEqual(backend.get_value(opt.learning_rate), 9) backend.set_value(opt.learning_rate, 11) self.assertEqual(backend.get_value(opt.lr), 11) self.assertEqual(backend.get_value(opt.learning_rate), 11)
def testAdaptiveModelFit(self, adaptive_kwargs): rands = lambda: np.random.random((100, 1)).astype(np.float32) dataset = tf.data.Dataset.from_tensor_slices((rands(), rands())) dataset = dataset.repeat().batch(10, drop_remainder=True) train_batch = dataset.make_one_shot_iterator().get_next() model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))]) loss = 'mse' train_batch = dataset.make_one_shot_iterator().get_next() optimizer = optimizers.Kfac(damping=10., train_batch=train_batch, model=model, loss=loss, **adaptive_kwargs) model.compile(optimizer, loss) model.fit(train_batch, steps_per_epoch=10, epochs=1)