def _test_config_tofrom(model_fn, loss, opt): """Confirm that optimizer saves config and loads config.""" # build model and save the opt to a config as c. learning_rate = 0.01 model_lr = model_fn() model_lr.layers[0].lr_mult = 0.3 model_lr.layers[0].layers[-1].lr_mult = 0.1 model_lr.layers[-1].lr_mult = 0.5 d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) c = d_opt.get_config() # reconstruct the model and then build the opt from config. model_lr = model_fn() model_lr.layers[0].lr_mult = 0.3 model_lr.layers[0].layers[-1].lr_mult = 0.1 model_lr.layers[-1].lr_mult = 0.5 d_opt_from_config = DiscriminativeLayerOptimizer.from_config(c, model_lr) model_lr.compile(loss=loss, optimizer=d_opt_from_config) # we expect both optimizers to have the same optimizer group and base optimizer. np.testing.assert_equal(len(d_opt.optimizer_group), len(d_opt_from_config.optimizer_group)) np.testing.assert_equal(d_opt.opt_class, d_opt_from_config.opt_class) # we also expect the lr for each opt in the opt groups to be the same. Also confirms same lr mult. np.testing.assert_array_equal( [opt.learning_rate for opt in d_opt.optimizer_group], [opt.learning_rate for opt in d_opt_from_config.optimizer_group], )
def test_equal_with_no_layer_lr(model_fn, loss, opt): """Confirm that discriminative learning is almost the same as regular learning.""" learning_rate = 0.01 model = model_fn() model.compile(loss=loss, optimizer=opt(learning_rate)) model_lr = model_fn() d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) _assert_training_losses_are_close(model, model_lr)
def _test_equal_half_layer_lr_to_half_lr_of_opt(model_fn, loss, opt): """Confirm 0.5 lr_mult for the model is the same as optim with 0.5 lr. This also confirms that lr_mult on the model level is propagated to all sublayers and their variables. """ mult = 0.5 learning_rate = 0.01 model = model_fn() model.compile(loss=loss, optimizer=opt(learning_rate * mult)) model_lr = model_fn() model_lr.lr_mult = mult d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) _assert_training_losses_are_close(model, model_lr)
def _test_equal_0_layer_lr_to_trainable_false(model_fn, loss, opt): """Confirm 0 lr_mult for the model is the same as model not trainable. This also confirms that lr_mult on the model level is propagated to all sublayers and their variables. """ learning_rate = 0.01 model = model_fn() model.trainable = False model.compile(loss=loss, optimizer=opt(learning_rate)) model_lr = model_fn() model_lr.lr_mult = 0.0 d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) # Only two epochs because we expect no training to occur, thus losses shouldn't change anyways. _assert_training_losses_are_close(model, model_lr, epochs=2)
def _test_variables_get_assigned(model_fn, loss, opt): """Confirm that variables do get an lr_mult attribute and that they get the correct one. """ learning_rate = 0.01 model_lr = model_fn() # set lr mults. model_lr.layers[0].lr_mult = 0.3 model_lr.layers[0].layers[-1].lr_mult = 0.1 model_lr.layers[-1].lr_mult = 0.5 d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) # We expect trainable vars at 0.3 to be reduced by the amount at 0.1. # This tests that the 0.3 lr mult does not override the 0.1 lr mult. np.testing.assert_equal( len(model_lr.layers[0].trainable_variables) - len(model_lr.layers[0].layers[-1].trainable_variables), len([ var for var in model_lr.trainable_variables if var.lr_mult == 0.3 ]), ) # We expect trainable vars of model with lr_mult 0.1 to equal trainable vars of that layer. np.testing.assert_equal( len(model_lr.layers[0].layers[-1].trainable_variables), len([ var for var in model_lr.trainable_variables if var.lr_mult == 0.1 ]), ) # Same logic as above. np.testing.assert_equal( len(model_lr.layers[-1].trainable_variables), len([ var for var in model_lr.trainable_variables if var.lr_mult == 0.5 ]), )
def _test_model_checkpoint(model_fn, loss, opt): """Confirm that model does save checkpoints and can load them properly.""" learning_rate = 0.01 model_lr = model_fn() model_lr.layers[0].lr_mult = 0.3 model_lr.layers[0].layers[-1].lr_mult = 0.1 model_lr.layers[-1].lr_mult = 0.5 d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) x = np.ones(shape=(8, 32, 32, 3), dtype=np.float32) y = np.zeros(shape=(8, 5), dtype=np.float32) y[:, 0] = 1.0 filepath = os.path.join(tempfile.gettempdir(), model_fn.__name__ + "_cp.ckpt") callbacks = [ tf.keras.callbacks.ModelCheckpoint(filepath=filepath, save_weights_only=True, verbose=1) ] model_lr.fit( x, y, epochs=2, batch_size=4, verbose=False, shuffle=False, callbacks=callbacks, ) # If this doesn't error out, then loading and checkpointing should be fine. model_lr.load_weights(filepath=filepath)
def _test_sub_layers_keep_lr_mult(model_fn, loss, opt): """Confirm that model trains with lower lr on specific layer, while a different lr_mult is applied everywhere else. Also confirms that sub layers with an lr mult do not get overridden. """ learning_rate = 0.01 model_lr = model_fn() # We set model to lrmult 0 and layer one to lrmult 5. # If layer one is trainable, then the loss should decrease. model_lr.lr_mult = 0.00 model_lr.layers[-1].lr_mult = 3 d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) loss_values = get_losses(_get_train_results(model_lr, epochs=5)) np.testing.assert_array_less([loss_values[-1]], [loss_values[0]])
def _test_equal_0_sub_layer_lr_to_sub_layer_trainable_false( model_fn, loss, opt): """Confirm 0 lr_mult for the a specific layer is the same as setting layer to not trainable. This also confirms that lr_mult propagates into that layer's trainable variables. This also confirms that lr_mult does not propagate to the rest of the layers unintentionally. """ learning_rate = 0.01 model = model_fn() # Layers 0 represents the pretrained network model.layers[0].trainable = False model.compile(loss=loss, optimizer=opt(learning_rate)) model_lr = model_fn() model_lr.layers[0].lr_mult = 0.0 d_opt = DiscriminativeLayerOptimizer(opt, model_lr, verbose=False, learning_rate=learning_rate) model_lr.compile(loss=loss, optimizer=d_opt) _assert_training_losses_are_close(model, model_lr)