def _create_callbacks(self, log_dir, init_steps, steps_per_epoch, params, ckpt_mgr): """Creates a list of callbacks.""" sfunc = optimizer.LearningRateFn(params["learning_rate"], params["hidden_size"], params["learning_rate_warmup_steps"]) def _save_checkpoint(epoch, logs): if logs['steps'] % steps_per_epoch == 0: try: ckpt_mgr.save(checkpoint_number=epoch) except: logging.warning( f'save model failed due to an exception. continue without saving\n' ) else: logging.warning( f'not save model when training is interrupted. logs = {logs}\n' ) callbacks = [] callbacks.append(optimizer.LearningRateScheduler(sfunc, init_steps)) callbacks.append( tf.keras.callbacks.LambdaCallback(on_epoch_end=_save_checkpoint)) if self.flags_obj.enable_tensorboard: tensorboard_callback = utils.TensorBoardFix( start_step=init_steps, log_dir=log_dir, profile_batch=0, write_graph=False, update_freq=self.flags_obj.batches_between_tensorboard_log) callbacks.append(tensorboard_callback) callbacks.append( utils.CSVLoggerFix(f'{log_dir}/history.step-{init_steps}.log')) return callbacks
def _create_callbacks(self, cur_log_dir, init_steps, params): """Creates a list of callbacks.""" sfunc = optimizer.LearningRateFn(params["learning_rate"], params["hidden_size"], params["learning_rate_warmup_steps"]) scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps) callbacks = misc.get_callbacks(params["steps_between_evals"]) callbacks.append(scheduler_callback) ckpt_full_path = os.path.join(cur_log_dir, "cp-{epoch:04d}.ckpt") callbacks.append(tf.keras.callbacks.ModelCheckpoint(ckpt_full_path, save_weights_only=True)) return callbacks
def _create_callbacks(self, cur_log_dir, init_steps, params): """Creates a list of callbacks.""" sfunc = optimizer.LearningRateFn(params["learning_rate"], params["hidden_size"], params["learning_rate_warmup_steps"]) scheduler_callback = optimizer.LearningRateScheduler(sfunc, init_steps) tb_logdir = os.path.join(cur_log_dir, "logs") save_path = os.path.join( cur_log_dir, "weights-epoch-{epoch:02d}-loss-{loss:.4f}.hdf5") csv_path = os.path.join(cur_log_dir, "result.csv") return [ scheduler_callback, tf.keras.callbacks.TensorBoard(tb_logdir), tf.keras.callbacks.ModelCheckpoint(save_path, save_weights_only=True), tf.keras.callbacks.CSVLogger(csv_path, append=True), ]
def plot_learning_rate(lr=1e-1, hidden_size=32, warmup_steps=30 * 37): sfunc = optimizer.LearningRateFn(lr, hidden_size, warmup_steps) plot([go.Scatter(y=[sfunc(i) for i in range(37 * 500)])])