def _make_execution_function(model, mode): """Makes function to run one step of model execution.""" if model._distribution_strategy: return dist_utils._make_execution_function(model, mode) else: return model._make_execution_function(mode)
def _get_iterator(inputs, distribution_strategy=None): if distribution_strategy: return distributed_training_utils.get_iterator( inputs, distribution_strategy) return training_utils.get_iterator(inputs) def _reinitialize_iterator(iterator, distribution_strategy=None): if distribution_strategy: distributed_training_utils.initialize_iterator( iterator, distribution_strategy) else: training_utils.initialize_iterator(iterator) def _make_execution_function(model, mode): """Makes function to run one step of model execution.""" if model._distribution_strategy: return distributed_training_utils._make_execution_function(model, mode) return model._make_execution_function(mode) # For backwards compatibility for internal users of these loops. fit_loop = functools.partial(model_iteration, mode=ModeKeys.TRAIN) test_loop = functools.partial( model_iteration, mode=ModeKeys.TEST, shuffle=False) predict_loop = functools.partial( model_iteration, mode=ModeKeys.PREDICT, shuffle=False)
def fit( self, model, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, **kwargs): batch_size = model._validate_or_infer_batch_size( batch_size, steps_per_epoch, x) strategy = _get_distribution_strategy(model) batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size( strategy, x, batch_size, steps_per_epoch, ModeKeys.TRAIN) dist_utils.validate_callbacks(input_callbacks=callbacks, optimizer=model.optimizer) # Enter tf.distribute.Strategy scope. with dist_utils.distributed_scope( strategy=strategy, learning_phase=1): training_data_adapter, validation_adapter = _process_training_inputs( model, x, y, batch_size=batch_size, sample_weights=sample_weight, class_weights=class_weight, validation_split=validation_split, steps_per_epoch=steps_per_epoch, shuffle=shuffle, validation_data=validation_data, validation_steps=validation_steps, distribution_strategy=strategy) do_validation = (validation_adapter is not None) if not steps_per_epoch: steps_per_epoch = training_data_adapter.get_size() # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch)) training_context = TrainingContext() initial_epoch = model._maybe_load_initial_epoch_from_ckpt( initial_epoch, ModeKeys.TRAIN) _update_sample_weight_mode(model, ModeKeys.TRAIN, training_data_adapter, strategy) training_function = dist_utils._make_execution_function( model, ModeKeys.TRAIN) training_data_iter = None # Only recreate iterator when the data has a fixed length, which will be # fully consumed every epoch, or has a unknown length (dataset, generator) # and will be fully consumed (steps_per_epoch is None) recreate_training_iterator = (training_data_adapter.get_size() is not None or steps_per_epoch is None) if do_validation: if not validation_steps: validation_steps = validation_adapter.get_size() eval_function = dist_utils._make_execution_function( model, ModeKeys.TEST) eval_data_iter = None recreate_eval_iterator = (validation_adapter.get_size() is not None or validation_steps is None) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=do_validation, batch_size=batch_size, epochs=epochs, steps_per_epoch=steps_per_epoch, samples=None, verbose=0, # Handle ProgBarLogger separately in this loop. mode=ModeKeys.TRAIN) with training_context.on_start(model, callbacks, verbose, ModeKeys.TRAIN): # TODO(scottzhu): Handle TPUStrategy training loop for epoch in range(initial_epoch, epochs): if training_context.callbacks.model.stop_training: break # Training with training_context.on_epoch(epoch, ModeKeys.TRAIN) as epoch_logs: model.reset_metrics() if training_data_iter is None or recreate_training_iterator: training_data_iter = _create_dataset_iterator( strategy, training_data_adapter.get_dataset()) training_result = run_one_epoch( model, training_data_iter, training_function, dataset_size=training_data_adapter.get_size(), strategy=strategy, steps_per_epoch=steps_per_epoch, mode=ModeKeys.TRAIN, training_context=training_context, current_epoch=epoch) cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN) # Evaluation if (do_validation and training_utils.should_run_validation(validation_freq, epoch) and not callbacks.model.stop_training): if eval_data_iter is None or recreate_eval_iterator: eval_data_iter = _create_dataset_iterator( strategy, validation_adapter.get_dataset()) eval_context = TrainingContext() with eval_context.on_start( model, callbacks, verbose=0, mode=ModeKeys.TEST): with eval_context.on_epoch(epoch, ModeKeys.TEST): model.reset_metrics() eval_result = run_one_epoch( model, eval_data_iter, eval_function, dataset_size=validation_adapter.get_size(), strategy=strategy, steps_per_epoch=validation_steps, mode=ModeKeys.TEST, training_context=eval_context, current_epoch=epochs) cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TRAIN, prefix='val_') return model.history
def _model_iteration( self, model, mode, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None, **kwargs): batch_size = model._validate_or_infer_batch_size( batch_size, steps, x) strategy = _get_distribution_strategy(model) batch_size, steps = dist_utils.process_batch_and_step_size( strategy, x, batch_size, steps, mode) dist_utils.validate_callbacks(input_callbacks=callbacks, optimizer=model.optimizer) # Enter tf.distribute.Strategy scope. with dist_utils.distributed_scope( strategy=strategy, learning_phase=0): adapter = _process_inputs( model, x, y, batch_size=batch_size, sample_weights=sample_weight, steps=steps, distribution_strategy=strategy) if not steps: steps = adapter.get_size() # tf.print('{} on {} steps.'.format(ModeKeys.TRAIN, steps_per_epoch)) training_context = TrainingContext() _update_sample_weight_mode(model, mode, adapter, strategy) execution_function = dist_utils._make_execution_function(model, mode) data_iterator = _create_dataset_iterator( strategy, adapter.get_dataset()) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=False, batch_size=batch_size, epochs=1, steps_per_epoch=steps, samples=None, verbose=0, # Handle ProgBarLogger separately in this loop. mode=mode) with training_context.on_start(model, callbacks, verbose, mode): # TODO(scottzhu): Handle TPUStrategy training loop with training_context.on_epoch(0, mode) as epoch_logs: model.reset_metrics() result = run_one_epoch( model, data_iterator, execution_function, dataset_size=adapter.get_size(), strategy=strategy, steps_per_epoch=steps, mode=mode, training_context=training_context, current_epoch=1) cbks.make_logs(model, epoch_logs, result, mode) if len(result) == 1: result = result[0] return result
def _make_execution_function(model, mode): """Makes function to run one step of model execution.""" if model._distribution_strategy: return distributed_training_utils._make_execution_function(model, mode) return model._make_execution_function(mode)