def training_loop(self, train_ds, test_ds): """Custom training and testing loop. Args: train_ds: Training dataset test_ds: Testing dataset Returns: train_loss, test_loss """ if self.enable_function: self.train_step = tf.function(self.train_step) self.test_step = tf.function(self.test_step) template = 'Epoch: {}, Train Loss: {}, Test Loss: {}' for epoch in range(self.epochs): self.train_loss_metric.reset_states() self.test_loss_metric.reset_states() for inp, targ in train_ds: self.train_step((inp, targ)) for inp_test, targ_test in test_ds: self.test_step((inp_test, targ_test)) print (template.format(epoch, self.train_loss_metric.result().numpy(), self.test_loss_metric.result().numpy())) return (self.train_loss_metric.result().numpy(), self.test_loss_metric.result().numpy())
def test_tf_saved_model_save_multiple_signatures(self): base_path = os.path.join(self.get_temp_dir(), 'tf_saved_model_save') export_path = os.path.join(base_path, '00000123') root = tf.train.Checkpoint() root.f = tf.function(lambda x: {'y': 1.}, input_signature=[tf.TensorSpec(None, tf.float32)]) root.g = tf.function(lambda x: {'y': 2.}, input_signature=[tf.TensorSpec(None, tf.float32)]) tf.saved_model.experimental.save( root, export_path, signatures={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: root.f, 'custom_signature_key': root.g}) _, model_server_address, _ = TensorflowModelServerTest.RunServer( 'default', base_path) expected_version = self._GetModelVersion(base_path) self.VerifyPredictRequest( model_server_address, expected_output=2.0, expected_version=expected_version, signature_name='custom_signature_key') self.VerifyPredictRequest( model_server_address, expected_output=1.0, expected_version=expected_version)
def custom_loop(self, train_iterator, test_iterator, num_train_steps_per_epoch, num_test_steps_per_epoch, strategy): """Custom training and testing loop. Args: train_iterator: Training iterator created using strategy test_iterator: Testing iterator created using strategy num_train_steps_per_epoch: number of training steps in an epoch. num_test_steps_per_epoch: number of test steps in an epoch. strategy: Distribution strategy Returns: train_loss, train_accuracy, test_loss, test_accuracy """ # this code is expected to change. def distributed_train(): return strategy.experimental_run( self.train_step, train_iterator) def distributed_test(): return strategy.experimental_run( self.test_step, test_iterator) if self.enable_function: distributed_train = tf.function(distributed_train) distributed_test = tf.function(distributed_test) for epoch in range(self.epochs): self.optimizer.learning_rate = self.decay(epoch) train_iterator.initialize() for _ in range(num_train_steps_per_epoch): distributed_train() test_iterator.initialize() for _ in range(num_test_steps_per_epoch): distributed_test() template = ('Epoch: {}, Train Loss: {}, Train Accuracy: {}, ' 'Test Loss: {}, Test Accuracy: {}') print( template.format(epoch, self.train_loss_metric.result(), self.train_acc_metric.result(), self.test_loss_metric.result(), self.test_acc_metric.result())) if epoch != self.epochs - 1: self.train_loss_metric.reset_states() self.train_acc_metric.reset_states() self.test_loss_metric.reset_states() self.test_acc_metric.reset_states() return (self.train_loss_metric.result().numpy(), self.train_acc_metric.result().numpy(), self.test_loss_metric.result().numpy(), self.test_acc_metric.result().numpy())
def main(argv): del argv root = tf.train.Checkpoint() # Create a cell and attach to our checkpointable. root.rnn_cell = tf.keras.layers.LSTMCell(units=10, recurrent_initializer=None) # Wrap the rnn_cell.__call__ function and assign to next_state. root.next_state = tf.function(root.rnn_cell.__call__, autograph=False) # Wrap the rnn_cell.get_initial_function using a decorator and assign to an # attribute with the same name. @tf.function(input_signature=[tf.TensorSpec([None, None], tf.float32)]) def get_initial_state(tensor): return root.rnn_cell.get_initial_state(tensor, None, None) root.get_initial_state = get_initial_state # Construct an initial_state, then call next_state explicitly to trigger a # trace for serialization (we need an explicit call, because next_state has # not been annotated with an input_signature). initial_state = root.get_initial_state( tf.constant(np.random.uniform(size=[3, 10]).astype(np.float32))) root.next_state( tf.constant(np.random.uniform(size=[3, 19]).astype(np.float32)), initial_state) tf.saved_model.save(root, FLAGS.export_dir)
def train(self, dataset, checkpoint_pr): """Train the GAN for x number of epochs. Args: dataset: train dataset. checkpoint_pr: prefix in which the checkpoints are stored. Returns: Time for each epoch. """ time_list = [] if self.enable_function: self.train_step = tf.function(self.train_step) for epoch in range(self.epochs): start_time = time.time() for input_image, target_image in dataset: gen_loss, disc_loss = self.train_step(input_image, target_image) wall_time_sec = time.time() - start_time time_list.append(wall_time_sec) # saving (checkpoint) the model every 20 epochs if (epoch + 1) % 20 == 0: self.checkpoint.save(file_prefix=checkpoint_pr) template = 'Epoch {}, Generator loss {}, Discriminator Loss {}' print (template.format(epoch, gen_loss, disc_loss)) return time_list
def custom_loop(self, train_dataset, test_dataset): """Custom training and testing loop. Args: train_dataset: Training dataset test_dataset: Testing dataset Returns: train_loss, train_accuracy, test_loss, test_accuracy """ if self.enable_function: self.train_step = tf.function(self.train_step) self.test_step = tf.function(self.test_step) for epoch in range(self.epochs): self.optimizer.learning_rate = self.decay(epoch) for image, label in train_dataset: self.train_step(image, label) for test_image, test_label in test_dataset: self.test_step(test_image, test_label) template = ('Epoch: {}, Train Loss: {}, Train Accuracy: {}, ' 'Test Loss: {}, Test Accuracy: {}') print( template.format(epoch, self.train_loss_metric.result(), self.train_acc_metric.result(), self.test_loss_metric.result(), self.test_acc_metric.result())) if epoch != self.epochs - 1: self.train_loss_metric.reset_states() self.train_acc_metric.reset_states() self.test_loss_metric.reset_states() self.test_acc_metric.reset_states() return (self.train_loss_metric.result().numpy(), self.train_acc_metric.result().numpy(), self.test_loss_metric.result().numpy(), self.test_acc_metric.result().numpy())
def test_tf_saved_model_save(self): base_path = os.path.join(self.get_temp_dir(), 'tf_saved_model_save') export_path = os.path.join(base_path, '00000123') root = tf.train.Checkpoint() root.v1 = tf.Variable(3.) root.v2 = tf.Variable(2.) root.f = tf.function( lambda x: {'y': root.v1 * root.v2 * x}) to_save = root.f.get_concrete_function(tf.TensorSpec(None, tf.float32)) tf.saved_model.experimental.save(root, export_path, to_save) _, model_server_address, _ = TensorflowModelServerTest.RunServer( 'default', base_path) expected_version = self._GetModelVersion(base_path) self.VerifyPredictRequest( model_server_address, expected_output=12.0, specify_output=False, expected_version=expected_version)
def _check_sharding_annotations(self, f_jax, args: Sequence[Any], *, expected: Sequence[str], expected_opt: Sequence[str], num_partitions=2): """Check expected patterns in the HLO generated from f_jax and its conversion. We run this check on CPU also, which is useful for debugging locally. We currently check the unoptimized HLO against `expected` on CPU and TPU, and we check the optimized HLO against `expected_opt` on TPU only and only for JAX. See `self.AssertShardingAnnotations` for documentation of `expected` and `expected_opt`. """ if jtu.device_under_test() == "gpu": raise unittest.SkipTest("Sharding HLO tests not useful for GPU") jax_comp = jax.xla_computation(f_jax)(*args) jax_hlo = jax_comp.as_hlo_text() if LOG_HLO: logging.info("[%s] got JAX HLO %s", self._testMethodName, jax_hlo) self.AssertShardingAnnotations("JAX before optimizations", jax_hlo, expected) if jtu.device_under_test() == "tpu": backend = jax._src.lib.xla_bridge.get_backend() num_replicas = 1 device_assignment = np.arange(num_partitions * num_replicas) device_assignment = np.reshape(device_assignment, (-1, num_partitions)) use_spmd_partitioning = num_partitions > 1 compile_options = jax._src.lib.xla_bridge.get_compile_options( num_replicas=num_replicas, num_partitions=num_partitions, device_assignment=device_assignment, use_spmd_partitioning=use_spmd_partitioning, ) jax_optimized_hlo = backend.compile( jax_comp, compile_options).hlo_modules()[0].to_string() if LOG_HLO: logging.info("[%s] got JAX optimized HLO for platform %s %s", self._testMethodName, backend.platform, jax_optimized_hlo) self.AssertShardingAnnotations("JAX after optimizations", jax_optimized_hlo, expected_opt) f_tf = jax2tf.convert(f_jax) device_name = f"/device:{jtu.device_under_test().upper()}:0" tf_hlo = (tf.function(f_tf, jit_compile=True, autograph=False).experimental_get_compiler_ir( *args)(stage="hlo", device_name=device_name)) if LOG_HLO: logging.info("[%s] got TF HLO %s", self._testMethodName, tf_hlo) self.AssertShardingAnnotations("TF before optimizations", tf_hlo, expected) tf_optimized_hlo = (tf.function( f_tf, jit_compile=True).experimental_get_compiler_ir(*args)( stage="optimized_hlo", device_name=device_name)) if LOG_HLO: logging.info("[%s] got TF optimized HLO for %s: %s", self._testMethodName, device_name, tf_optimized_hlo)
def test_is_defun(self): self.assertTrue(function_utils.is_defun(tf.function(lambda x: None))) fn = tf.function(lambda x: None, (tf.TensorSpec(None, tf.int32),)) self.assertTrue(function_utils.is_defun(fn)) self.assertFalse(function_utils.is_defun(lambda x: None)) self.assertFalse(function_utils.is_defun(None))
def export_saved_model(model: tf.keras.Model, input_shape: Tuple[int, int, int, int, int], export_path: str = '/tmp/movinet/', causal: bool = False, bundle_input_init_states_fn: bool = True, checkpoint_path: Optional[str] = None) -> None: """Exports a MoViNet model to a saved model. Args: model: the tf.keras.Model to export. input_shape: The 5D spatiotemporal input shape of size [batch_size, num_frames, image_height, image_width, num_channels]. Set the field or a shape position in the field to None for dynamic input. export_path: Export path to save the saved_model file. causal: Run the model in causal mode. bundle_input_init_states_fn: Add init_states as a function signature to the saved model. This is not necessary if the input shape is static (e.g., for TF Lite). checkpoint_path: Checkpoint path to load. Leave blank to keep the model's initialization. """ # Use dimensions of 1 except the channels to export faster, # since we only really need the last dimension to build and get the output # states. These dimensions can be set to `None` once the model is built. input_shape_concrete = [1 if s is None else s for s in input_shape] model.build(input_shape_concrete) # Compile model to generate some internal Keras variables. model.compile() if checkpoint_path: checkpoint = tf.train.Checkpoint(model=model) status = checkpoint.restore(checkpoint_path) status.assert_existing_objects_matched() if causal: # Call the model once to get the output states. Call again with `states` # input to ensure that the inputs with the `states` argument is built # with the full output state shapes. input_image = tf.ones(input_shape_concrete) _, states = model({ **model.init_states(input_shape_concrete), 'image': input_image }) _ = model({**states, 'image': input_image}) # Create a function to explicitly set the names of the outputs def predict(inputs): outputs, states = model(inputs) return {**states, 'logits': outputs} specs = { name: tf.TensorSpec(spec.shape, name=name, dtype=spec.dtype) for name, spec in model.initial_state_specs(input_shape).items() } specs['image'] = tf.TensorSpec(input_shape, dtype=model.dtype, name='image') predict_fn = tf.function(predict, jit_compile=True) predict_fn = predict_fn.get_concrete_function(specs) init_states_fn = tf.function(model.init_states, jit_compile=True) init_states_fn = init_states_fn.get_concrete_function( tf.TensorSpec([5], dtype=tf.int32)) if bundle_input_init_states_fn: signatures = {'call': predict_fn, 'init_states': init_states_fn} else: signatures = predict_fn tf.keras.models.save_model(model, export_path, signatures=signatures) else: _ = model(tf.ones(input_shape_concrete)) tf.keras.models.save_model(model, export_path)
def convert_and_save_model( jax_fn: tp.Callable[[tp.Any, tp.Any], tp.Any], params, model_dir: str, *, input_signatures: tp.Sequence[tf.TensorSpec], shape_polymorphic_input_spec: tp.Optional[str] = None, with_gradient: bool = False, enable_xla: bool = True, compile_model: bool = True, save_model_options: tp.Optional[ tf.saved_model.SaveOptions] = None): """Convert a JAX function and saves a SavedModel. This is an example, for serious uses you will likely want to copy and expand it as needed (see note at the top of the model). Use this function if you have a trained ML model that has both a prediction function and trained parameters, which you want to save separately from the function graph as variables (e.g., to avoid limits on the size of the GraphDef, or to enable fine-tuning.) If you don't have such parameters, you can still use this library function but probably don't need it (see jax2tf/README.md for some simple examples). In order to use this wrapper you must first convert your model to a function with two arguments: the parameters and the input on which you want to do inference. Both arguments may be np.ndarray or (nested) tuples/lists/dictionaries thereof. See the README.md for a discussion of how to prepare Flax and Haiku models. Args: jax_fn: a JAX function taking two arguments, the parameters and the inputs. Both arguments may be (nested) tuples/lists/dictionaries of np.ndarray. params: the parameters, to be used as first argument for `jax_fn`. These must be (nested) tuples/lists/dictionaries of np.ndarray, and will be saved as the variables of the SavedModel. model_dir: the directory where the model should be saved. input_signatures: the input signatures for the second argument of `jax_fn` (the input). A signature must be a `tensorflow.TensorSpec` instance, or a (nested) tuple/list/dictionary thereof with a structure matching the second argument of `jax_fn`. The first input_signature will be saved as the default serving signature. The additional signatures will be used only to ensure that the `jax_fn` is traced and converted to TF for the corresponding input shapes. shape_polymorphic_input_spec: if given then it will be used as the `in_shapes` argument to jax2tf.convert for the second parameter of `jax_fn`. In this case, a single `input_signatures` is supported, and should have `None` in the polymorphic dimensions. Should be a string, or a (nesteD) tuple/list/dictionary thereof with a structure matching the second argument of `jax_fn`. with_gradient: whether the SavedModel should support gradients. If True, then a custom gradient is saved. If False, then a tf.raw_ops.PreventGradient is saved to error if a gradient is attempted. (At the moment due to a bug in SavedModel, custom gradients are not supported.) enable_xla: whether the jax2tf converter is allowed to use TFXLA ops. If False, the conversion tries harder to use purely TF ops and raises an exception if it is not possible. (default: True) compile_model: use TensorFlow jit_compiler on the SavedModel. This is needed if the SavedModel will be used for TensorFlow serving. save_model_options: options to pass to savedmodel.save. """ if not input_signatures: raise ValueError("At least one input_signature must be given") if shape_polymorphic_input_spec is not None: if len(input_signatures) > 1: raise ValueError("For shape-polymorphic conversion a single " "input_signature is supported.") tf_fn = jax2tf.convert( jax_fn, with_gradient=with_gradient, in_shapes=[None, shape_polymorphic_input_spec], enable_xla=enable_xla, ) # Create tf.Variables for the parameters. If you want more useful variable # names, you can use `tree.map_structure_with_path` from the `dm-tree` package param_vars = tf.nest.map_structure( # Due to a bug in SavedModel it is not possible to use tf.GradientTape on # a function converted with jax2tf and loaded from SavedModel. Thus, we # mark the variables as non-trainable to ensure that users of the # SavedModel will not try to fine tune them. lambda param: tf.Variable(param, trainable=with_gradient), params, ) tf_fun = tf.function( lambda inputs: tf_fn(param_vars, inputs), autograph=False, experimental_compile=compile_model, ) signatures = {} # This signature is needed for TensorFlow Serving use. signatures[ tf.saved_model. DEFAULT_SERVING_SIGNATURE_DEF_KEY] = tf_fun.get_concrete_function( input_signatures[0]) for input_signature in input_signatures[1:]: # If there are more signatures, trace and cache a TF function for each one tf_fun.get_concrete_function(input_signature) wrapper = _ReusableSavedModelWrapper(tf_fun, param_vars) tf.saved_model.save(wrapper, model_dir, signatures=signatures, options=save_model_options)
def run_customized_training_loop( # pylint: disable=invalid-name _sentinel=None, # pylint: enable=invalid-name strategy=None, model_fn=None, loss_fn=None, scale_loss=True, model_dir=None, train_input_fn=None, steps_per_epoch=None, steps_per_loop=1, epochs=1, eval_input_fn=None, eval_steps=None, metric_fn=None, init_checkpoint=None, custom_callbacks=None, run_eagerly=False, sub_model_export_name=None, explicit_allreduce=False, pre_allreduce_callbacks=None, post_allreduce_callbacks=None, train_summary_interval=0): """Run BERT pretrain model training using low-level API. Arguments: _sentinel: Used to prevent positional parameters. Internal, do not use. strategy: Distribution strategy on which to run low level training loop. model_fn: Function that returns a tuple (model, sub_model). Caller of this function should add optimizer to the `model` via calling `model.compile()` API or manually setting `model.optimizer` attribute. Second element of the returned tuple(sub_model) is an optional sub model to be used for initial checkpoint -- if provided. loss_fn: Function with signature func(labels, logits) and returns a loss tensor. scale_loss: Whether to divide the raw loss by number of replicas before gradients calculation. model_dir: Model directory used during training for restoring/saving model weights. train_input_fn: Function that returns a tf.data.Dataset used for training. steps_per_epoch: Number of steps to run per epoch. At the end of each epoch, model checkpoint will be saved and evaluation will be conducted if evaluation dataset is provided. steps_per_loop: Number of steps per graph-mode loop. In order to reduce communication in eager context, training logs are printed every steps_per_loop. epochs: Number of epochs to train. eval_input_fn: Function that returns evaluation dataset. If none, evaluation is skipped. eval_steps: Number of steps to run evaluation. Required if `eval_input_fn` is not none. metric_fn: A metrics function that returns a Keras Metric object to record evaluation result using evaluation dataset or with training dataset after every epoch. init_checkpoint: Optional checkpoint to load to `sub_model` returned by `model_fn`. custom_callbacks: A list of Keras Callbacks objects to run during training. More specifically, `on_batch_begin()`, `on_batch_end()`, `on_epoch_begin()`, `on_epoch_end()` methods are invoked during training. Note that some metrics may be missing from `logs`. run_eagerly: Whether to run model training in pure eager execution. This should be disable for TPUStrategy. sub_model_export_name: If not None, will export `sub_model` returned by `model_fn` into checkpoint files. The name of intermediate checkpoint file is {sub_model_export_name}_step_{step}.ckpt and the last checkpint's name is {sub_model_export_name}.ckpt; if None, `sub_model` will not be exported as checkpoint. explicit_allreduce: Whether to explicitly perform gradient allreduce, instead of relying on implicit allreduce in optimizer.apply_gradients(). default is False. For now, if training using FP16 mixed precision, explicit allreduce will aggregate gradients in FP16 format. For TPU and GPU training using FP32, explicit allreduce will aggregate gradients in FP32 format. pre_allreduce_callbacks: A list of callback functions that takes gradients and model variables pairs as input, manipulate them, and returns a new gradients and model variables paris. The callback functions will be invoked in the list order and before gradients are allreduced. With mixed precision training, the pre_allreduce_allbacks will be applied on scaled_gradients. Default is no callbacks. Only used when explicit_allreduce=True. post_allreduce_callbacks: A list of callback functions that takes gradients and model variables pairs as input, manipulate them, and returns a new gradients and model variables paris. The callback functions will be invoked in the list order and right before gradients are applied to variables for updates. Default is no callbacks. Only used when explicit_allreduce=True. train_summary_interval: Step interval for training summaries. If the value is a negative number, then training summaries are not enabled. Returns: Trained model. Raises: ValueError: (1) When model returned by `model_fn` does not have optimizer attribute or when required parameters are set to none. (2) eval args are not specified correctly. (3) metric_fn must be a callable if specified. (4) sub_model_checkpoint_name is specified, but `sub_model` returned by `model_fn` is None. """ if _sentinel is not None: raise ValueError('only call `run_customized_training_loop()` ' 'with named arguments.') required_arguments = [ strategy, model_fn, loss_fn, model_dir, steps_per_epoch, train_input_fn ] if [arg for arg in required_arguments if arg is None]: raise ValueError('`strategy`, `model_fn`, `loss_fn`, `model_dir`, ' '`steps_per_loop` and `steps_per_epoch` are required ' 'parameters.') if steps_per_loop > steps_per_epoch: logging.error( 'steps_per_loop: %d is specified to be greater than ' ' steps_per_epoch: %d, we will use steps_per_epoch as' ' steps_per_loop.', steps_per_loop, steps_per_epoch) steps_per_loop = steps_per_epoch assert tf.executing_eagerly() if run_eagerly: if isinstance(strategy, tf.distribute.experimental.TPUStrategy): raise ValueError( 'TPUStrategy should not run eagerly as it heavily relies on graph' ' optimization for the distributed system.') if eval_input_fn and (eval_steps is None or metric_fn is None): raise ValueError( '`eval_step` and `metric_fn` are required when `eval_input_fn ` ' 'is not none.') if metric_fn and not callable(metric_fn): raise ValueError( 'if `metric_fn` is specified, metric_fn must be a callable.') callback_list = tf.keras.callbacks.CallbackList(custom_callbacks) total_training_steps = steps_per_epoch * epochs train_iterator = _get_input_iterator(train_input_fn, strategy) with distribution_utils.get_strategy_scope(strategy): # To correctly place the model weights on accelerators, # model and optimizer should be created in scope. model, sub_model = model_fn() if not hasattr(model, 'optimizer'): raise ValueError('User should set optimizer attribute to model ' 'inside `model_fn`.') if sub_model_export_name and sub_model is None: raise ValueError('sub_model_export_name is specified as %s, but ' 'sub_model is None.' % sub_model_export_name) optimizer = model.optimizer if init_checkpoint: logging.info( 'Checkpoint file %s found and restoring from ' 'initial checkpoint for core model.', init_checkpoint) checkpoint = tf.train.Checkpoint(model=sub_model) checkpoint.restore( init_checkpoint).assert_existing_objects_matched() logging.info('Loading from checkpoint file completed') train_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) eval_metrics = [metric_fn()] if metric_fn else [] # If evaluation is required, make a copy of metric as it will be used by # both train and evaluation. train_metrics = [ metric.__class__.from_config(metric.get_config()) for metric in eval_metrics ] # Create summary writers if _should_export_summary(strategy): summary_dir = os.path.join(model_dir, 'summaries') else: # In multi worker training we need every worker to write summary, because # variables can trigger synchronization on read and synchronization needs # all workers to participate. summary_dir = tempfile.mkdtemp() eval_summary_writer = tf.summary.create_file_writer( os.path.join(summary_dir, 'eval')) last_summary_step = 0 if steps_per_loop >= _MIN_SUMMARY_STEPS and train_summary_interval >= 0: # Only writes summary when the stats are collected sufficiently over # enough steps. train_summary_writer = tf.summary.create_file_writer( os.path.join(summary_dir, 'train')) else: train_summary_writer = tf.summary.create_noop_writer() # Collects training variables. training_vars = model.trainable_variables def _replicated_step(inputs): """Replicated training step.""" inputs, labels = inputs with tf.GradientTape() as tape: model_outputs = model(inputs, training=True) loss = loss_fn(labels, model_outputs) # Raw loss is used for reporting in metrics/logs. raw_loss = loss if scale_loss: # Scales down the loss for gradients to be invariant from replicas. loss = loss / strategy.num_replicas_in_sync if explicit_allreduce: grad_utils.minimize_using_explicit_allreduce( tape, optimizer, loss, training_vars, pre_allreduce_callbacks, post_allreduce_callbacks) else: if isinstance( optimizer, tf.keras.mixed_precision.experimental. LossScaleOptimizer): with tape: scaled_loss = optimizer.get_scaled_loss(loss) scaled_grads = tape.gradient(scaled_loss, training_vars) grads = optimizer.get_unscaled_gradients(scaled_grads) else: grads = tape.gradient(loss, training_vars) optimizer.apply_gradients(zip(grads, training_vars)) # For reporting, the metric takes the mean of losses. train_loss_metric.update_state(raw_loss) for metric in train_metrics: metric.update_state(labels, model_outputs) @tf.function def train_steps(iterator, steps): """Performs distributed training steps in a loop. Args: iterator: the distributed iterator of training datasets. steps: an tf.int32 integer tensor to specify number of steps to run inside host training loop. Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ if not isinstance(steps, tf.Tensor): raise ValueError( 'steps should be an Tensor. Python object may cause ' 'retracing.') for _ in tf.range(steps): strategy.run(_replicated_step, args=(next(iterator), )) def train_single_step(iterator): """Performs a distributed training step. Args: iterator: the distributed iterator of training datasets. Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ strategy.run(_replicated_step, args=(next(iterator), )) def test_step(iterator): """Calculates evaluation metrics on distributed devices.""" def _test_step_fn(inputs): """Replicated accuracy calculation.""" inputs, labels = inputs model_outputs = model(inputs, training=False) for metric in eval_metrics: metric.update_state(labels, model_outputs) strategy.run(_test_step_fn, args=(next(iterator), )) if not run_eagerly: train_single_step = tf.function(train_single_step) test_step = tf.function(test_step) def _run_evaluation(current_training_step, test_iterator): """Runs validation steps and aggregate metrics. Args: current_training_step: tf.int32 tensor containing the current step. test_iterator: distributed iterator of test datasets. Returns: A dict of metic names and values. """ for _ in range(eval_steps): test_step(test_iterator) logs = {} with eval_summary_writer.as_default(): for metric in eval_metrics + model.metrics: metric_value = _float_metric_value(metric) logs[metric.name] = metric_value logging.info('Step: [%d] Validation %s = %f', current_training_step, metric.name, metric_value) tf.summary.scalar(metric.name, metric_value, step=current_training_step) eval_summary_writer.flush() return logs # Training loop starts here. checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer, global_step=optimizer.iterations) sub_model_checkpoint = tf.train.Checkpoint( model=sub_model, global_step=optimizer.iterations ) if sub_model_export_name else None latest_checkpoint_file = tf.train.latest_checkpoint(model_dir) if latest_checkpoint_file: logging.info( 'Checkpoint file %s found and restoring from ' 'checkpoint', latest_checkpoint_file) checkpoint.restore(latest_checkpoint_file) logging.info('Loading from checkpoint file completed') current_step = optimizer.iterations.numpy() checkpoint_name = 'ctl_step_{step}.ckpt' while current_step < total_training_steps: if current_step % steps_per_epoch == 0: callback_list.on_epoch_begin( int(current_step / steps_per_epoch) + 1) # Training loss/metric are taking average over steps inside micro # training loop. We reset the their values before each round. train_loss_metric.reset_states() for metric in train_metrics + model.metrics: metric.reset_states() callback_list.on_batch_begin(current_step) # Runs several steps in the host while loop. steps = steps_to_run(current_step, steps_per_epoch, steps_per_loop) if tf.config.list_physical_devices('GPU'): # TODO(zongweiz): merge with train_steps once tf.while_loop # GPU performance bugs are fixed. for _ in range(steps): train_single_step(train_iterator) else: # Converts steps to a Tensor to avoid tf.function retracing. train_steps(train_iterator, tf.convert_to_tensor(steps, dtype=tf.int32)) train_loss = _float_metric_value(train_loss_metric) current_step += steps callback_list.on_batch_end(current_step - 1, {'loss': train_loss}) # Updates training logging. training_status = 'Train Step: %d/%d / loss = %s' % ( current_step, total_training_steps, train_loss) if current_step >= last_summary_step + train_summary_interval: summary_writer = train_summary_writer last_summary_step = current_step else: summary_writer = tf.summary.create_noop_writer() with summary_writer.as_default(): tf.summary.scalar(train_loss_metric.name, train_loss, step=current_step) for metric in train_metrics + model.metrics: metric_value = _float_metric_value(metric) training_status += ' %s = %f' % (metric.name, metric_value) tf.summary.scalar(metric.name, metric_value, step=current_step) summary_writer.flush() logging.info(training_status) if current_step % steps_per_epoch == 0: # Save a submodel with the step in the file name after each epoch. if sub_model_export_name: _save_checkpoint( strategy, sub_model_checkpoint, model_dir, '%s_step_%d.ckpt' % (sub_model_export_name, current_step)) # Save model checkpoints and run validation steps after each epoch # (with the exception of the final epoch which is handled after the # training loop). if current_step < total_training_steps: _save_checkpoint(strategy, checkpoint, model_dir, checkpoint_name.format(step=current_step)) logs = None if eval_input_fn: logging.info('Running evaluation after step: %s.', current_step) logs = _run_evaluation( current_step, _get_input_iterator(eval_input_fn, strategy)) # Re-initialize evaluation metric. for metric in eval_metrics + model.metrics: metric.reset_states() callback_list.on_epoch_end( int(current_step / steps_per_epoch), logs) if sub_model_export_name: _save_checkpoint(strategy, sub_model_checkpoint, model_dir, '%s.ckpt' % sub_model_export_name) _save_checkpoint(strategy, checkpoint, model_dir, checkpoint_name.format(step=current_step)) logs = None if eval_input_fn: logging.info( 'Running final evaluation after training is complete.') logs = _run_evaluation( current_step, _get_input_iterator(eval_input_fn, strategy)) callback_list.on_epoch_end(int(current_step / steps_per_epoch), logs) training_summary = { 'total_training_steps': total_training_steps, 'train_loss': _float_metric_value(train_loss_metric), } for metric in model.metrics: training_summary[metric.name] = _float_metric_value(metric) if eval_metrics: # TODO(hongkuny): Cleans up summary reporting in text. training_summary['last_train_metrics'] = _float_metric_value( train_metrics[0]) training_summary['eval_metrics'] = _float_metric_value( eval_metrics[0]) write_txt_summary(training_summary, summary_dir) if not _should_export_summary(strategy): tf.io.gfile.rmtree(summary_dir) return model
def jit_compile(self, f: Callable) -> Callable: return tf.function(f)
def infer(self, features_file, predictions_file=None, checkpoint_path=None, log_time=False): """Runs inference. Args: features_file: The file(s) to infer from. predictions_file: If set, predictions are saved in this file. checkpoint_path: Path of a specific checkpoint to predict. If ``None``, the latest is used. log_time: If ``True``, several time metrics will be printed in the logs at the end of the inference loop. """ checkpoint, config = self._init_run() checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) model = checkpoint.model infer_config = config["infer"] dataset = model.examples_inputter.make_inference_dataset( features_file, infer_config["batch_size"], length_bucket_width=infer_config["length_bucket_width"], prefetch_buffer_size=infer_config.get("prefetch_buffer_size")) if predictions_file: stream = io.open(predictions_file, encoding="utf-8", mode="w") else: stream = sys.stdout ordered_writer = None infer_fn = tf.function(model.infer, input_signature=(dataset.element_spec,)) write_fn = lambda prediction: ( model.print_prediction(prediction, params=infer_config, stream=stream)) total_time = 0 total_tokens = 0 total_examples = 0 start_time = time.time() for source in dataset: predictions = infer_fn(source) predictions = tf.nest.map_structure(lambda t: t.numpy(), predictions) end_time = time.time() if log_time: total_time += end_time - start_time batch_size = next(six.itervalues(predictions)).shape[0] total_examples += batch_size length = predictions.get("length") if length is not None: if len(length.shape) == 2: length = length[:, 0] total_tokens += sum(length) for prediction in misc.extract_batches(predictions): if "index" in prediction: if ordered_writer is None: ordered_writer = misc.OrderRestorer( index_fn=lambda prediction: prediction["index"], callback_fn=write_fn) ordered_writer.push(prediction) else: write_fn(prediction) start_time = time.time() if log_time: tf.get_logger().info("Total prediction time (s): %f", total_time) tf.get_logger().info( "Average prediction time (s): %f", total_time / total_examples) if total_tokens > 0: tf.get_logger().info("Tokens per second: %f", total_tokens / total_time) if predictions_file: stream.close()
logits_aux4 = aux4(feat4) logits_aux5_4 = aux5_4(feat5_4) return tf.keras.Model( inputs=x, outputs=[logits, logits_aux2, logits_aux3, logits_aux4, logits_aux5_4], name="BiSeNetV2", ) if __name__ == "__main__": import time input_shape = (360, 640, 3) model = get_bisenetv2(input_shape, n_classes=2) model.summary() model.compile("adam", "mse") model = tf.function(model) image = tf.random.normal((1, *input_shape)) # warm up for i in range(10): model(image) iters = 200 init = time.time() for i in range(iters): model(image) end = time.time() - init print(f"FPS {1/(end/iters)}") print(f"Time {end/iters}")
sigm25_1 = sigmoid(conv25_3) concat25_1 = Concatenate()([conv24_3, sigm25_1]) model = Model(inputs=inputs, outputs=[concat23_1, concat25_1, concat50_3, concat50_5]) model.summary() tf.saved_model.save(model, 'saved_model_{}_{}x{}'.format(ds, height, width)) # model.save('footprints_{}_{}x{}_float32.h5'.format(ds, height, width).format(height, width)) full_model = tf.function(lambda inputs: model(inputs)) full_model = full_model.get_concrete_function(inputs=[tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype)]) frozen_func = convert_variables_to_constants_v2(full_model, lower_control_flow=False) frozen_func.graph.as_graph_def() tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=".", name="footprints_{}_{}x{}_float32.pb".format(ds, height, width), as_text=False) # No Quantization - Input/Output=float32 converter = tf.lite.TFLiteConverter.from_keras_model(model) tflite_model = converter.convert() with open('footprints_{}_{}x{}_float32.tflite'.format(ds, height, width), 'wb') as w: w.write(tflite_model) print("tflite convert complete! - footprints_{}_{}x{}_float32.tflite".format(ds, height, width))
def run(flags_obj): """Run ResNet ImageNet training and eval loop using custom training loops. Args: flags_obj: An object containing parsed flag values. Raises: ValueError: If fp16 is passed as it is not currently supported. Returns: Dictionary of training and eval stats. """ keras_utils.set_session_config(enable_eager=flags_obj.enable_eager, enable_xla=flags_obj.enable_xla) dtype = flags_core.get_tf_dtype(flags_obj) if dtype == tf.float16: policy = tf.compat.v2.keras.mixed_precision.experimental.Policy( 'mixed_float16') tf.compat.v2.keras.mixed_precision.experimental.set_policy(policy) elif dtype == tf.bfloat16: policy = tf.compat.v2.keras.mixed_precision.experimental.Policy( 'mixed_bfloat16') tf.compat.v2.keras.mixed_precision.experimental.set_policy(policy) # This only affects GPU. common.set_cudnn_batchnorm_mode() # TODO(anj-s): Set data_format without using Keras. data_format = flags_obj.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') tf.keras.backend.set_image_data_format(data_format) strategy = distribution_utils.get_distribution_strategy( distribution_strategy=flags_obj.distribution_strategy, num_gpus=flags_obj.num_gpus, num_workers=distribution_utils.configure_cluster(), all_reduce_alg=flags_obj.all_reduce_alg, num_packs=flags_obj.num_packs, tpu_address=flags_obj.tpu) train_ds, test_ds = get_input_dataset(flags_obj, strategy) per_epoch_steps, train_epochs, eval_steps = get_num_train_iterations( flags_obj) steps_per_loop = min(flags_obj.steps_per_loop, per_epoch_steps) logging.info( "Training %d epochs, each epoch has %d steps, " "total steps: %d; Eval %d steps", train_epochs, per_epoch_steps, train_epochs * per_epoch_steps, eval_steps) time_callback = keras_utils.TimeHistory(flags_obj.batch_size, flags_obj.log_steps) with distribution_utils.get_strategy_scope(strategy): resnet_model.change_keras_layer(flags_obj.use_tf_keras_layers) model = resnet_model.resnet50( num_classes=imagenet_preprocessing.NUM_CLASSES, batch_size=flags_obj.batch_size, use_l2_regularizer=not flags_obj.single_l2_loss_op) lr_schedule = common.PiecewiseConstantDecayWithWarmup( batch_size=flags_obj.batch_size, epoch_size=imagenet_preprocessing.NUM_IMAGES['train'], warmup_epochs=common.LR_SCHEDULE[0][1], boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]), multipliers=list(p[0] for p in common.LR_SCHEDULE), compute_lr_on_cpu=True) optimizer = common.get_optimizer(lr_schedule) if dtype == tf.float16: loss_scale = flags_core.get_loss_scale(flags_obj, default_for_fp16=128) optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer( optimizer, loss_scale) elif flags_obj.fp16_implementation == 'graph_rewrite': # `dtype` is still float32 in this case. We built the graph in float32 and # let the graph rewrite change parts of it float16. if not flags_obj.use_tf_function: raise ValueError( '--fp16_implementation=graph_rewrite requires ' '--use_tf_function to be true') loss_scale = flags_core.get_loss_scale(flags_obj, default_for_fp16=128) optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite( optimizer, loss_scale) current_step = 0 checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) latest_checkpoint = tf.train.latest_checkpoint(flags_obj.model_dir) if latest_checkpoint: checkpoint.restore(latest_checkpoint) logging.info("Load checkpoint %s", latest_checkpoint) current_step = optimizer.iterations.numpy() train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32) training_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( 'training_accuracy', dtype=tf.float32) test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32) test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( 'test_accuracy', dtype=tf.float32) trainable_variables = model.trainable_variables def step_fn(inputs): """Per-Replica StepFn.""" images, labels = inputs with tf.GradientTape() as tape: logits = model(images, training=True) prediction_loss = tf.keras.losses.sparse_categorical_crossentropy( labels, logits) loss = tf.reduce_sum(prediction_loss) * (1.0 / flags_obj.batch_size) num_replicas = tf.distribute.get_strategy( ).num_replicas_in_sync if flags_obj.single_l2_loss_op: l2_loss = resnet_model.L2_WEIGHT_DECAY * 2 * tf.add_n([ tf.nn.l2_loss(v) for v in trainable_variables if 'bn' not in v.name ]) loss += (l2_loss / num_replicas) else: loss += (tf.reduce_sum(model.losses) / num_replicas) # Scale the loss if flags_obj.dtype == "fp16": loss = optimizer.get_scaled_loss(loss) grads = tape.gradient(loss, trainable_variables) # Unscale the grads if flags_obj.dtype == "fp16": grads = optimizer.get_unscaled_gradients(grads) optimizer.apply_gradients(zip(grads, trainable_variables)) train_loss.update_state(loss) training_accuracy.update_state(labels, logits) @tf.function def train_steps(iterator, steps): """Performs distributed training steps in a loop.""" for _ in tf.range(steps): strategy.experimental_run_v2(step_fn, args=(next(iterator), )) def train_single_step(iterator): if strategy: strategy.experimental_run_v2(step_fn, args=(next(iterator), )) else: return step_fn(next(iterator)) def test_step(iterator): """Evaluation StepFn.""" def step_fn(inputs): images, labels = inputs logits = model(images, training=False) loss = tf.keras.losses.sparse_categorical_crossentropy( labels, logits) loss = tf.reduce_sum(loss) * (1.0 / flags_obj.batch_size) test_loss.update_state(loss) test_accuracy.update_state(labels, logits) if strategy: strategy.experimental_run_v2(step_fn, args=(next(iterator), )) else: step_fn(next(iterator)) if flags_obj.use_tf_function: train_single_step = tf.function(train_single_step) test_step = tf.function(test_step) if flags_obj.enable_tensorboard: summary_writer = tf.summary.create_file_writer(flags_obj.model_dir) else: summary_writer = None train_iter = iter(train_ds) time_callback.on_train_begin() for epoch in range(current_step // per_epoch_steps, train_epochs): train_loss.reset_states() training_accuracy.reset_states() steps_in_current_epoch = 0 while steps_in_current_epoch < per_epoch_steps: time_callback.on_batch_begin(steps_in_current_epoch + epoch * per_epoch_steps) steps = _steps_to_run(steps_in_current_epoch, per_epoch_steps, steps_per_loop) if steps == 1: train_single_step(train_iter) else: # Converts steps to a Tensor to avoid tf.function retracing. train_steps(train_iter, tf.convert_to_tensor(steps, dtype=tf.int32)) time_callback.on_batch_end(steps_in_current_epoch + epoch * per_epoch_steps) steps_in_current_epoch += steps logging.info('Training loss: %s, accuracy: %s at epoch %d', train_loss.result().numpy(), training_accuracy.result().numpy(), epoch + 1) if (not flags_obj.skip_eval and (epoch + 1) % flags_obj.epochs_between_evals == 0): test_loss.reset_states() test_accuracy.reset_states() test_iter = iter(test_ds) for _ in range(eval_steps): test_step(test_iter) logging.info('Test loss: %s, accuracy: %s%% at epoch: %d', test_loss.result().numpy(), test_accuracy.result().numpy(), epoch + 1) if flags_obj.enable_checkpoint_and_export: checkpoint_name = checkpoint.save( os.path.join(flags_obj.model_dir, 'model.ckpt-{}'.format(epoch + 1))) logging.info('Saved checkpoint to %s', checkpoint_name) if summary_writer: current_steps = steps_in_current_epoch + (epoch * per_epoch_steps) with summary_writer.as_default(): tf.summary.scalar('train_loss', train_loss.result(), current_steps) tf.summary.scalar('train_accuracy', training_accuracy.result(), current_steps) tf.summary.scalar('eval_loss', test_loss.result(), current_steps) tf.summary.scalar('eval_accuracy', test_accuracy.result(), current_steps) time_callback.on_train_end() if summary_writer: summary_writer.close() eval_result = None train_result = None if not flags_obj.skip_eval: eval_result = [ test_loss.result().numpy(), test_accuracy.result().numpy() ] train_result = [ train_loss.result().numpy(), training_accuracy.result().numpy() ] stats = build_stats(train_result, eval_result, time_callback) return stats
import os import tensorflow as tf from tensorflow import keras # tf.function and auto-graph. def scaled_elu(z, scale=1.0, alpha=1.0): #z>=0?scale*z:scale*alpha*tf.nn.elu(z) is_positive = tf.greater_equal(z, 0.0) return scale * tf.where(is_positive, z, alpha * tf.nn.elu(z)) print(scaled_elu(tf.constant(-3.))) print(scaled_elu(tf.constant([-3, -2.5]))) scaled_elu_tf = tf.function(scaled_elu) print(scaled_elu_tf(tf.constant(-3.))) print(scaled_elu_tf(tf.constant([-3, -2.5]))) print(scaled_elu_tf.python_function is scaled_elu) # 转化后的优势就是快 def converge_to_2(n_iters): total = tf.constant(0.) increment = tf.constant(1.) for _ in range(n_iters): total += increment increment /= 2.0
def policy_vtest(): """Autoaugment test policy for debugging.""" # Each tuple is an augmentation operation of the form # (operation, probability, magnitude). Each element in policy is a # sub-policy that will be applied sequentially on the image. policy = [ [('TranslateX', 1.0, 4), ('Equalize', 1.0, 10)], ] return policy # pylint: disable=g-long-lambda blend = tf.function(lambda i1, i2, factor: tf.cast( tfa_image.blend(tf.cast(i1, tf.float32), tf.cast(i2, tf.float32), factor), tf.uint8)) # pylint: enable=g-long-lambda def random_erase(image, prob, min_area=0.02, max_area=1 / 3, min_aspect=1 / 3, max_aspect=10 / 3, mode='pixel'): """The random erasing augmentations: https://arxiv.org/pdf/1708.04896.pdf. This augmentation is applied after image normalization.
def custom_loop(self, epoch, optimizer, train_dist_dataset, test_dist_dataset, strategy): """Custom training and testing loop. Args: train_dist_dataset: Training dataset created using strategy. test_dist_dataset: Testing dataset created using strategy. strategy: Distribution strategy. Returns: train_loss, train_accuracy, test_loss, test_accuracy """ def distributed_train_step(dataset_inputs): per_replica_losses = strategy.experimental_run_v2(self.train_step, args=(dataset_inputs, optimizer,)) return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) def distributed_test_step(dataset_inputs): per_replica_losses = strategy.experimental_run_v2(self.test_step, args=(dataset_inputs,)) return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) if self.enable_function: distributed_train_step = tf.function(distributed_train_step) distributed_test_step = tf.function(distributed_test_step) self.train_top1_metric.reset_states() self.train_top5_metric.reset_states() self.val_top1_metric.reset_states() self.val_top5_metric.reset_states() self.batch_time.reset() optimizer.learning_rate = self.decay(epoch) print('learningRate: {:.4f}'.format(optimizer.learning_rate.numpy())) train_total_loss = 0.0 num_train_batches = 0.0 for one_batch in train_dist_dataset: end = time.time() if args.WarmingUp: if epoch < args.learning_rate_schedule[0]: batch_learning_rate = self.decay(epoch) + float(num_train_batches / np.ceil(args.train_num/args.batchSize))\ * args.learning_rate / args.learning_rate_schedule[0] optimizer.learning_rate = batch_learning_rate # print('learningRate: {:.4f}'.format(optimizer.learning_rate.numpy())) train_total_loss += distributed_train_step(one_batch) num_train_batches += 1 self.batch_time.update(time.time() - end) if num_train_batches % args.print_freq == 0: print('learningRate: {:.4f}'.format(optimizer.learning_rate.numpy())) template = ('Epoch: {}({}/{})\tTime:{:.4f}({:.4f})\tLoss: {:.4f}\tTop1_Accuracy: {:.4f}\tTop5_Accuracy: {:.4f}') print(template.format(epoch, int(num_train_batches), int(np.ceil(args.train_num/args.batchSize)), self.batch_time.val, self.batch_time.avg, train_total_loss / num_train_batches, 100 * self.train_top1_metric.result(), 100 * self.train_top5_metric.result())) self.batch_time.reset() val_total_loss = 0.0 num_val_batches = 0.0 for one_batch in test_dist_dataset: end = time.time() val_total_loss += distributed_test_step(one_batch) num_val_batches += 1 self.batch_time.update(time.time() - end) if num_val_batches % args.print_freq == 0: template = ('Val: {}({}/{})\tTime:{:.4f}({:.4f})\tLoss: {:.4f}\tTop1_Accuracy: {:.4f}\tTop5_Accuracy: {:.4f}') print(template.format(epoch, int(num_val_batches), int(np.ceil(args.val_num/args.batchSize)), self.batch_time.val, self.batch_time.avg, val_total_loss / num_val_batches, 100 * self.val_top1_metric.result(), 100 * self.val_top5_metric.result())) return (train_total_loss / num_train_batches, 100 * self.train_top1_metric.result().numpy(), 100 * self.train_top5_metric.result().numpy(), val_total_loss / num_val_batches, 100 * self.val_top1_metric.result().numpy(), 100 * self.val_top5_metric.result().numpy())
def run_test_case(self, func, feed_dict, input_names_with_port, output_names_with_port, rtol=1e-07, atol=1e-5, convert_var_to_const=True, constant_fold=True, check_value=True, check_shape=True, check_dtype=True, process_args=None, onnx_feed_dict=None, graph_validator=None, as_session=False, large_model=False): # optional - passed to process_tf_graph if process_args is None: process_args = {} # optional - pass distinct feed_dict to onnx runtime if onnx_feed_dict is None: onnx_feed_dict = feed_dict input_names_with_port = list(feed_dict) tf_reset_default_graph() graph_def = None initialized_tables = None np.random.seed(1) # Make it reproducible. clean_feed_dict = {utils.node_name(k): v for k, v in feed_dict.items()} if is_tf2() and not as_session: # # use eager to execute the tensorflow func # # numpy doesn't work for all ops, make it tf.Tensor() input_tensors = [tf.TensorSpec(shape=v.shape, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items()] input_list = [tf.convert_to_tensor(v, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items()] tf.random.set_seed(1) expected = func(*input_list) if isinstance(expected, (list, tuple)): # list or tuple expected = [x.numpy() for x in expected] else: # single result expected = [expected.numpy()] # now make the eager functions a graph concrete_func = tf.function(func, input_signature=tuple(input_tensors)) concrete_func = concrete_func.get_concrete_function() graph_def = from_function(concrete_func, input_names=list(feed_dict.keys()), output_names=output_names_with_port, large_model=large_model) else: # # use graph to execute the tensorflow func # with tf_session() as sess: tf_set_random_seed(1) input_list = [] for k, v in clean_feed_dict.items(): input_list.append(tf_placeholder(name=k, shape=v.shape, dtype=tf.as_dtype(v.dtype))) func(*input_list) variables_lib.global_variables_initializer().run() tf_tables_initializer().run() output_dict = [] for out_name in output_names_with_port: output_dict.append(sess.graph.get_tensor_by_name(out_name)) expected = sess.run(output_dict, feed_dict=feed_dict) graph_def = freeze_session(sess, input_names=list(feed_dict.keys()), output_names=output_names_with_port) table_names, key_dtypes, value_dtypes = get_hash_table_info(graph_def) initialized_tables = {} for n, k_dtype, val_dtype in zip(table_names, key_dtypes, value_dtypes): h = lookup_ops.hash_table_v2(k_dtype, val_dtype, shared_name=n) k, v = lookup_ops.lookup_table_export_v2(h, k_dtype, val_dtype) initialized_tables[n] = (sess.run(k), sess.run(v)) tf_reset_default_graph() with tf_session() as sess: tf.import_graph_def(graph_def, name='') graph_def = tf_optimize(list(feed_dict.keys()), output_names_with_port, graph_def, fold_constant=constant_fold) tf_reset_default_graph() with tf_session() as sess: const_node_values = None if large_model: const_node_values = compress_graph_def(graph_def) tf.import_graph_def(graph_def, name='') if self.config.is_debug_mode: model_path = os.path.join(self.test_data_directory, self._testMethodName + "_after_tf_optimize.pb") utils.save_protobuf(model_path, graph_def) self.logger.debug("created file %s", model_path) g = process_tf_graph(sess.graph, opset=self.config.opset, input_names=list(feed_dict.keys()), output_names=output_names_with_port, target=self.config.target, const_node_values=const_node_values, initialized_tables=initialized_tables, **process_args) g = optimizer.optimize_graph(g) actual = self.run_backend(g, output_names_with_port, onnx_feed_dict, large_model) for expected_val, actual_val in zip(expected, actual): if check_value: self.assertAllClose(expected_val, actual_val, rtol=rtol, atol=atol) if check_dtype: self.assertEqual(expected_val.dtype, actual_val.dtype) # why need shape checke: issue when compare [] with scalar # https://github.com/numpy/numpy/issues/11071 if check_shape: self.assertEqual(expected_val.shape, actual_val.shape) if graph_validator: self.assertTrue(graph_validator(g)) return g
def run_test(self, name, backend="onnxruntime", onnx_file=None, opset=None, extra_opset=None, perf=None): """Run complete test against backend.""" self.perf = perf # get the model if self.url: _, dir_name = self.download_model() logger.info("Downloaded to %s", dir_name) model_path = os.path.join( dir_name, self.local) if self.local != "." else dir_name else: model_path = self.local logger.info("Load model from %s", model_path) input_names = list(self.input_names.keys()) initialized_tables = {} outputs = self.output_names tflite_path = None to_rename = None if self.model_type in ["checkpoint"]: graph_def, input_names, outputs = tf_loader.from_checkpoint( model_path, input_names, outputs) elif self.model_type in ["saved_model"]: loaded = tf_loader.from_saved_model( model_path, None, None, self.tag, self.signatures, self.concrete_function, self.large_model, return_concrete_func=not self.run_tf_frozen, return_initialized_tables=True, return_tensors_to_rename=True) if not self.run_tf_frozen: # Must maintain ref to imported since concrete_func uses weak refs # pylint: disable=unused-variable graph_def, input_names, outputs, concrete_func, imported, initialized_tables, to_rename = loaded else: graph_def, input_names, outputs, initialized_tables, to_rename = loaded elif self.model_type in ["keras"]: graph_def, input_names, outputs = tf_loader.from_keras( model_path, input_names, outputs) elif self.model_type in ["tflite"]: tflite_path = model_path graph_def = None else: graph_def, input_names, outputs = tf_loader.from_graphdef( model_path, input_names, outputs) if utils.is_debug_mode(): utils.save_protobuf( os.path.join(TEMP_DIR, name + "_after_tf_optimize.pb"), graph_def) if tflite_path is not None: inputs = {} for k in input_names: v = self.input_names[k] inputs[k] = self.make_input(v) interpreter = tf.lite.Interpreter(tflite_path) input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() input_name_to_index = { n['name'].split(':')[0]: n['index'] for n in input_details } for k, v in inputs.items(): interpreter.resize_tensor_input(input_name_to_index[k], v.shape) interpreter.allocate_tensors() def run_tflite(): for k, v in inputs.items(): interpreter.set_tensor(input_name_to_index[k], v) interpreter.invoke() result = [ interpreter.get_tensor(output['index']) for output in output_details ] return result tf_results = run_tflite() if self.perf: logger.info("Running TFLite perf") n = 0 start = time.time() stop = start + PERF_TIME while time.time() < stop: for _ in range(PERF_STEP): _ = run_tflite() n += PERF_STEP self.tf_runtime = 1000 * (time.time() - start) / n logger.info("TFLite perf {:.2f}ms/inference, n={}".format( self.tf_runtime, n)) logger.info("TFLite OK") if not self.run_tf_frozen: inputs = {} for k in input_names: v = self.input_names[k] inputs[k.split(":")[0]] = tf.constant(self.make_input(v)) tf_func = tf.function(concrete_func) logger.info("Running TF") tf_results_d = tf_func(**inputs) # If there is only a single output a dict might not be returned if isinstance(tf_results_d, tf.Tensor): tf_results = [tf_results_d] else: tf_results = [ tf_results_d[k] for k in sorted(tf_results_d.keys()) ] tf_results = [tf_res.numpy() for tf_res in tf_results] if self.perf: logger.info("Running TF perf") n = 0 start = time.time() stop = start + PERF_TIME if self.tf_profile is not None: tf.profiler.experimental.start(self.tf_profile) while time.time() < stop: for _ in range(PERF_STEP): _ = concrete_func(**inputs) n += PERF_STEP if self.tf_profile is not None: tf.profiler.experimental.stop() self.tf_runtime = 1000 * (time.time() - start) / n logger.info("TF perf {:.2f}ms/inference, n={}".format( self.tf_runtime, n)) logger.info("TensorFlow OK") shape_override = {} const_node_values = None tf_graph = None if graph_def is not None: inputs = {} tf_reset_default_graph() with tf.Graph().as_default() as tf_graph: from tf2onnx.tf_utils import compress_graph_def if self.large_model: const_node_values = compress_graph_def(graph_def) tf.import_graph_def(graph_def, name='') with tf_session(graph=tf_graph) as sess: # create the input data for k in input_names: v = self.input_names[k] t = sess.graph.get_tensor_by_name(k) expected_dtype = tf.as_dtype(t.dtype).name if isinstance(v, six.text_type) and v.startswith("np."): np_value = eval(v) # pylint: disable=eval-used if expected_dtype != np_value.dtype: logger.warning( "dtype mismatch for input %s: expected=%s, actual=%s", k, expected_dtype, np_value.dtype) inputs[k] = np_value.astype(expected_dtype) else: if expected_dtype == "string": inputs[k] = self.make_input(v).astype( np.str).astype(np.object) else: inputs[k] = self.make_input(v).astype( expected_dtype) if self.force_input_shape: for k, v in inputs.items(): shape_override[k] = list(v.shape) # run the model with tensorflow if self.skip_tensorflow: logger.info("TensorFlow SKIPPED") elif self.run_tf_frozen: if self.tf_profile is not None: tf.profiler.experimental.start(self.tf_profile) tf_results = self.run_tensorflow(sess, inputs) if self.tf_profile is not None: tf.profiler.experimental.stop() logger.info("TensorFlow OK") tf_graph = sess.graph model_proto = None if self.skip_conversion: if self.large_model: external_tensor_storage = ExternalTensorStorage() model_proto = utils.model_proto_from_zip( self.converted_model, external_tensor_storage) else: external_tensor_storage = None model_proto = utils.model_proto_from_file(self.converted_model) logger.info("ONNX loaded from file") else: try: # convert model to onnx onnx_graph = self.to_onnx( tf_graph, opset=opset, extra_opset=extra_opset, shape_override=shape_override, input_names=inputs.keys(), const_node_values=const_node_values, initialized_tables=initialized_tables, tflite_path=tflite_path, tensors_to_rename=to_rename) onnx_graph = optimizer.optimize_graph(onnx_graph) print("ONNX", onnx_graph.dump_node_statistics()) external_tensor_storage = ExternalTensorStorage( ) if self.large_model else None model_proto = onnx_graph.make_model( "converted from tf2onnx", external_tensor_storage=external_tensor_storage) logger.info("To_ONNX, OK") if onnx_file: self.create_onnx_file(name, model_proto, inputs, onnx_file, external_tensor_storage) if self.converted_model: if self.large_model: utils.save_onnx_zip(self.converted_model, model_proto, external_tensor_storage) else: utils.save_protobuf(self.converted_model, model_proto) logger.info("Created %s", self.converted_model) except Exception: logger.error("To_ONNX FAIL", exc_info=1) return False try: onnx_results = None if backend == "onnxruntime": if to_rename is None: struc_outputs = self.output_names else: struc_outputs = [ to_rename.get(k, k) for k in self.output_names ] onnx_results = self.run_onnxruntime(name, model_proto, inputs, struc_outputs, external_tensor_storage) else: raise ValueError("unknown backend") logger.info("Run_ONNX OK") try: if self.skip_tensorflow: logger.info("Results: skipped tensorflow") else: if self.check_only_shape: for tf_res, onnx_res in zip(tf_results, onnx_results): np.testing.assert_array_equal( tf_res.shape, onnx_res.shape) else: for tf_res, onnx_res in zip(tf_results, onnx_results): good_cnt = np.count_nonzero( np.isclose(tf_res, onnx_res, rtol=self.rtol, atol=self.atol)) bad_cnt = tf_res.size - good_cnt if bad_cnt > self.ptol / 100 * tf_res.size: # Prints a nice error message with stats np.testing.assert_allclose(tf_res, onnx_res, rtol=self.rtol, atol=self.atol) logger.info("Results: OK") return True except Exception: logger.error("Results", exc_info=1) except Exception: logger.error("Run_ONNX FAIL", exc_info=1) return False
import numpy as np import tensorflow as tf root = tf.train.Checkpoint() root.f = tf.function(lambda x, y: tf.matmul(x, y)) new_input_data = np.random.randn(2, 2, 2, 2).astype(np.float32) new_w = np.random.randn(2, 2, 2, 2).astype(np.float32) input_data = tf.convert_to_tensor(new_input_data) input_w = tf.convert_to_tensor(new_w) concrete_func = root.f.get_concrete_function(input_data, input_w) converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) tflite_model = converter.convert() tflite_filename = "matmul.tflite" with open(tflite_filename, "wb") as f: f.write(tflite_model) print("Converted %s." % tflite_filename)
def wrapper(fn): wrapped_fn = tf.function(fn, input_signature) return _eager_function_handler(input_signature)(wrapped_fn)
def make_tf_opt_epoch_fn( inputs: GraphsTuple, target: np.ndarray, batch_size: int, model: snt.Module, optimizer: snt.Optimizer, loss_fn: templates.LossFunction, l2_reg: float = 0.0) -> Callable[[tf.Tensor, tf.Tensor], tf.Tensor]: """Make a tf.function of (inputs, target) for optimization. This function is useful for basic inference training of GNN models. Uses all variables to create a a function that has a tf.function optimized input signature. Function uses pure tf.functions to build batches and aggregate losses. The result is a heavily optimized function that is at least 2x faster than a basic tf.function with experimental_relax_shapes=True. Args: inputs: graphs used for training. target: values to predict for training. batch_size: batch size. model: a GNN model. optimizer: optimizer, probably Adam or SGD. loss_fn: a loss function to optimize. l2_reg: l2 regularization weight. Returns: optimize_one_epoch(intpus, target), a tf.function optimized callable. """ # Explicit input signature is faster than experimental relax shapes. input_signature = [ graph_nets.utils_tf.specs_from_graphs_tuple(inputs), tf.TensorSpec.from_tensor(tf.convert_to_tensor(target)) ] n = graph_utils.get_num_graphs(inputs) n_batches = tf.cast(n // batch_size, tf.float32) if l2_reg > 0.0: regularizer = snt.regularizers.L2(l2_reg) linear_variables = gnn_models.get_linear_variables(model) if batch_size == 1 or n == 1: def optimize_one_epoch(inputs, target): """One epoch single-batch optimization.""" with tf.GradientTape() as tape: loss = loss_fn(target, model(inputs)) if l2_reg > 0.0: loss += regularizer(linear_variables) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply(grads, model.trainable_variables) return loss else: def optimize_one_epoch(inputs, target): """One epoch optimization.""" loss = tf.constant(0.0, tf.float32) for batch in get_batch_indices(n, batch_size): x_batch = graph_utils.get_graphs_tf(inputs, batch) y_batch = tf.gather(target, batch) with tf.GradientTape() as tape: batch_loss = loss_fn(y_batch, model(x_batch)) if l2_reg > 0.0: batch_loss += regularizer(linear_variables) grads = tape.gradient(batch_loss, model.trainable_variables) optimizer.apply(grads, model.trainable_variables) loss += batch_loss return loss / n_batches return tf.function(optimize_one_epoch, input_signature=input_signature)
def run(experiment_name: str, run_name: str, config: Config) -> None: mlflow.set_experiment(experiment_name) train, val, test = __load_data(config.path, config.window_size, config.batch_size) model = __model_fn(config.window_size, config.hidden_size, config.dropout) loss_fn = losses.WeightedBinaryCrossEntropy(config.pos_weight) optimizer = __optimizer_fn(config.learning_rate) input_spec = (tf.TensorSpec((None, config.window_size, len(SENSORS)), dtype=tf.float32), tf.TensorSpec((None, 1), dtype=tf.float32)) ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, config.output, max_to_keep=5) train_loss = tf.keras.metrics.Mean(name='loss') train_metrics = __training_metrics_fn() train_step = tf.function(partial(__train_step, model=model, optimizer=optimizer, loss_fn=loss_fn, loss=train_loss, metrics=train_metrics), input_signature=input_spec) val_loss = tf.keras.metrics.Mean(name='loss') val_metrics = __training_metrics_fn() val_step = tf.function(partial(__validation_step, model=model, loss_fn=loss_fn, loss=val_loss, metrics=val_metrics), input_signature=input_spec) eval_state = tf.zeros((2, 2), dtype=tf.int32) eval_metrics = __evaluation_metrics_fn() eval_step = partial(__evaluation_step, model=model, metrics=eval_metrics) with mlflow.start_run(run_name=run_name): mlflow.log_params(config._asdict()) # Fitting for epoch in range(1, config.epochs + 1): train_loss.reset_states() for metrics in train_metrics: metrics.reset_states() val_loss.reset_states() for metric in val_metrics: metric.reset_states() # Training for X, y in train: train_step(X, y) mlflow.log_metric(train_loss.name, train_loss.result().numpy(), step=epoch) mlflow.log_metrics( { metric.name: metric.result().numpy() for metric in train_metrics }, step=epoch) # Validation for X, y in val: val_step(X, y) mlflow.log_metric(f'val_{val_loss.name}', val_loss.result().numpy(), step=epoch) mlflow.log_metrics( { f'val_{metric.name}': metric.result().numpy() for metric in val_metrics }, step=epoch) # Checkpoint if epoch % config.checkpoint_rate == 0: ckpt_manager.save() # Evaluation def evaluate(confusion_matrix, client): # Reset PR-AUC and Accuracy metrics eval_metrics[0].reset_states() eval_metrics[3].reset_states() results = test[client].reduce(eval_state, eval_step) mlflow.log_metrics(f'client_{client}_val_auc', eval_metrics[0].result().numpy()) mlflow.log_metrics(f'client_{client}_val_acc', eval_metrics[3].result().numpy()) return confusion_matrix + results confusion_matrix = reduce(evaluate, test.clients, tf.zeros((2, 2), dtype=tf.int32)) # Confusion matrix fig, ax = plt.subplots(figsize=(16, 8)) sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap=sns.color_palette("Blues"), ax=ax) ax.set_xlabel('Predicted') ax.set_ylabel('Ground Truth') mlflow.log_figure(fig, 'confusion_matrix.png') plt.close(fig) # Precision Recall fig, ax = plt.subplots(figsize=(16, 8)) sns.lineplot(x=eval_metrics[2].results().numpy(), y=eval_metrics[1].results().numpy(), ax=ax) ax.set_xlabel('Recall') ax.set_xlim(0., 1.) ax.set_ylabel('Precision') ax.set_ylim(0., 1.) mlflow.log_figure(fig, 'precision_recall.png') plt.close(fig)
def __call__(self): """ Assigns the values of the parameters of the main network to the parameters of the target network """ tf.function(self._update_target_vars())
def run_customized_training_loop( # pylint: disable=invalid-name _sentinel=None, # pylint: enable=invalid-name strategy=None, model_fn=None, loss_fn=None, model_dir=None, train_input_fn=None, steps_per_epoch=None, steps_per_loop=1, epochs=1, eval_input_fn=None, eval_steps=None, metric_fn=None, init_checkpoint=None, use_remote_tpu=False, custom_callbacks=None, run_eagerly=False): """Run BERT pretrain model training using low-level API. Arguments: _sentinel: Used to prevent positional parameters. Internal, do not use. strategy: Distribution strategy on which to run low level training loop. model_fn: Function that returns a tuple (model, sub_model). Caller of this function should add optimizer to the `model` via calling `model.compile()` API or manually setting `model.optimizer` attribute. Second element of the returned tuple(sub_model) is an optional sub model to be used for initial checkpoint -- if provided. loss_fn: Function with signature func(labels, logits) and returns a loss tensor. model_dir: Model directory used during training for restoring/saving model weights. train_input_fn: Function that returns a tf.data.Dataset used for training. steps_per_epoch: Number of steps to run per epoch. At the end of each epoch, model checkpoint will be saved and evaluation will be conducted if evaluation dataset is provided. steps_per_loop: Number of steps per graph-mode loop. In order to reduce communication in eager context, training logs are printed every steps_per_loop. epochs: Number of epochs to train. eval_input_fn: Function that returns evaluation dataset. If none, evaluation is skipped. eval_steps: Number of steps to run evaluation. Required if `eval_input_fn` is not none. metric_fn: A metrics function that returns a Keras Metric object to record evaluation result using evaluation dataset or with training dataset after every epoch. init_checkpoint: Optional checkpoint to load to `sub_model` returned by `model_fn`. use_remote_tpu: Ignored, will be removed in the future. custom_callbacks: A list of Keras Callbacks objects to run during training. More specifically, `on_batch_begin()`, `on_batch_end()`, methods are invoked during training. run_eagerly: Whether to run model training in pure eager execution. This should be disable for TPUStrategy. Returns: Trained model. Raises: ValueError: (1) When model returned by `model_fn` does not have optimizer attribute or when required parameters are set to none. (2) eval args are not specified correctly. (3) metric_fn must be a callable if specified. """ # TODO(bfontain): Remove use_remote_tpu once there are no models using it. del use_remote_tpu if _sentinel is not None: raise ValueError('only call `run_customized_training_loop()` ' 'with named arguments.') required_arguments = [ strategy, model_fn, loss_fn, model_dir, steps_per_epoch, train_input_fn ] if [arg for arg in required_arguments if arg is None]: raise ValueError('`strategy`, `model_fn`, `loss_fn`, `model_dir`, ' '`steps_per_loop` and `steps_per_epoch` are required ' 'parameters.') if steps_per_loop > steps_per_epoch: logging.error( 'steps_per_loop: %d is specified to be greater than ' ' steps_per_epoch: %d, we will use steps_per_epoch as' ' steps_per_loop.', steps_per_loop, steps_per_epoch) steps_per_loop = steps_per_epoch assert tf.executing_eagerly() if run_eagerly: if steps_per_loop > 1: raise ValueError( 'steps_per_loop is used for performance optimization. When you want ' 'to run eagerly, you cannot leverage graph mode loop.') if isinstance(strategy, tf.distribute.experimental.TPUStrategy): raise ValueError( 'TPUStrategy should not run eagerly as it heavily replies on graph' ' optimization for the distributed system.') if eval_input_fn and (eval_steps is None or metric_fn is None): raise ValueError( '`eval_step` and `metric_fn` are required when `eval_input_fn ` ' 'is not none.') if metric_fn and not callable(metric_fn): raise ValueError( 'if `metric_fn` is specified, metric_fn must be a callable.') total_training_steps = steps_per_epoch * epochs # To reduce unnecessary send/receive input pipeline operation, we place input # pipeline ops in worker task. train_iterator = _get_input_iterator(train_input_fn, strategy) with distribution_utils.get_strategy_scope(strategy): # To correctly place the model weights on accelerators, # model and optimizer should be created in scope. model, sub_model = model_fn() if not hasattr(model, 'optimizer'): raise ValueError('User should set optimizer attribute to model ' 'inside `model_fn`.') optimizer = model.optimizer use_float16 = isinstance( optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer) if init_checkpoint: logging.info( 'Checkpoint file %s found and restoring from ' 'initial checkpoint for core model.', init_checkpoint) checkpoint = tf.train.Checkpoint(model=sub_model) checkpoint.restore(init_checkpoint).assert_consumed() logging.info('Loading from checkpoint file completed') train_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) eval_metrics = [metric_fn()] if metric_fn else [] # If evaluation is required, make a copy of metric as it will be used by # both train and evaluation. train_metrics = [ metric.__class__.from_config(metric.get_config()) for metric in eval_metrics ] # Create summary writers eval_summary_writer = tf.summary.create_file_writer( os.path.join(model_dir, 'summaries/eval')) if steps_per_loop >= _MIN_SUMMARY_STEPS: # Only writes summary when the stats are collected sufficiently over # enough steps. train_summary_writer = tf.summary.create_file_writer( os.path.join(model_dir, 'summaries/train')) else: train_summary_writer = None # Collects training variables. training_vars = model.trainable_variables def _replicated_step(inputs): """Replicated training step.""" inputs, labels = inputs with tf.GradientTape() as tape: model_outputs = model(inputs, training=True) loss = loss_fn(labels, model_outputs) if use_float16: scaled_loss = optimizer.get_scaled_loss(loss) if use_float16: scaled_grads = tape.gradient(scaled_loss, training_vars) grads = optimizer.get_unscaled_gradients(scaled_grads) else: grads = tape.gradient(loss, training_vars) optimizer.apply_gradients(zip(grads, training_vars)) # For reporting, the metric takes the mean of losses. train_loss_metric.update_state(loss) for metric in train_metrics: metric.update_state(labels, model_outputs) @tf.function def train_steps(iterator, steps): """Performs distributed training steps in a loop. Args: iterator: the distributed iterator of training datasets. steps: an tf.int32 integer tensor to specify number of steps to run inside host training loop. Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ if not isinstance(steps, tf.Tensor): raise ValueError( 'steps should be an Tensor. Python object may cause ' 'retracing.') for _ in tf.range(steps): strategy.experimental_run_v2(_replicated_step, args=(next(iterator), )) def train_single_step(iterator): """Performs a distributed training step. Args: iterator: the distributed iterator of training datasets. Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ strategy.experimental_run_v2(_replicated_step, args=(next(iterator), )) def test_step(iterator): """Calculates evaluation metrics on distributed devices.""" def _test_step_fn(inputs): """Replicated accuracy calculation.""" inputs, labels = inputs model_outputs = model(inputs, training=False) for metric in eval_metrics: metric.update_state(labels, model_outputs) strategy.experimental_run_v2(_test_step_fn, args=(next(iterator), )) if not run_eagerly: train_single_step = tf.function(train_single_step) test_step = tf.function(test_step) def _run_evaluation(current_training_step, test_iterator): """Runs validation steps and aggregate metrics.""" for _ in range(eval_steps): test_step(test_iterator) with eval_summary_writer.as_default(): for metric in eval_metrics + model.metrics: metric_value = _float_metric_value(metric) logging.info('Step: [%d] Validation %s = %f', current_training_step, metric.name, metric_value) tf.summary.scalar(metric.name, metric_value, step=current_training_step) eval_summary_writer.flush() def _run_callbacks_on_batch_begin(batch): """Runs custom callbacks at the start of every step.""" if not custom_callbacks: return for callback in custom_callbacks: callback.on_batch_begin(batch) def _run_callbacks_on_batch_end(batch): """Runs custom callbacks at the end of every step.""" if not custom_callbacks: return for callback in custom_callbacks: callback.on_batch_end(batch) # Training loop starts here. checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) latest_checkpoint_file = tf.train.latest_checkpoint(model_dir) if latest_checkpoint_file: logging.info( 'Checkpoint file %s found and restoring from ' 'checkpoint', latest_checkpoint_file) checkpoint.restore(latest_checkpoint_file) logging.info('Loading from checkpoint file completed') current_step = optimizer.iterations.numpy() checkpoint_name = 'ctl_step_{step}.ckpt' while current_step < total_training_steps: # Training loss/metric are taking average over steps inside micro # training loop. We reset the their values before each round. train_loss_metric.reset_states() for metric in train_metrics + model.metrics: metric.reset_states() _run_callbacks_on_batch_begin(current_step) # Runs several steps in the host while loop. steps = _steps_to_run(current_step, steps_per_epoch, steps_per_loop) if steps == 1: # TODO(zongweiz): merge with train_steps once tf.while_loop # GPU performance bugs are fixed. train_single_step(train_iterator) else: # Converts steps to a Tensor to avoid tf.function retracing. train_steps(train_iterator, tf.convert_to_tensor(steps, dtype=tf.int32)) _run_callbacks_on_batch_end(current_step) current_step += steps train_loss = _float_metric_value(train_loss_metric) # Updates training logging. training_status = 'Train Step: %d/%d / loss = %s' % ( current_step, total_training_steps, train_loss) if train_summary_writer: with train_summary_writer.as_default(): tf.summary.scalar(train_loss_metric.name, train_loss, step=current_step) for metric in train_metrics + model.metrics: metric_value = _float_metric_value(metric) training_status += ' %s = %f' % (metric.name, metric_value) tf.summary.scalar(metric.name, metric_value, step=current_step) train_summary_writer.flush() logging.info(training_status) # Saves model checkpoints and run validation steps at every epoch end. if current_step % steps_per_epoch == 0: # To avoid repeated model saving, we do not save after the last # step of training. if current_step < total_training_steps: _save_checkpoint(checkpoint, model_dir, checkpoint_name.format(step=current_step)) if eval_input_fn: logging.info('Running evaluation after step: %s.', current_step) _run_evaluation( current_step, _get_input_iterator(eval_input_fn, strategy)) # Re-initialize evaluation metric. for metric in eval_metrics + model.metrics: metric.reset_states() _save_checkpoint(checkpoint, model_dir, checkpoint_name.format(step=current_step)) if eval_input_fn: logging.info( 'Running final evaluation after training is complete.') _run_evaluation(current_step, _get_input_iterator(eval_input_fn, strategy)) training_summary = { 'total_training_steps': total_training_steps, 'train_loss': _float_metric_value(train_loss_metric), } if eval_metrics: # TODO(hongkuny): Cleans up summary reporting in text. training_summary['last_train_metrics'] = _float_metric_value( train_metrics[0]) training_summary['eval_metrics'] = _float_metric_value( eval_metrics[0]) write_txt_summary(training_summary, model_dir) return model
def benchmark( gen_displace, N, num, batchsize, dataset=False, graph=False, check_err=True ): """Runs a randomized benchmark of displacement operator generation, D(alpha) Args: gen_displace ([type]): [description] N (int): Dimension of Hilbert space num (int): Total number of alphas to benchmark batchsize (int): Number of alphas per batch (only used with Dataset) dataset (bool, optional): Enable tf.data.Datset API. Defaults to False. graph (bool, optional): Enable tf.function compilation. Defaults to False. check_err (bool, optional): Check error against analytic coherent state. Defaults to True. Returns: (float x 4): (total time per alpha, loop time per alpha, mean error, max error) """ alphas = random_alphas(num) # Dataset API adds some overhead, we can compare to the in-memory case if dataset: b_alphas = tf.data.Dataset.from_tensor_slices(alphas).batch(batchsize) # Initialize the constants used in the displacement generation # For the direct expm method, this is just creating a, a_dag # For BCH, we diagonalize the q, p operators start_time = time.perf_counter() f = gen_displace(N) init_time = time.perf_counter() - start_time if graph: # Enable tf.function and tf.autograph f = tf.function(f) f = f.get_concrete_function(tf.TensorSpec(shape=[num], dtype=tf.complex64)) # Repeat batch 3x to iron out timing fluctuations repeat_times = [] for _ in range(3): if dataset: # Loop through each Dataset batch start_time = time.perf_counter() for l_alpha in b_alphas: # Be careful of memory limitations here results = f(l_alpha) # Calculate error of last batch only alphas = l_alpha loop_time = time.perf_counter() - start_time else: start_time = time.perf_counter() results = f(alphas) loop_time = time.perf_counter() - start_time repeat_times.append(loop_time) # We take the minimum time from above. This is typically representative of # a lower bound, as higher times are often caused by other processes # interfering with timing accuracy. See Python's timeit.repeat docs. total_time = (min(repeat_times) + init_time) / num loop_time = min(repeat_times) / num if check_err: mean_err, max_err = err_checks.coeff_err(results, alphas) else: mean_err, max_err = float("inf"), float("inf") return total_time, loop_time, mean_err, max_err
def custom_loop(self, train_dist_dataset, test_dist_dataset, strategy): """Custom training and testing loop. Args: train_dist_dataset: Training dataset created using strategy. test_dist_dataset: Testing dataset created using strategy. strategy: Distribution strategy. Returns: train_loss, train_accuracy, test_loss, test_accuracy """ def distributed_train_epoch(ds, epoch_num): total_loss = 0.0 num_train_batches = 0.0 for one_batch in ds: start = time.time() per_replica_loss = strategy.experimental_run_v2( self.train_step, args=(one_batch, )) current_loss = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None) total_loss += current_loss num_train_batches += 1 self.iter_num += 1 time_cost_per_batch = time.time() - start images_per_sec = cfg.TRAIN.batch_size / time_cost_per_batch if self.iter_num % cfg.TRAIN.log_interval == 0: logger.info('epoch_num: %d, ' 'iter_num: %d, ' 'loss_value: %.6f, ' 'speed: %d images/sec ' % (epoch_num, self.iter_num, current_loss, images_per_sec)) return total_loss, num_train_batches def distributed_test_epoch(ds, epoch_num): total_loss = 0. num_test_batches = 0.0 for one_batch in ds: per_replica_loss = strategy.experimental_run_v2( self.test_step, args=(one_batch, )) current_loss = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None) total_loss += current_loss num_test_batches += 1 return total_loss, num_test_batches if self.enable_function: distributed_train_epoch = tf.function(distributed_train_epoch) distributed_test_epoch = tf.function(distributed_test_epoch) for epoch in range(self.epochs): start = time.time() self.optimizer.learning_rate = self.decay(epoch) train_total_loss, num_train_batches = distributed_train_epoch( train_dist_dataset, epoch) test_total_loss, num_test_batches = distributed_test_epoch( test_dist_dataset, epoch) time_consume_per_epoch = time.time() - start training_massage = 'Epoch: %d, ' \ 'Train Loss: %.6f, ' \ 'Test Loss: %.6f '\ 'Time consume: %.2f'%(epoch, train_total_loss / num_train_batches, test_total_loss / num_test_batches, time_consume_per_epoch) logger.info(training_massage) #### save the model every end of epoch current_model_saved_name = os.path.join( cfg.MODEL.model_path, 'epoch_%d_val_loss%.6f' % (epoch, test_total_loss / num_test_batches)) logger.info('A model saved to %s' % current_model_saved_name) if not os.access(cfg.MODEL.model_path, os.F_OK): os.mkdir(cfg.MODEL.model_path) tf.saved_model.save(self.model, current_model_saved_name) return (train_total_loss / num_train_batches, test_total_loss / num_test_batches)
def __init__(self, layer_sizes: Sequence[int], input_size: int, num_classes: int = 2, context_map_size: int = 4, bias: bool = True, context_bias: bool = False, base_predictor: Optional[Callable[[np.ndarray], np.ndarray]] = None, learning_rate: Union[float, DynamicParameter] = 1e-3, pred_clipping: float = 1e-3, weight_clipping: float = 5.0): tf.Module.__init__(self, name='GLN') GLNBase.__init__(self, layer_sizes, input_size, num_classes, context_map_size, bias, context_bias, base_predictor, learning_rate, pred_clipping, weight_clipping) # Learning rate as dynamic parameter if self.learning_rate == 'paper': self.learning_rate = PaperLearningRate(name='learning_rate') else: self.learning_rate = ConstantParameter(self.learning_rate, name='learning_rate') # Initialize layers self.layers = list() previous_size = self.base_pred_size for size in (self.layer_sizes + (1, )): self.layers.append( Linear(size=size, input_size=previous_size, context_size=self.input_size, context_map_size=self.context_map_size, num_classes=self.num_classes, learning_rate=self.learning_rate, pred_clipping=self.pred_clipping, weight_clipping=self.weight_clipping, bias=self.bias, context_bias=self.context_bias)) previous_size = size # TF-compiled predict function self._tf_predict = tf.function( func=self._predict, input_signature=[ tf.TensorSpec(shape=(None, self.base_pred_size), dtype=tf.dtypes.float32), tf.TensorSpec(shape=(None, self.input_size), dtype=tf.dtypes.float32) ], autograph=False) # TF-compiled update function self.target_dtype = tf.dtypes.int64 self._tf_update = tf.function( func=self._predict, input_signature=[ tf.TensorSpec(shape=(None, self.base_pred_size), dtype=tf.dtypes.float32), tf.TensorSpec(shape=(None, self.input_size), dtype=tf.dtypes.float32), tf.TensorSpec(shape=(None, ), dtype=self.target_dtype) ], autograph=False)
def __init__( self, model, features_file, labels_file, batch_size, batch_type="examples", length_bucket_width=None, scorers=None, save_predictions=False, early_stopping=None, model_dir=None, export_on_best=None, exporter=None, max_exports_to_keep=5, ): """Initializes the evaluator. Args: model: A :class:`opennmt.models.Model` to evaluate. features_file: Path to the evaluation features. labels_file: Path to the evaluation labels. batch_size: The evaluation batch size. batch_type: The batching strategy to use: can be "examples" or "tokens". length_bucket_width: The width of the length buckets to select batch candidates from (for efficiency). Set ``None`` to not constrain batch formation. scorers: A list of scorers, callables taking the path to the reference and the hypothesis and return one or more scores. save_predictions: Save evaluation predictions to a file. This is ``True`` when :obj:`scorers` is set. early_stopping: An ``EarlyStopping`` instance. model_dir: The active model directory. export_on_best: Export a model when this evaluation metric has the best value so far. exporter: A :class:`opennmt.utils.Exporter` instance to export the model. Defaults to :class:`opennmt.utils.SavedModelExporter`. max_exports_to_keep: Maximum number of exports to keep. Older exports will be garbage collected. Set to ``None`` to keep all exports. Raises: ValueError: If :obj:`save_predictions` is set but the model is not compatible. ValueError: If :obj:`save_predictions` is set but :obj:`model_dir` is ``None``. ValueError: If :obj:`export_on_best` is set but :obj:`model_dir` is ``None``. ValueError: If the :obj:`early_stopping` configuration is invalid. """ if model_dir is not None: export_dir = os.path.join(model_dir, "export") eval_dir = os.path.join(model_dir, "eval") else: if save_predictions: raise ValueError( "Saving evaluation predictions requires model_dir to be set" ) if export_on_best is not None: raise ValueError("Exporting models requires model_dir to be set") export_dir = None eval_dir = None if scorers is None: scorers = [] if scorers: save_predictions = True if save_predictions: if model.unsupervised: raise ValueError( "This model does not support saving evaluation predictions" ) if not tf.io.gfile.exists(eval_dir): tf.io.gfile.makedirs(eval_dir) self._model = model self._labels_file = labels_file self._save_predictions = save_predictions self._scorers = scorers self._eval_dir = eval_dir self._metrics_history = [] if eval_dir is not None: self._summary_writer = tf.summary.create_file_writer(eval_dir) summaries = misc.read_summaries(eval_dir) for step, values in summaries: metrics = misc.extract_prefixed_keys(values, _SUMMARIES_SCOPE + "/") self._metrics_history.append((step, metrics)) else: self._summary_writer = tf.summary.create_noop_writer() dataset = model.examples_inputter.make_evaluation_dataset( features_file, labels_file, batch_size, batch_type=batch_type, length_bucket_width=length_bucket_width, num_threads=1, prefetch_buffer_size=1, ) self._eval_fn = tf.function( model.evaluate, input_signature=dataset.element_spec ) self._dataset = dataset self._metrics_name = {"loss", "perplexity"} for scorer in self._scorers: self._metrics_name.update(scorer.scores_name) model_metrics = self._model.get_metrics() if model_metrics: self._metrics_name.update(set(model_metrics.keys())) if early_stopping is not None: if early_stopping.metric not in self._metrics_name: raise ValueError( "Invalid early stopping metric '%s', expected one in %s" % (early_stopping.metric, str(self._metrics_name)) ) if early_stopping.steps <= 0: raise ValueError("Early stopping steps should greater than 0") self._early_stopping = early_stopping self._export_on_best = export_on_best self._exporter = exporter self._export_dir = export_dir self._max_exports_to_keep = max_exports_to_keep
import os import random import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.models import Sequential keras_model_path = '.' + os.sep + 'Models' + os.sep + 'fsw_best_model' + os.sep model = tf.keras.models.load_model(keras_model_path) run_model = tf.function(lambda x: model(x)) # This is important, let's fix the input size. concrete_func = run_model.get_concrete_function( tf.TensorSpec([1, 60, 7, 1], model.inputs[0].dtype)) # model directory. MODEL_DIR = '.' + os.sep + 'Models' + os.sep + 'Saved_Model' + os.sep model.save(MODEL_DIR, save_format="tf", signatures=concrete_func) converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_DIR) tflite_model = converter.convert() if not os.path.exists('./tflite_models'): os.mkdir('./tflite_models') with open('./tflite_models/keras_tflite', 'wb') as f: f.write(tflite_model) interpreter = tf.lite.Interpreter(model_content=tflite_model)
def _run(func, *args, **kwargs): """funcをグラフモードで実行する。""" return tf.function(func)(*args, **kwargs)
def test_all_tf_functions_work_together_high_threshold(self): clients = 3 num_sub_rounds = 4 max_rounds = 6 max_num_prefixes = 3 threshold = 100 max_user_contribution = 100 roots = (string.ascii_lowercase + string.digits + "'@#-;*:./" + triehh_tf.DEFAULT_TERMINATOR) possible_prefix_extensions = list(roots) possible_prefix_extensions_num = len(possible_prefix_extensions) possible_prefix_extensions = tf.constant(possible_prefix_extensions, dtype=tf.string) server_state = triehh_tf.ServerState( discovered_heavy_hitters=tf.constant([], dtype=tf.string), heavy_hitter_frequencies=tf.constant([], dtype=tf.float64), discovered_prefixes=tf.constant([''], dtype=tf.string), round_num=tf.constant(0, dtype=tf.int32), accumulated_votes=tf.zeros( dtype=tf.int32, shape=[max_num_prefixes, possible_prefix_extensions_num]), accumulated_weights=tf.constant(0, dtype=tf.int32)) def create_dataset_fn(client_id): del client_id return tf.data.Dataset.from_tensor_slices(['hello', 'hey', 'hi']) client_ids = list(range(100)) client_data = tff.simulation.ClientData.from_clients_and_fn( client_ids=client_ids, create_tf_dataset_for_client_fn=create_dataset_fn) for round_num in range(max_rounds * num_sub_rounds): sampled_clients = list(range(clients)) sampled_datasets = [ client_data.create_tf_dataset_for_client(client_id) for client_id in sampled_clients ] accumulated_votes = tf.zeros( dtype=tf.int32, shape=[max_num_prefixes, possible_prefix_extensions_num]) accumulated_weights = tf.constant(0, dtype=tf.int32) # This is a workaround to clear the graph cache in the `tf.function`; this # is necessary because we need to construct a new lookup table every round # based on new prefixes. client_update = tf.function( triehh_tf.client_update.python_function) for dataset in sampled_datasets: client_output = client_update( dataset, server_state.discovered_prefixes, possible_prefix_extensions, round_num, tf.constant(num_sub_rounds), tf.constant(max_num_prefixes, dtype=tf.int32), tf.constant(max_user_contribution, dtype=tf.int32)) accumulated_votes += client_output.client_votes accumulated_weights += client_output.client_weight server_state = triehh_tf.server_update( server_state, possible_prefix_extensions, accumulated_votes, accumulated_weights, tf.constant(num_sub_rounds, dtype=tf.int32), tf.constant(max_num_prefixes, dtype=tf.int32), tf.constant(threshold, dtype=tf.int32)) expected_discovered_heavy_hitters = tf.constant([], dtype=tf.string) expected_heavy_hitter_frequencies = tf.constant([], dtype=tf.float64) expected_discovered_prefixes = tf.constant([], dtype=tf.string) self.assertSetAllEqual(server_state.discovered_heavy_hitters, expected_discovered_heavy_hitters) self.assertHistogramsEqual(server_state.discovered_heavy_hitters, server_state.heavy_hitter_frequencies, expected_discovered_heavy_hitters, expected_heavy_hitter_frequencies) self.assertSetAllEqual(server_state.discovered_prefixes, expected_discovered_prefixes)
def freeze_and_run_tf(self, func, feed_dict, outputs, as_session, premade_placeholders, large_model, constant_fold): np.random.seed(1) # Make it reproducible. clean_feed_dict = {utils.node_name(k): v for k, v in feed_dict.items()} if is_tf2() and not as_session: # # use eager to execute the tensorflow func # # numpy doesn't work for all ops, make it tf.Tensor() input_tensors = [ tf.TensorSpec(shape=v.shape, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] input_list = [ tf.convert_to_tensor(v, dtype=tf.as_dtype(v.dtype), name=utils.node_name(k)) for k, v in feed_dict.items() ] tf.random.set_seed(1) result = func(*input_list) if isinstance(result, (list, tuple)): # list or tuple result = [x.numpy() for x in result] else: # single result result = [result.numpy()] # now make the eager functions a graph concrete_func = tf.function(func, input_signature=tuple(input_tensors)) concrete_func = concrete_func.get_concrete_function() graph_def = from_function(concrete_func, input_names=list(feed_dict.keys()), output_names=outputs, large_model=large_model) initialized_tables = None else: # # use graph to execute the tensorflow func # with tf_session() as sess: tf_set_random_seed(1) input_list = [] if not premade_placeholders: for k, v in clean_feed_dict.items(): input_list.append( tf_placeholder(name=k, shape=v.shape, dtype=tf.as_dtype(v.dtype))) func(*input_list) variables_lib.global_variables_initializer().run() tf_tables_initializer().run() output_dict = [] for out_name in outputs: output_dict.append(sess.graph.get_tensor_by_name(out_name)) result = sess.run(output_dict, feed_dict=feed_dict) graph_def = freeze_session(sess, input_names=list(feed_dict.keys()), output_names=outputs) table_names, key_dtypes, value_dtypes = get_hash_table_info( graph_def) initialized_tables = {} for n, k_dtype, val_dtype in zip(table_names, key_dtypes, value_dtypes): h = lookup_ops.hash_table_v2(k_dtype, val_dtype, shared_name=n) k, v = lookup_ops.lookup_table_export_v2( h, k_dtype, val_dtype) initialized_tables[n] = (sess.run(k), sess.run(v)) tf_reset_default_graph() with tf_session() as sess: tf.import_graph_def(graph_def, name='') graph_def = tf_optimize(list(feed_dict.keys()), outputs, graph_def, fold_constant=constant_fold) model_path = os.path.join( self.test_data_directory, self._testMethodName + "_after_tf_optimize.pb") utils.save_protobuf(model_path, graph_def) self.logger.debug("created file %s", model_path) return result, graph_def, initialized_tables
def _create_eval_loop_fn(eval_step_fn, options: StandardEvaluatorOptions): if options.use_tf_function: eval_step_fn = tf.function(eval_step_fn) return loop_fns.create_loop_fn(eval_step_fn)