def set_precision_policy(policy_name: Text = None, loss_scale: bool = False): """Set precision policy according to the name. Args: policy_name: precision policy name, one of 'float32', 'mixed_float16', 'mixed_bfloat16', or None. loss_scale: whether to use loss scale (only for training). """ if not policy_name: return assert policy_name in ('mixed_float16', 'mixed_bfloat16', 'float32') logging.info('use mixed precision policy name %s', policy_name) # TODO(tanmingxing): use tf.keras.layers.enable_v2_dtype_behavior() when it # available in stable TF release. from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top,g-direct-tensorflow-import base_layer_utils.enable_v2_dtype_behavior() # mixed_float16 training is not supported for now, so disable loss_scale. # float32 and mixed_bfloat16 do not need loss scale for training. if loss_scale: policy = tf2.keras.mixed_precision.experimental.Policy(policy_name) else: policy = tf2.keras.mixed_precision.experimental.Policy( policy_name, loss_scale=None) tf2.keras.mixed_precision.experimental.set_policy(policy)
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN # Make sure to set the Keras learning phase. True during training, # False for inference. tf.keras.backend.set_learning_phase(is_training) # Set policy for mixed-precision training with Keras-based models. if use_tpu and train_config.use_bfloat16: from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0. base_layer_utils.enable_v2_dtype_behavior() tf2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16') detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch(labels, unpad_groundtruth_tensors=train_config. unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes]. get_shape().as_list()) unpad_groundtruth_tensors = boxes_shape[ 1] is not None and not use_tpu labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): provide_groundtruth(detection_model, labels) preprocessed_images = features[fields.InputDataFields.image] side_inputs = detection_model.get_side_inputs(features) if use_tpu and train_config.use_bfloat16: with tf.tpu.bfloat16_scope(): prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape], **side_inputs) prediction_dict = ops.bfloat16_to_float32_nested( prediction_dict) else: prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape], **side_inputs) def postprocess_wrapper(args): return detection_model.postprocess(args[0], args[1]) if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if use_tpu and postprocess_on_cpu: detections = tf.tpu.outside_compilation( postprocess_wrapper, (prediction_dict, features[fields.InputDataFields.true_image_shape])) else: detections = postprocess_wrapper( (prediction_dict, features[fields.InputDataFields.true_image_shape])) if mode == tf.estimator.ModeKeys.TRAIN: load_pretrained = hparams.load_pretrained if hparams else False if train_config.fine_tune_checkpoint and load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config. fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): if (mode == tf.estimator.ModeKeys.EVAL and eval_config.use_dummy_loss_in_eval): total_loss = tf.constant(1.0) losses_dict = {'Loss/total_loss': total_loss} else: losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.values()] if train_config.add_regularization_loss: regularization_losses = detection_model.regularization_losses( ) if use_tpu and train_config.use_bfloat16: regularization_losses = ops.bfloat16_to_float32_nested( regularization_losses) if regularization_losses: regularization_loss = tf.add_n( regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict[ 'Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None include_variables = (train_config.update_trainable_variables if train_config.update_trainable_variables else None) exclude_variables = (train_config.freeze_variables if train_config.freeze_variables else None) trainable_variables = slim.filter_variables( tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None if train_config.summarize_gradients: summaries = [ 'gradients', 'gradient_norm', 'global_gradient_norm' ] train_op = slim.optimizers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, update_ops=detection_model.updates(), variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: exported_output = exporter_lib.add_output_tensor_nodes(detections) export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(exported_output) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic, eval_input_config.max_number_of_boxes) use_original_images = fields.InputDataFields.original_image in features if use_original_images: eval_images = features[fields.InputDataFields.original_image] true_image_shapes = tf.slice( features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) original_image_spatial_shapes = features[ fields.InputDataFields.original_image_spatial_shape] else: eval_images = features[fields.InputDataFields.image] true_image_shapes = None original_image_spatial_shapes = None eval_dict = eval_util.result_dict_for_batched_example( eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) if fields.InputDataFields.image_additional_channels in features: eval_dict[fields.InputDataFields. image_additional_channels] = features[ fields.InputDataFields.image_additional_channels] if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index( ) else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) vis_metric_ops = None if not use_tpu and use_original_images: keypoint_edges = [(kp.start, kp.end) for kp in eval_config.keypoint_edge] eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections( category_index, max_examples_to_draw=eval_config.num_visualizations, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False, keypoint_edges=keypoint_edges or None) vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops( eval_dict) # Eval metrics on a single example. eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_config, list(category_index.values()), eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if vis_metric_ops is not None: eval_metric_ops.update(vis_metric_ops) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours ) scaffold = tf.train.Scaffold(saver=saver) # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. if use_tpu and mode != tf.estimator.ModeKeys.EVAL: return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: if scaffold is None: keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( sharded=True, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, save_relative_paths=True) tf.add_to_collection(tf.GraphKeys.SAVERS, saver) scaffold = tf.train.Scaffold(saver=saver) return tf.estimator.EstimatorSpec(mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)
from tensorflow.python.keras.layers import core from tensorflow.python.keras.mixed_precision.experimental import get_layer_policy from tensorflow.python.keras.mixed_precision.experimental import policy from tensorflow.python.platform import test class GetLayerPolicyTest(test.TestCase): def test_get_layer_policy(self): layer = core.Dense(4) self.assertEqual( get_layer_policy.get_layer_policy(layer).name, 'float32') p = policy.Policy('mixed_float16') layer = core.Dense(4, dtype=p) self.assertIs(get_layer_policy.get_layer_policy(layer), p) layer = core.Dense(4, dtype='float64') self.assertEqual( get_layer_policy.get_layer_policy(layer).name, 'float64') def test_error(self): with self.assertRaisesRegexp( ValueError, 'get_policy can only be called on a layer, but got: 1'): get_layer_policy.get_layer_policy(1) if __name__ == '__main__': base_layer_utils.enable_v2_dtype_behavior() test.main()
def main(): global_batch_size = 1024 slot_num = 10 nnz_per_slot = 5 from tensorflow.python.keras.engine import base_layer_utils base_layer_utils.enable_v2_dtype_behavior() policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16") tf.keras.mixed_precision.experimental.set_policy(policy) dataset = utility.get_dataset(global_batch_size//hvd.size(), read_batchsize=global_batch_size//hvd.size()) sok_init_op = sok.Init(global_batch_size=global_batch_size) model = utility.SOKDenseDemo(max_vocabulary_size_per_gpu=1024, embedding_vec_size=8, slot_num=slot_num, nnz_per_slot=nnz_per_slot, num_dense_layers=0) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) optimizer = sok.tf.keras.mixed_precision.LossScaleOptimizer(optimizer, 1024) loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction='none') def _replica_loss(labels, logits): loss = loss_fn(labels, logits) dtype = loss.dtype loss = tf.cast(loss, tf.float32) loss = tf.nn.compute_average_loss(loss, global_batch_size=global_batch_size) return tf.cast(loss, dtype) def train_step(inputs, labels): logit = model(inputs, training=True) loss = _replica_loss(labels, logit) scaled_loss = optimizer.get_scaled_loss(loss) scaled_gradients = tf.gradients(scaled_loss, model.trainable_variables) emb_vars, other_vars =\ sok.split_embedding_variable_from_others(model.trainable_variables) scaled_emb_grads, scaled_other_grads =\ scaled_gradients[:len(emb_vars)], scaled_gradients[len(emb_vars):] emb_grads = optimizer.get_unscaled_gradients(scaled_emb_grads) other_grads = optimizer.get_unscaled_gradients(scaled_other_grads) other_grads = [hvd.allreduce(grad) for grad in other_grads] with sok.OptimizerScope(emb_vars): emb_train_op = optimizer.apply_gradients(zip(emb_grads, emb_vars)) other_train_op = optimizer.apply_gradients(zip(other_grads, other_vars)) total_loss = hvd.allreduce(loss) with tf.control_dependencies([emb_train_op, other_train_op]): return tf.identity(total_loss) train_iterator = dataset.make_initializable_iterator() iterator_init = train_iterator.initializer inputs, labels = train_iterator.get_next() loss = train_step(inputs, labels) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(sok_init_op) sess.run([init_op, iterator_init]) for step in range(10): loss_v = sess.run(loss) if hvd.local_rank() == 0: print("[INFO]: step {}, loss {}".format(step, loss_v))