Exemplo n.º 1
0
def build_graph(pipeline_config,
                shapes_info,
                input_type='encoded_image_string_tensor',
                use_bfloat16=True):
    """Builds serving graph of faster_rcnn to be exported.

  Args:
    pipeline_config: A TrainEvalPipelineConfig proto.
    shapes_info: A python dict of tensors' names and their shapes, returned by
      `get_prediction_tensor_shapes()`.
    input_type: One of
                'encoded_image_string_tensor': a 1d tensor with dtype=tf.string
                'image_tensor': a 4d tensor with dtype=tf.uint8
                'tf_example': a 1d tensor with dtype=tf.string
    use_bfloat16: If true, use tf.bfloat16 on TPU.

  Returns:
    placeholder_tensor: A placeholder tensor, type determined by `input_type`.
    result_tensor_dict: A python dict of tensors' names and tensors.
  """
    pipeline_config = modify_config(pipeline_config)
    detection_model = INPUT_BUILDER_UTIL_MAP['model_build'](
        pipeline_config.model, is_training=False)

    placeholder_tensor, input_tensors = \
        exporter.input_placeholder_fn_map[input_type]()

    # CPU pre-processing
    inputs = tf.cast(input_tensors, dtype=tf.float32)
    preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)

    # Dimshuffle: [b, h, w, c] -> [b, c, h, w]
    preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 3, 1, 2])
    if use_bfloat16:
        preprocessed_inputs = tf.cast(preprocessed_inputs, dtype=tf.bfloat16)

    # TPU feature extraction
    def tpu_subgraph_predict_fn(preprocessed_inputs, true_image_shapes):
        """Defines the first part of graph on TPU."""
        # [b, c, h, w] -> [b, h, w, c]
        preprocessed_inputs = tf.transpose(preprocessed_inputs,
                                           perm=[0, 2, 3, 1])

        prediction_dict = detection_model.predict(preprocessed_inputs,
                                                  true_image_shapes)

        return (
            # [batch, anchor, depth] -> [depth, batch, anchor]
            tf.transpose(prediction_dict[RPN_BOX_ENCODINGS], perm=[2, 0, 1]),
            # [batch, anchor, depth] -> [depth, batch, anchor]
            tf.transpose(
                prediction_dict[RPN_OBJECTNESS_PREDICTIONS_WITH_BACKGROUND],
                perm=[2, 0, 1]),
            # [anchors, depth]
            tf.transpose(prediction_dict[ANCHORS], perm=[1, 0]),
            # [num_proposals, num_classes, code_size]
            prediction_dict[REFINED_BOX_ENCODINGS],
            prediction_dict[CLASS_PREDICTIONS_WITH_BACKGROUND],
            prediction_dict[NUM_PROPOSALS],
            prediction_dict[PROPOSAL_BOXES])

    @function.Defun(capture_resource_var_by_value=False)
    def tpu_subgraph_predict():
        if use_bfloat16:
            with tf.contrib.tpu.bfloat16_scope():
                return tf.contrib.tpu.rewrite(
                    tpu_subgraph_predict_fn,
                    [preprocessed_inputs, true_image_shapes])
        else:
            return tf.contrib.tpu.rewrite(
                tpu_subgraph_predict_fn,
                [preprocessed_inputs, true_image_shapes])

    (rpn_box_encodings, rpn_objectness_predictions_with_background, anchors,
     refined_box_encodings, class_predictions_with_background, num_proposals,
     proposal_boxes) = tpu_functional.TPUPartitionedCall(
         args=tpu_subgraph_predict.captured_inputs,
         device_ordinal=tpu_ops.tpu_ordinal_selector(),
         Tout=[
             o.type
             for o in tpu_subgraph_predict.definition.signature.output_arg
         ],
         f=tpu_subgraph_predict)

    prediction_dict = {
        RPN_BOX_ENCODINGS:
        tf.transpose(rpn_box_encodings, perm=[1, 2, 0]),
        RPN_OBJECTNESS_PREDICTIONS_WITH_BACKGROUND:
        tf.transpose(rpn_objectness_predictions_with_background,
                     perm=[1, 2, 0]),
        ANCHORS:
        tf.transpose(anchors, perm=[1, 0]),
        REFINED_BOX_ENCODINGS:
        refined_box_encodings,
        CLASS_PREDICTIONS_WITH_BACKGROUND:
        class_predictions_with_background,
        NUM_PROPOSALS:
        num_proposals,
        PROPOSAL_BOXES:
        proposal_boxes
    }

    for k in prediction_dict:
        if isinstance(prediction_dict[k], list):
            prediction_dict[k] = [
                prediction_dict[k][idx].set_shape(shapes_info[k][idx])
                for idx in len(prediction_dict[k])
            ]
        else:
            prediction_dict[k].set_shape(shapes_info[k])

    if use_bfloat16:
        prediction_dict = utils.bfloat16_to_float32_nested(prediction_dict)

    # CPU post-processing (NMS)
    postprocessed_tensors = detection_model.postprocess(
        prediction_dict, true_image_shapes)
    result_tensor_dict = exporter.add_output_tensor_nodes(
        postprocessed_tensors, 'inference_op')

    return placeholder_tensor, result_tensor_dict
Exemplo n.º 2
0
    def model_fn(features, labels, mode, params=None):
        """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
        params = params or {}
        total_loss, train_op, detections, export_outputs = None, None, None, None
        is_training = mode == tf.estimator.ModeKeys.TRAIN

        # Make sure to set the Keras learning phase. True during training,
        # False for inference.
        tf.keras.backend.set_learning_phase(is_training)
        # Set policy for mixed-precision training with Keras-based models.
        if use_tpu and train_config.use_bfloat16:
            from tensorflow.python.keras.engine import base_layer_utils  # pylint: disable=g-import-not-at-top
            # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
            base_layer_utils.enable_v2_dtype_behavior()
            tf2.keras.mixed_precision.experimental.set_policy('mixed_bfloat16')
        detection_model = detection_model_fn(is_training=is_training,
                                             add_summaries=(not use_tpu))
        scaffold_fn = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            labels = unstack_batch(labels,
                                   unpad_groundtruth_tensors=train_config.
                                   unpad_groundtruth_tensors)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # For evaling on train data, it is necessary to check whether groundtruth
            # must be unpadded.
            boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes].
                           get_shape().as_list())
            unpad_groundtruth_tensors = boxes_shape[
                1] is not None and not use_tpu
            labels = unstack_batch(
                labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            provide_groundtruth(detection_model, labels)

        preprocessed_images = features[fields.InputDataFields.image]

        side_inputs = detection_model.get_side_inputs(features)

        if use_tpu and train_config.use_bfloat16:
            with tf.tpu.bfloat16_scope():
                prediction_dict = detection_model.predict(
                    preprocessed_images,
                    features[fields.InputDataFields.true_image_shape],
                    **side_inputs)
                prediction_dict = ops.bfloat16_to_float32_nested(
                    prediction_dict)
        else:
            prediction_dict = detection_model.predict(
                preprocessed_images,
                features[fields.InputDataFields.true_image_shape],
                **side_inputs)

        def postprocess_wrapper(args):
            return detection_model.postprocess(args[0], args[1])

        if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
            if use_tpu and postprocess_on_cpu:
                detections = tf.tpu.outside_compilation(
                    postprocess_wrapper,
                    (prediction_dict,
                     features[fields.InputDataFields.true_image_shape]))
            else:
                detections = postprocess_wrapper(
                    (prediction_dict,
                     features[fields.InputDataFields.true_image_shape]))

        if mode == tf.estimator.ModeKeys.TRAIN:
            load_pretrained = hparams.load_pretrained if hparams else False
            if train_config.fine_tune_checkpoint and load_pretrained:
                if not train_config.fine_tune_checkpoint_type:
                    # train_config.from_detection_checkpoint field is deprecated. For
                    # backward compatibility, set train_config.fine_tune_checkpoint_type
                    # based on train_config.from_detection_checkpoint.
                    if train_config.from_detection_checkpoint:
                        train_config.fine_tune_checkpoint_type = 'detection'
                    else:
                        train_config.fine_tune_checkpoint_type = 'classification'
                asg_map = detection_model.restore_map(
                    fine_tune_checkpoint_type=train_config.
                    fine_tune_checkpoint_type,
                    load_all_detection_checkpoint_vars=(
                        train_config.load_all_detection_checkpoint_vars))
                available_var_map = (
                    variables_helper.get_variables_available_in_checkpoint(
                        asg_map,
                        train_config.fine_tune_checkpoint,
                        include_global_step=False))
                if use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(
                            train_config.fine_tune_checkpoint,
                            available_var_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(
                        train_config.fine_tune_checkpoint, available_var_map)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            if (mode == tf.estimator.ModeKeys.EVAL
                    and eval_config.use_dummy_loss_in_eval):
                total_loss = tf.constant(1.0)
                losses_dict = {'Loss/total_loss': total_loss}
            else:
                losses_dict = detection_model.loss(
                    prediction_dict,
                    features[fields.InputDataFields.true_image_shape])
                losses = [loss_tensor for loss_tensor in losses_dict.values()]
                if train_config.add_regularization_loss:
                    regularization_losses = detection_model.regularization_losses(
                    )
                    if use_tpu and train_config.use_bfloat16:
                        regularization_losses = ops.bfloat16_to_float32_nested(
                            regularization_losses)
                    if regularization_losses:
                        regularization_loss = tf.add_n(
                            regularization_losses, name='regularization_loss')
                        losses.append(regularization_loss)
                        losses_dict[
                            'Loss/regularization_loss'] = regularization_loss
                total_loss = tf.add_n(losses, name='total_loss')
                losses_dict['Loss/total_loss'] = total_loss

            if 'graph_rewriter_config' in configs:
                graph_rewriter_fn = graph_rewriter_builder.build(
                    configs['graph_rewriter_config'], is_training=is_training)
                graph_rewriter_fn()

            # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
            # can write learning rate summaries on TPU without host calls.
            global_step = tf.train.get_or_create_global_step()
            training_optimizer, optimizer_summary_vars = optimizer_builder.build(
                train_config.optimizer)

        if mode == tf.estimator.ModeKeys.TRAIN:
            if use_tpu:
                training_optimizer = tf.tpu.CrossShardOptimizer(
                    training_optimizer)

            # Optionally freeze some layers by setting their gradients to be zero.
            trainable_variables = None
            include_variables = (train_config.update_trainable_variables
                                 if train_config.update_trainable_variables
                                 else None)
            exclude_variables = (train_config.freeze_variables
                                 if train_config.freeze_variables else None)
            trainable_variables = slim.filter_variables(
                tf.trainable_variables(),
                include_patterns=include_variables,
                exclude_patterns=exclude_variables)

            clip_gradients_value = None
            if train_config.gradient_clipping_by_norm > 0:
                clip_gradients_value = train_config.gradient_clipping_by_norm

            if not use_tpu:
                for var in optimizer_summary_vars:
                    tf.summary.scalar(var.op.name, var)
            summaries = [] if use_tpu else None
            if train_config.summarize_gradients:
                summaries = [
                    'gradients', 'gradient_norm', 'global_gradient_norm'
                ]
            train_op = slim.optimizers.optimize_loss(
                loss=total_loss,
                global_step=global_step,
                learning_rate=None,
                clip_gradients=clip_gradients_value,
                optimizer=training_optimizer,
                update_ops=detection_model.updates(),
                variables=trainable_variables,
                summaries=summaries,
                name='')  # Preventing scope prefix on all variables.

        if mode == tf.estimator.ModeKeys.PREDICT:
            exported_output = exporter_lib.add_output_tensor_nodes(detections)
            export_outputs = {
                tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
                tf.estimator.export.PredictOutput(exported_output)
            }

        eval_metric_ops = None
        scaffold = None
        if mode == tf.estimator.ModeKeys.EVAL:
            class_agnostic = (fields.DetectionResultFields.detection_classes
                              not in detections)
            groundtruth = _prepare_groundtruth_for_eval(
                detection_model, class_agnostic,
                eval_input_config.max_number_of_boxes)
            use_original_images = fields.InputDataFields.original_image in features
            if use_original_images:
                eval_images = features[fields.InputDataFields.original_image]
                true_image_shapes = tf.slice(
                    features[fields.InputDataFields.true_image_shape], [0, 0],
                    [-1, 3])
                original_image_spatial_shapes = features[
                    fields.InputDataFields.original_image_spatial_shape]
            else:
                eval_images = features[fields.InputDataFields.image]
                true_image_shapes = None
                original_image_spatial_shapes = None

            eval_dict = eval_util.result_dict_for_batched_example(
                eval_images,
                features[inputs.HASH_KEY],
                detections,
                groundtruth,
                class_agnostic=class_agnostic,
                scale_to_absolute=True,
                original_image_spatial_shapes=original_image_spatial_shapes,
                true_image_shapes=true_image_shapes)

            if fields.InputDataFields.image_additional_channels in features:
                eval_dict[fields.InputDataFields.
                          image_additional_channels] = features[
                              fields.InputDataFields.image_additional_channels]

            if class_agnostic:
                category_index = label_map_util.create_class_agnostic_category_index(
                )
            else:
                category_index = label_map_util.create_category_index_from_labelmap(
                    eval_input_config.label_map_path)
            vis_metric_ops = None
            if not use_tpu and use_original_images:
                keypoint_edges = [(kp.start, kp.end)
                                  for kp in eval_config.keypoint_edge]

                eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
                    category_index,
                    max_examples_to_draw=eval_config.num_visualizations,
                    max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
                    min_score_thresh=eval_config.min_score_threshold,
                    use_normalized_coordinates=False,
                    keypoint_edges=keypoint_edges or None)
                vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
                    eval_dict)

            # Eval metrics on a single example.
            eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
                eval_config, list(category_index.values()), eval_dict)
            for loss_key, loss_tensor in iter(losses_dict.items()):
                eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
            for var in optimizer_summary_vars:
                eval_metric_ops[var.op.name] = (var, tf.no_op())
            if vis_metric_ops is not None:
                eval_metric_ops.update(vis_metric_ops)
            eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}

            if eval_config.use_moving_averages:
                variable_averages = tf.train.ExponentialMovingAverage(0.0)
                variables_to_restore = variable_averages.variables_to_restore()
                keep_checkpoint_every_n_hours = (
                    train_config.keep_checkpoint_every_n_hours)
                saver = tf.train.Saver(
                    variables_to_restore,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours
                )
                scaffold = tf.train.Scaffold(saver=saver)

        # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
        if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
            return tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                scaffold_fn=scaffold_fn,
                predictions=detections,
                loss=total_loss,
                train_op=train_op,
                eval_metrics=eval_metric_ops,
                export_outputs=export_outputs)
        else:
            if scaffold is None:
                keep_checkpoint_every_n_hours = (
                    train_config.keep_checkpoint_every_n_hours)
                saver = tf.train.Saver(
                    sharded=True,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
                    save_relative_paths=True)
                tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
                scaffold = tf.train.Scaffold(saver=saver)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=detections,
                                              loss=total_loss,
                                              train_op=train_op,
                                              eval_metric_ops=eval_metric_ops,
                                              export_outputs=export_outputs,
                                              scaffold=scaffold)
def build_graph(pipeline_config,
                shapes_info,
                input_type='encoded_image_string_tensor',
                use_bfloat16=True):
  """Builds serving graph of faster_rcnn to be exported.

  Args:
    pipeline_config: A TrainEvalPipelineConfig proto.
    shapes_info: A python dict of tensors' names and their shapes, returned by
      `get_prediction_tensor_shapes()`.
    input_type: One of
                'encoded_image_string_tensor': a 1d tensor with dtype=tf.string
                'image_tensor': a 4d tensor with dtype=tf.uint8
                'tf_example': a 1d tensor with dtype=tf.string
    use_bfloat16: If true, use tf.bfloat16 on TPU.

  Returns:
    placeholder_tensor: A placeholder tensor, type determined by `input_type`.
    result_tensor_dict: A python dict of tensors' names and tensors.
  """
  pipeline_config = modify_config(pipeline_config)
  detection_model = model_builder.build(
      pipeline_config.model, is_training=False)

  placeholder_tensor, input_tensors = \
      exporter.input_placeholder_fn_map[input_type]()

  # CPU pre-processing
  inputs = tf.cast(input_tensors, dtype=tf.float32)
  preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)

  # Dimshuffle: [b, h, w, c] -> [b, c, h, w]
  preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 3, 1, 2])
  if use_bfloat16:
    preprocessed_inputs = tf.cast(preprocessed_inputs, dtype=tf.bfloat16)

  # TPU feature extraction
  def tpu_subgraph_first_stage_fn(preprocessed_inputs):
    """Defines the first part of graph on TPU."""
    # [b, c, h, w] -> [b, h, w, c]
    preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 2, 3, 1])

    prediction_dict = detection_model._predict_first_stage(preprocessed_inputs)

    # [b, h, w, c] -> [b, c, h, w]
    rpn_box_predictor_features = tf.transpose(
        prediction_dict[RPN_BOX_PREDICTOR_FEATURES], perm=[0, 3, 1, 2])
    # [b, h, w, c] -> [b, c, h, w]
    rpn_features_to_crop = tf.transpose(
        prediction_dict[RPN_FEATURES_TO_CROP], perm=[0, 3, 1, 2])
    # [batch, anchor, depth] -> [depth, batch, anchor]
    rpn_box_encodings = tf.transpose(
        prediction_dict[RPN_BOX_ENCODINGS], perm=[2, 0, 1])
    # [batch, anchor, depth] -> [depth, batch, anchor]
    rpn_objectness_predictions_with_background = tf.transpose(
        prediction_dict[RPN_OBJECTNESS_PREDICTIONS_WITH_BACKGROUND],
        perm=[2, 0, 1])
    # [anchors, depth]
    anchors = tf.transpose(prediction_dict[ANCHORS], perm=[1, 0])

    return (rpn_box_predictor_features, rpn_features_to_crop,
            prediction_dict['image_shape'], rpn_box_encodings,
            rpn_objectness_predictions_with_background, anchors)

  @function.Defun(capture_resource_var_by_value=False)
  def tpu_subgraph_first_stage():
    if use_bfloat16:
      with tf.contrib.tpu.bfloat16_scope():
        return tf.contrib.tpu.rewrite(tpu_subgraph_first_stage_fn,
                                      [preprocessed_inputs])
    else:
      return tf.contrib.tpu.rewrite(tpu_subgraph_first_stage_fn,
                                    [preprocessed_inputs])

  (rpn_box_predictor_features, rpn_features_to_crop, image_shape,
   rpn_box_encodings, rpn_objectness_predictions_with_background,
   anchors) = \
      tpu_functional.TPUPartitionedCall(
          args=tpu_subgraph_first_stage.captured_inputs,
          device_ordinal=tpu_ops.tpu_ordinal_selector(),
          Tout=[
              o.type
              for o in tpu_subgraph_first_stage.definition.signature.output_arg
          ],
          f=tpu_subgraph_first_stage)

  prediction_dict = {
      RPN_BOX_PREDICTOR_FEATURES:
          tf.transpose(rpn_box_predictor_features, perm=[0, 2, 3, 1]),
      RPN_FEATURES_TO_CROP:
          tf.transpose(rpn_features_to_crop, perm=[0, 2, 3, 1]),
      IMAGE_SHAPE:
          image_shape,
      RPN_BOX_ENCODINGS:
          tf.transpose(rpn_box_encodings, perm=[1, 2, 0]),
      RPN_OBJECTNESS_PREDICTIONS_WITH_BACKGROUND:
          tf.transpose(
              rpn_objectness_predictions_with_background, perm=[1, 2, 0]),
      ANCHORS:
          tf.transpose(anchors, perm=[1, 0]),
  }

  for k in prediction_dict:
    prediction_dict[k].set_shape(shapes_info[k])

  if use_bfloat16:
    prediction_dict = utils.bfloat16_to_float32_nested(prediction_dict)

  # CPU region proposal (NMS)
  proposal_boxes_normalized, num_proposals = \
      detection_model._proposal_postprocess(
          tf.cast(prediction_dict[RPN_BOX_ENCODINGS], dtype=tf.float32),
          tf.cast(
              prediction_dict[RPN_OBJECTNESS_PREDICTIONS_WITH_BACKGROUND],
              dtype=tf.float32), prediction_dict[ANCHORS],
          prediction_dict[IMAGE_SHAPE], true_image_shapes)
  prediction_dict[NUM_PROPOSALS] = num_proposals

  # [b, h, w, c] -> [b, c, h, w]
  prediction_dict[RPN_FEATURES_TO_CROP] = tf.transpose(
      prediction_dict[RPN_FEATURES_TO_CROP], perm=[0, 3, 1, 2])

  if use_bfloat16:
    prediction_dict[RPN_FEATURES_TO_CROP] = tf.cast(
        prediction_dict[RPN_FEATURES_TO_CROP], dtype=tf.bfloat16)
    proposal_boxes_normalized = tf.cast(
        proposal_boxes_normalized, dtype=tf.bfloat16)

  # TPU box prediction
  def tpu_subgraph_second_stage_fn(rpn_features_to_crop,
                                   proposal_boxes_normalized, image_shape):
    """Defines the second part of graph on TPU."""
    rpn_features_to_crop = tf.transpose(rpn_features_to_crop, perm=[0, 2, 3, 1])

    output_dict = detection_model._box_prediction(
        rpn_features_to_crop, proposal_boxes_normalized, image_shape)

    return [
        output_dict[REFINED_BOX_ENCODINGS],
        output_dict[CLASS_PREDICTIONS_WITH_BACKGROUND],
        output_dict[PROPOSAL_BOXES], output_dict[BOX_CLASSIFIER_FEATURES]
    ]

  @function.Defun(capture_resource_var_by_value=False)
  def tpu_subgraph_second_stage():
    """TPU subgraph 2 wrapper."""
    if use_bfloat16:
      with tf.contrib.tpu.bfloat16_scope():
        return tf.contrib.tpu.rewrite(tpu_subgraph_second_stage_fn, [
            prediction_dict[RPN_FEATURES_TO_CROP],
            proposal_boxes_normalized,
            prediction_dict[IMAGE_SHAPE],
        ])
    else:
      return tf.contrib.tpu.rewrite(tpu_subgraph_second_stage_fn, [
          prediction_dict[RPN_FEATURES_TO_CROP],
          proposal_boxes_normalized,
          prediction_dict[IMAGE_SHAPE],
      ])

  (refined_box_encodings, class_predictions_with_background, proposal_boxes,
   box_classifier_features) = tpu_functional.TPUPartitionedCall(
       args=tpu_subgraph_second_stage.captured_inputs,
       device_ordinal=tpu_ops.tpu_ordinal_selector(),
       Tout=[
           o.type
           for o in tpu_subgraph_second_stage.definition.signature.output_arg
       ],
       f=tpu_subgraph_second_stage)

  prediction_dict[RPN_FEATURES_TO_CROP] = tf.transpose(
      prediction_dict[RPN_FEATURES_TO_CROP], perm=[0, 2, 3, 1])

  prediction_dict_updater = {
      REFINED_BOX_ENCODINGS: refined_box_encodings,
      CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background,
      PROPOSAL_BOXES: proposal_boxes,
      BOX_CLASSIFIER_FEATURES: box_classifier_features,
      PROPOSAL_BOXES_NORMALIZED: proposal_boxes_normalized,
  }

  for k in prediction_dict_updater:
    prediction_dict_updater[k].set_shape(shapes_info[k])

  prediction_dict.update(prediction_dict_updater)

  if use_bfloat16:
    prediction_dict = utils.bfloat16_to_float32_nested(prediction_dict)

  # CPU post-processing (NMS)
  postprocessed_tensors = detection_model.postprocess(prediction_dict,
                                                      true_image_shapes)
  result_tensor_dict = exporter.add_output_tensor_nodes(postprocessed_tensors,
                                                        'inference_op')

  return placeholder_tensor, result_tensor_dict
Exemplo n.º 4
0
    def model_fn(features, labels, mode, params=None):
        """Constructs the object detection model.

        Args:
            features: Dictionary of feature tensors, returned from `input_fn`.
            labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
            otherwise None.
            mode: Mode key from tf.estimator.ModeKeys.
            params: Parameter dictionary passed from the estimator.

        Returns:
            An `EstimatorSpec` that encapsulates the model and its serving
            configurations.
        """
        params = params or {}
        total_loss, train_op, detections, export_outputs = None, None, None, None
        is_training = mode == tf.estimator.ModeKeys.TRAIN

        # Make sure to set the Keras learning phase. True during training,
        # False for inference.
        tf.keras.backend.set_learning_phase(is_training)
        detection_model = detection_model_fn(is_training=is_training,
                                             add_summaries=(not use_tpu))

        scaffold_fn = None
        scaffold = None
        eval_metric_ops = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            # get the optimizer and global step:
            global_step = tf.train.get_or_create_global_step()
            training_optimizer, optimizer_summary_vars = optimizer_builder.build(
                train_config.optimizer)

            #get the trainable variables
            #trainable_variables = None
            include_variables = (train_config.update_trainable_variables
                                 if train_config.update_trainable_variables
                                 else None)
            exclude_variables = (train_config.freeze_variables
                                 if train_config.freeze_variables else None)
            trainable_variables = tf.contrib.framework.filter_variables(
                tf.trainable_variables(),
                include_patterns=include_variables,
                exclude_patterns=exclude_variables)

            #get the clip_gradients_value
            clip_gradients_value = None
            if train_config.gradient_clipping_by_norm > 0:
                clip_gradients_value = train_config.gradient_clipping_by_norm

            total_loss = 0.
            tower_grads = []
            with tf.variable_scope(tf.get_variable_scope()):
                feature_list, label_list = split_features_and_labels(
                    features, labels, train_config.GPU_num)
                for i in xrange(train_config.GPU_num):
                    with tf.device('/gpu:%d' % i):
                        with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                            loss = tower_loss(scope=scope,
                                              features=feature_list[i],
                                              labels=label_list[i],
                                              detection_model=detection_model,
                                              train_config=train_config)
                            tf.get_variable_scope().reuse_variables()
                            grads = training_optimizer.compute_gradients(
                                loss=loss)
                            if isinstance(clip_gradients_value, float):
                                grads = clip_gradients_by_norm(
                                    grads, clip_gradients_value)
                            tower_grads.append(grads)
                            total_loss += loss
            total_loss /= train_config.GPU_num
            grad_avg = average_gradients(tower_grads)

            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                apply_gradient_op = training_optimizer.apply_gradients(
                    grads_and_vars=grad_avg, global_step=global_step)

            train_op = apply_gradient_op

            if train_config.fine_tune_checkpoint:
                if not train_config.fine_tune_checkpoint_type:
                    # train_config.from_detection_checkpoint field is deprecated. For
                    # backward compatibility, set train_config.fine_tune_checkpoint_type
                    # based on train_config.from_detection_checkpoint.
                    if train_config.from_detection_checkpoint:
                        train_config.fine_tune_checkpoint_type = 'detection'
                    else:
                        train_config.fine_tune_checkpoint_type = 'classification'
                asg_map = detection_model.restore_map(
                    fine_tune_checkpoint_type=train_config.
                    fine_tune_checkpoint_type,
                    load_all_detection_checkpoint_vars=(
                        train_config.load_all_detection_checkpoint_vars))
                available_var_map = (
                    variables_helper.get_variables_available_in_checkpoint(
                        asg_map,
                        train_config.fine_tune_checkpoint,
                        include_global_step=False))
                if use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(
                            train_config.fine_tune_checkpoint,
                            available_var_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(
                        train_config.fine_tune_checkpoint, available_var_map)

        elif mode == tf.estimator.ModeKeys.EVAL:
            detection_model = detection_model_fn(is_training=is_training,
                                                 add_summaries=(not use_tpu))
            # For evaling on train data, it is necessary to check whether groundtruth
            # must be unpadded.
            #in mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.PREDICT, I explictly set the evaluation and prediction to run on CPU
            with tf.device('/cpu:1'):
                # training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer )
                boxes_shape = (labels[fields.InputDataFields.
                                      groundtruth_boxes].get_shape().as_list())
                unpad_groundtruth_tensors = boxes_shape[
                    1] is not None and not use_tpu
                labels = unstack_batch(
                    labels,
                    unpad_groundtruth_tensors=unpad_groundtruth_tensors)

                gt_boxes_list = labels[
                    fields.InputDataFields.groundtruth_boxes]
                gt_classes_list = labels[
                    fields.InputDataFields.groundtruth_classes]
                gt_masks_list = None
                if fields.InputDataFields.groundtruth_instance_masks in labels:
                    gt_masks_list = labels[
                        fields.InputDataFields.groundtruth_instance_masks]
                gt_keypoints_list = None
                if fields.InputDataFields.groundtruth_keypoints in labels:
                    gt_keypoints_list = labels[
                        fields.InputDataFields.groundtruth_keypoints]
                gt_weights_list = None
                if fields.InputDataFields.groundtruth_weights in labels:
                    gt_weights_list = labels[
                        fields.InputDataFields.groundtruth_weights]
                gt_confidences_list = None
                if fields.InputDataFields.groundtruth_confidences in labels:
                    gt_confidences_list = labels[
                        fields.InputDataFields.groundtruth_confidences]
                gt_is_crowd_list = None
                if fields.InputDataFields.groundtruth_is_crowd in labels:
                    gt_is_crowd_list = labels[
                        fields.InputDataFields.groundtruth_is_crowd]
                detection_model.provide_groundtruth(
                    groundtruth_boxes_list=gt_boxes_list,
                    groundtruth_classes_list=gt_classes_list,
                    groundtruth_confidences_list=gt_confidences_list,
                    groundtruth_masks_list=gt_masks_list,
                    groundtruth_keypoints_list=gt_keypoints_list,
                    groundtruth_weights_list=gt_weights_list,
                    groundtruth_is_crowd_list=gt_is_crowd_list)

                training_optimizer, optimizer_summary_vars = optimizer_builder.build(
                    train_config.optimizer)

                preprocessed_images = features[fields.InputDataFields.image]
                if use_tpu and train_config.use_bfloat16:
                    with tf.contrib.tpu.bfloat16_scope():
                        prediction_dict = detection_model.predict(
                            preprocessed_images,
                            features[fields.InputDataFields.true_image_shape])
                    for k, v in prediction_dict.items():
                        if v.dtype == tf.bfloat16:
                            prediction_dict[k] = tf.cast(v, tf.float32)
                else:
                    prediction_dict = detection_model.predict(
                        preprocessed_images,
                        features[fields.InputDataFields.true_image_shape])

                detections = detection_model.postprocess(
                    prediction_dict,
                    features[fields.InputDataFields.true_image_shape])

                losses_dict = detection_model.loss(
                    prediction_dict,
                    features[fields.InputDataFields.true_image_shape])
                losses = [loss_tensor for loss_tensor in losses_dict.values()]
                if train_config.add_regularization_loss:
                    regularization_losses = detection_model.regularization_losses(
                    )
                    if regularization_losses:
                        regularization_loss = tf.add_n(
                            regularization_losses, name='regularization_loss')
                losses.append(regularization_loss)
                losses_dict['Loss/regularization_loss'] = regularization_loss
                total_loss = tf.add_n(losses, name='total_loss')
                losses_dict['Loss/total_loss'] = total_loss

                if 'graph_rewriter_config' in configs:
                    graph_rewriter_fn = graph_rewriter_builder.build(
                        configs['graph_rewriter_config'],
                        is_training=is_training)
                    graph_rewriter_fn()

                class_agnostic = (
                    fields.DetectionResultFields.detection_classes
                    not in detections)
                groundtruth = _prepare_groundtruth_for_eval(
                    detection_model, class_agnostic,
                    eval_input_config.max_number_of_boxes)
                use_original_images = fields.InputDataFields.original_image in features
                if use_original_images:
                    eval_images = features[
                        fields.InputDataFields.original_image]
                    true_image_shapes = tf.slice(
                        features[fields.InputDataFields.true_image_shape],
                        [0, 0], [-1, 3])
                    original_image_spatial_shapes = features[
                        fields.InputDataFields.original_image_spatial_shape]
                else:
                    eval_images = features[fields.InputDataFields.image]
                    true_image_shapes = None
                    original_image_spatial_shapes = None

                eval_dict = eval_util.result_dict_for_batched_example(
                    eval_images,
                    features[inputs.HASH_KEY],
                    detections,
                    groundtruth,
                    class_agnostic=class_agnostic,
                    scale_to_absolute=True,
                    original_image_spatial_shapes=original_image_spatial_shapes,
                    true_image_shapes=true_image_shapes)

                if class_agnostic:
                    category_index = label_map_util.create_class_agnostic_category_index(
                    )
                else:
                    category_index = label_map_util.create_category_index_from_labelmap(
                        eval_input_config.label_map_path)
                vis_metric_ops = None
                if not use_tpu and use_original_images:
                    eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
                        category_index,
                        max_examples_to_draw=eval_config.num_visualizations,
                        max_boxes_to_draw=eval_config.
                        max_num_boxes_to_visualize,
                        min_score_thresh=eval_config.min_score_threshold,
                        use_normalized_coordinates=False)
                    vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
                        eval_dict)

                # Eval metrics on a single example.
                eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
                    eval_config, category_index.values(), eval_dict)
                for loss_key, loss_tensor in iter(losses_dict.items()):
                    eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
                for var in optimizer_summary_vars:
                    eval_metric_ops[var.op.name] = (var, tf.no_op())
                if vis_metric_ops is not None:
                    eval_metric_ops.update(vis_metric_ops)
                eval_metric_ops = {
                    str(k): v
                    for k, v in eval_metric_ops.items()
                }

                if eval_config.use_moving_averages:
                    variable_averages = tf.train.ExponentialMovingAverage(0.0)
                    variables_to_restore = variable_averages.variables_to_restore(
                    )
                    keep_checkpoint_every_n_hours = (
                        train_config.keep_checkpoint_every_n_hours)
                    saver = tf.train.Saver(variables_to_restore,
                                           keep_checkpoint_every_n_hours=
                                           keep_checkpoint_every_n_hours)
                    scaffold = tf.train.Scaffold(saver=saver)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            detection_model = detection_model_fn(is_training=is_training,
                                                 add_summaries=(not use_tpu))
            #similar to EVAL mode, I run PREDICT on CPU too.
            with tf.device(':/cpu:1'):
                preprocessed_images = features[fields.InputDataFields.image]

                if use_tpu and train_config.use_bfloat16:
                    with tf.contrib.tpu.bfloat16_scope():
                        prediction_dict = detection_model.predict(
                            preprocessed_images,
                            features[fields.InputDataFields.true_image_shape])
                        for k, v in prediction_dict.items():
                            if v.dtype == tf.bfloat16:
                                prediction_dict[k] = tf.cast(v, tf.float32)
                else:
                    prediction_dict = detection_model.predict(
                        preprocessed_images,
                        features[fields.InputDataFields.true_image_shape])

                detections = detection_model.postprocess(
                    prediction_dict,
                    features[fields.InputDataFields.true_image_shape])

                exported_output = exporter_lib.add_output_tensor_nodes(
                    detections)
                export_outputs = {
                    tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
                    tf.estimator.export.PredictOutput(exported_output)
                }

        # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
        if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
            return tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                scaffold_fn=scaffold_fn,
                predictions=detections,
                loss=total_loss,
                train_op=train_op,
                eval_metrics=eval_metric_ops,
                export_outputs=export_outputs)
        else:
            #scafold here only contains Saver
            if scaffold is None:
                keep_checkpoint_every_n_hours = (
                    train_config.keep_checkpoint_every_n_hours)
                saver = tf.train.Saver(
                    sharded=True,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
                    save_relative_paths=True)
                tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
                scaffold = tf.train.Scaffold(saver=saver)

            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=detections,
                                              loss=total_loss,
                                              train_op=train_op,
                                              eval_metric_ops=eval_metric_ops,
                                              export_outputs=export_outputs,
                                              scaffold=scaffold)
    def model_fn(features, labels, mode, params=None):
        """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
        params = params or {}
        total_loss, train_op, detections, export_outputs = None, None, None, None
        is_training = mode == tf.estimator.ModeKeys.TRAIN

        # Make sure to set the Keras learning phase. True during training,
        # False for inference.
        tf.keras.backend.set_learning_phase(is_training)
        detection_model = detection_model_fn(is_training=is_training,
                                             add_summaries=(not use_tpu))
        scaffold_fn = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            labels = unstack_batch(labels,
                                   unpad_groundtruth_tensors=train_config.
                                   unpad_groundtruth_tensors)
        elif mode == tf.estimator.ModeKeys.EVAL:
            # For evaling on train data, it is necessary to check whether groundtruth
            # must be unpadded.
            boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes].
                           get_shape().as_list())
            unpad_groundtruth_tensors = True if boxes_shape[
                1] is not None else False
            labels = unstack_batch(
                labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
            gt_classes_list = labels[
                fields.InputDataFields.groundtruth_classes]
            gt_masks_list = None
            if fields.InputDataFields.groundtruth_instance_masks in labels:
                gt_masks_list = labels[
                    fields.InputDataFields.groundtruth_instance_masks]
            gt_keypoints_list = None
            if fields.InputDataFields.groundtruth_keypoints in labels:
                gt_keypoints_list = labels[
                    fields.InputDataFields.groundtruth_keypoints]
            gt_weights_list = None
            if fields.InputDataFields.groundtruth_weights in labels:
                gt_weights_list = labels[
                    fields.InputDataFields.groundtruth_weights]
            gt_is_crowd_list = None
            if fields.InputDataFields.groundtruth_is_crowd in labels:
                gt_is_crowd_list = labels[
                    fields.InputDataFields.groundtruth_is_crowd]
            detection_model.provide_groundtruth(
                groundtruth_boxes_list=gt_boxes_list,
                groundtruth_classes_list=gt_classes_list,
                groundtruth_masks_list=gt_masks_list,
                groundtruth_keypoints_list=gt_keypoints_list,
                groundtruth_weights_list=gt_weights_list,
                groundtruth_is_crowd_list=gt_is_crowd_list)

        preprocessed_images = features[fields.InputDataFields.image]
        if use_tpu and train_config.use_bfloat16:
            with tf.contrib.tpu.bfloat16_scope():
                prediction_dict = detection_model.predict(
                    preprocessed_images,
                    features[fields.InputDataFields.true_image_shape])
                for k, v in prediction_dict.items():
                    if v.dtype == tf.bfloat16:
                        prediction_dict[k] = tf.cast(v, tf.float32)
        else:
            prediction_dict = detection_model.predict(
                preprocessed_images,
                features[fields.InputDataFields.true_image_shape])
        if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
            detections = detection_model.postprocess(
                prediction_dict,
                features[fields.InputDataFields.true_image_shape])

        if mode == tf.estimator.ModeKeys.TRAIN:
            if train_config.fine_tune_checkpoint and hparams.load_pretrained:
                if not train_config.fine_tune_checkpoint_type:
                    # train_config.from_detection_checkpoint field is deprecated. For
                    # backward compatibility, set train_config.fine_tune_checkpoint_type
                    # based on train_config.from_detection_checkpoint.
                    if train_config.from_detection_checkpoint:
                        train_config.fine_tune_checkpoint_type = 'detection'
                    else:
                        train_config.fine_tune_checkpoint_type = 'classification'
                asg_map = detection_model.restore_map(
                    fine_tune_checkpoint_type=train_config.
                    fine_tune_checkpoint_type,
                    load_all_detection_checkpoint_vars=(
                        train_config.load_all_detection_checkpoint_vars))
                available_var_map = (
                    variables_helper.get_variables_available_in_checkpoint(
                        asg_map,
                        train_config.fine_tune_checkpoint,
                        include_global_step=False))
                if use_tpu:

                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(
                            train_config.fine_tune_checkpoint,
                            available_var_map)
                        return tf.train.Scaffold()

                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(
                        train_config.fine_tune_checkpoint, available_var_map)

        if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
            losses_dict = detection_model.loss(
                prediction_dict,
                features[fields.InputDataFields.true_image_shape])
            losses = [loss_tensor for loss_tensor in losses_dict.values()]
            if train_config.add_regularization_loss:
                regularization_losses = detection_model.regularization_losses()
                if regularization_losses:
                    regularization_loss = tf.add_n(regularization_losses,
                                                   name='regularization_loss')
                    losses.append(regularization_loss)
                    losses_dict[
                        'Loss/regularization_loss'] = regularization_loss
            total_loss = tf.add_n(losses, name='total_loss')
            losses_dict['Loss/total_loss'] = total_loss

            if 'graph_rewriter_config' in configs:
                graph_rewriter_fn = graph_rewriter_builder.build(
                    configs['graph_rewriter_config'], is_training=is_training)
                graph_rewriter_fn()

            # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
            # can write learning rate summaries on TPU without host calls.
            global_step = tf.train.get_or_create_global_step()
            training_optimizer, optimizer_summary_vars = optimizer_builder.build(
                train_config.optimizer)

        if mode == tf.estimator.ModeKeys.TRAIN:
            if use_tpu:
                training_optimizer = tf.contrib.tpu.CrossShardOptimizer(
                    training_optimizer)

            # Optionally freeze some layers by setting their gradients to be zero.
            trainable_variables = None
            include_variables = (train_config.update_trainable_variables
                                 if train_config.update_trainable_variables
                                 else None)
            exclude_variables = (train_config.freeze_variables
                                 if train_config.freeze_variables else None)
            trainable_variables = tf.contrib.framework.filter_variables(
                tf.trainable_variables(),
                include_patterns=include_variables,
                exclude_patterns=exclude_variables)

            clip_gradients_value = None
            if train_config.gradient_clipping_by_norm > 0:
                clip_gradients_value = train_config.gradient_clipping_by_norm

            if not use_tpu:
                for var in optimizer_summary_vars:
                    tf.summary.scalar(var.op.name, var)
            summaries = [] if use_tpu else None
            if train_config.summarize_gradients:
                summaries = [
                    'gradients', 'gradient_norm', 'global_gradient_norm'
                ]
            train_op = tf.contrib.layers.optimize_loss(
                loss=total_loss,
                global_step=global_step,
                learning_rate=None,
                clip_gradients=clip_gradients_value,
                optimizer=training_optimizer,
                update_ops=detection_model.updates(),
                variables=trainable_variables,
                summaries=summaries,
                name='')  # Preventing scope prefix on all variables.

        if mode == tf.estimator.ModeKeys.PREDICT:
            exported_output = exporter_lib.add_output_tensor_nodes(detections)
            export_outputs = {
                tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
                tf.estimator.export.PredictOutput(exported_output)
            }

        eval_metric_ops = None
        scaffold = None
        if mode == tf.estimator.ModeKeys.EVAL:
            class_agnostic = (fields.DetectionResultFields.detection_classes
                              not in detections)
            groundtruth = _prepare_groundtruth_for_eval(
                detection_model, class_agnostic)
            use_original_images = fields.InputDataFields.original_image in features
            if use_original_images:
                eval_images = tf.cast(
                    tf.image.resize_bilinear(
                        features[fields.InputDataFields.original_image][0:1],
                        features[fields.InputDataFields.
                                 original_image_spatial_shape][0]), tf.uint8)
            else:
                eval_images = features[fields.InputDataFields.image]

            eval_dict = eval_util.result_dict_for_single_example(
                eval_images[0:1],
                features[inputs.HASH_KEY][0],
                detections,
                groundtruth,
                class_agnostic=class_agnostic,
                scale_to_absolute=True)

            if class_agnostic:
                category_index = label_map_util.create_class_agnostic_category_index(
                )
            else:
                category_index = label_map_util.create_category_index_from_labelmap(
                    eval_input_config.label_map_path)
            vis_metric_ops = None
            if not use_tpu and use_original_images:
                eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
                    category_index,
                    max_examples_to_draw=eval_config.num_visualizations,
                    max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
                    min_score_thresh=eval_config.min_score_threshold,
                    use_normalized_coordinates=False)
                vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
                    eval_dict)

            # Eval metrics on a single example.
            eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
                eval_config, list(category_index.values()), eval_dict)
            for loss_key, loss_tensor in iter(losses_dict.items()):
                eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
            for var in optimizer_summary_vars:
                eval_metric_ops[var.op.name] = (var, tf.no_op())
            if vis_metric_ops is not None:
                eval_metric_ops.update(vis_metric_ops)
            eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}

            if eval_config.use_moving_averages:
                variable_averages = tf.train.ExponentialMovingAverage(0.0)
                variables_to_restore = variable_averages.variables_to_restore()
                keep_checkpoint_every_n_hours = (
                    train_config.keep_checkpoint_every_n_hours)
                saver = tf.train.Saver(
                    variables_to_restore,
                    keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours
                )
                scaffold = tf.train.Scaffold(saver=saver)

        # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
        if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
            return tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                scaffold_fn=scaffold_fn,
                predictions=detections,
                loss=total_loss,
                train_op=train_op,
                eval_metrics=eval_metric_ops,
                export_outputs=export_outputs)
        else:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=detections,
                                              loss=total_loss,
                                              train_op=train_op,
                                              eval_metric_ops=eval_metric_ops,
                                              export_outputs=export_outputs,
                                              scaffold=scaffold)
Exemplo n.º 6
0
def build_graph(pipeline_config,
                shapes_info,
                input_type='encoded_image_string_tensor',
                use_bfloat16=False):
    """Builds TPU serving graph of ssd to be exported.

  Args:
    pipeline_config: A TrainEvalPipelineConfig proto.
    shapes_info: A python dict of tensors' names and their shapes, returned by
      `get_prediction_tensor_shapes()`.
    input_type: One of
                'encoded_image_string_tensor': a 1d tensor with dtype=tf.string
                'image_tensor': a 4d tensor with dtype=tf.uint8
                'tf_example': a 1d tensor with dtype=tf.string
    use_bfloat16: If true, use tf.bfloat16 on TPU.

  Returns:
    placeholder_tensor: A placeholder tensor, type determined by `input_type`.
    result_tensor_dict: A python dict of tensors' names and tensors.
  """

    detection_model = model_builder.build(pipeline_config.model,
                                          is_training=False)

    placeholder_tensor, input_tensors = \
        exporter.input_placeholder_fn_map[input_type]()

    inputs = tf.cast(input_tensors, dtype=tf.float32)
    preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs)

    # Dimshuffle: (b, h, w, c) -> (b, c, h, w)
    # This is to avoid extra padding due to TPU memory layout:
    # We swap larger dimensions in and smaller dimensions out, so that small
    # dimensions don't get padded tens / hundreds times of its own size.
    # This trick is applied to other similar tensors below.
    preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 3, 1, 2])
    if use_bfloat16:
        preprocessed_inputs = tf.cast(preprocessed_inputs, dtype=tf.bfloat16)

    def predict_tpu_subgraph(preprocessed_inputs, true_image_shapes):
        """Wraps over the CPU version of `predict()`.

    This builds a same graph as the original `predict()`, manipulates
    result tensors' dimensions to be memory efficient on TPU, and
    returns them as list of tensors.

    Args:
      preprocessed_inputs: A 4D tensor of shape (batch, channels, height, width)
      true_image_shapes: True image shapes tensor.

    Returns:
      A Python list of tensors:
        box_encodings: 3D tensor of shape (code_size, batch_size, num_anchors)
        class_predictions_with_background: 3D tensor,
            shape (num_classes + 1, batch_size, num_anchors)
        anchors: 2D tensor of shape (4, num_anchors)
    """
        # Dimshuffle: (b, c, h, w) -> (b, h, w, c)
        preprocessed_inputs = tf.transpose(preprocessed_inputs,
                                           perm=[0, 2, 3, 1])
        if use_bfloat16:
            with tf.contrib.tpu.bfloat16_scope():
                prediction_dict = detection_model.predict(
                    preprocessed_inputs, true_image_shapes)
        else:
            prediction_dict = detection_model.predict(preprocessed_inputs,
                                                      true_image_shapes)

        # Dimshuffle: (batch, anchors, depth) -> (depth, batch, anchors)
        return [
            tf.transpose(prediction_dict[BOX_ENCODINGS], perm=[2, 0, 1]),
            tf.transpose(prediction_dict[CLASS_PREDICTIONS_WITH_BACKGROUND],
                         perm=[2, 0, 1]),
            tf.transpose(prediction_dict[ANCHORS], perm=[1, 0]),
        ]

    @function.Defun(capture_resource_var_by_value=False)
    def predict_tpu():
        return tf.contrib.tpu.rewrite(predict_tpu_subgraph,
                                      [preprocessed_inputs, true_image_shapes])

    prediction_outputs = tpu_functional.TPUPartitionedCall(
        args=predict_tpu.captured_inputs,
        device_ordinal=tpu_ops.tpu_ordinal_selector(),
        Tout=[o.type for o in predict_tpu.definition.signature.output_arg],
        f=predict_tpu)

    (preprocessed_inputs, box_encodings, class_predictions_with_background,
     anchors) = recover_shape(preprocessed_inputs, prediction_outputs,
                              shapes_info)

    output_tensors = {
        'preprocessed_inputs': preprocessed_inputs,
        BOX_ENCODINGS: box_encodings,
        CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background,
        ANCHORS: anchors,
    }

    if use_bfloat16:
        output_tensors = utils.bfloat16_to_float32_nested(output_tensors)

    postprocessed_tensors = detection_model.postprocess(
        output_tensors, true_image_shapes)
    result_tensor_dict = exporter.add_output_tensor_nodes(
        postprocessed_tensors, 'inference_op')

    return placeholder_tensor, result_tensor_dict
Exemplo n.º 7
0
  def model_fn(features, labels, mode, params=None):
    """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
    params = params or {}
    total_loss, train_op, detections, export_outputs = None, None, None, None
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    # Make sure to set the Keras learning phase. True during training,
    # False for inference.
    tf.keras.backend.set_learning_phase(is_training)
    detection_model = detection_model_fn(
        is_training=is_training, add_summaries=(not use_tpu))
    scaffold_fn = None

    if mode == tf.estimator.ModeKeys.TRAIN:
      labels = unstack_batch(
          labels,
          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
    elif mode == tf.estimator.ModeKeys.EVAL:
      # For evaling on train data, it is necessary to check whether groundtruth
      # must be unpadded.
      boxes_shape = (
          labels[fields.InputDataFields.groundtruth_boxes].get_shape()
          .as_list())
      unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu
      labels = unstack_batch(
          labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
      gt_masks_list = None
      if fields.InputDataFields.groundtruth_instance_masks in labels:
        gt_masks_list = labels[
            fields.InputDataFields.groundtruth_instance_masks]
      gt_keypoints_list = None
      if fields.InputDataFields.groundtruth_keypoints in labels:
        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
      gt_weights_list = None
      if fields.InputDataFields.groundtruth_weights in labels:
        gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
      gt_confidences_list = None
      if fields.InputDataFields.groundtruth_confidences in labels:
        gt_confidences_list = labels[
            fields.InputDataFields.groundtruth_confidences]
      gt_is_crowd_list = None
      if fields.InputDataFields.groundtruth_is_crowd in labels:
        gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd]
      detection_model.provide_groundtruth(
          groundtruth_boxes_list=gt_boxes_list,
          groundtruth_classes_list=gt_classes_list,
          groundtruth_confidences_list=gt_confidences_list,
          groundtruth_masks_list=gt_masks_list,
          groundtruth_keypoints_list=gt_keypoints_list,
          groundtruth_weights_list=gt_weights_list,
          groundtruth_is_crowd_list=gt_is_crowd_list)

    preprocessed_images = features[fields.InputDataFields.image]
    if use_tpu and train_config.use_bfloat16:
      with tf.contrib.tpu.bfloat16_scope():
        prediction_dict = detection_model.predict(
            preprocessed_images,
            features[fields.InputDataFields.true_image_shape])
        for k, v in prediction_dict.items():
          if v.dtype == tf.bfloat16:
            prediction_dict[k] = tf.cast(v, tf.float32)
    else:
      prediction_dict = detection_model.predict(
          preprocessed_images,
          features[fields.InputDataFields.true_image_shape])

    def postprocess_wrapper(args):
      return detection_model.postprocess(args[0], args[1])

    if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
      if use_tpu and postprocess_on_cpu:
        detections = tf.contrib.tpu.outside_compilation(
            postprocess_wrapper,
            (prediction_dict,
             features[fields.InputDataFields.true_image_shape]))
      else:
        detections = postprocess_wrapper((
            prediction_dict,
            features[fields.InputDataFields.true_image_shape]))

    if mode == tf.estimator.ModeKeys.TRAIN:
      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
        if not train_config.fine_tune_checkpoint_type:
          # train_config.from_detection_checkpoint field is deprecated. For
          # backward compatibility, set train_config.fine_tune_checkpoint_type
          # based on train_config.from_detection_checkpoint.
          if train_config.from_detection_checkpoint:
            train_config.fine_tune_checkpoint_type = 'detection'
          else:
            train_config.fine_tune_checkpoint_type = 'classification'
        asg_map = detection_model.restore_map(
            fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type,
            load_all_detection_checkpoint_vars=(
                train_config.load_all_detection_checkpoint_vars))
        available_var_map = (
            variables_helper.get_variables_available_in_checkpoint(
                asg_map,
                train_config.fine_tune_checkpoint,
                include_global_step=False))
        if use_tpu:

          def tpu_scaffold():
            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                          available_var_map)
            return tf.train.Scaffold()

          scaffold_fn = tpu_scaffold
        else:
          tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                        available_var_map)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      losses_dict = detection_model.loss(
          prediction_dict, features[fields.InputDataFields.true_image_shape])
      losses = [loss_tensor for loss_tensor in losses_dict.values()]
      if train_config.add_regularization_loss:
        regularization_losses = detection_model.regularization_losses()
        if regularization_losses:
          regularization_loss = tf.add_n(
              regularization_losses, name='regularization_loss')
          losses.append(regularization_loss)
          losses_dict['Loss/regularization_loss'] = regularization_loss
      total_loss = tf.add_n(losses, name='total_loss')
      losses_dict['Loss/total_loss'] = total_loss

      if 'graph_rewriter_config' in configs:
        graph_rewriter_fn = graph_rewriter_builder.build(
            configs['graph_rewriter_config'], is_training=is_training)
        graph_rewriter_fn()

      # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we
      # can write learning rate summaries on TPU without host calls.
      global_step = tf.train.get_or_create_global_step()
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
          train_config.optimizer)

    if mode == tf.estimator.ModeKeys.TRAIN:
      if use_tpu:
        training_optimizer = tf.contrib.tpu.CrossShardOptimizer(
            training_optimizer)

      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
      include_variables = (
          train_config.update_trainable_variables
          if train_config.update_trainable_variables else None)
      exclude_variables = (
          train_config.freeze_variables
          if train_config.freeze_variables else None)
      trainable_variables = tf.contrib.framework.filter_variables(
          tf.trainable_variables(),
          include_patterns=include_variables,
          exclude_patterns=exclude_variables)

      clip_gradients_value = None
      if train_config.gradient_clipping_by_norm > 0:
        clip_gradients_value = train_config.gradient_clipping_by_norm

      if not use_tpu:
        for var in optimizer_summary_vars:
          tf.summary.scalar(var.op.name, var)
      summaries = [] if use_tpu else None
      if train_config.summarize_gradients:
        summaries = ['gradients', 'gradient_norm', 'global_gradient_norm']
      train_op = tf.contrib.layers.optimize_loss(
          loss=total_loss,
          global_step=global_step,
          learning_rate=None,
          clip_gradients=clip_gradients_value,
          optimizer=training_optimizer,
          update_ops=detection_model.updates(),
          variables=trainable_variables,
          summaries=summaries,
          name='')  # Preventing scope prefix on all variables.

    if mode == tf.estimator.ModeKeys.PREDICT:
      exported_output = exporter_lib.add_output_tensor_nodes(detections)
      export_outputs = {
          tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
              tf.estimator.export.PredictOutput(exported_output)
      }

    eval_metric_ops = None
    scaffold = None
    if mode == tf.estimator.ModeKeys.EVAL:
      class_agnostic = (
          fields.DetectionResultFields.detection_classes not in detections)
      groundtruth = _prepare_groundtruth_for_eval(
          detection_model, class_agnostic,
          eval_input_config.max_number_of_boxes)
      use_original_images = fields.InputDataFields.original_image in features
      if use_original_images:
        eval_images = features[fields.InputDataFields.original_image]
        true_image_shapes = tf.slice(
            features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3])
        original_image_spatial_shapes = features[fields.InputDataFields
                                                 .original_image_spatial_shape]
      else:
        eval_images = features[fields.InputDataFields.image]
        true_image_shapes = None
        original_image_spatial_shapes = None

      eval_dict = eval_util.result_dict_for_batched_example(
          eval_images,
          features[inputs.HASH_KEY],
          detections,
          groundtruth,
          class_agnostic=class_agnostic,
          scale_to_absolute=True,
          original_image_spatial_shapes=original_image_spatial_shapes,
          true_image_shapes=true_image_shapes)

      if class_agnostic:
        category_index = label_map_util.create_class_agnostic_category_index()
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
            eval_input_config.label_map_path)
      vis_metric_ops = None
      if not use_tpu and use_original_images:
        eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections(
            category_index,
            max_examples_to_draw=eval_config.num_visualizations,
            max_boxes_to_draw=eval_config.max_num_boxes_to_visualize,
            min_score_thresh=eval_config.min_score_threshold,
            use_normalized_coordinates=False)
        vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops(
            eval_dict)

      # Eval metrics on a single example.
      eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators(
          eval_config, list(category_index.values()), eval_dict)
      for loss_key, loss_tensor in iter(losses_dict.items()):
        eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor)
      for var in optimizer_summary_vars:
        eval_metric_ops[var.op.name] = (var, tf.no_op())
      if vis_metric_ops is not None:
        eval_metric_ops.update(vis_metric_ops)
      eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()}

      if eval_config.use_moving_averages:
        variable_averages = tf.train.ExponentialMovingAverage(0.0)
        variables_to_restore = variable_averages.variables_to_restore()
        keep_checkpoint_every_n_hours = (
            train_config.keep_checkpoint_every_n_hours)
        saver = tf.train.Saver(
            variables_to_restore,
            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours)
        scaffold = tf.train.Scaffold(saver=saver)

    # EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
    if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
      return tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metrics=eval_metric_ops,
          export_outputs=export_outputs)
    else:
      if scaffold is None:
        keep_checkpoint_every_n_hours = (
            train_config.keep_checkpoint_every_n_hours)
        saver = tf.train.Saver(
            sharded=True,
            keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
            save_relative_paths=True)
        tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
        scaffold = tf.train.Scaffold(saver=saver)
      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs,
          scaffold=scaffold)