示例#1
0
    def _dummy_computation_fn(features, labels):
        model._is_training = False  # pylint: disable=protected-access
        tf.keras.backend.set_learning_phase(False)

        labels = model_lib.unstack_batch(
            labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

        return _compute_losses_and_predictions_dicts(model, features, labels)
  def compute_eval_dict(features, labels):
    """Compute the evaluation result on an image."""
    # For evaling on train data, it is necessary to check whether groundtruth
    # must be unpadded.
    boxes_shape = (
        labels[fields.InputDataFields.groundtruth_boxes].get_shape().as_list())
    unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu
    labels = model_lib.unstack_batch(
        labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

    losses_dict, prediction_dict = _compute_losses_and_predictions_dicts(
        detection_model, features, labels, add_regularization_loss)

    def postprocess_wrapper(args):
      return detection_model.postprocess(args[0], args[1])

    # TODO(kaftan): Depending on how postprocessing will work for TPUS w/
    ## TPUStrategy, may be good to move wrapping to a utility method
    if use_tpu and postprocess_on_cpu:
      detections = contrib_tpu.outside_compilation(
          postprocess_wrapper,
          (prediction_dict, features[fields.InputDataFields.true_image_shape]))
    else:
      detections = postprocess_wrapper(
          (prediction_dict, features[fields.InputDataFields.true_image_shape]))

    class_agnostic = (
        fields.DetectionResultFields.detection_classes not in detections)
    # TODO(kaftan) (or anyone): move `_prepare_groundtruth_for_eval to eval_util
    ## and call this from there.
    groundtruth = model_lib._prepare_groundtruth_for_eval(  # pylint: disable=protected-access
        detection_model, class_agnostic, eval_input_config.max_number_of_boxes)
    use_original_images = fields.InputDataFields.original_image in features
    if use_original_images:
      eval_images = features[fields.InputDataFields.original_image]
      true_image_shapes = tf.slice(
          features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3])
      original_image_spatial_shapes = features[
          fields.InputDataFields.original_image_spatial_shape]
    else:
      eval_images = features[fields.InputDataFields.image]
      true_image_shapes = None
      original_image_spatial_shapes = None

    eval_dict = eval_util.result_dict_for_batched_example(
        eval_images,
        features[inputs.HASH_KEY],
        detections,
        groundtruth,
        class_agnostic=class_agnostic,
        scale_to_absolute=True,
        original_image_spatial_shapes=original_image_spatial_shapes,
        true_image_shapes=true_image_shapes)

    return eval_dict, losses_dict, class_agnostic
示例#3
0
def eager_train_step(detection_model,
                     features,
                     labels,
                     unpad_groundtruth_tensors,
                     optimizer,
                     learning_rate,
                     add_regularization_loss=True,
                     clip_gradients_value=None,
                     use_tpu=False,
                     use_bfloat16=False,
                     global_step=None,
                     num_replicas=1.0):

    # """Execute a single training step in the TF v2 style loop."""
    is_training = True

    detection_model._is_training = is_training  # pylint: disable=protected-access
    tf.keras.backend.set_learning_phase(is_training)

    labels = model_lib.unstack_batch(
        labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

    with tf.GradientTape() as tape:
        losses_dict, _ = _compute_losses_and_predictions_dicts(
            detection_model, features, labels, add_regularization_loss,
            use_tpu, use_bfloat16)

        total_loss = losses_dict['Loss/total_loss']

        # Normalize loss for num replicas
        total_loss = tf.math.divide(
            total_loss, tf.constant(num_replicas, dtype=tf.float32))
        losses_dict['Loss/normalized_total_loss'] = total_loss

    for loss_type in losses_dict:
        tf.compat.v2.summary.scalar(loss_type,
                                    losses_dict[loss_type],
                                    step=global_step)

    trainable_variables = detection_model.trainable_variables

    gradients = tape.gradient(total_loss, trainable_variables)

    if clip_gradients_value:
        gradients, _ = tf.clip_by_global_norm(gradients, clip_gradients_value)
    optimizer.apply_gradients(zip(gradients, trainable_variables))

    if not use_tpu:
        tf.compat.v2.summary.scalar('learning_rate',
                                    learning_rate,
                                    step=global_step)

    return total_loss
示例#4
0
    def test_unbatch_and_unpad_groundtruth_tensors(self):
        image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
        groundtruth_boxes_placeholder = tf.placeholder(tf.float32,
                                                       [2, 5, None])
        groundtruth_classes_placeholder = tf.placeholder(
            tf.float32, [2, 5, None])
        groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, 5])
        num_groundtruth_placeholder = tf.placeholder(tf.int32, [2])

        tensor_dict = {
            fields.InputDataFields.image:
            image_placeholder,
            fields.InputDataFields.groundtruth_boxes:
            groundtruth_boxes_placeholder,
            fields.InputDataFields.groundtruth_classes:
            groundtruth_classes_placeholder,
            fields.InputDataFields.groundtruth_weights:
            groundtruth_weights_placeholder,
            fields.InputDataFields.num_groundtruth_boxes:
            num_groundtruth_placeholder
        }
        unbatched_tensor_dict = model_lib.unstack_batch(
            tensor_dict, unpad_groundtruth_tensors=True)
        with self.test_session() as sess:
            unbatched_tensor_dict_out = sess.run(
                unbatched_tensor_dict,
                feed_dict={
                    image_placeholder:
                    np.random.rand(2, 4, 4, 3).astype(np.float32),
                    groundtruth_boxes_placeholder:
                    np.random.rand(2, 5, 4).astype(np.float32),
                    groundtruth_classes_placeholder:
                    np.random.rand(2, 5, 6).astype(np.float32),
                    groundtruth_weights_placeholder:
                    np.random.rand(2, 5).astype(np.float32),
                    num_groundtruth_placeholder:
                    np.array([3, 3], np.int32)
                })
        for image_out in unbatched_tensor_dict_out[
                fields.InputDataFields.image]:
            self.assertAllEqual(image_out.shape, [4, 4, 3])
        for groundtruth_boxes_out in unbatched_tensor_dict_out[
                fields.InputDataFields.groundtruth_boxes]:
            self.assertAllEqual(groundtruth_boxes_out.shape, [3, 4])
        for groundtruth_classes_out in unbatched_tensor_dict_out[
                fields.InputDataFields.groundtruth_classes]:
            self.assertAllEqual(groundtruth_classes_out.shape, [3, 6])
        for groundtruth_weights_out in unbatched_tensor_dict_out[
                fields.InputDataFields.groundtruth_weights]:
            self.assertAllEqual(groundtruth_weights_out.shape, [3])
示例#5
0
  def test_unbatch_and_unpad_groundtruth_tensors(self):
    image_placeholder = tf.placeholder(tf.float32, [2, None, None, None])
    groundtruth_boxes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
    groundtruth_classes_placeholder = tf.placeholder(tf.float32, [2, 5, None])
    groundtruth_weights_placeholder = tf.placeholder(tf.float32, [2, 5])
    num_groundtruth_placeholder = tf.placeholder(tf.int32, [2])

    tensor_dict = {
        fields.InputDataFields.image:
            image_placeholder,
        fields.InputDataFields.groundtruth_boxes:
            groundtruth_boxes_placeholder,
        fields.InputDataFields.groundtruth_classes:
            groundtruth_classes_placeholder,
        fields.InputDataFields.groundtruth_weights:
            groundtruth_weights_placeholder,
        fields.InputDataFields.num_groundtruth_boxes:
            num_groundtruth_placeholder
    }
    unbatched_tensor_dict = model_lib.unstack_batch(
        tensor_dict, unpad_groundtruth_tensors=True)
    with self.test_session() as sess:
      unbatched_tensor_dict_out = sess.run(
          unbatched_tensor_dict,
          feed_dict={
              image_placeholder:
                  np.random.rand(2, 4, 4, 3).astype(np.float32),
              groundtruth_boxes_placeholder:
                  np.random.rand(2, 5, 4).astype(np.float32),
              groundtruth_classes_placeholder:
                  np.random.rand(2, 5, 6).astype(np.float32),
              groundtruth_weights_placeholder:
                  np.random.rand(2, 5).astype(np.float32),
              num_groundtruth_placeholder:
                  np.array([3, 3], np.int32)
          })
    for image_out in unbatched_tensor_dict_out[fields.InputDataFields.image]:
      self.assertAllEqual(image_out.shape, [4, 4, 3])
    for groundtruth_boxes_out in unbatched_tensor_dict_out[
        fields.InputDataFields.groundtruth_boxes]:
      self.assertAllEqual(groundtruth_boxes_out.shape, [3, 4])
    for groundtruth_classes_out in unbatched_tensor_dict_out[
        fields.InputDataFields.groundtruth_classes]:
      self.assertAllEqual(groundtruth_classes_out.shape, [3, 6])
    for groundtruth_weights_out in unbatched_tensor_dict_out[
        fields.InputDataFields.groundtruth_weights]:
      self.assertAllEqual(groundtruth_weights_out.shape, [3])
def eager_train_step(detection_model,
                     features,
                     labels,
                     unpad_groundtruth_tensors,
                     optimizer,
                     learning_rate,
                     add_regularization_loss=True,
                     clip_gradients_value=None,
                     global_step=None,
                     num_replicas=1.0):
    """Process a single training batch.
    This method computes the loss for the model on a single training batch,
    while tracking the gradients with a gradient tape. It then updates the
    model variables with the optimizer, clipping the gradients if
    clip_gradients_value is present.
    This method can run eagerly or inside a tf.function.
    Args:
      detection_model: A DetectionModel (based on Keras) to train.
      features: Dictionary of feature tensors from the input dataset.
        Should be in the format output by `inputs.train_input.
          features[fields.InputDataFields.image] is a [batch_size, H, W, C]
            float32 tensor with preprocessed images.
          features[HASH_KEY] is a [batch_size] int32 tensor representing unique
            identifiers for the images.
          features[fields.InputDataFields.true_image_shape] is a [batch_size, 3]
            int32 tensor representing the true image shapes, as preprocessed
            images could be padded.
          features[fields.InputDataFields.original_image] (optional, not used
            during training) is a
            [batch_size, H, W, C] float32 tensor with original images.
      labels: A dictionary of groundtruth tensors. This method unstacks
        these labels using model_lib.unstack_batch. The stacked labels are of
        the form returned by `inputs.train_input` and `inputs.eval_input`.
          labels[fields.InputDataFields.num_groundtruth_boxes] is a [batch_size]
            int32 tensor indicating the number of valid groundtruth boxes
            per image.
          labels[fields.InputDataFields.groundtruth_boxes] is a
            [batch_size, num_boxes, 4] float32 tensor containing the corners of
            the groundtruth boxes.
          labels[fields.InputDataFields.groundtruth_classes] is a
            [batch_size, num_boxes, num_classes] float32 one-hot tensor of
            classes. num_classes includes the background class.
          labels[fields.InputDataFields.groundtruth_weights] is a
            [batch_size, num_boxes] float32 tensor containing groundtruth weights
            for the boxes.
          -- Optional --
          labels[fields.InputDataFields.groundtruth_instance_masks] is a
            [batch_size, num_boxes, H, W] float32 tensor containing only binary
            values, which represent instance masks for objects.
          labels[fields.InputDataFields.groundtruth_keypoints] is a
            [batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
            keypoints for each box.
          labels[fields.InputDataFields.groundtruth_dp_num_points] is a
            [batch_size, num_boxes] int32 tensor with the number of DensePose
            sampled points per instance.
          labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
            [batch_size, num_boxes, max_sampled_points] int32 tensor with the
            part ids (0-indexed) for each instance.
          labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
            [batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
            surface coordinates for each point. Each surface coordinate is of the
            form (y, x, v, u) where (y, x) are normalized image locations and
            (v, u) are part-relative normalized surface coordinates.
          labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
            k-hot tensor of classes.
          labels[fields.InputDataFields.groundtruth_track_ids] is a int32
            tensor of track IDs.
          labels[fields.InputDataFields.groundtruth_keypoint_depths] is a
            float32 tensor containing keypoint depths information.
          labels[fields.InputDataFields.groundtruth_keypoint_depth_weights] is a
            float32 tensor containing the weights of the keypoint depth feature.
      unpad_groundtruth_tensors: A parameter passed to unstack_batch.
      optimizer: The training optimizer that will update the variables.
      learning_rate: The learning rate tensor for the current training step.
        This is used only for TensorBoard logging purposes, it does not affect
         model training.
      add_regularization_loss: Whether or not to include the model's
        regularization loss in the losses dictionary.
      clip_gradients_value: If this is present, clip the gradients global norm
        at this value using `tf.clip_by_global_norm`.
      global_step: The current training step. Used for TensorBoard logging
        purposes. This step is not updated by this function and must be
        incremented separately.
      num_replicas: The number of replicas in the current distribution strategy.
        This is used to scale the total loss so that training in a distribution
        strategy works correctly.
    Returns:
      The total loss observed at this training step
    """
    # """Execute a single training step in the TF v2 style loop."""
    is_training = True

    detection_model._is_training = is_training  # pylint: disable=protected-access
    tf.keras.backend.set_learning_phase(is_training)

    labels = model_lib.unstack_batch(
        labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors)

    with tf.GradientTape() as tape:
        losses_dict, _ = _compute_losses_and_predictions_dicts(
            detection_model, features, labels, add_regularization_loss)

        total_loss = losses_dict['Loss/total_loss']

        # Normalize loss for num replicas
        total_loss = tf.math.divide(total_loss,
                                    tf.constant(num_replicas, dtype=tf.float32))
        losses_dict['Loss/normalized_total_loss'] = total_loss

    for loss_type in losses_dict:
        tf.compat.v2.summary.scalar(
            loss_type, losses_dict[loss_type], step=global_step)

    trainable_variables = detection_model.trainable_variables

    gradients = tape.gradient(total_loss, trainable_variables)

    if clip_gradients_value:
        gradients, _ = tf.clip_by_global_norm(gradients, clip_gradients_value)
    optimizer.apply_gradients(zip(gradients, trainable_variables))
    tf.compat.v2.summary.scalar(
        'learning_rate', learning_rate, step=global_step)
    tf.compat.v2.summary.image(
        name='train_input_images',
        step=global_step,
        data=features[fields.InputDataFields.image],
        max_outputs=3)
    return total_loss