Пример #1
0
def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
                                  max_number_of_boxes):
    """Extracts groundtruth data from detection_model and prepares it for eval.

  Args:
    detection_model: A `DetectionModel` object.
    class_agnostic: Whether the detections are class_agnostic.
    max_number_of_boxes: Max number of groundtruth boxes.

  Returns:
    A tuple of:
    groundtruth: Dictionary with the following fields:
      'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes,
        in normalized coordinates.
      'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed
        classes.
      'groundtruth_masks': 4D float32 tensor of instance masks (if provided in
        groundtruth)
      'groundtruth_is_crowd': [batch_size, num_boxes] bool tensor indicating
        is_crowd annotations (if provided in groundtruth).
      'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
        of groundtruth boxes per image..
    class_agnostic: Boolean indicating whether detections are class agnostic.
  """
    input_data_fields = fields.InputDataFields()
    groundtruth_boxes = tf.stack(
        detection_model.groundtruth_lists(fields.BoxListFields.boxes))
    groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
    # For class-agnostic models, groundtruth one-hot encodings collapse to all
    # ones.
    if class_agnostic:
        groundtruth_classes_one_hot = tf.ones(
            [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1])
    else:
        groundtruth_classes_one_hot = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.classes))
    label_id_offset = 1  # Applying label id offset (b/63711816)
    groundtruth_classes = (tf.argmax(groundtruth_classes_one_hot, axis=2) +
                           label_id_offset)
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes
    }
    if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
        groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.masks))
    if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd):
        groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.is_crowd))

    if detection_model.groundtruth_has_field(fields.BoxListFields.keypoints):
        groundtruth[input_data_fields.groundtruth_keypoints] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.keypoints))

    groundtruth[input_data_fields.num_groundtruth_boxes] = (tf.tile(
        [max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
    return groundtruth
Пример #2
0
def draw_side_by_side_evaluation_image(eval_dict,
                                       category_index,
                                       max_boxes_to_draw=20,
                                       min_score_thresh=0.2):
    etection_fields = fields.DetectionResultFields()
    input_data_fields = fields.InputDataFields()
    instance_masks = None
    if detection_fields.detection_masks in eval_dict:
        instance_masks = tf.cast(
            tf.expand_dims(eval_dict[detection_fields.detection_masks],
                           axis=0), tf.uint8)
    keypoints = None
    if detection_fields.detection_keypoints in eval_dict:
        keypoints = tf.expand_dims(
            eval_dict[detection_fields.detection_keypoints], axis=0)
    groundtruth_instance_masks = None
    if input_data_fields.groundtruth_instance_masks in eval_dict:
        groundtruth_instance_masks = tf.cast(
            tf.expand_dims(
                eval_dict[input_data_fields.groundtruth_instance_masks],
                axis=0), tf.uint8)
    images_with_detections = draw_bounding_boxes_on_image_tensors(
        eval_dict[input_data_fields.original_image],
        tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0),
        tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0),
        tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0),
        category_index,
        instance_masks=instance_masks,
        keypoints=keypoints,
        max_boxes_to_draw=max_boxes_to_draw,
        min_score_thresh=min_score_thresh)
    images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
        eval_dict[input_data_fields.original_image],
        tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0),
        tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes],
                       axis=0),
        tf.expand_dims(
            tf.ones_like(eval_dict[input_data_fields.groundtruth_classes],
                         dtype=tf.float32),
            axis=0),
        category_index,
        instance_masks=groundtruth_instance_masks,
        keypoints=None,
        max_boxes_to_draw=None,
        min_score_thresh=0.0)
    return tf.concat([images_with_detections, images_with_groundtruth], axis=2)
Пример #3
0
def _prepare_groundtruth_for_eval(detection_model, class_agnostic):
    """Extracts groundtruth data from detection_model and prepares it for eval.

  Args:
    detection_model: A `DetectionModel` object.
    class_agnostic: Whether the detections are class_agnostic.

  Returns:
    A tuple of:
    groundtruth: Dictionary with the following fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_masks': 3D float32 tensor of instance masks (if provided in
        groundtruth)
      'groundtruth_is_crowd': [num_boxes] bool tensor indicating is_crowd
        annotations (if provided in groundtruth).
    class_agnostic: Boolean indicating whether detections are class agnostic.
  """
    input_data_fields = fields.InputDataFields()
    groundtruth_boxes = detection_model.groundtruth_lists(
        fields.BoxListFields.boxes)[0]
    # For class-agnostic models, groundtruth one-hot encodings collapse to all
    # ones.
    if class_agnostic:
        groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
        groundtruth_classes_one_hot = tf.ones([groundtruth_boxes_shape[0], 1])
    else:
        groundtruth_classes_one_hot = detection_model.groundtruth_lists(
            fields.BoxListFields.classes)[0]
    label_id_offset = 1  # Applying label id offset (b/63711816)
    groundtruth_classes = (tf.argmax(groundtruth_classes_one_hot, axis=1) +
                           label_id_offset)
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes
    }
    if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
        groundtruth[input_data_fields.groundtruth_instance_masks] = (
            detection_model.groundtruth_lists(fields.BoxListFields.masks)[0])
    if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd):
        groundtruth[input_data_fields.groundtruth_is_crowd] = (
            detection_model.groundtruth_lists(
                fields.BoxListFields.is_crowd)[0])
    return groundtruth
Пример #4
0
def _get_groundtruth_data(detection_model, class_agnostic):
  input_data_fields = fields.InputDataFields()
  groundtruth_boxes = detection_model.groundtruth_lists(
      fields.BoxListFields.boxes)[0]
  if class_agnostic:
    groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
    groundtruth_classes_one_hot = tf.ones([groundtruth_boxes_shape[0], 1])
  else:
    groundtruth_classes_one_hot = detection_model.groundtruth_lists(
        fields.BoxListFields.classes)[0]
  label_id_offset = 1
  groundtruth_classes = (
      tf.argmax(groundtruth_classes_one_hot, axis=1) + label_id_offset)
  groundtruth = {
      input_data_fields.groundtruth_boxes: groundtruth_boxes,
      input_data_fields.groundtruth_classes: groundtruth_classes
  }
  if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
    groundtruth[input_data_fields.groundtruth_instance_masks] = (
        detection_model.groundtruth_lists(fields.BoxListFields.masks)[0])
  return groundtruth
Пример #5
0
def draw_side_by_side_evaluation_image(eval_dict,
                                       category_index,
                                       max_boxes_to_draw=20,
                                       min_score_thresh=0.2,
                                       use_normalized_coordinates=True):
  """Creates a side-by-side image with detections and groundtruth.

  Bounding boxes (and instance masks, if available) are visualized on both
  subimages.

  Args:
    eval_dict: The evaluation dictionary returned by
      eval_util.result_dict_for_batched_example() or
      eval_util.result_dict_for_single_example().
    category_index: A category index (dictionary) produced from a labelmap.
    max_boxes_to_draw: The maximum number of boxes to draw for detections.
    min_score_thresh: The minimum score threshold for showing detections.
    use_normalized_coordinates: Whether to assume boxes and kepoints are in
      normalized coordinates (as opposed to absolute coordiantes).
      Default is True.

  Returns:
    A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
      corresponds to detections, while the subimage on the right corresponds to
      groundtruth.
  """
  detection_fields = fields.DetectionResultFields()
  input_data_fields = fields.InputDataFields()

  images_with_detections_list = []

  # Add the batch dimension if the eval_dict is for single example.
  if len(eval_dict[detection_fields.detection_classes].shape) == 1:
    for key in eval_dict:
      if key != input_data_fields.original_image:
        eval_dict[key] = tf.expand_dims(eval_dict[key], 0)

  for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
    instance_masks = None
    if detection_fields.detection_masks in eval_dict:
      instance_masks = tf.cast(
          tf.expand_dims(
              eval_dict[detection_fields.detection_masks][indx], axis=0),
          tf.uint8)
    keypoints = None
    if detection_fields.detection_keypoints in eval_dict:
      keypoints = tf.expand_dims(
          eval_dict[detection_fields.detection_keypoints][indx], axis=0)
    groundtruth_instance_masks = None
    if input_data_fields.groundtruth_instance_masks in eval_dict:
      groundtruth_instance_masks = tf.cast(
          tf.expand_dims(
              eval_dict[input_data_fields.groundtruth_instance_masks][indx],
              axis=0), tf.uint8)

    images_with_detections = draw_bounding_boxes_on_image_tensors(
        tf.expand_dims(
            eval_dict[input_data_fields.original_image][indx], axis=0),
        tf.expand_dims(
            eval_dict[detection_fields.detection_boxes][indx], axis=0),
        tf.expand_dims(
            eval_dict[detection_fields.detection_classes][indx], axis=0),
        tf.expand_dims(
            eval_dict[detection_fields.detection_scores][indx], axis=0),
        category_index,
        original_image_spatial_shape=tf.expand_dims(
            eval_dict[input_data_fields.original_image_spatial_shape][indx],
            axis=0),
        true_image_shape=tf.expand_dims(
            eval_dict[input_data_fields.true_image_shape][indx], axis=0),
        instance_masks=instance_masks,
        keypoints=keypoints,
        max_boxes_to_draw=max_boxes_to_draw,
        min_score_thresh=min_score_thresh,
        use_normalized_coordinates=use_normalized_coordinates)
    images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
        tf.expand_dims(
            eval_dict[input_data_fields.original_image][indx], axis=0),
        tf.expand_dims(
            eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
        tf.expand_dims(
            eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
        tf.expand_dims(
            tf.ones_like(
                eval_dict[input_data_fields.groundtruth_classes][indx],
                dtype=tf.float32),
            axis=0),
        category_index,
        original_image_spatial_shape=tf.expand_dims(
            eval_dict[input_data_fields.original_image_spatial_shape][indx],
            axis=0),
        true_image_shape=tf.expand_dims(
            eval_dict[input_data_fields.true_image_shape][indx], axis=0),
        instance_masks=groundtruth_instance_masks,
        keypoints=None,
        max_boxes_to_draw=None,
        min_score_thresh=0.0,
        use_normalized_coordinates=use_normalized_coordinates)
    images_with_detections_list.append(
        tf.concat([images_with_detections, images_with_groundtruth], axis=2))
  return images_with_detections_list
Пример #6
0
def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
                                  max_number_of_boxes):
    """Extracts groundtruth data from detection_model and prepares it for eval.

  Args:
    detection_model: A `DetectionModel` object.
    class_agnostic: Whether the detections are class_agnostic.
    max_number_of_boxes: Max number of groundtruth boxes.

  Returns:
    A tuple of:
    groundtruth: Dictionary with the following fields:
      'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes,
        in normalized coordinates.
      'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed
        classes.
      'groundtruth_masks': 4D float32 tensor of instance masks (if provided in
        groundtruth)
      'groundtruth_is_crowd': [batch_size, num_boxes] bool tensor indicating
        is_crowd annotations (if provided in groundtruth).
      'groundtruth_area': [batch_size, num_boxes] float32 tensor indicating
        the area (in the original absolute coordinates) of annotations (if
        provided in groundtruth).
      'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
        of groundtruth boxes per image..
      'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
        tensor of keypoints (if provided in groundtruth).
      'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
        with the number of DensePose points for each instance (if provided in
        groundtruth).
      'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
        max_sampled_points] int32 tensor with the part ids for each DensePose
        sampled point (if provided in groundtruth).
      'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
        max_sampled_points, 4] containing the DensePose surface coordinates for
        each sampled point (if provided in groundtruth).
      'groundtruth_track_ids_list': [batch_size, num_boxes] int32 tensor
        with track ID for each instance (if provided in groundtruth).
      'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
        group_of annotations (if provided in groundtruth).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
        tensor of 1-indexed classes.
    class_agnostic: Boolean indicating whether detections are class agnostic.
  """
    input_data_fields = fields.InputDataFields()
    groundtruth_boxes = tf.stack(
        detection_model.groundtruth_lists(fields.BoxListFields.boxes))
    groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
    # For class-agnostic models, groundtruth one-hot encodings collapse to all
    # ones.
    if class_agnostic:
        groundtruth_classes_one_hot = tf.ones(
            [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1])
    else:
        groundtruth_classes_one_hot = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.classes))
    label_id_offset = 1  # Applying label id offset (b/63711816)
    groundtruth_classes = (tf.argmax(groundtruth_classes_one_hot, axis=2) +
                           label_id_offset)
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes
    }
    if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
        groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.masks))

    if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd):
        groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.is_crowd))

    if detection_model.groundtruth_has_field(
            input_data_fields.groundtruth_area):
        groundtruth[input_data_fields.groundtruth_area] = tf.stack(
            detection_model.groundtruth_lists(
                input_data_fields.groundtruth_area))

    if detection_model.groundtruth_has_field(fields.BoxListFields.keypoints):
        groundtruth[input_data_fields.groundtruth_keypoints] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.keypoints))

    if detection_model.groundtruth_has_field(
            fields.BoxListFields.keypoint_visibilities):
        groundtruth[
            input_data_fields.groundtruth_keypoint_visibilities] = tf.stack(
                detection_model.groundtruth_lists(
                    fields.BoxListFields.keypoint_visibilities))

    if detection_model.groundtruth_has_field(fields.BoxListFields.group_of):
        groundtruth[input_data_fields.groundtruth_group_of] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.group_of))

    if detection_model.groundtruth_has_field(
            fields.InputDataFields.groundtruth_labeled_classes):
        labeled_classes_list = detection_model.groundtruth_lists(
            fields.InputDataFields.groundtruth_labeled_classes)
        labeled_classes = [
            tf.where(x)[:, 0] + label_id_offset for x in labeled_classes_list
        ]
        if len(labeled_classes) > 1:
            num_classes = labeled_classes_list[0].shape[0]
            padded_labeled_classes = []
            for x in labeled_classes:
                padding = num_classes - tf.shape(x)[0]
                padded_labeled_classes.append(tf.pad(x, [[0, padding]]))
            groundtruth[
                input_data_fields.groundtruth_labeled_classes] = tf.stack(
                    padded_labeled_classes)
        else:
            groundtruth[
                input_data_fields.groundtruth_labeled_classes] = tf.stack(
                    labeled_classes)

    if detection_model.groundtruth_has_field(
            fields.BoxListFields.densepose_num_points):
        groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
            detection_model.groundtruth_lists(
                fields.BoxListFields.densepose_num_points))
    if detection_model.groundtruth_has_field(
            fields.BoxListFields.densepose_part_ids):
        groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
            detection_model.groundtruth_lists(
                fields.BoxListFields.densepose_part_ids))
    if detection_model.groundtruth_has_field(
            fields.BoxListFields.densepose_surface_coords):
        groundtruth[
            input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
                detection_model.groundtruth_lists(
                    fields.BoxListFields.densepose_surface_coords))

    if detection_model.groundtruth_has_field(fields.BoxListFields.track_ids):
        groundtruth[input_data_fields.groundtruth_track_ids] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.track_ids))

    groundtruth[input_data_fields.num_groundtruth_boxes] = (tf.tile(
        [max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
    return groundtruth
Пример #7
0
def visualize_detection_results(result_dict,
                                tag,
                                global_step,
                                categories,
                                summary_dir='',
                                export_dir='',
                                agnostic_mode=False,
                                show_groundtruth=False,
                                groundtruth_box_visualization_color='black',
                                min_score_thresh=.5,
                                max_num_predictions=20,
                                skip_scores=False,
                                skip_labels=False,
                                keep_image_id_for_visualization_export=False):
  """Visualizes detection results and writes visualizations to image summaries.

  This function visualizes an image with its detected bounding boxes and writes
  to image summaries which can be viewed on tensorboard.  It optionally also
  writes images to a directory. In the case of missing entry in the label map,
  unknown class name in the visualization is shown as "N/A".

  Args:
    result_dict: a dictionary holding groundtruth and detection
      data corresponding to each image being evaluated.  The following keys
      are required:
        'original_image': a numpy array representing the image with shape
          [1, height, width, 3] or [1, height, width, 1]
        'detection_boxes': a numpy array of shape [N, 4]
        'detection_scores': a numpy array of shape [N]
        'detection_classes': a numpy array of shape [N]
      The following keys are optional:
        'groundtruth_boxes': a numpy array of shape [N, 4]
        'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
      Detections are assumed to be provided in decreasing order of score and for
      display, and we assume that scores are probabilities between 0 and 1.
    tag: tensorboard tag (string) to associate with image.
    global_step: global step at which the visualization are generated.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list has the following keys:
          'id': (required) an integer id uniquely identifying this category
          'name': (required) string representing category name
            e.g., 'cat', 'dog', 'pizza'
          'supercategory': (optional) string representing the supercategory
            e.g., 'animal', 'vehicle', 'food', etc
    summary_dir: the output directory to which the image summaries are written.
    export_dir: the output directory to which images are written.  If this is
      empty (default), then images are not exported.
    agnostic_mode: boolean (default: False) controlling whether to evaluate in
      class-agnostic mode or not.
    show_groundtruth: boolean (default: False) controlling whether to show
      groundtruth boxes in addition to detected boxes
    groundtruth_box_visualization_color: box color for visualizing groundtruth
      boxes
    min_score_thresh: minimum score threshold for a box to be visualized
    max_num_predictions: maximum number of detections to visualize
    skip_scores: whether to skip score when drawing a single detection
    skip_labels: whether to skip label when drawing a single detection
    keep_image_id_for_visualization_export: whether to keep image identifier in
      filename when exported to export_dir
  Raises:
    ValueError: if result_dict does not contain the expected keys (i.e.,
      'original_image', 'detection_boxes', 'detection_scores',
      'detection_classes')
  """
  detection_fields = fields.DetectionResultFields
  input_fields = fields.InputDataFields
  if not set([
      input_fields.original_image,
      detection_fields.detection_boxes,
      detection_fields.detection_scores,
      detection_fields.detection_classes,
  ]).issubset(set(result_dict.keys())):
    raise ValueError('result_dict does not contain all expected keys.')
  if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
    raise ValueError('If show_groundtruth is enabled, result_dict must contain '
                     'groundtruth_boxes.')
  logging.info('Creating detection visualizations.')
  category_index = label_map_util.create_category_index(categories)

  image = np.squeeze(result_dict[input_fields.original_image], axis=0)
  if image.shape[2] == 1:  # If one channel image, repeat in RGB.
    image = np.tile(image, [1, 1, 3])
  detection_boxes = result_dict[detection_fields.detection_boxes]
  detection_scores = result_dict[detection_fields.detection_scores]
  detection_classes = np.int32((result_dict[
      detection_fields.detection_classes]))
  detection_keypoints = result_dict.get(detection_fields.detection_keypoints)
  detection_masks = result_dict.get(detection_fields.detection_masks)
  detection_boundaries = result_dict.get(detection_fields.detection_boundaries)

  # Plot groundtruth underneath detections
  if show_groundtruth:
    groundtruth_boxes = result_dict[input_fields.groundtruth_boxes]
    groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints)
    vis_utils.visualize_boxes_and_labels_on_image_array(
        image=image,
        boxes=groundtruth_boxes,
        classes=None,
        scores=None,
        category_index=category_index,
        keypoints=groundtruth_keypoints,
        use_normalized_coordinates=False,
        max_boxes_to_draw=None,
        groundtruth_box_visualization_color=groundtruth_box_visualization_color)
  vis_utils.visualize_boxes_and_labels_on_image_array(
      image,
      detection_boxes,
      detection_classes,
      detection_scores,
      category_index,
      instance_masks=detection_masks,
      instance_boundaries=detection_boundaries,
      keypoints=detection_keypoints,
      use_normalized_coordinates=False,
      max_boxes_to_draw=max_num_predictions,
      min_score_thresh=min_score_thresh,
      agnostic_mode=agnostic_mode,
      skip_scores=skip_scores,
      skip_labels=skip_labels)

  if export_dir:

    '''
    if keep_image_id_for_visualization_export and result_dict[fields.
                                                              InputDataFields()
                                                              .key]:
      export_path = os.path.join(export_dir, 'export-{}-{}.png'.format(
          tag, result_dict[fields.InputDataFields().key]))
    '''

    '''

    Modified key to filename
    '''

    if keep_image_id_for_visualization_export and result_dict[fields.
                                                              InputDataFields()
                                                              .key]:

      # Debugging steps
      #print(result_dict[fields.InputDataFields().key])
      #print(type(export_dir))


      export_path=os.path.join(export_dir,result_dict[fields.InputDataFields().key].decode("ASCII").split("/")[-1]+".png")

      '''
      export_path = os.path.join(export_dir, #result_dict[fields.InputDataFields().key].split("/")[-1])
      'export-{}-{}.png'.format(
          tag, result_dict[fields.InputDataFields().key]))
      '''
    else:
      export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))

    print(export_path)
    vis_utils.save_image_array_as_png(image, export_path)

  summary = tf.Summary(value=[
      tf.Summary.Value(
          tag=tag,
          image=tf.Summary.Image(
              encoded_image_string=vis_utils.encode_image_array_as_png_str(
                  image)))
  ])
  summary_writer = tf.summary.FileWriterCache.get(summary_dir)
  summary_writer.add_summary(summary, global_step)

  logging.info('Detection visualizations written to summary with tag %s.', tag)
Пример #8
0
  def model_fn(features, labels, mode, params=None):
    """Constructs the object detection model.

    Args:
      features: Dictionary of feature tensors, returned from `input_fn`.
      labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL,
        otherwise None.
      mode: Mode key from tf.estimator.ModeKeys.
      params: Parameter dictionary passed from the estimator.

    Returns:
      An `EstimatorSpec` that encapsulates the model and its serving
        configurations.
    """
    params = params or {}
    total_loss, train_op, detections, export_outputs = None, None, None, None
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    detection_model = detection_model_fn(is_training=is_training,
                                         add_summaries=(not use_tpu))
    scaffold_fn = None

    if mode == tf.estimator.ModeKeys.TRAIN:
      labels = unstack_batch(
          labels,
          unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors)
    elif mode == tf.estimator.ModeKeys.EVAL:
      labels = unstack_batch(labels, unpad_groundtruth_tensors=False)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes]
      gt_classes_list = labels[fields.InputDataFields.groundtruth_classes]
      gt_masks_list = None
      if fields.InputDataFields.groundtruth_instance_masks in labels:
        gt_masks_list = labels[
            fields.InputDataFields.groundtruth_instance_masks]
      gt_keypoints_list = None
      if fields.InputDataFields.groundtruth_keypoints in labels:
        gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints]
      detection_model.provide_groundtruth(
          groundtruth_boxes_list=gt_boxes_list,
          groundtruth_classes_list=gt_classes_list,
          groundtruth_masks_list=gt_masks_list,
          groundtruth_keypoints_list=gt_keypoints_list)

    preprocessed_images = features[fields.InputDataFields.image]
    prediction_dict = detection_model.predict(
        preprocessed_images, features[fields.InputDataFields.true_image_shape])
    detections = detection_model.postprocess(
        prediction_dict, features[fields.InputDataFields.true_image_shape])

    if mode == tf.estimator.ModeKeys.TRAIN:
      if train_config.fine_tune_checkpoint and hparams.load_pretrained:
        asg_map = detection_model.restore_map(
            from_detection_checkpoint=train_config.from_detection_checkpoint,
            load_all_detection_checkpoint_vars=(
                train_config.load_all_detection_checkpoint_vars))
        available_var_map = (
            variables_helper.get_variables_available_in_checkpoint(
                asg_map, train_config.fine_tune_checkpoint,
                include_global_step=False))
        if use_tpu:
          def tpu_scaffold():
            tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                          available_var_map)
            return tf.train.Scaffold()
          scaffold_fn = tpu_scaffold
        else:
          tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint,
                                        available_var_map)

    if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
      losses_dict = detection_model.loss(
          prediction_dict, features[fields.InputDataFields.true_image_shape])
      losses = [loss_tensor for loss_tensor in losses_dict.itervalues()]
      total_loss = tf.add_n(losses, name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
      global_step = tf.train.get_or_create_global_step()
      training_optimizer, optimizer_summary_vars = optimizer_builder.build(
          train_config.optimizer)

      if use_tpu:
        training_optimizer = tpu_optimizer.CrossShardOptimizer(
            training_optimizer)

      # Optionally freeze some layers by setting their gradients to be zero.
      trainable_variables = None
      if train_config.freeze_variables:
        trainable_variables = tf.contrib.framework.filter_variables(
            tf.trainable_variables(),
            exclude_patterns=train_config.freeze_variables)

      clip_gradients_value = None
      if train_config.gradient_clipping_by_norm > 0:
        clip_gradients_value = train_config.gradient_clipping_by_norm

      if not use_tpu:
        for var in optimizer_summary_vars:
          tf.summary.scalar(var.op.name, var)
      summaries = [] if use_tpu else None
      train_op = tf.contrib.layers.optimize_loss(
          loss=total_loss,
          global_step=global_step,
          learning_rate=None,
          clip_gradients=clip_gradients_value,
          optimizer=training_optimizer,
          variables=trainable_variables,
          summaries=summaries,
          name='')  # Preventing scope prefix on all variables.

    if mode == tf.estimator.ModeKeys.PREDICT:
      export_outputs = {
          tf.saved_model.signature_constants.PREDICT_METHOD_NAME:
              tf.estimator.export.PredictOutput(detections)
      }

    eval_metric_ops = None
    if mode == tf.estimator.ModeKeys.EVAL:
      # Detection summaries during eval.
      class_agnostic = (fields.DetectionResultFields.detection_classes
                        not in detections)
      groundtruth = _get_groundtruth_data(detection_model, class_agnostic)
      eval_dict = eval_util.result_dict_for_single_example(
          tf.expand_dims(features[fields.InputDataFields.original_image][0], 0),
          features[inputs.HASH_KEY][0],
          detections,
          groundtruth,
          class_agnostic=class_agnostic,
          scale_to_absolute=False)

      if class_agnostic:
        category_index = label_map_util.create_class_agnostic_category_index()
      else:
        category_index = label_map_util.create_category_index_from_labelmap(
            eval_input_config.label_map_path)
      detection_and_groundtruth = vis_utils.draw_side_by_side_evaluation_image(
          eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)
      if not use_tpu:
        tf.summary.image('Detections_Left_Groundtruth_Right',
                         detection_and_groundtruth)

      # Eval metrics on a single image.
      detection_fields = fields.DetectionResultFields()
      input_data_fields = fields.InputDataFields()
      coco_evaluator = coco_evaluation.CocoDetectionEvaluator(
          category_index.values())
      eval_metric_ops = coco_evaluator.get_estimator_eval_metric_ops(
          image_id=eval_dict[input_data_fields.key],
          groundtruth_boxes=eval_dict[input_data_fields.groundtruth_boxes],
          groundtruth_classes=eval_dict[input_data_fields.groundtruth_classes],
          detection_boxes=eval_dict[detection_fields.detection_boxes],
          detection_scores=eval_dict[detection_fields.detection_scores],
          detection_classes=eval_dict[detection_fields.detection_classes])

    if use_tpu:
      return tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          scaffold_fn=scaffold_fn,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metrics=eval_metric_ops,
          export_outputs=export_outputs)
    else:
      return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=detections,
          loss=total_loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs)
def draw_side_by_side_evaluation_image(eval_dict,
                                       category_index,
                                       max_boxes_to_draw=20,
                                       min_score_thresh=0.2):
  """Creates a side-by-side image with detections and groundtruth.

  Bounding boxes (and instance masks, if available) are visualized on both
  subimages.

  Args:
    eval_dict: The evaluation dictionary returned by
      eval_util.result_dict_for_single_example().
    category_index: A category index (dictionary) produced from a labelmap.
    max_boxes_to_draw: The maximum number of boxes to draw for detections.
    min_score_thresh: The minimum score threshold for showing detections.

  Returns:
    A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to
      detections, while the subimage on the right corresponds to groundtruth.
  """
  detection_fields = fields.DetectionResultFields()
  input_data_fields = fields.InputDataFields()
  instance_masks = None
  if detection_fields.detection_masks in eval_dict:
    instance_masks = tf.cast(
        tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0),
        tf.uint8)
  keypoints = None
  if detection_fields.detection_keypoints in eval_dict:
    keypoints = tf.expand_dims(
        eval_dict[detection_fields.detection_keypoints], axis=0)
  groundtruth_instance_masks = None
  if input_data_fields.groundtruth_instance_masks in eval_dict:
    groundtruth_instance_masks = tf.cast(
        tf.expand_dims(
            eval_dict[input_data_fields.groundtruth_instance_masks], axis=0),
        tf.uint8)
  images_with_detections = draw_bounding_boxes_on_image_tensors(
      eval_dict[input_data_fields.original_image],
      tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0),
      tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0),
      tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0),
      category_index,
      instance_masks=instance_masks,
      keypoints=keypoints,
      max_boxes_to_draw=max_boxes_to_draw,
      min_score_thresh=min_score_thresh)
  images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
      eval_dict[input_data_fields.original_image],
      tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0),
      tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0),
      tf.expand_dims(
          tf.ones_like(
              eval_dict[input_data_fields.groundtruth_classes],
              dtype=tf.float32),
          axis=0),
      category_index,
      instance_masks=groundtruth_instance_masks,
      keypoints=None,
      max_boxes_to_draw=None,
      min_score_thresh=0.0)
  return tf.concat([images_with_detections, images_with_groundtruth], axis=2)
Пример #10
0
def draw_side_by_side_evaluation_image(eval_dict,
                                       category_index,
                                       max_boxes_to_draw=20,
                                       min_score_thresh=0.2,
                                       use_normalized_coordinates=True):
  
  detection_fields = fields.DetectionResultFields()
  input_data_fields = fields.InputDataFields()

  images_with_detections_list = []

  # Add the batch dimension if the eval_dict is for single example.
  if len(eval_dict[detection_fields.detection_classes].shape) == 1:
    for key in eval_dict:
      if key != input_data_fields.original_image:
        eval_dict[key] = tf.expand_dims(eval_dict[key], 0)

  for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
    instance_masks = None
    if detection_fields.detection_masks in eval_dict:
      instance_masks = tf.cast(
          tf.expand_dims(
              eval_dict[detection_fields.detection_masks][indx], axis=0),
          tf.uint8)
    keypoints = None
    if detection_fields.detection_keypoints in eval_dict:
      keypoints = tf.expand_dims(
          eval_dict[detection_fields.detection_keypoints][indx], axis=0)
    groundtruth_instance_masks = None
    if input_data_fields.groundtruth_instance_masks in eval_dict:
      groundtruth_instance_masks = tf.cast(
          tf.expand_dims(
              eval_dict[input_data_fields.groundtruth_instance_masks][indx],
              axis=0), tf.uint8)

    images_with_detections = draw_bounding_boxes_on_image_tensors(
        tf.expand_dims(
            eval_dict[input_data_fields.original_image][indx], axis=0),
        tf.expand_dims(
            eval_dict[detection_fields.detection_boxes][indx], axis=0),
        tf.expand_dims(
            eval_dict[detection_fields.detection_classes][indx], axis=0),
        tf.expand_dims(
            eval_dict[detection_fields.detection_scores][indx], axis=0),
        category_index,
        original_image_spatial_shape=tf.expand_dims(
            eval_dict[input_data_fields.original_image_spatial_shape][indx],
            axis=0),
        true_image_shape=tf.expand_dims(
            eval_dict[input_data_fields.true_image_shape][indx], axis=0),
        instance_masks=instance_masks,
        keypoints=keypoints,
        max_boxes_to_draw=max_boxes_to_draw,
        min_score_thresh=min_score_thresh,
        use_normalized_coordinates=use_normalized_coordinates)
    images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
        tf.expand_dims(
            eval_dict[input_data_fields.original_image][indx], axis=0),
        tf.expand_dims(
            eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
        tf.expand_dims(
            eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
        tf.expand_dims(
            tf.ones_like(
                eval_dict[input_data_fields.groundtruth_classes][indx],
                dtype=tf.float32),
            axis=0),
        category_index,
        original_image_spatial_shape=tf.expand_dims(
            eval_dict[input_data_fields.original_image_spatial_shape][indx],
            axis=0),
        true_image_shape=tf.expand_dims(
            eval_dict[input_data_fields.true_image_shape][indx], axis=0),
        instance_masks=groundtruth_instance_masks,
        keypoints=None,
        max_boxes_to_draw=None,
        min_score_thresh=0.0,
        use_normalized_coordinates=use_normalized_coordinates)
    images_with_detections_list.append(
        tf.concat([images_with_detections, images_with_groundtruth], axis=2))
  return images_with_detections_list
Пример #11
0
def result_dict_for_single_example(image,
                                   key,
                                   detections,
                                   groundtruth=None,
                                   class_agnostic=False,
                                   scale_to_absolute=False):
    """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

  Args:
    image: A single 4D image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
      DetectionModel.postprocess().
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
        (Optional).
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes, masks, keypoints should
      be scaled to absolute coordinates. Note that for IoU based evaluations,
      it does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

  Returns:
    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`.
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, None, None] float32 tensor of binarized
      masks. (Only present if available in `detections`)
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      (Optional)
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks
      (Optional).

  """
    label_id_offset = 1  # Applying label id offset (b/63711816)

    input_data_fields = fields.InputDataFields()
    output_dict = {
        input_data_fields.original_image: image,
        input_data_fields.key: key,
        input_data_fields.source_id: key,
    }

    detection_fields = fields.DetectionResultFields
    detection_boxes = detections[detection_fields.detection_boxes][0]
    output_dict[detection_fields.detection_boxes] = detection_boxes
    image_shape = tf.shape(image)
    if scale_to_absolute:
        absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
            box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
        output_dict[detection_fields.detection_boxes] = (
            absolute_detection_boxlist.get())
    detection_scores = detections[detection_fields.detection_scores][0]
    output_dict[detection_fields.detection_scores] = detection_scores

    if class_agnostic:
        detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    else:
        detection_classes = (
            tf.to_int64(detections[detection_fields.detection_classes][0]) +
            label_id_offset)
    output_dict[detection_fields.detection_classes] = detection_classes

    if detection_fields.detection_masks in detections:
        detection_masks = detections[detection_fields.detection_masks][0]
        output_dict[detection_fields.detection_masks] = detection_masks
        if scale_to_absolute:
            # TODO: This should be done in model's postprocess
            # function ideally.
            detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, image_shape[1],
                image_shape[2])
            detection_masks_reframed = tf.to_float(
                tf.greater(detection_masks_reframed, 0.5))
            output_dict[
                detection_fields.detection_masks] = detection_masks_reframed
    if detection_fields.detection_keypoints in detections:
        detection_keypoints = detections[
            detection_fields.detection_keypoints][0]
        output_dict[detection_fields.detection_keypoints] = detection_keypoints
        if scale_to_absolute:
            absolute_detection_keypoints = keypoint_ops.scale(
                detection_keypoints, image_shape[1], image_shape[2])
            output_dict[detection_fields.detection_keypoints] = (
                absolute_detection_keypoints)

    if groundtruth:
        output_dict.update(groundtruth)
        if scale_to_absolute:
            groundtruth_boxes = groundtruth[
                input_data_fields.groundtruth_boxes]
            absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
                box_list.BoxList(groundtruth_boxes), image_shape[1],
                image_shape[2])
            output_dict[input_data_fields.groundtruth_boxes] = (
                absolute_gt_boxlist.get())
        # For class-agnostic models, groundtruth classes all become 1.
        if class_agnostic:
            groundtruth_classes = groundtruth[
                input_data_fields.groundtruth_classes]
            groundtruth_classes = tf.ones_like(groundtruth_classes,
                                               dtype=tf.int64)
            output_dict[
                input_data_fields.groundtruth_classes] = groundtruth_classes

    return output_dict