Exemplo n.º 1
0
  def accuracy_function(self, logits, labels, data_type):
    """Returns the ops to measure the mean precision of the model."""
    try:
      import ssd_dataloader  # pylint: disable=g-import-not-at-top
      from object_detection.box_coders import faster_rcnn_box_coder  # pylint: disable=g-import-not-at-top
      from object_detection.core import box_coder  # pylint: disable=g-import-not-at-top
      from object_detection.core import box_list  # pylint: disable=g-import-not-at-top
    except ImportError:
      raise ImportError('To use the COCO dataset, you must clone the '
                        'repo https://github.com/tensorflow/models and add '
                        'tensorflow/models and tensorflow/models/research to '
                        'the PYTHONPATH, and compile the protobufs by '
                        'following https://github.com/tensorflow/models/blob/'
                        'master/research/object_detection/g3doc/installation.md'
                        '#protobuf-compilation ; To evaluate using COCO'
                        'metric, download and install Python COCO API from'
                        'https://github.com/cocodataset/cocoapi')

    # Unpack model output back to locations and confidence scores of predictions
    # pred_locs: relative locations (coordiates) of objects in all SSD boxes
    # shape: [batch_size, NUM_SSD_BOXES, 4]
    # pred_labels: confidence scores of objects being of all categories
    # shape: [batch_size, NUM_SSD_BOXES, label_num]
    pred_locs, pred_labels = tf.split(logits, [4, self.label_num], 2)

    ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
        scale_factors=ssd_constants.BOX_CODER_SCALES)
    anchors = box_list.BoxList(
        tf.convert_to_tensor(ssd_dataloader.DefaultBoxes()('ltrb')))
    pred_boxes = box_coder.batch_decode(
        encoded_boxes=pred_locs, box_coder=ssd_box_coder, anchors=anchors)

    pred_scores = tf.nn.softmax(pred_labels, axis=2)

    boxes_classes, id_shape = tf.split(
        labels, [ssd_constants.MAX_NUM_EVAL_BOXES, 1], 1)
    # TODO(haoyuzhang): maybe use these values for visualization.
    gt_boxes, gt_classes = tf.split(boxes_classes, [4, 1], 2)  # pylint: disable=unused-variable
    id_shape = tf.squeeze(id_shape, 1)
    source_id, raw_shape, _ = tf.split(id_shape, [1, 3, 1], 1)
    source_id = tf.squeeze(source_id, 1)

    return {
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.PRED_BOXES): pred_boxes,
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.PRED_SCORES): pred_scores,
        # TODO(haoyuzhang): maybe use these values for visualization.
        # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_boxes': gt_boxes,
        # constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_classes': gt_classes,
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.SOURCE_ID): source_id,
        (constants.UNREDUCED_ACCURACY_OP_PREFIX +
         ssd_constants.RAW_SHAPE): raw_shape
    }
Exemplo n.º 2
0
    def loss_function(self, build_network_result, labels):
        logits = build_network_result.logits
        # Unpack model output back to locations and confidence scores of predictions
        # Shape of pred_loc: [batch_size, 4, NUM_SSD_BOXES]
        # Shape of pred_label: [batch_size, label_num, NUM_SSD_BOXES]
        pred_loc, pred_label = tf.split(logits, [4, self.label_num], 1)

        # Unpack ground truth labels to number of boxes, locations, and classes
        # initial shape: [batch_size, NUM_SSD_BOXES, 5]
        # Shape of labels: [batch_size, NUM_SSD_BOXES, 5]
        # Shape of num_gt: [batch_size, 1, 5] -- 5 identical copies
        labels, num_gt = tf.split(labels, [ssd_constants.NUM_SSD_BOXES, 1], 1)

        # Shape of num_gt: [batch_size]
        num_gt = tf.squeeze(tf.cast(num_gt[:, :, 0], tf.int32))

        # Shape of gt_loc: [batch_size, NUM_SSD_BOXES, 4]
        # Shape of gt_label: [batch_size, NUM_SSD_BOXES, 1]
        gt_loc, gt_label = tf.split(labels, [4, 1], 2)
        gt_label = tf.cast(gt_label, tf.int32)

        cross_entropy = tf.losses.sparse_softmax_cross_entropy(
            gt_label,
            tf.transpose(pred_label, [0, 2, 1]),
            reduction=tf.losses.Reduction.NONE)

        default_boxes = tf.tile(
            tf.convert_to_tensor(
                ssd_dataloader.DefaultBoxes()('xywh'))[tf.newaxis, :, :],
            [gt_loc.get_shape()[0], 1, 1])

        # To performance people: MLPerf uses this transposed convention.
        # I (taylorrobie) have matched it to make it easier to compare to the
        # reference. If this hurts performance, feel free to adjust accordingly.
        gt_label = tf.squeeze(gt_label)
        # pred_loc, pred_label, gt_loc, default_boxes = [
        #     tf.transpose(i, (0, 2, 1)) for i in
        #     [pred_loc, pred_label, gt_loc, default_boxes]
        # ]

        # Shape of gt_loc: [batch_size, 4, NUM_SSD_BOXES]
        gt_loc = tf.transpose(gt_loc, [0, 2, 1])

        # Shape of default_boxes: [batch_size, 4, NUM_SSD_BOXES]
        default_boxes = tf.transpose(default_boxes, [0, 2, 1])

        mask = tf.greater(gt_label, 0)
        float_mask = tf.cast(mask, tf.float32)

        gt_location_vectors = tf.concat(
            [
                (ssd_constants.SCALE_XY *
                 (gt_loc[:, :2, :] - default_boxes[:, :2, :]) /
                 default_boxes[:, 2:, :]),

                # The gt_loc height and width have already had the log taken.
                # See FasterRcnnBoxCoder for more details.
                (ssd_constants.SCALE_HW *
                 (gt_loc[:, 2:, :] - tf.log(default_boxes[:, 2:, :])))
            ],
            axis=1)

        smooth_l1 = tf.reduce_sum(tf.losses.huber_loss(
            gt_location_vectors, pred_loc, reduction=tf.losses.Reduction.NONE),
                                  axis=1)
        smooth_l1 = tf.multiply(smooth_l1, float_mask)
        box_loss = tf.reduce_sum(smooth_l1, axis=1)

        # Hard example mining
        neg_masked_cross_entropy = cross_entropy * (1 - float_mask)

        relative_position = tf.contrib.framework.argsort(
            tf.contrib.framework.argsort(neg_masked_cross_entropy,
                                         direction='DESCENDING'))
        num_neg_boxes = num_gt * ssd_constants.NEGS_PER_POSITIVE
        top_k_neg_mask = tf.cast(
            tf.less(
                relative_position,
                tf.tile(num_neg_boxes[:, tf.newaxis],
                        (1, ssd_constants.NUM_SSD_BOXES))), tf.float32)

        class_loss = tf.reduce_sum(tf.multiply(cross_entropy,
                                               float_mask + top_k_neg_mask),
                                   axis=1)

        class_loss = tf.reduce_mean(class_loss / tf.cast(num_gt, tf.float32))
        box_loss = tf.reduce_mean(box_loss / tf.cast(num_gt, tf.float32))

        return class_loss + box_loss