def test_post_processing_batch_size2(): logits = tf.constant([[[-100., 0, 100], [-100., 1000, -100]], [[4., 0, 3], [-100., 1000, -100]]]) probs = tf.nn.softmax(logits, axis=-1) boxes = tf.constant([ [[0, 0, 1, 1], [0, 0, 0.5, 0.5]], [[0, 0, 0.3, 0.3], [0, 0, 0.5, 0.5]], ]) boxes = convert_to_center_coordinates(boxes) image_information = tf.constant([[200, 400], [400, 200]]) image_padded_information = tf.constant([400, 400]) boxes, scores, labels = post_processing(boxes, logits, image_information, image_padded_information) expected_labels = np.array([[1, 0], [0, 1]]) expected_scores = np.array([ [probs[0, 0, 2], probs[0, 1, 1]], [probs[1, 1, 1], probs[1, 0, 2]], ]) expected_boxes = np.array([ [[0, 0, 1, 1], [0, 0, 1., 0.5]], [[0, 0, 0.5, 1.], [0, 0, 0.3, 0.6]], ]) np.testing.assert_array_equal(expected_labels, labels.numpy()) np.testing.assert_almost_equal(expected_boxes, boxes.numpy()) np.testing.assert_array_equal(expected_scores, scores.numpy())
def compute_loss( self, ground_truths: Dict[str, tf.Tensor], y_pred: Dict[str, tf.Tensor], input_shape: tf.Tensor, ) -> tf.Tensor: """Apply the GIoU, L1 and SCC to each layers of the transformer decoder Args: ground_truths: see output kerod.dataset.preprocessing for the doc y_pred: A dict - *scores: A Tensor of shape [batch_size, num_queries, num_classes + 1] class logits - *bbox*: A Tensor of shape [batch_size, num_queries, 4] input_shape: [height, width] of the input tensor. It is the shape of the images will all the padding included. It is used to normalize the ground_truths boxes. Returns: tf.Tensor: A scalar for the loss """ normalized_boxes = ground_truths[BoxField.BOXES] / tf.tile( input_shape[None], [1, 2]) centered_normalized_boxes = convert_to_center_coordinates( normalized_boxes) ground_truths = { # We add one because the background is not counted in ground_truths [BoxField.LABELS] BoxField.LABELS: ground_truths[BoxField.LABELS] + 1, BoxField.BOXES: centered_normalized_boxes, BoxField.WEIGHTS: ground_truths[BoxField.WEIGHTS], BoxField.NUM_BOXES: ground_truths[BoxField.NUM_BOXES] } boxes_per_lvl = tf.split(y_pred[BoxField.BOXES], self.transformer_num_layers, axis=1) logits_per_lvl = tf.split(y_pred[BoxField.SCORES], self.transformer_num_layers, axis=1) y_pred_per_lvl = [{ BoxField.BOXES: boxes, BoxField.SCORES: logits } for boxes, logits in zip(boxes_per_lvl, logits_per_lvl)] num_boxes = tf.cast(tf.reduce_sum(ground_truths[BoxField.NUM_BOXES]), tf.float32) loss = 0 # Compute the Giou, L1 and SCC at each layers of the transformer decoder for i, y_pred in enumerate(y_pred_per_lvl): # Logs the metrics for the last layer of the decoder compute_metrics = i == self.transformer_num_layers - 1 loss += self._compute_loss(y_pred, ground_truths, num_boxes, compute_metrics=compute_metrics) return loss
def test_convert_to_center_coordinates(): boxes = tf.constant([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]) centers_sizes = box_ops.convert_to_center_coordinates(boxes) expected_centers_sizes = np.array([[15, 12.5, 10, 5], [0.35, 0.25, 0.3, 0.3]]) np.testing.assert_allclose(centers_sizes, expected_centers_sizes) boxes_out = box_ops.convert_to_xyxy_coordinates(centers_sizes) np.testing.assert_allclose(boxes_out, boxes)
def test_detr_similarity(): boxes1 = tf.constant([[[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]], [[4.0, 3.0, 7.0, 5.0], [0, 0, 0, 0]]]) boxes1 = convert_to_center_coordinates(boxes1) boxes2 = tf.constant([[[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]], [[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0], [0.0, 0.0, 20.0, 20.0]]]) boxes2 = convert_to_center_coordinates(boxes2) ground_truths = { BoxField.BOXES: boxes1, BoxField.LABELS: tf.constant([[1, 0], [1, 0]], tf.int32), BoxField.WEIGHTS: tf.constant([[1, 0], [1, 1]], tf.float32), BoxField.NUM_BOXES: tf.constant([[2], [1]], tf.int32) } classification_logits = tf.constant( [[[0, 100, 0], [0, 100, 0], [0, 100, 0]], [[100, 0, 0], [0, -100, 0], [0, -100, 0]]], tf.float32) inputs2 = {BoxField.BOXES: boxes2, BoxField.SCORES: classification_logits} similarity = DetrSimilarity()(ground_truths, inputs2) # Taken from test_box_ops.py::test_compute_giou_3d_tensor exp_iou = np.array([[[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]], [[2.0 / 16.0, 0, 6.0 / 400.0], [0, 0, 0]]]) exp_term2 = np.array([[[4. / 20, 125 / 132, 0.], [12 / 28., 84 / 90., 0.]], [[4. / 20, 125. / 132, 0.], [36. / 48, 224. / 225, 0.]]]) exp_giou = -(exp_iou - exp_term2) exp_cost_class = np.array([[[-1., -1., -1.], [0., 0., 0.]], [[0., 0., -0.], [-1., -0.5, -0.5]]]) exp_cost_bbox = np.array([[[5., 22.5, 45.5], [8.5, 19., 40.]], [[5., 22.5, 45.5], [17.5, 31., 60.]]]) exp_similarity = 2 * exp_giou + exp_cost_class + 5 * exp_cost_bbox np.testing.assert_allclose(similarity, exp_similarity)