def __call__(self, true_counts, inds, y_true, boxes, classes, y_pred): """Call function to compute the loss and a set of metrics per FPN level. Args: true_counts: `Tensor` of shape [batchsize, height, width, num_anchors] represeneting how many boxes are in a given pixel [j, i] in the output map. inds: `Tensor` of shape [batchsize, None, 3] indicating the location [j, i] that a given box is associatied with in the FPN prediction map. y_true: `Tensor` of shape [batchsize, None, 8] indicating the actual box associated with each index in the inds tensor list. boxes: `Tensor` of shape [batchsize, None, 4] indicating the original ground truth boxes for each image as they came from the decoder used for bounding box search. classes: `Tensor` of shape [batchsize, None, 1] indicating the original ground truth classes for each image as they came from the decoder used for bounding box search. y_pred: `Tensor` of shape [batchsize, height, width, output_depth] holding the models output at a specific FPN level. Returns: loss: `float` for the actual loss. box_loss: `float` loss on the boxes used for metrics. conf_loss: `float` loss on the confidence used for metrics. class_loss: `float` loss on the classes used for metrics. avg_iou: `float` metric for the average iou between predictions and ground truth. avg_obj: `float` metric for the average confidence of the model for predictions. """ (loss, box_loss, conf_loss, class_loss, mean_loss, iou, pred_conf, ind_mask, grid_mask) = self._compute_loss(true_counts, inds, y_true, boxes, classes, y_pred) # Temporary metrics box_loss = tf.stop_gradient(0.05 * box_loss / self._iou_normalizer) # Metric compute using done here to save time and resources. sigmoid_conf = tf.stop_gradient(tf.sigmoid(pred_conf)) iou = tf.stop_gradient(iou) avg_iou = loss_utils.average_iou( loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1), iou)) avg_obj = loss_utils.average_iou( tf.squeeze(sigmoid_conf, axis=-1) * grid_mask) return (loss, box_loss, conf_loss, class_loss, mean_loss, tf.stop_gradient(avg_iou), tf.stop_gradient(avg_obj))
def _compute_loss(self, true_counts, inds, y_true, boxes, classes, y_pred): """Per FPN path loss logic for Yolov4-csp, Yolov4-Large, and Yolov5.""" # Generate shape constants. shape = tf.shape(true_counts) batch_size, width, height, num = shape[0], shape[1], shape[2], shape[3] fwidth = tf.cast(width, tf.float32) fheight = tf.cast(height, tf.float32) # Cast all input compontnts to float32 and stop gradient to save memory. y_true = tf.cast(y_true, tf.float32) true_counts = tf.cast(true_counts, tf.float32) true_conf = tf.clip_by_value(true_counts, 0.0, 1.0) grid_points, anchor_grid = self._anchor_generator(width, height, batch_size, dtype=tf.float32) # Split the y_true list. (true_box, ind_mask, true_class) = tf.split(y_true, [4, 1, 1], axis=-1) grid_mask = true_conf = tf.squeeze(true_conf, axis=-1) true_class = tf.squeeze(true_class, axis=-1) num_objs = tf.cast(tf.reduce_sum(ind_mask), dtype=y_pred.dtype) # Split up the predicitons. y_pred = tf.cast( tf.reshape(y_pred, [batch_size, width, height, num, -1]), tf.float32) pred_box, pred_conf, pred_class = tf.split(y_pred, [4, 1, -1], axis=-1) # Decode the boxes for loss compute. scale, pred_box, pbg = self._decode_boxes(fwidth, fheight, pred_box, anchor_grid, grid_points, darknet=False) # If the ignore threshold is enabled, search all boxes ignore all # IOU valeus larger than the ignore threshold that are not in the # noted ground truth list. if self._ignore_thresh != 0.0: (_, obj_mask) = self._tiled_global_box_search( pbg, tf.stop_gradient(tf.sigmoid(pred_class)), boxes, classes, true_conf, smoothed=False, scale=None) # Scale and shift and select the ground truth boxes # and predictions to the prediciton domain. if self._box_type == 'anchor_free': true_box = loss_utils.apply_mask( ind_mask, (scale * self._path_stride * true_box)) else: offset = tf.cast(tf.gather_nd(grid_points, inds, batch_dims=1), true_box.dtype) offset = tf.concat([offset, tf.zeros_like(offset)], axis=-1) true_box = loss_utils.apply_mask(ind_mask, (scale * true_box) - offset) pred_box = loss_utils.apply_mask( ind_mask, tf.gather_nd(pred_box, inds, batch_dims=1)) # Select the correct/used prediction classes. true_class = tf.one_hot(tf.cast(true_class, tf.int32), depth=tf.shape(pred_class)[-1], dtype=pred_class.dtype) true_class = loss_utils.apply_mask(ind_mask, true_class) pred_class = loss_utils.apply_mask( ind_mask, tf.gather_nd(pred_class, inds, batch_dims=1)) # Compute the box loss. _, iou, box_loss = self.box_loss(true_box, pred_box, darknet=False) box_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1), box_loss) box_loss = math_ops.divide_no_nan(tf.reduce_sum(box_loss), num_objs) # Use the box IOU to build the map for confidence loss computation. iou = tf.maximum(tf.stop_gradient(iou), 0.0) smoothed_iou = (( (1 - self._objectness_smooth) * tf.cast(ind_mask, iou.dtype)) + self._objectness_smooth * tf.expand_dims(iou, axis=-1)) smoothed_iou = loss_utils.apply_mask(ind_mask, smoothed_iou) true_conf = loss_utils.build_grid(inds, smoothed_iou, pred_conf, ind_mask, update=self._update_on_repeat) true_conf = tf.squeeze(true_conf, axis=-1) # Compute the cross entropy loss for the confidence map. bce = tf.keras.losses.binary_crossentropy(tf.expand_dims(true_conf, axis=-1), pred_conf, from_logits=True) if self._ignore_thresh != 0.0: bce = loss_utils.apply_mask(obj_mask, bce) conf_loss = tf.reduce_sum(bce) / tf.reduce_sum(obj_mask) else: conf_loss = tf.reduce_mean(bce) # Compute the cross entropy loss for the class maps. class_loss = tf.keras.losses.binary_crossentropy( true_class, pred_class, label_smoothing=self._label_smoothing, from_logits=True) class_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1), class_loss) class_loss = math_ops.divide_no_nan(tf.reduce_sum(class_loss), num_objs) # Apply the weights to each loss. box_loss *= self._iou_normalizer class_loss *= self._cls_normalizer conf_loss *= self._obj_normalizer # Add all the losses together then take the sum over the batches. mean_loss = box_loss + class_loss + conf_loss loss = mean_loss * tf.cast(batch_size, mean_loss.dtype) return (loss, box_loss, conf_loss, class_loss, mean_loss, iou, pred_conf, ind_mask, grid_mask)
def _compute_loss(self, true_counts, inds, y_true, boxes, classes, y_pred): """Per FPN path loss logic used for Yolov3, Yolov4, and Yolo-Tiny.""" if self._box_type == 'scaled': # Darknet Model Propagates a sigmoid once in back prop so we replicate # that behaviour y_pred = grad_sigmoid(y_pred) # Generate and store constants and format output. shape = tf.shape(true_counts) batch_size, width, height, num = shape[0], shape[1], shape[2], shape[3] fwidth = tf.cast(width, tf.float32) fheight = tf.cast(height, tf.float32) grid_points, anchor_grid = self._anchor_generator(width, height, batch_size, dtype=tf.float32) # Cast all input compontnts to float32 and stop gradient to save memory. boxes = tf.stop_gradient(tf.cast(boxes, tf.float32)) classes = tf.stop_gradient(tf.cast(classes, tf.float32)) y_true = tf.stop_gradient(tf.cast(y_true, tf.float32)) true_counts = tf.stop_gradient(tf.cast(true_counts, tf.float32)) true_conf = tf.stop_gradient(tf.clip_by_value(true_counts, 0.0, 1.0)) grid_points = tf.stop_gradient(grid_points) anchor_grid = tf.stop_gradient(anchor_grid) # Split all the ground truths to use as seperate items in loss computation. (true_box, ind_mask, true_class) = tf.split(y_true, [4, 1, 1], axis=-1) true_conf = tf.squeeze(true_conf, axis=-1) true_class = tf.squeeze(true_class, axis=-1) grid_mask = true_conf # Splits all predictions. y_pred = tf.cast( tf.reshape(y_pred, [batch_size, width, height, num, -1]), tf.float32) pred_box, pred_conf, pred_class = tf.split(y_pred, [4, 1, -1], axis=-1) # Decode the boxes to be used for loss compute. _, _, pred_box = self._decode_boxes(fwidth, fheight, pred_box, anchor_grid, grid_points, darknet=True) # If the ignore threshold is enabled, search all boxes ignore all # IOU valeus larger than the ignore threshold that are not in the # noted ground truth list. if self._ignore_thresh != 0.0: (true_conf, obj_mask) = self._tiled_global_box_search( pred_box, tf.stop_gradient(tf.sigmoid(pred_class)), boxes, classes, true_conf, smoothed=self._objectness_smooth > 0) # Build the one hot class list that are used for class loss. true_class = tf.one_hot(tf.cast(true_class, tf.int32), depth=tf.shape(pred_class)[-1], dtype=pred_class.dtype) true_classes = tf.stop_gradient( loss_utils.apply_mask(ind_mask, true_class)) # Reorganize the one hot class list as a grid. true_class = loss_utils.build_grid(inds, true_classes, pred_class, ind_mask, update=False) true_class = tf.stop_gradient(true_class) # Use the class mask to find the number of objects located in # each predicted grid cell/pixel. counts = true_class counts = tf.reduce_sum(counts, axis=-1, keepdims=True) reps = tf.gather_nd(counts, inds, batch_dims=1) reps = tf.squeeze(reps, axis=-1) reps = tf.stop_gradient(tf.where(reps == 0.0, tf.ones_like(reps), reps)) # Compute the loss for only the cells in which the boxes are located. pred_box = loss_utils.apply_mask( ind_mask, tf.gather_nd(pred_box, inds, batch_dims=1)) iou, _, box_loss = self.box_loss(true_box, pred_box, darknet=True) box_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1), box_loss) box_loss = math_ops.divide_no_nan(box_loss, reps) box_loss = tf.cast(tf.reduce_sum(box_loss, axis=1), dtype=y_pred.dtype) # Compute the sigmoid binary cross entropy for the class maps. class_loss = tf.reduce_mean(loss_utils.sigmoid_bce( tf.expand_dims(true_class, axis=-1), tf.expand_dims(pred_class, axis=-1), self._label_smoothing), axis=-1) # Apply normalization to the class losses. if self._cls_normalizer < 1.0: # Build a mask based on the true class locations. cls_norm_mask = true_class # Apply the classes weight to class indexes were one_hot is one. class_loss *= ((1 - cls_norm_mask) + cls_norm_mask * self._cls_normalizer) # Mask to the class loss and compute the sum over all the objects. class_loss = tf.reduce_sum(class_loss, axis=-1) class_loss = loss_utils.apply_mask(grid_mask, class_loss) class_loss = math_ops.rm_nan_inf(class_loss, val=0.0) class_loss = tf.cast(tf.reduce_sum(class_loss, axis=(1, 2, 3)), dtype=y_pred.dtype) # Compute the sigmoid binary cross entropy for the confidence maps. bce = tf.reduce_mean(loss_utils.sigmoid_bce( tf.expand_dims(true_conf, axis=-1), pred_conf, 0.0), axis=-1) # Mask the confidence loss and take the sum across all the grid cells. if self._ignore_thresh != 0.0: bce = loss_utils.apply_mask(obj_mask, bce) conf_loss = tf.cast(tf.reduce_sum(bce, axis=(1, 2, 3)), dtype=y_pred.dtype) # Apply the weights to each loss. box_loss *= self._iou_normalizer conf_loss *= self._obj_normalizer # Add all the losses together then take the mean over the batches. loss = box_loss + class_loss + conf_loss loss = tf.reduce_mean(loss) # Reduce the mean of the losses to use as a metric. box_loss = tf.reduce_mean(box_loss) conf_loss = tf.reduce_mean(conf_loss) class_loss = tf.reduce_mean(class_loss) return (loss, box_loss, conf_loss, class_loss, loss, iou, pred_conf, ind_mask, grid_mask)