def single_batch_nms(candidate_boxes): y_mask = candidate_boxes[..., 4] >= self.score_threshold candidate_boxes = candidate_boxes[y_mask] outputs = torch.zeros((self.max_detection + 1, candidate_boxes.size(-1))) indices = [] updates = [] count = 0 while candidate_boxes.size(0) > 0 and count < self.max_detection: best_idx = torch.argmax(candidate_boxes[..., 4], dim=0) best_box = candidate_boxes[best_idx] indices.append([count] * candidate_boxes.size(-1)) updates.append(best_box) count += 1 candidate_boxes = torch.cat( (candidate_boxes[0:best_idx], candidate_boxes[best_idx + 1:candidate_boxes.size(0)]), dim=0) iou = broadcast_iou(best_box[0:4], candidate_boxes[..., 0:4]) iou_mask = iou <= self.iou_threshold candidate_boxes = candidate_boxes[iou_mask] if count > 0: count_index = [[self.max_detection] * candidate_boxes.size(-1)] count_updates = [torch.zeros(candidate_boxes.size(-1)).fill_(count)] indices = torch.cat((torch.tensor(indices), torch.tensor(count_index)), dim=0) updates = torch.cat((torch.stack(updates).cuda(), torch.stack(count_updates).cuda()), dim=0) outputs = outputs.cuda().scatter_(0, indices.cuda(), updates) return outputs
def calc_ignore_mask(self, true_box, pred_box, true_obj): # (batch, 13, 13, 3, 4) true_box_shape = true_box.shape pred_box_shape = pred_box.shape true_box = torch.reshape(true_box, [true_box_shape[0], -1, 4]) true_box = torch.sort(true_box, dim=1, descending=True).values # true_box = true_box[:, 0:100, :] # pred_box, true_box shape : (batch, 507, 4) pred_box = torch.reshape(pred_box, [pred_box_shape[0], -1, 4]) # (batch, 507. 507) iou = broadcast_iou(pred_box, true_box) # tensorflow 코드에서는 reduce_max를 해야하는데 여기선 필요 없나? # https://github.com/ethanyanjiali/deep-vision/blob/master/YOLO/tensorflow/yolov3.py#L462 # best_iou = torch.max(iou, dim=-1).values best_iou = iou best_iou = torch.reshape(best_iou, [ pred_box_shape[0], pred_box_shape[1], pred_box_shape[2], pred_box_shape[3] ]) # (batch, 13, 13, 3, 1) ignore_mask = (best_iou < self.ignore_thresh).float() ignore_mask = torch.unsqueeze(ignore_mask, dim=-1) return ignore_mask
def yolo_loss(y_true, y_pred): # 1. transform all pred outputs # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls)) pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes( y_pred, anchors, classes) pred_xy = pred_xywh[..., 0:2] pred_wh = pred_xywh[..., 2:4] # 2. transform all true outputs # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls)) true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1), axis=-1) true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2 true_wh = true_box[..., 2:4] - true_box[..., 0:2] # give higher weights to small boxes box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1] # 3. inverting the pred box equations grid_size = tf.shape(y_true)[1] grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) true_xy = true_xy * tf.cast(grid_size, tf.float32) - \ tf.cast(grid, tf.float32) true_wh = tf.math.log(true_wh / anchors) true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh), true_wh) # 4. calculate all masks obj_mask = tf.squeeze(true_obj, -1) # ignore false positive when iou is over threshold best_iou = tf.map_fn( lambda x: tf.reduce_max(broadcast_iou( x[0], tf.boolean_mask(x[1], tf.cast(x[2], tf.bool))), axis=-1), (pred_box, true_box, obj_mask), tf.float32) ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32) # 5. calculate all losses xy_loss = obj_mask * box_loss_scale * \ tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) wh_loss = obj_mask * box_loss_scale * \ tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) obj_loss = binary_crossentropy(true_obj, pred_obj) obj_loss = obj_mask * obj_loss + \ (1 - obj_mask) * ignore_mask * obj_loss if Config.CLASS_MUTUALLY_EXCLUSIVE is True: class_loss = obj_mask * sparse_categorical_crossentropy( true_class_idx, pred_class) else: class_loss = obj_mask * binary_crossentropy( true_class_idx, pred_class) # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1) xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3)) obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3)) class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3)) return xy_loss + wh_loss + obj_loss + class_loss
def yoloLoss(y_true, y_pred): pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes( y_pred, anchors, class_num) pred_xy = pred_xywh[..., 0:2] # 取出偏移量 pred_wh = pred_xywh[..., 2:4] true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1), axis=-1) true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2 true_wh = true_box[..., 2:4] - true_box[..., 0:2] box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1] grid_size = tf.shape(y_true)[1] grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) true_xy = true_xy * tf.cast(grid_size, tf.float32) - \ tf.cast(grid, tf.float32) true_wh = tf.math.log(true_wh / anchors) true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh), true_wh) # 将真实坐标转变为偏移量用于计算loss obj_mask = tf.squeeze(true_obj, -1) # ignore false positive when iou is over threshold best_iou = tf.map_fn( lambda x: tf.reduce_max(broadcast_iou( x[0], tf.boolean_mask(x[1], tf.cast(x[2], tf.bool))), axis=-1), (pred_box, true_box, obj_mask), tf.float32) ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32) xy_loss = obj_mask * box_loss_scale * \ tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) wh_loss = obj_mask * box_loss_scale * \ tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) obj_loss = binary_crossentropy(true_obj, pred_obj) obj_loss = obj_mask * obj_loss + \ (1 - obj_mask) * ignore_mask * obj_loss # TODO: use binary_crossentropy instead class_loss = obj_mask * sparse_categorical_crossentropy( true_class_idx, pred_class) xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3)) obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3)) class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3)) return xy_loss + wh_loss + obj_loss + class_loss
def calc_ignore_mask(self, true_box, pred_box, true_obj): obj_mask = torch.squeeze(true_obj, dim=-1) best_iou = [] for x in zip(pred_box, true_box, obj_mask): mask = x[1][x[2].bool()] if mask.size(0) is not 0: best_iou.append(broadcast_iou(x[0], mask)) else: best_iou.append(torch.zeros(true_box.shape[1:4]).cuda()) best_iou = torch.stack(best_iou) ignore_mask = (best_iou < self.ignore_thresh).float() ignore_mask = ignore_mask.unsqueeze(-1) return ignore_mask
def single_batch_nms(candidate_boxes): y_mask = candidate_boxes[ ..., 4] >= self.score_threshold # true or false candidate_boxes = candidate_boxes[y_mask] outputs = torch.zeros( (self.max_detection + 1, candidate_boxes.size(-1))) indices = [] updates = [] count = 0 # candidate_boxes가 없거나 max_detection을 다 채울때 까지 반복 while candidate_boxes.size(0) > 0 and count < self.max_detection: # candidate_boxes 중에서 점수가 가장 높은 박스 pick best_idx = torch.argmax(candidate_boxes[..., 4], dim=0) best_box = candidate_boxes[best_idx] indices.append([count] * candidate_boxes.size(-1)) updates.append(best_box) count += 1 # best_box는 candidate_boxes에서 제거 candidate_boxes = torch.cat( (candidate_boxes[0:best_idx], candidate_boxes[best_idx + 1:candidate_boxes.size(0)]), dim=0) # best_box와 모든 candidate_boxes 비교 iou = broadcast_iou(best_box[0:4], candidate_boxes[..., 0:4]) # iou가 iou_threshold보다 큰 후보 상자 제거 candidate_boxes = candidate_boxes[iou <= self.iou_threshold] # 한번이라도 count가 됬을 때 if count > 0: count_idx = [[self.max_detection] * candidate_boxes.size(-1)] count_update = [ torch.zeros(candidate_boxes.size(-1)).fill_(count) ] indices = torch.cat( (torch.tensor(indices), torch.tensor(count_idx)), dim=0) updates = torch.cat( (torch.stack(updates), torch.stack(count_update).cuda()), dim=0) # dim=0으로 outputs의 indices위치에 updates값 넣기 outputs = outputs.cuda().scatter_(0, indices.cuda(), updates) return outputs
def calc_ignore_mask(self, true_obj, true_box, pred_box): true_box_shape = tf.shape(true_box) pred_box_shape = tf.shape(pred_box) true_box = tf.reshape(true_box, [true_box_shape[0], -1, 4]) true_box = tf.sort(true_box, axis=1, direction="DESCENDING") true_box = true_box[:, 0:100, :] pred_box = tf.reshape(pred_box, [pred_box_shape[0], -1, 4]) iou = broadcast_iou(pred_box, true_box) best_iou = tf.reduce_max(iou, axis=-1) best_iou = tf.reshape(best_iou, [ pred_box_shape[0], pred_box_shape[1], pred_box_shape[2], pred_box_shape[3] ]) ignore_mask = tf.cast(best_iou < self.ignore_thresh, tf.float32) ignore_mask = tf.expand_dims(ignore_mask, axis=-1) return ignore_mask
def single_batch_nms(candidate_boxes): # filter out predictions with score less than score_threshold candidate_boxes = tf.boolean_mask( candidate_boxes, candidate_boxes[..., 4] >= score_threshold) outputs = tf.zeros((max_detection + 1, tf.shape(candidate_boxes)[-1])) indices = [] updates = [] count = 0 # keep running this until there's no more candidate box or max_detection is met while tf.shape(candidate_boxes)[0] > 0 and count < max_detection: # pick the box with the highest score best_idx = tf.math.argmax(candidate_boxes[..., 4], axis=0) best_box = candidate_boxes[best_idx] # add this best box to the output indices.append([count]) updates.append(best_box) count += 1 # remove this box from candidate boxes candidate_boxes = tf.concat([ candidate_boxes[0:best_idx], candidate_boxes[best_idx + 1:tf.shape(candidate_boxes)[0]] ], axis=0) # calculate IOU between this box and all remaining candidate boxes iou = broadcast_iou(best_box[0:4], candidate_boxes[..., 0:4]) # remove all candidate boxes with IOU bigger than iou_threshold candidate_boxes = tf.boolean_mask(candidate_boxes, iou[0] <= iou_threshold) if count > 0: # also append num_detection to the result count_index = [[max_detection]] count_updates = [ tf.fill([tf.shape(candidate_boxes)[-1]], count) ] indices = tf.concat([indices, count_index], axis=0) updates = tf.concat([updates, count_updates], axis=0) outputs = tf.tensor_scatter_nd_update(outputs, indices, updates) return outputs
def calc_ignore_mask(self, true_obj, true_box, pred_box): # eg. true_obj (1, 13, 13, 3, 1) true_obj = tf.squeeze(true_obj, axis=-1) # eg. true_obj (1, 13, 13, 3) # eg. true_box (1, 13, 13, 3, 4) # eg. pred_box (1, 13, 13, 2, 4) # eg. true_box_filtered (2, 4) it was (3, 4) but one element got filtered out true_box_filtered = tf.boolean_mask(true_box, tf.cast(true_obj, tf.bool)) # YOLOv3: # "If the bounding box prior is not the best but does overlap a ground # truth object by more than some threshold we ignore the prediction, # following [17]. We use the threshold of .5." # calculate the iou for each pair of pred bbox and true bbox, then find the best among them # eg. best_iou (1, 1, 1, 2) best_iou = tf.reduce_max(broadcast_iou(pred_box, true_box_filtered), axis=-1) # if best iou is higher than threshold, set the box to be ignored for noobj loss # eg. ignore_mask(1, 1, 1, 2) ignore_mask = tf.cast(best_iou < self.ignore_thresh, tf.float32) ignore_mask = tf.expand_dims(ignore_mask, axis=-1) return ignore_mask