def scale_boxes(boxes, im_info): """ scale predicted boxes Arguments: boxes -- tensor of shape (N, 4) xxyy format im_info -- dictionary {width:, height:} Returns: scaled_boxes -- tensor of shape (N, 4) xxyy format """ h = im_info['height'] w = im_info['width'] input_h, input_w = cfg.test_input_size scale_h, scale_w = input_h / h, input_w / w # scale the boxes boxes *= (cfg.test_input_size[0] / cfg.gridH) #cfg.strides boxes[:, 0::2] /= scale_w boxes[:, 1::2] /= scale_h boxes = xywh2xxyy(boxes) # clamp boxes boxes[:, 0::2].clamp_(0, w - 1) boxes[:, 1::2].clamp_(0, h - 1) return boxes
def build_target(output, gt_data, H, W): """ Build the training target for output tensor Arguments: output_data -- tuple (delta_pred_batch, conf_pred_batch, class_pred_batch), output data of the yolo network gt_data -- tuple (gt_boxes_batch, gt_classes_batch, num_boxes_batch), ground truth data delta_pred_batch -- tensor of shape (B, H * W * num_anchors, 4), predictions of delta σ(t_x), σ(t_y), σ(t_w), σ(t_h) conf_pred_batch -- tensor of shape (B, H * W * num_anchors, 1), prediction of IoU score σ(t_c) class_score_batch -- tensor of shape (B, H * W * num_anchors, num_classes), prediction of class scores (cls1, cls2, ..) gt_boxes_batch -- tensor of shape (B, N, 4), ground truth boxes, normalized values (x1, y1, x2, y2) range 0~1 gt_classes_batch -- tensor of shape (B, N), ground truth classes (cls) num_obj_batch -- tensor of shape (B, 1). number of objects Returns: iou_target -- tensor of shape (B, H * W * num_anchors, 1) iou_mask -- tensor of shape (B, H * W * num_anchors, 1) box_target -- tensor of shape (B, H * W * num_anchors, 4) box_mask -- tensor of shape (B, H * W * num_anchors, 1) class_target -- tensor of shape (B, H * W * num_anchors, 1) class_mask -- tensor of shape (B, H * W * num_anchors, 1) """ delta_pred_batch = output[0] conf_pred_batch = output[1] class_score_batch = output[2] gt_boxes_batch = gt_data[0] gt_classes_batch = gt_data[1] num_boxes_batch = gt_data[2] bsize = delta_pred_batch.size(0) num_anchors = 5 # hard code for now # initial the output tensor # we use `tensor.new()` to make the created tensor has the same devices and data type as input tensor's # what tensor is used doesn't matter iou_target = delta_pred_batch.new_zeros((bsize, H * W, num_anchors, 1)) iou_mask = delta_pred_batch.new_ones( (bsize, H * W, num_anchors, 1)) * cfg.noobject_scale box_target = delta_pred_batch.new_zeros((bsize, H * W, num_anchors, 4)) box_mask = delta_pred_batch.new_zeros((bsize, H * W, num_anchors, 1)) class_target = conf_pred_batch.new_zeros((bsize, H * W, num_anchors, 1)) class_mask = conf_pred_batch.new_zeros((bsize, H * W, num_anchors, 1)) # get all the anchors anchors = torch.FloatTensor(cfg.anchors) # note: the all anchors' xywh scale is normalized by the grid width and height, i.e. 13 x 13 # this is very crucial because the predict output is normalized to 0~1, which is also # normalized by the grid width and height all_grid_xywh = generate_all_anchors( anchors, H, W) # shape: (H * W * num_anchors, 4), format: (x, y, w, h) all_grid_xywh = delta_pred_batch.new( *all_grid_xywh.size()).copy_(all_grid_xywh) all_anchors_xywh = all_grid_xywh.clone() all_anchors_xywh[:, 0:2] += 0.5 if cfg.debug: print('all grid: ', all_grid_xywh[:12, :]) print('all anchor: ', all_anchors_xywh[:12, :]) all_anchors_xxyy = xywh2xxyy(all_anchors_xywh) # process over batches for b in range(bsize): num_obj = num_boxes_batch[b].item() delta_pred = delta_pred_batch[b] gt_boxes = gt_boxes_batch[b][:num_obj, :] gt_classes = gt_classes_batch[b][:num_obj] # rescale ground truth boxes gt_boxes[:, 0::2] *= W gt_boxes[:, 1::2] *= H # step 1: process IoU target # apply delta_pred to pre-defined anchors all_anchors_xywh = all_anchors_xywh.view(-1, 4) box_pred = box_transform_inv(all_grid_xywh, delta_pred) box_pred = xywh2xxyy(box_pred) # for each anchor, its iou target is corresponded to the max iou with any gt boxes ious = box_ious(box_pred, gt_boxes) # shape: (H * W * num_anchors, num_obj) ious = ious.view(-1, num_anchors, num_obj) max_iou, _ = torch.max(ious, dim=-1, keepdim=True) # shape: (H * W, num_anchors, 1) if cfg.debug: print('ious', ious) # iou_target[b] = max_iou # we ignore the gradient of predicted boxes whose IoU with any gt box is greater than cfg.threshold iou_thresh_filter = max_iou.view(-1) > cfg.thresh n_pos = torch.nonzero(iou_thresh_filter).numel() if n_pos > 0: iou_mask[b][max_iou >= cfg.thresh] = 0 # step 2: process box target and class target # calculate overlaps between anchors and gt boxes overlaps = box_ious(all_anchors_xxyy, gt_boxes).view(-1, num_anchors, num_obj) gt_boxes_xywh = xxyy2xywh(gt_boxes) # iterate over all objects for t in range(gt_boxes.size(0)): # compute the center of each gt box to determine which cell it falls on # assign it to a specific anchor by choosing max IoU gt_box_xywh = gt_boxes_xywh[t] gt_class = gt_classes[t] cell_idx_x, cell_idx_y = torch.floor(gt_box_xywh[:2]) cell_idx = cell_idx_y * W + cell_idx_x cell_idx = cell_idx.long() # update box_target, box_mask overlaps_in_cell = overlaps[cell_idx, :, t] argmax_anchor_idx = torch.argmax(overlaps_in_cell) assigned_grid = all_grid_xywh.view( -1, num_anchors, 4)[cell_idx, argmax_anchor_idx, :].unsqueeze(0) gt_box = gt_box_xywh.unsqueeze(0) target_t = box_transform(assigned_grid, gt_box) if cfg.debug: print('assigned_grid, ', assigned_grid) print('gt: ', gt_box) print('target_t, ', target_t) box_target[b, cell_idx, argmax_anchor_idx, :] = target_t.unsqueeze(0) box_mask[b, cell_idx, argmax_anchor_idx, :] = 1 # update cls_target, cls_mask class_target[b, cell_idx, argmax_anchor_idx, :] = gt_class class_mask[b, cell_idx, argmax_anchor_idx, :] = 1 # update iou target and iou mask iou_target[b, cell_idx, argmax_anchor_idx, :] = max_iou[cell_idx, argmax_anchor_idx, :] if cfg.debug: print(max_iou[cell_idx, argmax_anchor_idx, :]) iou_mask[b, cell_idx, argmax_anchor_idx, :] = cfg.object_scale return iou_target.view(bsize, -1, 1), \ iou_mask.view(bsize, -1, 1), \ box_target.view(bsize, -1, 4),\ box_mask.view(bsize, -1, 1), \ class_target.view(bsize, -1, 1).long(), \ class_mask.view(bsize, -1, 1)