def compute_loss(self, objectness, proposal, anchors_all, targets): gt_boxes = targets['target'].split(targets['batch_len']) batch_idx = list() anchor_idx = list() gt_idx = list() for idx, gt in enumerate(gt_boxes): if len(gt) == 0: match_idx = torch.full_like(anchors_all[:, 0], fill_value=Matcher.BELOW_LOW_THRESHOLD).long() else: gt_anchor_iou = box_iou(gt[:, 1:], anchors_all) match_idx = self.proposal_matcher(gt_anchor_iou) positive_idx, negative_idx = self.balanced_positive_negative_sampler(match_idx) batch_idx.append(([idx] * len(positive_idx), [idx] * len(negative_idx))) gt_idx.append(match_idx[positive_idx].long()) anchor_idx.append((positive_idx, negative_idx)) all_batch_idx = sum([sum(item, []) for item in batch_idx], []) all_anchor_idx = torch.cat([torch.cat(item) for item in anchor_idx]) all_cls_target = torch.tensor(sum([[1] * len(item[0]) + [0] * len(item[1]) for item in anchor_idx], []), device=objectness.device, dtype=objectness.dtype) all_cls_predicts = objectness[all_batch_idx, all_anchor_idx] cls_loss = self.bce(all_cls_predicts, all_cls_target[:, None]) all_positive_batch = sum([item[0] for item in batch_idx], []) all_positive_anchor = torch.cat([item[0] for item in anchor_idx]) all_predict_box = proposal[all_positive_batch, all_positive_anchor] all_gt_box = torch.cat([i[j][:, 1:] for i, j in zip(gt_boxes, gt_idx)], dim=0) box_loss = self.box_loss(all_predict_box, all_gt_box).sum() / len(all_gt_box) return cls_loss, box_loss
def __call__(self, anchors, gt_boxes, num_anchor_per_layer): ''' :param anchors: :param gt_boxes: :param num_anchor_per_layer: :return: ''' ret_list = list() anchor_xy = (anchors[:, :2] + anchors[:, 2:]) / 2. for bid, gt in enumerate(gt_boxes): if len(gt) == 0: continue start_idx = 0 candidate_idxs = list() gt_xy = (gt[:, [1, 2]] + gt[:, [3, 4]]) / 2. distances = (anchor_xy[:, None, :] - gt_xy[None, :, :] ).pow(2).sum(-1).sqrt() # shape=[all_anchor,num_gt] anchor_gt_iou = box_iou(anchors, gt[:, 1:]) # shape=[all_anchor,num_gt] for num_anchor in num_anchor_per_layer: distances_per_level = distances[start_idx:start_idx + num_anchor] top_k = min(self.top_k * self.anchor_num_per_loc, num_anchor) _, topk_idxs_per_level = distances_per_level.topk( top_k, dim=0, largest=False) candidate_idxs.append(topk_idxs_per_level + start_idx) start_idx += num_anchor candidate_idxs = torch.cat(candidate_idxs, dim=0) candidate_ious = anchor_gt_iou.gather( dim=0, index=candidate_idxs) # shape=[sum_topk,num_gt] #筛选条件1 iou>统计量 iou_mean_per_gt = candidate_ious.mean(0) iou_std_per_gt = candidate_ious.std(0) iou_thresh_per_gt = iou_mean_per_gt + iou_std_per_gt is_pos = candidate_ious >= iou_thresh_per_gt[ None, :] # shape=[sum_topk,num_gt] # 筛选条件2 中心点在gt_box内部 candidate_xy = anchor_xy[candidate_idxs] lt = candidate_xy - gt[None, :, [1, 2]] rb = gt[None, :, [3, 4]] - candidate_xy is_in_gts = torch.cat([lt, rb], dim=-1).min(-1)[0] > 0.01 is_pos = is_pos & is_in_gts gt_idx = torch.arange(len(gt))[None, :].repeat( (len(candidate_idxs), 1)) match = torch.full_like(anchor_gt_iou, fill_value=-INF) match[candidate_idxs[is_pos], gt_idx[is_pos]] = anchor_gt_iou[candidate_idxs[is_pos], gt_idx[is_pos]] val, match_gt_idx = match.max(dim=1) match_gt_idx[val == -INF] = -1 ret_list.append((bid, match_gt_idx)) return ret_list
def coco_map(predicts_list, targets_list): """ :param predicts_list: per_img predicts_shape [n,6] (x1,y1,x2,y2,score,cls_id) :param targets_list: per_img targets_shape [m, 5] (cls_id,x1,y1,x2,y2) :return: """ device = targets_list[0].device iouv = torch.linspace(0.5, 0.95, 10).to(device) niou = iouv.numel() stats = list() for predicts, targets in zip(predicts_list, targets_list): nl = len(targets) tcls = targets[:, 0].tolist() if nl else [] if predicts is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue correct = torch.zeros(predicts.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = list() tcls_tensor = targets[:, 0] tbox = targets[:, 1:5] for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) pi = (cls == predicts[:, 5]).nonzero(as_tuple=False).view(-1) if pi.shape[0]: ious, i = box_iou(predicts[pi, :4], tbox[ti]).max(1) for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > iouv if len(detected) == nl: break stats.append( (correct.cpu(), predicts[:, 4].cpu(), predicts[:, 5].cpu(), tcls)) stats = [np.concatenate(x, 0) for x in zip(*stats)] if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() return mp, mr, map50, map else: return 0., 0., 0., 0.
def select_training_samples(self, proposals, gt_boxes): proposals = [torch.cat([p, g[:, 1:]]) for p, g in zip(proposals, gt_boxes)] proposals_ret = list() proposal_idx = list() for idx, p, g in zip(range(len(proposals)), proposals, gt_boxes): if len(g) == 0: match_idx = torch.full_like(p[:, 0], fill_value=Matcher.BELOW_LOW_THRESHOLD).long() else: gt_anchor_iou = box_iou(g[:, 1:], p) match_idx = self.proposal_matcher(gt_anchor_iou) positive_idx, negative_idx = self.balanced_positive_negative_sampler(match_idx) proposal_idx.append((positive_idx, negative_idx, match_idx[positive_idx].long())) proposals_ret.append(p[torch.cat([positive_idx, negative_idx])]) return proposals_ret, proposal_idx
def __call__(self, anchors, gt_boxes): ret = list() for idx, gt_box in enumerate(gt_boxes): if len(gt_box) == 0: continue ori_match = None gt_anchor_iou = box_iou(gt_box[..., 1:], anchors) match_val, match_idx = gt_anchor_iou.max(dim=0) if self.allow_low_quality_matches: ori_match = match_idx.clone() match_idx[match_val < self.ignore_iou] = self.BELOW_LOW_THRESHOLD match_idx[(match_val >= self.ignore_iou) & (match_val < self.iou_thresh)] = self.BETWEEN_THRESHOLDS if self.allow_low_quality_matches: self.set_low_quality_matches_(match_idx, ori_match, gt_anchor_iou) ret.append((idx, match_idx)) return ret
def compute_box_loss(self, proposals, cls_predicts, box_predicts, targets): gt_boxes = targets['target'].split(targets['batch_len']) loss_cls_predicts = list() loss_box_predicts = list() loss_cls_targets = list() loss_box_targets = list() positive_ids = list() positive_gt_idx = list() for p, c, b, g in zip(proposals, cls_predicts, box_predicts, gt_boxes): if len(g) == 0: match_idx = torch.full_like( p[:, 0], fill_value=Matcher.BELOW_LOW_THRESHOLD).long() else: gt_anchor_iou = box_iou(g[:, 1:], p) match_idx = self.matcher(gt_anchor_iou).long() positive_idx, negative_idx = self.sampler(match_idx) gt_idx = match_idx[positive_idx] loss_cls_predicts.append(c[positive_idx]) loss_box_predicts.append(b[positive_idx]) loss_cls_targets.append(g[gt_idx][:, 0].long()) loss_box_targets.append(g[gt_idx][:, 1:]) positive_ids.append(positive_idx) positive_gt_idx.append(gt_idx) loss_cls_predicts.append(c[negative_idx]) loss_cls_targets.append( torch.full((len(negative_idx), ), -1, device=c.device, dtype=torch.long)) loss_cls_predicts = torch.cat(loss_cls_predicts) loss_cls_targets = torch.cat(loss_cls_targets) + 1 loss_box_predicts = torch.cat(loss_box_predicts) loss_box_targets = torch.cat(loss_box_targets) cls_loss = self.ce(loss_cls_predicts, loss_cls_targets) box_loss = self.iou_loss( loss_box_predicts, loss_box_targets).sum() / len(loss_box_targets) return cls_loss, box_loss, positive_ids, positive_gt_idx
def non_max_suppression(prediction, conf_thresh=0.1, iou_thresh=0.6, merge=False, agnostic=False, multi_label=True, max_det=300): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ xc = prediction[..., 4] > conf_thresh # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height redundant = True # require redundant detections output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thresh).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thresh] # Filter by class # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = nms(boxes, scores, iou_thresh) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thresh # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( 1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] return output