def __call__(self, bs, anchors, targets): """ :param bs: batch_size :param anchors: list(anchor) anchor [all, 4] (x1,y1,x2,y2) :param targets: [gt_num, 7] (batch_id,weights,label_id,x1,y1,x2,y2) :return: """ # [all,4] (x1,y1,x2,y2) all_anchors = torch.cat(anchors, dim=0) flag_list = list() targets_list = list() for bi in range(bs): flag = torch.ones(size=(len(all_anchors),), device=all_anchors.device) # flag = all_anchors.new_ones(size=(len(all_anchors),)) # [gt_num, 6] (weights,label_idx,x1,y1,x2,y2) batch_targets = targets[targets[:, 0] == bi, 1:] if len(batch_targets) == 0: flag_list.append(flag * 0.) targets_list.append(torch.Tensor()) continue flag *= -1. batch_box = batch_targets[:, 2:] # [all,gt_num] anchor_gt_iou = box_iou(all_anchors, batch_box) iou_val, gt_idx = anchor_gt_iou.max(dim=1) pos_idx = iou_val >= self.iou_thresh neg_idx = iou_val < self.ignore_iou flag[pos_idx] = 1. flag[neg_idx] = 0. flag_list.append(flag) gt_targets = batch_targets[gt_idx, :] targets_list.append(gt_targets) return flag_list, targets_list, all_anchors
def select_train_sample(self, proposal, targets): ''' :param proposal(list,len=bs): list(filter_box) filter_box.shape=[N,4] N=post_nms_num :param targets: (bs,7) 7==>(bs_idx,weights,label_idx,x1,y1,x2,y2) :return: ret_proposal (list, len=bs): list(proposal) shape=[n_p+n_n,4] ret_labels (list, len=bs): list(labels) shape=[n_p+n_n,1] =0 neg, >0 pos ret_targets (list, len=bs): list(targets) shape=[n_p+n_n,4] =[0,0,0,0] neg, else pos ''' bs = len(proposal) ret_proposal = list() ret_labels = list() ret_targets = list() # ret_mask=list() for i in range(bs): batch_targets = targets[targets[:, 0] == i, 1:] if len(batch_targets): # question: why add gt_box into proposals? batch_proposal = torch.cat( [proposal[i], batch_targets[:, -4:]], dim=0) targets_proposal_iou = box_iou(batch_targets[:, -4:], batch_proposal) match_idx = self.matcher(targets_proposal_iou) else: batch_proposal = proposal[i] match_idx = torch.full((batch_proposal.shape[0], ), fill_value=-1, dtype=torch.long, device=batch_proposal.device) positive_negative_mask = self.sampler( match_idx) # =1 pos, =0 neg, =255 other valid_mask = positive_negative_mask != 255 # pos neg =True, other =False ret_proposal.append( batch_proposal[valid_mask]) # shape=[n_p+n_n,4] compress_mask = positive_negative_mask[valid_mask].bool( ) # shape=[n_p+n_n,] =True pos, =False neg # ret_mask.append(compress_mask) labels_idx = torch.zeros_like(compress_mask, dtype=torch.float, requires_grad=False) labels_idx[compress_mask] = batch_targets[ match_idx[valid_mask][compress_mask].long(), 1] + 1 # why add 1? the cls_output_size = (num_cls+1) ret_labels.append(labels_idx) # shape=[n_p+n_g,1] =0 neg, >0 pos targets_box = torch.zeros_like(batch_proposal[valid_mask]) targets_box[compress_mask, :] = batch_targets[ match_idx[valid_mask][compress_mask].long(), -4:] ret_targets.append(targets_box) # ret_proposal = torch.stack(ret_proposal, dim=0) # ret_labels = torch.stack(ret_labels, dim=0) # ret_targets = torch.stack(ret_targets, dim=0) return ret_proposal, ret_labels, ret_targets
def __call__(self,anchors,targets): ''' :param anchors: shape=[N,4] :param targets: shape=[M,4] :return: matches_targets_ids: shape=[N,] matches_targets_ids[i]=k(k>=0) positive_sample, k is index of matched gt_box matches_targets_ids[i]=-1 BELOW_LOW_THRESHOLD matches_targets_ids[i]=-2 BETWEEN_THRESHOLDS ''' target_anchor_iou=box_iou(targets,anchors) # shape=[M,N] matches_target_idx=self.matcher(target_anchor_iou) return matches_target_idx
def coco_map(predicts_list, targets_list): """ :param predicts_list: per_img predicts_shape [n,6] (x1,y1,x2,y2,score,cls_id) :param targets_list: per_img targets_shape [m, 5] (cls_id,x1,y1,x2,y2) :return: """ device = targets_list[0].device iouv = torch.linspace(0.5, 0.95, 10).to(device) niou = iouv.numel() stats = list() for predicts, targets in zip(predicts_list, targets_list): nl = len(targets) tcls = targets[:, 0].tolist() if nl else [] if predicts is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue correct = torch.zeros(predicts.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = list() tcls_tensor = targets[:, 0] tbox = targets[:, 1:5] for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) pi = (cls == predicts[:, 5]).nonzero(as_tuple=False).view(-1) if pi.shape[0]: ious, i = box_iou(predicts[pi, :4], tbox[ti]).max(1) for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > iouv if len(detected) == nl: break stats.append( (correct.cpu(), predicts[:, 4].cpu(), predicts[:, 5].cpu(), tcls)) stats = [np.concatenate(x, 0) for x in zip(*stats)] if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() return mp, mr, map50, map else: return 0., 0., 0., 0.
def __call__(self, cls_predicts, box_predicts, anchors, targets): """ :param cls_predicts: :param box_predicts: :param anchors: :param targets: :return: """ device = cls_predicts[0].device bs = cls_predicts[0].shape[0] cls_num = cls_predicts[0].shape[-1] expand_anchor = torch.cat(anchors, dim=0) negative_loss_list = list() positive_loss_list = list() for bi in range(bs): batch_cls_predicts = torch.cat([cls_item[bi] for cls_item in cls_predicts], dim=0) \ .sigmoid() \ .clamp(min=1e-6, max=1 - 1e-6) batch_targets = targets[targets[:, 0] == bi, 1:] if len(batch_targets) == 0: negative_loss = -(1 - self.alpha) * ( batch_cls_predicts** self.gamma) * (1 - batch_cls_predicts).log() negative_loss_list.append(negative_loss.sum()) continue batch_box_predicts = torch.cat( [box_item[bi] for box_item in box_predicts], dim=0) # calc_positive_loss targets_anchor_iou = box_iou(batch_targets[:, 2:], expand_anchor) _, top_k_anchor_idx = targets_anchor_iou.topk(k=self.top_k, dim=1, sorted=False) matched_cls_prob = batch_cls_predicts[top_k_anchor_idx].gather( dim=-1, index=(batch_targets[:, [1]][:, None, :]).long().repeat( 1, self.top_k, 1)).squeeze(-1) match_box_target = self.box_coder.encoder( expand_anchor[top_k_anchor_idx], batch_targets[:, None, 2:]) matched_box_prob = ( -self.box_reg_weight * smooth_l1_loss(batch_box_predicts[top_k_anchor_idx], match_box_target, self.beta).sum(-1)).exp() positive_loss = self.alpha * mean_max( matched_cls_prob * matched_box_prob).sum() positive_loss_list.append(positive_loss) with torch.no_grad(): box_localization = self.box_coder.decoder( batch_box_predicts, expand_anchor) target_box_iou = box_iou(batch_targets[:, 2:], box_localization) t1 = self.box_iou_thresh t2 = target_box_iou.max(dim=1, keepdim=True)[0].clamp(min=t1 + 1e-6) target_box_prob = ((target_box_iou - t1) / (t2 - t1)).clamp(min=0., max=1.) indices = torch.stack([ torch.arange(len(batch_targets), device=device), batch_targets[:, 1] ], dim=0).long() object_cls_box_prob = torch.sparse_coo_tensor(indices, target_box_prob, device=device) cls_idx, anchor_idx = torch.sparse.sum( object_cls_box_prob, dim=0).to_dense().nonzero(as_tuple=False).t() if len(cls_idx) == 0: negative_loss = -(1 - self.alpha) * ( batch_cls_predicts** self.gamma) * (1 - batch_cls_predicts).log() negative_loss_list.append(negative_loss.sum()) continue anchor_positive_max_prob = torch.where( batch_targets[:, [1]].long() == cls_idx, target_box_prob[:, anchor_idx], torch.tensor(data=0., device=device)).max(dim=0)[0] anchor_cls_assign_prob = torch.zeros(size=(len(expand_anchor), cls_num), device=device) anchor_cls_assign_prob[anchor_idx, cls_idx] = anchor_positive_max_prob negative_prob = batch_cls_predicts * (1 - anchor_cls_assign_prob) negative_loss = -(1 - self.alpha) * (negative_prob**self.gamma) * ( 1 - negative_prob).log() negative_loss_list.append(negative_loss.sum()) negative_losses = torch.stack(negative_loss_list).sum() / max( 1, len(targets)) if len(positive_loss_list) == 0: total_loss = negative_losses return total_loss, torch.stack( [negative_losses, torch.tensor(data=0., device=device)]), len(targets) positive_losses = torch.stack(positive_loss_list).sum() / max( 1, len(targets)) total_loss = negative_losses + positive_losses return total_loss, torch.stack([negative_losses, positive_losses]), len(targets)
def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian, targets): """ :param cls_predicts: list(cls_predict) cls_predict [bs, cls, h, w] :param box_predicts: list(box_predict) box_predict [bs, 4, h, w] :param implicits: list(implicit) implicit[bs, 1, h, w] :param grids: [h, w, 2] :param gaussian: [cls, 4] :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2) :return: """ device = cls_predicts[0].device bs = cls_predicts[0].shape[0] cls_num = cls_predicts[0].shape[1] # expand_grid [grid_num,3](xc,yc,stride) expand_grid = torch.cat([ torch.cat([ grid_item, torch.tensor(data=stride_item, device=device, dtype=torch.float).expand_as(grid_item[..., [0]]) ], dim=-1).view(-1, 3) for stride_item, grid_item in zip(self.strides, grids)], dim=0) for i in range(len(cls_predicts)): if cls_predicts[i].dtype == torch.float16: cls_predicts[i] = cls_predicts[i].float() for i in range(len(implicits)): if implicits[i].dtype == torch.float16: implicits[i] = implicits[i].float() negative_loss_list = list() positive_loss_list = list() for bi in range(bs): # batch_cls_predicts [grid_num,cls_num] batch_cls_predicts = torch.cat( [cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num) for cls_item in cls_predicts], dim=0).sigmoid() # batch_implicit [grid_num,1] batch_implicit = torch.cat( [implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1) for implicit_item in implicits], dim=0).sigmoid() batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp(1e-6, 1 - 1e-6) # batch_box_predicts [grid_num, 4] batch_box_predicts = torch.cat( [box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4) for box_item in box_predicts], dim=0) batch_targets = targets[targets[:, 0] == bi, 1:] if len(batch_targets) == 0: negative_loss = -(1 - self.alpha) * batch_join_predicts ** self.gamma * ( 1 - batch_join_predicts).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) continue # [gt_num,6] (weights,label_idx,x1,y1,x2,y2) gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2 # [grid_num,gt_num,2] xy_offset = (expand_grid[:, None, :2] - gt_xy[None, :, :]) / expand_grid[:, None, [2]] # [grid_num,gt_num,4] batch_reg_targets = self.box_coder.encode(expand_grid[..., :2], batch_targets[..., 2:]) grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero(as_tuple=False).t() cls_prob = batch_join_predicts[grid_idx, batch_targets[gt_idx, 1].long()] iou_loss = self.iou_loss_func(batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx, gt_idx, :]) loc_prob = (-self.lambda_p * iou_loss).exp() joint_prob = cls_prob * loc_prob confidence = (joint_prob / self.temperature).exp() gaussian_delta_mu = -( (xy_offset[grid_idx, gt_idx, :] - gaussian[batch_targets[gt_idx, 1].long(), :2]) ** 2 ).sum(-1) gaussian_delta_theta = 2 * ((gaussian[batch_targets[gt_idx, 1].long(), 2:]) ** 2).sum(-1) gaussian_weights = (gaussian_delta_mu / gaussian_delta_theta).exp() positive_weights = confidence * gaussian_weights positive_loss = torch.tensor(data=0., device=device) for unique_gt_idx in gt_idx.unique(): grid_idx_mask = gt_idx == unique_gt_idx instance_weights = positive_weights[grid_idx_mask] / positive_weights[grid_idx_mask].sum() instance_loss = -(instance_weights * joint_prob[grid_idx_mask]).sum().log() positive_loss += instance_loss positive_loss_list.append(positive_loss) decode_box = self.box_coder.decoder(expand_grid[..., :2], batch_box_predicts).detach() predict_targets_iou = box_iou(decode_box, batch_targets[..., 2:]) max_iou, max_iou_gt_idx = predict_targets_iou.max(dim=-1) func_iou = 1 / (1 - max_iou) func_iou = 1 - (func_iou - 1) / (func_iou.max() - 1 + 1e-10) negative_weights = torch.ones(size=(expand_grid.shape[0], cls_num), device=device).float() negative_weights[grid_idx, batch_targets[gt_idx, 1].long()] = func_iou[grid_idx] weighted_negative_prob = negative_weights * batch_join_predicts negative_loss = -(1 - self.alpha) * weighted_negative_prob ** self.gamma * ( 1 - weighted_negative_prob).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) total_negative_loss = torch.stack(negative_loss_list).sum() / max(1, len(targets)) if len(targets) == 0: return total_negative_loss, \ torch.stack([total_negative_loss, torch.tensor(0., device=device)]).detach(), \ len(targets) total_positive_loss = torch.stack(positive_loss_list).sum() / max(1, len(targets)) total_negative_loss = total_negative_loss * self.negative_weights total_positive_loss = total_positive_loss * self.positive_weights total_loss = total_negative_loss + total_positive_loss return total_loss, torch.stack([total_negative_loss, total_positive_loss]).detach(), len(targets)
def coco_map(predicts_list, targets_list, ID_list, shape_list, net_input_size, save_json=True): """ :param predicts_list(list, len=len(dataset)): per_img predicts_shape [n,6] (x1,y1,x2,y2,score,cls_id) :param targets_list(list, len=len(dataset)): per_img targets_shape [m, 5] (cls_id,x1,y1,x2,y2) :param ID_list(list, len=len(dataset)): image path, shape=[w,h] :param shapes_list(list, len=len(dataset)): original image shape=[w0,h0], which is used for evaluate mAP :return: """ device = targets_list[0].device # 设置iou阈值,从0.5~0.95,每间隔0.05取一次 iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() stats = list() jdict=[] for predicts, targets, path, original_shape in zip(predicts_list, targets_list, ID_list, shape_list): # 获取第i张图片的标签信息, 包括x1,y1,x2,y2,score,cls_id nl = len(targets) tcls = targets[:, 0].tolist() if nl else [] # 如果预测为空,则添加空的信息到stats里 if predicts is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... ##-------------- change it -------------------------------------------------------------------- # image_id = Path(path).stem # for coco image_id = path # for BDD ##------------------------------------------------------------------------------------------- box_json = predicts[:, :4].clone() # x1y1x2y2 ratio,pad=calculate_border(original_shape[::-1], net_input_size) # note: shape=[w,h] scale_coords(None, box_json, original_shape[::-1], (ratio,pad)) # to original shape box_json = xyxy2xywh(box_json) # xywh box_json[:, :2] -= box_json[:, 2:] / 2 # xy center to top-left corner for p, b in zip(predicts.tolist(), box_json.tolist()): jdict.append({#'image_id': int(image_id) if image_id.isnumeric() else image_id, # coco 'image_id': image_id, # BDD100 # 'category_id': coco_ids[int(p[5])], # coco 'category_id': BDD100_ids[int(p[5])], #BDD100 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) # Assign all predictions as incorrect # 初始化预测评定,niou为iou阈值的个数 correct = torch.zeros(predicts.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = list() # detected用来存放已检测到的目标 tcls_tensor = targets[:, 0] tbox = targets[:, 1:5] # Per target class # 对图片中的每个类单独处理 for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices pi = (cls == predicts[:, 5]).nonzero(as_tuple=False).view(-1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious # box_iou计算预测框与标签框的iou值,max(1)选出最大的ious值,i为对应的索引 """ pred shape[N, 4] tbox shape[M, 4] box_iou shape[N, M] ious shape[N, 1] i shape[N, 1], i里的值属于0~M """ ious, i = box_iou(predicts[pi, :4], tbox[ti]).max(1) # Append detections for j in (ious > iouv[0]).nonzero(as_tuple=False): #选择出iou>0.5的pred_box索引 d = ti[i[j]] # 有对应iou>0.5的pred_box所对应的bbox if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn (n=num of iou thresh) if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), predicts[:, 4].cpu(), predicts[:, 5].cpu(), tcls)) # Save JSON if save_json and len(jdict): f = 'detections_val2017_results.json' # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval import glob ###--------------------------------------------------------------------------------------------------------- # note: 以下为随着数据集不同而需要修改的项目 # imgIds = [int(Path(x).stem) for x in ID_list] # coco imgIds = ID_list # BDD100 # cocoGt = COCO('/home/wangchao/github_resposity/coco/annotations/instances_val2017.json') # initialize COCO ground truth api cocoGt = COCO('/home/wangchao/public_dataset/BDD100/annotations/bdd100k_labels_images_det_coco_val.json') # initialize BDD ground truth api ##---------------------------------------------------------------------------------------------------------- cocoDt = cocoGt.loadRes(f) # initialize COCO pred api # 创建评估器 cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate # 评估 cocoEval.evaluate() cocoEval.accumulate() # 展示结果 cocoEval.summarize() map, map50 = cocoEval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) stats = [np.concatenate(x, 0) for x in zip(*stats)] if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] # change it with dataset maps=np.zeros(13)+map for i,c in enumerate(ap_class): maps[c]=ap[i] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() return mp, mr, map50, map, maps else: # changed it with dataset return 0., 0., 0., 0., np.zeros(13)
def __call__(self, cls_predicts, box_predicts, anchors, targets): ''' :param cls_predicts: :param box_predicts: :param anchors: :param targets: :return: ''' device = cls_predicts[0].device bs = cls_predicts[0].shape[0] cls_num = cls_predicts[0].shape[-1] expand_anchor = torch.cat(anchors, dim=0) #shape=[num_anchors,4] positive_numels = 0 # gt_box的数量 box_prob = list() # store P_A+, P_A-=1-P_A+ positive_loss_list = list() negative_loss_list = list() cls_probs = list() for bi in range(bs): cls_prob = torch.cat( [cls_item[bi] for cls_item in cls_predicts], dim=0).sigmoid().clamp( min=1e-6, max=1 - 1e-6) # cls_predict, shape=[num_anchors,80] target = targets[ targets[:, 0] == bi, 1:] # gt_box, shape=[num_gts,6] 6==>conf_score,label_id,x1,y1,x2,y2 # if no gt_box exist, just calc focal loss in negative condition if len(target) == 0: # negative_loss = -(cls_prob ** self.gamma) * (1 - cls_prob).log() negative_loss = -(cls_prob**self.gamma) * ( (1 - cls_prob).clamp( min=1e-10, max=1.0 - 1e-10).log().clamp(min=-1000., max=1000.)) negative_loss_list.append(negative_loss.sum()) continue cls_probs.append(cls_prob) box_regression = torch.cat( [box_item[bi] for box_item in box_predicts], dim=0) # box_predict , shape=[num_anchors,4] with torch.set_grad_enabled(False): # box_localization: a_{j}^{loc}, shape: [j, 4] box_localization = self.box_coder.decoder( box_regression, expand_anchor) # shape=[num_anchors,4] 4==>x1,y1,x2,y2 # object_box_iou: IoU_{ij}^{loc}, shape: [i, j] object_box_iou = box_iou( target[:, 2:], box_localization) # shape=(num_gts,num_anchors) t1 = self.box_iou_thresh t2 = object_box_iou.max(dim=1, keepdim=True)[0].clamp( min=t1 + 1e-12) # shape=[num_gts,1] # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j] object_box_prob = ((object_box_iou - t1) / (t2 - t1)).clamp( min=0, max=1.) ''' indices.shape=[2,num_gts] 第0行元素代表所对应的gt_box的索引, 第1行元素代表所对应的gt_box所属的类别 ''' indices = torch.stack( [torch.arange(len(target), device=device), target[:, 1]], dim=0).long() # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j] ''' object_cls_box_prob.shape=[num_gts, max_cls_id+1, num_anchors] 按照类别的取值填充 note: 如果索引为gt_id的gt_box所属的类别为label_id, 则object_cls_box_prob[gt_id,label_id]=target_box_prob[gt_id], 其他位置均为0 ''' object_cls_box_prob = torch.sparse_coo_tensor(indices, object_box_prob, device=device) """ image_box_prob: P{a_{j} \in A_{+}}, shape: [j, c] or [num_anchors,num_cls] image_box_prob是用来判断一个anchor是否可以匹配到某个目标(无论类别和匹配到gt box是什么)的置信度 from "start" to "end" implement: image_box_prob = torch.sparse.max(object_cls_box_prob, dim=0).t() """ # start # indices = torch.nonzero(torch.sparse.sum(object_cls_box_prob, dim=0).to_dense()).t_() # shape=[2,N] indices = torch.sparse.sum( object_cls_box_prob, dim=0).to_dense().nonzero( as_tuple=False).t() # shape=[2,N] if indices.numel() == 0: image_box_prob = torch.zeros( expand_anchor.shape[0], cls_num).type_as(object_box_prob) else: nonzero_box_prob = torch.where( target[:, 1].unsqueeze(dim=-1) == indices[0], # (num_gts,1)== (N) ===>(num_gts,N) object_box_prob[:, indices[1]], torch.tensor([ 0 ]).type_as(object_box_prob)).max(dim=0)[0] # ===> (N) image_box_prob = torch.sparse_coo_tensor( indices.flip([0]), nonzero_box_prob, size=(expand_anchor.shape[0], cls_num), # shape=[num_anchors,num_cls] device=device).to_dense() # end box_prob.append(image_box_prob) # construct bags for objects match_quality_matrix = box_iou(target[:, 2:], expand_anchor) _, matched = torch.topk( match_quality_matrix, self.top_k, dim=1, sorted=False ) # shape=(num_gts,top_k) 元素的取值范围[0,num_gts) 表示匹配到某个gt的anchor集合的索引 del match_quality_matrix # matched_cls_prob: P_{ij}^{cls} # shape=(num_gts,top_k) 元素的取值范围[0,num_cls) 表示匹配到某个gt的anchor所属的类别 matched_cls_prob = cls_prob[matched].gather( dim=-1, index=(target[:, [1]][:, None, :]).long().repeat(1, self.top_k, 1)).squeeze(-1) # matched_box_prob: P_{ij}^{loc} matched_object_targets = self.box_coder.encoder( expand_anchor[matched], target[:, 2:].unsqueeze(dim=1)) # shape=[num_gts,topk,4] # P_loc retinanet_regression_loss = smooth_l1_loss(box_regression[matched], matched_object_targets, self.box_reg_weight, self.beta) matched_box_prob = torch.exp(-retinanet_regression_loss) # positive_losses: { -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } positive_numels += len(target) positive_loss_list.append( self.positive_bag_loss_func(matched_cls_prob * matched_box_prob, dim=1)) # positive_loss: \sum_{i}{ -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } / ||B|| # positive_loss = torch.cat(positive_loss_list).sum() / max(1, positive_numels) item1 = torch.cat(positive_loss_list).sum() item2 = max(1, positive_numels) positive_loss = reduce_sum(item1) / reduce_sum( torch.tensor(data=item2, device=device).float()).item() # box_prob: P{a_{j} \in A_{+}} box_prob = torch.stack(box_prob, dim=0) cls_probs = torch.stack(cls_probs, dim=0) # negative_loss: \sum_{j}{ FL( (1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg}) ) } / n||B|| ''' (1-P_bg)<==>P_cls shape=[num_anchors,num_cls] P{A-}<==>(1-P{box_cls}) ''' if len(negative_loss_list) != 0: neg_loss_empty = torch.stack(negative_loss_list, dim=0).sum() else: neg_loss_empty = 0 # negative_loss = (neg_loss_empty + self.negative_bag_loss_func(cls_probs * (1 - box_prob), self.gamma)) / max(1, positive_numels * self.top_k) item3 = neg_loss_empty + self.negative_bag_loss_func( cls_probs * (1 - box_prob), self.gamma) item4 = max(1, positive_numels * self.top_k) negative_loss = reduce_sum(item3) / reduce_sum( torch.tensor(data=item4, device=device).float()).item() total_loss = positive_loss * self.alpha + negative_loss * (1 - self.alpha) # total_loss=reduce_sum(total_loss)/get_world_size() return total_loss, torch.stack([negative_loss, positive_loss]), positive_numels
def __call__(self, cls_predicts, box_predicts, implicits, grids, gaussian, targets): ''' params :param cls_predicts: list(cls_predict) cls_predict [bs, num_cls, h, w] :param box_predicts: list(box_predict) box_predict [bs, 4, h, w] :param implicits: list(implicit) implicit[bs, 1, h, w] :param grids: list(grid,len=5) grid [h, w, 2] 2==>(xc,yc)原图尺度 :param gaussian: [cls, 4] 4==>(ux,uy,theta_x,theta_y) :param targets: [gt, 7] (bs, weights, label_id, x1, y1, x2, y2) :return: ''' device = cls_predicts[0].device bs = cls_predicts[0].shape[0] cls_num = cls_predicts[0].shape[1] # expand_grid.shape=[grid_num,3] 3==>(xc,yc,stride) expand_grid = torch.cat([ torch.cat([ grid_item, torch.tensor(data=stride_item, device=device, dtype=torch.float).expand_as(grid_item[..., [0]]) ], dim=-1).view(-1, 3) for stride_item, grid_item in zip(self.strides, grids) ], dim=0) for i in range(len(cls_predicts)): if cls_predicts[i].dtype == torch.float16: cls_predicts[i] = cls_predicts[i].float() for i in range(len(implicits)): if implicits[i].dtype == torch.float16: implicits[i] = implicits[i].float() negative_loss_list = list() positive_loss_list = list() num_neg_grids = 0 for bi in range(bs): # batch_cls_predicts [grid_num,cls_num]==>sigmoid batch_cls_predicts = torch.cat([ cls_item[bi].permute(1, 2, 0).contiguous().view(-1, cls_num) for cls_item in cls_predicts ], dim=0).sigmoid() # batch_implicit [grid_num,1] batch_implicit = torch.cat([ implicit_item[bi].permute(1, 2, 0).contiguous().view(-1, 1) for implicit_item in implicits ], dim=0).sigmoid() # join_predicts=cls_predicts*implicit_predicts(分类*object) [grid_num,cls_num] batch_join_predicts = (batch_cls_predicts * batch_implicit).clamp( 1e-6, 1 - 1e-6) # batch_box_predicts [grid_num, 4] batch_box_predicts = torch.cat([ box_item[bi].permute(1, 2, 0).contiguous().view(-1, 4) for box_item in box_predicts ], dim=0) # target [gt_num,6] 6==>(weights, label_id, x1, y1, x2, y2) batch_targets = targets[targets[:, 0] == bi, 1:] # 如果没有target,则直接loss= negative focal loss if len(batch_targets) == 0: negative_loss = -1 * (batch_join_predicts**self.gamma) * ( 1 - batch_join_predicts).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) continue ############################################################################################################ ### clac positive loss ------------------------------------------------------------------------------------- # [gt_num,6] (weights,label_idx,x1,y1,x2,y2) gt_xy = (batch_targets[:, [2, 3]] + batch_targets[:, [4, 5]]) / 2. # d=(grid_xy-gt_xy) 用来计算centerness weight [grid_num,gt_num,2] xy_offset = (expand_grid[:, None, :2] - gt_xy[None, :, :]) / expand_grid[:, None, [2]] # 编码每个grid point的回归目标 [grid_num,gt_num,4] batch_reg_targets = self.box_coder.encode(expand_grid[..., :2], batch_targets[..., 2:]) # shape=[1,N] N=num of positive grid/location 假设所有在gt_box内部的点都是正样本 grid_idx, gt_idx = (batch_reg_targets.min(dim=-1)[0] > 0).nonzero( as_tuple=False).t() # debug num_neg_grids += grid_idx.shape[0] cls_prob = batch_join_predicts[grid_idx, batch_targets[ gt_idx, 1].long()] # shape=[N,1] iou_loss = self.iou_loss_func( batch_box_predicts[grid_idx, :], batch_reg_targets[grid_idx, gt_idx, :]) loc_prob = (-self.lambda_p * iou_loss).exp() # P_loc, shape=[N,1] joint_prob = cls_prob * loc_prob # P_+=cls_prob*obj_prob ,P(confidence at the location) shape=[N,1] confidence = ( joint_prob / self.temperature).exp() # C(P) weight_function shape=[N,1] ''' G(d)=e{-1*(d-u)**2/(2*theta**2)} d=xy_offset=grid_xy-gt_xy u,theta are learnable parameters. ''' gaussian_delta_mu = -( (xy_offset[grid_idx, gt_idx, :] - gaussian[batch_targets[gt_idx, 1].long(), :2])**2).sum(-1) gaussian_delta_theta = 2 * ( (gaussian[batch_targets[gt_idx, 1].long(), 2:])**2).sum(-1) gaussian_weights = (gaussian_delta_mu / gaussian_delta_theta).exp() # shape=[N,1] # w+ positive_weights = confidence * gaussian_weights # shape=[N,1] positive_loss = torch.tensor(data=0., device=device) for unique_gt_idx in gt_idx.unique(): gt_idx_mask = gt_idx == unique_gt_idx instance_weights = positive_weights[ gt_idx_mask] / positive_weights[gt_idx_mask].sum() instance_loss = -(instance_weights * joint_prob[gt_idx_mask]).sum().log() positive_loss += instance_loss positive_loss_list.append(positive_loss) ########################################################################################################################## ## calc negative loss ---------------------------------------------------------------------------------------------------- decode_box = self.box_coder.decoder( expand_grid[..., :2], batch_box_predicts).detach( ) # shape=[grid_num,4] 4==>(x1,y1,x2,y2) predict_targets_iou = box_iou( decode_box, batch_targets[..., 2:]) # shape=[grid_num,gt_num] ''' max_iou=max{iou between the predicted_box and all gt_boxes} ''' max_iou, max_iou_gt_idx = predict_targets_iou.max( dim=-1) # shape=[grid_num] func_iou = 1 / (1 - max_iou) func_iou = 1 - (func_iou - 1) / ( func_iou.max() - 1 + 1e-10 ) # max_iou==>(0,1) if max_iou=1, func_iou=0. if max_iou=0, func_iou=1. # 任何gt_box区域之外的点w-=1.0 negative_weights = torch.ones( size=(expand_grid.shape[0], cls_num), device=device).float() # shape=[grid_num, cls_num] negative_weights[grid_idx, batch_targets[gt_idx, 1].long()] = func_iou[grid_idx] weighted_negative_prob = negative_weights * batch_join_predicts negative_loss = -1 * (weighted_negative_prob**self.gamma) * ( 1 - weighted_negative_prob).log() negative_loss = negative_loss.sum() negative_loss_list.append(negative_loss) total_negative_loss = torch.stack(negative_loss_list).sum() / max( 1, len(targets)) # total_negative_loss = torch.stack(negative_loss_list).sum() / num_neg_grids if len(targets) == 0: return total_negative_loss, torch.stack( [total_negative_loss, torch.tensor(0., device=device)]).detach(), len(targets) total_positive_loss = torch.stack(positive_loss_list).sum() / max( 1, len(targets)) total_negative_loss = total_negative_loss * (1 - self.alpha) total_positive_loss = total_positive_loss * self.alpha total_loss = total_negative_loss + total_positive_loss return total_loss, torch.stack( [total_negative_loss, total_positive_loss]).detach(), len(targets)
def non_max_suppression(prediction, conf_thresh=0.1, iou_thresh=0.6, merge=False, agnostic=False, multi_label=True, max_det=300): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ xc = prediction[..., 4] > conf_thresh # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height redundant = True # require redundant detections output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thresh).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thresh] # Filter by class # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = nms(boxes, scores, iou_thresh) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thresh # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] return output
def non_max_suppression(prediction, conf_thresh=0.1, iou_thresh=0.6, merge=False, agnostic=False, multi_label=True, max_det=300): """Performs Non-Maximum Suppression (NMS) on inference results Args: prediction(torch.Tensor): shape=[bs.-1,no(85)] note: box cords (x,y,w,h) have been decoded into input size. Returns: a list(len=bs) with element's shape: nx6 (x1, y1, x2, y2, conf, cls) """ xc = prediction[..., 4] > conf_thresh # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height redundant = True # require redundant detections output = [None] * prediction.shape[0] # list len=bs for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[ xc[xi]] # if confidence score/ objectness < conf_thres, passed it # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thresh).nonzero( as_tuple=False).T # (i,j) i索引1 j索引2 # 一个Box选择置信度大于阈值的类别做预测, note: x[i, j + 5, None]==> x[i,j+5]???? x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only # best class only( 一个Box只选择其中置信度最高的类别) conf, j = x[:, 5:].max(1, keepdim=True) x = torch.cat( (box, conf, j.float()), 1)[conf.view(-1) > conf_thresh] # 二次筛选,排除掉最终的class_score<conf_thresh的标签 # Filter by class # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # 按照类别加入偏置量 ''' 按照类别拉大不同类别之间的box间距,为之后的maxtrix weight加权合并奠定基础(即加权合并主要在同类别的bbox之间进行) ''' boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = nms(boxes, scores, iou_thresh) if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thresh # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( 1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] return output