def update_sa_anns(boxes, labels, cer_idxs, gt_anns, iou_thre): """ 比较 certain_anns 和 GT_anns 的 iou,定下哪些 anns 需要用 cer_anns 替换 """ cer_boxes, cer_labels = boxes[cer_idxs], labels[cer_idxs] gt_boxes, gt_labels = gt_anns sa_boxes, sa_labels = gt_boxes.clone(), gt_labels # 保存 gt,在此基础上更新 cer_keep_idxs = [] gt_sl_idxs = [] # 记录 gt 中哪些 ann 是 model ann 的 for cer_idx, cer_box in enumerate(cer_boxes): cer_box_rep = cer_box.repeat((gt_boxes.size()[0], 1)) ious = box_iou(cer_box_rep, gt_boxes) val, gt_idx = torch.topk(ious, k=1) # 与 gt 阈值 >= 0.8,并且 label 相同 # 找到后,gt_boxes 不动态删了,要保证 gt_idx 的对应 if cer_labels[cer_idx] == gt_labels[gt_idx] and val >= iou_thre: sa_boxes[gt_idx] = cer_box cer_keep_idxs.append(cer_idx) # 保存再次筛选后的 idx gt_sl_idxs.append(gt_idx) # 与 gt ann 比较后,再次更新 certain_idxs,相当于 human judge 结果 cer_idxs = cer_idxs[cer_keep_idxs] return cer_idxs, gt_sl_idxs, sa_boxes, sa_labels
def fast_nms(box_thre, coef_thre, class_thre, cfg): class_thre, idx = class_thre.sort(1, descending=True) # [80, 64 (the number of kept boxes)] idx = idx[:, :cfg.top_k] class_thre = class_thre[:, :cfg.top_k] num_classes, num_dets = idx.size() box_thre = box_thre[idx.reshape(-1), :].reshape(num_classes, num_dets, 4) # [80, 64, 4] coef_thre = coef_thre[idx.reshape(-1), :].reshape(num_classes, num_dets, -1) # [80, 64, 32] iou = box_iou(box_thre, box_thre) iou.triu_(diagonal=1) iou_max, _ = iou.max(dim=1) # Now just filter out the ones higher than the threshold keep = (iou_max <= cfg.nms_iou_thre) # Assign each kept detection to its corresponding class class_ids = torch.arange(num_classes, device=box_thre.device)[:, None].expand_as(keep) class_ids, box_nms, coef_nms, class_nms = class_ids[keep], box_thre[keep], coef_thre[keep], class_thre[keep] # Only keep the top cfg.max_num_detections highest scores across all classes class_nms, idx = class_nms.sort(0, descending=True) idx = idx[:cfg.max_detections] class_nms = class_nms[:cfg.max_detections] class_ids = class_ids[idx] box_nms = box_nms[idx] coef_nms = coef_nms[idx] return box_nms, coef_nms, class_ids, class_nms
def compute_recall(self, exp_setting='gt5'): """ Compute recall for the created h5 file """ if exp_setting == 'gt5': pfile = self.cfg.ds.proposal_gt5_h5_resized elif exp_setting == 'p100': pfile = self.cfg.ds.proposal_h5_resized with h5py.File(pfile, 'r') as f: label_proposals = f['dets_labels'][:] vid_dict_df = self.vid_dict_df anet_ent_preproc_data = json.load(open(self.trn_anet_ent_preproc_file)) recall_num = 0 recall_tot = 0 for row_ind, row in tqdm(vid_dict_df.iterrows(), total=len(vid_dict_df)): vid = row['vid_id'] seg = row['seg_id'] vid_seg_id = row['id'] annot = anet_ent_preproc_data[vid]['segments'][seg] gt_boxs = torch.tensor(annot['bbox']).float() gt_frms = annot['frm_idx'] prop_index = row_ind region_feature_file = self.feature_root / f'{vid_seg_id}.npy' if not region_feature_file.exists(): continue props = copy.deepcopy(label_proposals[prop_index]) props = torch.tensor(props).float() # props = props.view(10, -1, 7) for fidx, frm in enumerate(gt_frms): prop_frms = props[props[..., 4] == frm] gt_box_in_frm = gt_boxs[fidx] ious = box_iou(prop_frms[:, :4], gt_box_in_frm) ious_max, ious_arg_max = ious.max(dim=0) # conversion to long is important, otherwise # after 256 becomes 0 recall_num += (ious_max > 0.5).any().long() recall_tot += len(gt_boxs) recall = recall_num.item() / recall_tot print(f'Recall is {recall}') return
def compute_sl_ratio(sl_ann, gt_ann, iou_thre=0.8): sl_ann_num, gt_ann_num = 0, len(gt_ann['labels']) sl_boxes, sl_labels = to_tensor(sl_ann['boxes']), to_tensor(sl_ann['labels']) gt_boxes, gt_labels = to_tensor(gt_ann['boxes']), to_tensor(gt_ann['labels']) for sl_idx, sl_box in enumerate(sl_boxes): sl_box_rep = sl_box.repeat((gt_boxes.size()[0], 1)) ious = box_iou(sl_box_rep, gt_boxes) val, gt_idx = torch.topk(ious, k=1) if sl_labels[sl_idx] == gt_labels[gt_idx] and val >= iou_thre: sl_ann_num += 1 print(sl_ann_num, gt_ann_num) return sl_ann_num / (gt_ann_num + 1e-16)
def __call__(self, img, boxes, labels, pts, has_pt): if random.random() > self.proba: return img, boxes, labels, pts, has_pt imw, imh = img.size params = [(0, 0, imw, imh)] # crop roi (x,y,w,h) out min_iou = random.choice([0, 0.1, 0.3, 0.5, 0.7, 0.9]) #for min_iou in (0, 0.1, 0.3, 0.5, 0.7, 0.9): for _ in range(10): scale = random.uniform(self.min_scale, 1) aspect_ratio = random.uniform( max(1 / self.max_aspect_ratio, scale * scale), min(self.max_aspect_ratio, 1 / (scale * scale))) w = int(imw * scale * math.sqrt(aspect_ratio)) h = int(imh * scale / math.sqrt(aspect_ratio)) x = random.randrange(imw - w) y = random.randrange(imh - h) roi = torch.tensor([[x, y, x + w, y + h]], dtype=torch.float) ious = box_iou(boxes, roi) params.append((x, y, w, h)) if ious.min() >= min_iou: params = [(x, y, w, h)] break x, y, w, h = random.choice(params) img = img.crop((x, y, x + w, y + h)) center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = (center[:, 0] >= x) & (center[:, 0] <= x+w) \ & (center[:, 1] >= y) & (center[:, 1] <= y+h) if mask.any(): boxes = boxes[mask] - torch.tensor([x, y] * 2, dtype=torch.float) boxes = box_clamp(boxes, 0, 0, w, h) labels = labels[mask] pts = pts[mask] - torch.tensor([x, y] * 5, dtype=torch.float) # pts = pts_clamp(pts, 0, 0, w, h) has_pt = has_pt[mask] else: boxes = torch.tensor([[0, 0] * 2], dtype=torch.float) labels = torch.tensor([-1], dtype=torch.long) pts = torch.tensor([[0, 0] * 5], dtype=torch.float) has_pt = torch.tensor([False], dtype=torch.bool) return img, boxes, labels, pts, has_pt
def prep_metrics(ap_data, ids_p, classes_p, boxes_p, masks_p, gt, gt_masks, height, width, iou_thres): gt_boxes = gt[:, :4] gt_boxes[:, [0, 2]] *= width gt_boxes[:, [1, 3]] *= height gt_classes = gt[:, 4].int().tolist() gt_masks = gt_masks.reshape(-1, height * width) masks_p = masks_p.reshape(-1, height * width) mask_iou_cache = mask_iou(masks_p, gt_masks) bbox_iou_cache = box_iou(boxes_p.float(), gt_boxes.float()).cpu() for _class in set(ids_p + gt_classes): num_gt_per_class = gt_classes.count(_class) for iouIdx in range(len(iou_thres)): iou_threshold = iou_thres[iouIdx] for iou_type, iou_func in zip(['box', 'mask'], [bbox_iou_cache, mask_iou_cache]): gt_used = [False] * len(gt_classes) ap_obj = ap_data[iou_type][iouIdx][_class] ap_obj.add_gt_positives(num_gt_per_class) for i, pred_class in enumerate(ids_p): if pred_class != _class: continue max_iou_found = iou_threshold max_match_idx = -1 for j, gt_class in enumerate(gt_classes): if gt_used[j] or gt_class != _class: continue iou = iou_func[i, j].item() if iou > max_iou_found: max_iou_found = iou max_match_idx = j if max_match_idx >= 0: gt_used[max_match_idx] = True ap_obj.push(classes_p[i], True) else: ap_obj.push(classes_p[i], False)
def rejudge_certain_anns(boxes, labels, cer_idxs, uncer_idxs, iou_thre=0.5): """ boxes, labels: detection 全部输出结果 iou(certain_box, uncertain_box) 再比较其 label 如果 label 不同,说明 model 对该位置不确定 (top n 不确定) 如果 label 相同,保留 cer box 即可 iou_thre 判断出的 box, 执行 nms,把相对于 cer_box 的 uncertain boxes 去掉 iou_thre 控制着 uncertain 阈值,越低更多 certain 样本化为 uncertain,更多样本流入 AL @return: 筛选过的 cer_boxes, cer_labels """ keep_idxs = [] cer_boxes, cer_labels = boxes[cer_idxs], labels[cer_idxs] uncer_boxes, uncer_labels = boxes[uncer_idxs], labels[uncer_idxs] for cer_idx, cer_box in enumerate(cer_boxes): certain = True # boxes, torch.float32 cer_box_rep = cer_box.repeat((uncer_boxes.size()[0], 1)) # repeat 便于向量比较 ious = box_iou(cer_box_rep, uncer_boxes) # >= iou_thre 的 uncertain_box 下标 mask = torch.where(ious >= iou_thre, torch.tensor(1), torch.tensor(0)).nonzero() if mask.size()[0] > 0: # uncertain 中有 label 不同的,uncer if cer_labels[cer_idx] not in uncer_labels[mask]: certain = False if certain: keep_idxs.append(cer_idx) # 相当于 nms,将上面 mask 的 box 去掉,不用于下轮判断,加快执行 unmask = [idx for idx in range(uncer_boxes.size()[0]) if idx not in mask] if len(unmask) == 0: # 已经没有 uncertain boxes 进行对比了,直接保存剩下所有的 cer_box keep_idxs += [idx for idx in range(cer_idx + 1, len(cer_boxes))] # 从 cer_idx+1 到最后 break uncer_boxes = uncer_boxes[unmask] uncer_labels = uncer_labels[unmask] return cer_idxs[keep_idxs] # 最后保留的 box
def choose_gt5_for_one_vid_seg(self, props, prop_feats, gt_boxs, gt_frms, out_file, save=True, nppf=5, nppf_orig=100, nfrms=10): """ Choose for 5 props per frame """ # Convert to torch tensors for box_iou computations # props: 10*100 x 7 props = torch.tensor(props).float() prop_feats = torch.tensor(prop_feats).float() # set for comparing gt_frms_set = set(gt_frms) gt_boxs = torch.tensor(gt_boxs).float() gt_frms = torch.tensor(gt_frms).float() # Get the frames for the proposal boxes are prop_frms = props[:, 4] # Create a frame mask. # Basically, if the iou = 0 if the proposal and # the ground truth box lie in different frames frm_msk = prop_frms[:, None] == gt_frms if len(gt_boxs) > 0 and len(props) > 0: ious = box_iou(props[:, :4], gt_boxs) * frm_msk.float() # get the max iou proposal for each bounding box ious_max, ious_arg_max = ious.max(dim=0) # if len(ious_arg_max) > nppf: # ious_arg_max = ious_arg_max[:nppf] out_props = props[ious_arg_max] out_props_inds = ious_arg_max % 100 recall = (ious_max > 0.5).sum() ngt = len(gt_boxs) else: ngt = 1 recall = 0 ious = torch.zeros(props.size(0), 1) out_props = props[0] out_props_inds = torch.tensor(0) # Dictionary to store final proposals to use fin_out_props = {} # Reshape proposals and proposal features to # nfrms x nppf x ndim props1 = props.view(nfrms, nppf_orig, 7) prop_dim = prop_feats.size(-1) prop_feats1 = prop_feats.view(nfrms, nppf_orig, prop_dim) # iterate over each frame for frm in range(nfrms): if frm not in fin_out_props: fin_out_props[frm] = [] # if there are gt boxes in the frame # consider the proposals which have highest iou # in the frame if frm in gt_frms_set: props_inds_gt_in_frm = out_props_inds[out_props[..., 4] == frm] # add highest iou props to the dict key fin_out_props[frm] += props_inds_gt_in_frm.tolist() # sort by their scores, and choose nppf=5 such props props_to_use_inds = props1[frm, ..., 6].argsort(descending=True)[:nppf] # add 5 such props to the list fin_out_props[frm] += props_to_use_inds.tolist() # Restrict the total to 5 fin_out_props[frm] = list(OrderedDict.fromkeys( fin_out_props[frm]))[:nppf] # Saving them, init with zeros props_output = torch.zeros(nfrms, nppf, 7) prop_feats_output = torch.zeros(nfrms, nppf, prop_dim) # set for each frame for frm in fin_out_props: inds = fin_out_props[frm] props_output[frm] = props1[frm][inds] prop_feats_output[frm] = prop_feats1[frm][inds] # Reshape nfrm x nppf x ndim -> nfrm*nppf x ndim props_output = props_output.view(nfrms * nppf, 7).detach().cpu().numpy() prop_feats_output = prop_feats_output.view( nfrms, nppf, prop_dim).detach().cpu().numpy() if save: np.save(out_file, prop_feats_output) return { 'out_props': props_output, 'recall': recall, 'num_prop': nppf * nfrms, 'num_gt': ngt }