def _score_of_edge(self, v1, v2): N1 = v1['boxes'].size(0) N2 = v2['boxes'].size(0) score = torch.cuda.FloatTensor(N1, N2).fill_(np.nan) track_score = torch.cuda.FloatTensor(N1, N2).fill_(np.nan) for i1 in range(N1): # scores of i1 box in frame i with all boxes in frame i+1 scores2 = v2['scores'].contiguous().view(-1, 1) scores1 = v1['scores'][i1] score[i1, :] = scores1 + scores2.t() if v1['trackedboxes'] is not None and v2['trackedboxes'] is not None: # overlaps between the boxes with tracked_boxes # overlaps (N1, N2) overlap_ratio_1 = bbox_overlaps(v1['boxes'].contiguous(), v1['trackedboxes'][0]) overlap_ratio_2 = bbox_overlaps(v2['boxes'].contiguous(), v1['trackedboxes'][1]) track_score = torch.mm(torch.round(overlap_ratio_1), torch.round(overlap_ratio_2).t()) score[track_score > 0.] += 1.0 track_score = (track_score > 0.).float() else: track_score = torch.cuda.FloatTensor(N1, N2).zero_() return score, track_score
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def _generate_rel_labels(self, obj_rois, gt_boxes, obj_num, rel_mat): obj_pair_rel_label = torch.Tensor( self.rel_batch_size).type_as(gt_boxes).zero_().long() # generate online data labels cur_pair = 0 for i in range(obj_num.size(0)): img_index = i % self.batch_size if obj_num[i] <= 1: continue begin_ind = torch.sum(obj_num[:i]) overlaps = bbox_overlaps( obj_rois[begin_ind:begin_ind + obj_num[i]][:, 1:5], gt_boxes[img_index][:, 0:4]) max_overlaps, max_inds = torch.max(overlaps, 1) for o1ind in range(obj_num[i]): for o2ind in range(o1ind + 1, obj_num[i]): o1_gt = int(max_inds[o1ind].item()) o2_gt = int(max_inds[o2ind].item()) if o1_gt == o2_gt: # skip invalid pairs if self._isex: cur_pair += 2 else: cur_pair += 1 continue # some labels are leaved out when labeling if rel_mat[img_index][o1_gt, o2_gt].item() == 0: if rel_mat[img_index][o2_gt, o1_gt].item() == 3: rel_mat[img_index][ o1_gt, o2_gt] = rel_mat[img_index][o2_gt, o1_gt] else: rel_mat[img_index][ o1_gt, o2_gt] = 3 - rel_mat[img_index][o2_gt, o1_gt] obj_pair_rel_label[cur_pair] = rel_mat[img_index][o1_gt, o2_gt] cur_pair += 1 if self._isex: # some labels are leaved out when labeling if rel_mat[img_index][o2_gt, o1_gt].item() == 0: if rel_mat[img_index][o1_gt, o2_gt].item() == 3: rel_mat[img_index][ o2_gt, o1_gt] = rel_mat[img_index][o1_gt, o2_gt] else: rel_mat[img_index][ o2_gt, o1_gt] = 3 - rel_mat[img_index][o1_gt, o2_gt] obj_pair_rel_label[cur_pair] = rel_mat[img_index][ o2_gt, o1_gt] cur_pair += 1 return obj_pair_rel_label
def Precision_Recall(dets, thresh, gt_boxes, num_boxes): bbox_list = [] for i in range(np.minimum(10, dets.shape[0])): bbox = [int(np.round(x)) for x in dets[i, :4]] score = dets[i, -1] if score > thresh: bbox_list.append(bbox) predict_bbox = torch.Tensor(bbox_list) target_boxes = gt_boxes[0].cpu()[:num_boxes, :4] TP = 0 if(target_boxes.size(0) > 0 and predict_bbox.size(0) > 0): overlaps = bbox_overlaps(predict_bbox, target_boxes) iou, argmax = torch.max(overlaps, 1) TP = torch.sum(iou.gt(0.5)).item() return predict_bbox.size(0), TP
def _select_pairs(self, obj_rois, obj_num): # in each image, only 2 rois are preserved. obj_num = obj_num[:self.batch_size].zero_() + 2 selected_rois = [] for im_ind in range(self.batch_size): rois = obj_rois[obj_rois[:, 0] == im_ind] for _ in range(5): selected = rois[np.random.choice(np.arange(rois.shape[0]), size=2, replace=False)] # check if the selected two boxes are same. if bbox_overlaps(selected[0:1][:, 1:5], selected[1:2][:, 1:5]).item() > 0.7: continue else: break selected_rois.append(selected.clone()) selected_rois = torch.cat(selected_rois, dim=0) return selected_rois, obj_num
def _generate_rel_labels(self, obj_rois, gt_boxes, obj_num, rel_mat, rel_batch_size): if self.using_crf: rel_mat = RelaTransform(rel_mat) obj_pair_rel_label = torch.Tensor(rel_batch_size).type_as( gt_boxes).zero_().long() # generate online data labels cur_pair = 0 for i in range(obj_num.size(0)): img_index = i % self.batch_size if obj_num[i] <= 1: continue begin_ind = torch.sum(obj_num[:i]) overlaps = bbox_overlaps( obj_rois[begin_ind:begin_ind + obj_num[i]][:, 1:5], gt_boxes[img_index][:, 0:4]) max_overlaps, max_inds = torch.max(overlaps, 1) for o1ind in range(obj_num[i]): for o2ind in range(o1ind + 1, obj_num[i]): o1_gt = int(max_inds[o1ind].item()) o2_gt = int(max_inds[o2ind].item()) if o1_gt == o2_gt: # skip invalid pairs if self._isex: cur_pair += 2 else: cur_pair += 1 continue # some labels are neglected when the dataset was labeled rel_mat[img_index] = self._check_rel_mat( rel_mat[img_index], o1_gt, o2_gt) obj_pair_rel_label[cur_pair] = rel_mat[img_index][o1_gt, o2_gt] cur_pair += 1 if self._isex: rel_mat[img_index] = self._check_rel_mat( rel_mat[img_index], o2_gt, o1_gt) obj_pair_rel_label[cur_pair] = rel_mat[img_index][ o2_gt, o1_gt] cur_pair += 1 return obj_pair_rel_label
def box_filter(boxes, must_overlap=False): """ Only include boxes that overlap as possible relations. If no overlapping boxes, use all of them.""" n_cands = boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), boxes.astype(np.float)) > 0 np.fill_diagonal(overlaps, 0) all_possib = np.ones_like(overlaps, dtype=np.bool) np.fill_diagonal(all_possib, 0) if must_overlap: possible_boxes = np.column_stack(np.where(overlaps)) if possible_boxes.size == 0: possible_boxes = np.column_stack(np.where(all_possib)) else: possible_boxes = np.column_stack(np.where(all_possib)) return possible_boxes
def eval_objects_recall(gt_annot, obj_rois, obj_scores, top_Ns): gt_obj_labels = gt_annot[:, 4].contiguous().view(-1, 1) gt_obj_rois = gt_annot[:, :4] obj_cnt = gt_obj_rois.size(0) obj_correct_cnt = torch.zeros(len(top_Ns)).int() obj_scores[:, 0].zero_() max_obj_scores, max_obj_ind = torch.max(obj_scores, 1) obj_scores_final = max_obj_scores obj_labels_final = max_obj_ind # compute overlaps between gt_obj_rois and pre_obj_rois overlaps = bbox_overlaps(obj_rois.contiguous(), gt_obj_rois.contiguous()) # sort triplet_scores _, order = torch.sort(obj_scores_final, 0, True) for idx, top_N in enumerate(top_Ns): keep_ind = order[:top_N] obj_scores_topN = obj_scores_final[keep_ind] obj_rois_topN = obj_rois[keep_ind] obj_annot_topN = obj_labels_final[keep_ind] for k in range(gt_obj_rois.size(0)): gt = gt_obj_labels[k] gt_box = gt_obj_rois[k] valid_index = (overlaps[keep_ind][:, k] > 0.5).nonzero() if len(valid_index.size()) == 0: continue valid_index = valid_index.squeeze() for i in range(valid_index.size(0)): obj_label = obj_annot_topN[valid_index[i]] if gt[0] == obj_label[0]: obj_correct_cnt[idx] += 1 break return obj_cnt, obj_correct_cnt
def draw(path_img, save_path, preds, gts, pred_scores, nid, self_defined=False): image = cv2.imread(path_img, 1) height, width, _ = image.shape if preds is None and gts is None: pass elif preds is None: for e in gts: cv2.rectangle(image, (int(e[0]), int(e[1])), (int(e[2]), int(e[3])), (0, 0, 255)) cv2.putText(image, "fn", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) elif gts is None: for e in preds: cv2.rectangle(image, (int(e[0]), int(e[1])), (int(e[2]), int(e[3])), (255, 0, 0)) cv2.putText(image, "fp", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2) else: for e in gts: cv2.rectangle(image, (int(e[0]), int(e[1])), (int(e[2]), int(e[3])), (255, 255, 255)) #cv2.putText(image, "gt", (int(max(2, e[2] - 20)), int(max(2, e[1] + 10))), # cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) #cv2.imwrite(os.path.join(save_path, '{}.png'.format(nid)), image) #preds = torch.from_numpy(preds) #gts = torch.from_numpy(gts) overlap = bbox_overlaps(preds, gts) overlap = overlap.numpy() if isinstance(preds, torch.Tensor): preds = preds.numpy() if isinstance(gts, torch.Tensor): gts = gts.numpy() idx_assigned_gt = overlap.argmax(axis=1) confidence = overlap.max(axis=1) assigned_gts = gts[idx_assigned_gt] flag = np.where(confidence >= iou_thres, 1, 0) if np.sum(flag) > 0: idx_tp = flag.nonzero()[0] tp_boxes = preds[idx_tp].astype(np.int16) tp_score = pred_scores[idx_tp] #cv2.putText(image, str(tp_score[i]), (int(max(2, e[0] + 20)), int(max(2, e[1] + 20))), # cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) else: tp_boxes = None flag = np.where(confidence < iou_thres, 1, 0) if np.sum(flag) > 0: idx_fp = flag.nonzero()[0] fp_boxes = preds[idx_fp].astype(np.int16) fp_score = pred_scores[idx_fp] else: fp_boxes = None tmp = overlap.max(axis=0) flag2 = np.where(tmp < iou_thres, 1, 0) if np.sum(flag2) > 0: idx_fn = flag2.nonzero()[0] fn_boxes = gts[idx_fn].astype(np.int16) else: fn_boxes = None non_fn_id = [] non_fp_id = [] if self_defined and fn_boxes is not None and fp_boxes is not None: max_w = width max_h = height mask_fp = np.zeros((max_h, max_w), dtype=np.int64) mask_fn = np.zeros((max_h, max_w), dtype=np.int64) id_fp = np.ones((max_h, max_w), dtype=np.int16) * -1 id_fn = np.ones((max_h, max_w), dtype=np.int16) * -1 for idx, e in enumerate(fp_boxes): mask_fp[e[1]:e[3], e[0]:e[2]] = 1 id_fp[e[1]:e[3], e[0]:e[2]] = idx for idx, e in enumerate(fn_boxes): #mask_fn[e[1]:e[3], e[0]:e[2]] = 1 fn_area = (e[3] - e[1]) * (e[2] - e[0]) nids = list(set(id_fp[e[1]:e[3], e[0]:e[2]].reshape(-1))) nids = [x for x in nids if x >= 0] if len(nids) <= 1: continue fp_area = mask_fp[e[1]:e[3], e[0]:e[2]].sum() if float(fp_area) / fn_area > 0.5: non_fn_id.append(idx) non_fp_id.extend(nids) add_tp_boxes = fp_boxes[list(set(non_fp_id))] if add_tp_boxes is not None and tp_boxes is not None: tp_boxes = np.concatenate([tp_boxes, add_tp_boxes], axis=0) elif add_tp_boxes is not None: tp_boxes = add_tp_boxes else: pass if tp_boxes is not None: for i, e in enumerate(tp_boxes): cv2.rectangle(image, (int(e[0]), int(e[1])), (int(e[2]), int(e[3])), (0, 255, 0)) cv2.putText(image, "tp", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) if fp_boxes is not None: for i, e in enumerate(fp_boxes): if self_defined: if i in non_fp_id: continue cv2.rectangle(image, (int(e[0]), int(e[1])), (int(e[2]), int(e[3])), (255, 0, 0)) cv2.putText(image, "fp", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2) if fn_boxes is not None: for i, e in enumerate(fn_boxes): if self_defined: if i in non_fn_id: continue cv2.rectangle(image, (int(e[0]), int(e[1])), (int(e[2]), int(e[3])), (0, 0, 255)) cv2.putText(image, "fn", (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) #num_non_fn = len(set(non_fn_id)) #num_non_fp = len(set(non_fp_id)) cv2.imwrite(os.path.join(save_path, '{}.png'.format(nid)), image)
def eval_attribute_recall(gt_annot, obj_rois, obj_scores, att_scores, top_Ns): gt_obj_labels = gt_annot[:, 4].contiguous().view(-1, 1) gt_obj_rois = gt_annot[:, :4] gt_atts = gt_annot[:, 5:21] obj_scores[:, 0].zero_() max_obj_scores, max_obj_ind = torch.max(obj_scores, 1) obj_labels_final = max_obj_ind att_scores[:, 0].zero_() att_scores_sorted, order = torch.sort(att_scores, 1, True) # since the maximal number of attributes for each bbox is 16, we trim att_scores_sorted to 16 att_scores_sorted_trim = att_scores_sorted[:, :16] order_trim = order[:, :16] # multiply two scores to get the final scores att_scores_final = max_obj_scores * att_scores_sorted_trim map_x = np.arange(0, att_scores_final.size(1)) map_y = np.arange(0, att_scores_final.size(0)) map_x_g, map_y_g = np.meshgrid(map_x, map_y) map_yx = torch.from_numpy( np.vstack((map_y_g.ravel(), map_x_g.ravel())).transpose()).cuda() overlaps = bbox_overlaps(obj_rois.contiguous(), gt_obj_rois.contiguous()) att_scores_final_v = att_scores_final.view(-1) map_yx_v = map_yx.view(-1, 2) _, order = torch.sort(att_scores_final_v, 0, True) for idx, top_N in enumerate(top_Ns): keep_ind = order[:top_N] map_yx_v_kept = map_yx_v[keep_ind] obj_kept = map_yx_v_kept[keep_ind, 0] att_kept = order_trim[map_yx_v_kept[keep_ind, 1]] obj_annot_topN = obj_labels_final[obj_kept] for k in range(gt_obj_rois.size(0)): gt_obj_label = gt_obj_labels[k] gt_box = gt_obj_rois[k] gt_att_label = gt_atts[k] valid_index = (overlaps[obj_kept][:, k] > 0.5).nonzero() if len(valid_index.size()) == 0: continue valid_index = valid_index.squeeze() for i in range(valid_index.size(0)): obj_label = obj_annot_topN[valid_index[i]] att_pos = att_kept[valid_index[i]] if gt_obj_label[0] == obj_label[0] and gt_att_label[ att_pos] == 1: obj_correct_cnt[idx] += 1 break return None
def eval_relations_recall(gt_annot, obj_rois, obj_scores, rel_inds, rel_scores, top_Ns): gt_obj_labels = gt_annot[:, 4].contiguous().view(-1, 1) gt_obj_rois = gt_annot[:, :4] gt_rels = gt_annot[:, 21:] gt_rels_ind = gt_rels.nonzero() if len(gt_rels_ind.size()) == 0: return 0, torch.zeros(len(top_Ns)).int(), None, None gt_rels_view = gt_rels.contiguous().view(-1) rel_cnt = gt_rels_ind.size(0) rel_correct_cnt = torch.zeros(len(top_Ns)).int() gt_pred_labels = gt_rels_view[ gt_rels_view.nonzero().squeeze()].contiguous().view(-1, 1) gt_rel_rois = torch.cat( (gt_obj_rois[gt_rels_ind[:, 0]], gt_obj_rois[gt_rels_ind[:, 1]]), 1) gt_rel_labels = torch.cat( (gt_obj_labels[gt_rels_ind[:, 0]], gt_pred_labels, gt_obj_labels[gt_rels_ind[:, 1]]), 1) obj_scores[:, 0].zero_() max_obj_scores, max_obj_ind = torch.max(obj_scores, 1) # find the top-N triplets sobj_inds = rel_inds[:, 0] oobj_inds = rel_inds[:, 1] # pdb.set_trace() # perform nms on object rois _, order = torch.sort(max_obj_scores, 0, True) obj_scores_ordered = max_obj_scores[order] obj_rois_ordered = obj_rois[order] keep = nms_detections(obj_rois_ordered, obj_scores_ordered) notkeep_ind = order.clone().fill_(1) notkeep_ind[order[keep.squeeze().long()]] = 0 notkeep_rels = notkeep_ind[sobj_inds].eq(1) | notkeep_ind[oobj_inds].eq(1) # set the rel_scores[:, 0].zero_() max_rel_scores, max_rel_ind = torch.max(rel_scores, 1) rel_scores_final = max_rel_scores * max_obj_scores[ sobj_inds] * max_obj_scores[oobj_inds] rel_scores_final[notkeep_rels] = 0 rel_rois_final = torch.cat((obj_rois[sobj_inds], obj_rois[oobj_inds]), 1) max_obj_ind = max_obj_ind.contiguous().view(-1, 1) max_rel_ind = max_rel_ind.contiguous().view(-1, 1) rel_annot_final = torch.cat( (max_obj_ind[sobj_inds], max_rel_ind, max_obj_ind[oobj_inds]), 1) # pdb.set_trace() # compute overlaps between gt_sobj and pred_sobj overlap_sobjs = bbox_overlaps(rel_rois_final[:, :4].contiguous(), gt_rel_rois[:, :4].contiguous()) # compute overlaps between gt_oobj and pred_oobj overlap_oobjs = bbox_overlaps(rel_rois_final[:, 4:].contiguous(), gt_rel_rois[:, 4:].contiguous()) # sort triplet_scores _, order = torch.sort(rel_scores_final, 0, True) for idx, top_N in enumerate(top_Ns): keep_ind = order[:top_N] rel_annot_topN = rel_annot_final[keep_ind] for k in range(gt_rel_rois.size(0)): gt = gt_rel_labels[k] gt_box = gt_rel_rois[k] valid_index = ( ((overlap_sobjs[keep_ind][:, k] > 0.5).int() + (overlap_oobjs[keep_ind][:, k] > 0.5).int()) == 2).nonzero() if len(valid_index.size()) == 0: continue # rel_correct_cnt[idx] += 1 # continue valid_index = valid_index.squeeze() for i in range(valid_index.size(0)): rel = rel_annot_topN[valid_index[i]] if gt[0] == rel[0] and gt[1] == rel[1] and gt[2] == rel[2]: rel_correct_cnt[idx] += 1 break return rel_cnt, rel_correct_cnt, gt_rel_rois, gt_rel_labels
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }