def cal_configure(gt_triplets, pred_triplets): match_pair = intersect_2d(gt_triplets[:, :2], pred_triplets[:, :2]) match_pair = match_pair.argmax(1) conf_matrix = np.zeros((50,50), dtype=np.int64) for i, j in enumerate(match_pair): conf_matrix[gt_triplets[i, 2]-1, pred_triplets[j, 2]-1] += 1 return conf_matrix
def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False, rel_cats=None): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: """ # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets keeps = intersect_2d(gt_triplets, pred_triplets) gt_has_match = keeps.any(1) pred_to_gt = {} for rel_cat_id, rel_cat_name in rel_cats.items(): pred_to_gt[rel_cat_name] = [[] for x in range(pred_boxes.shape[0])] for gt_ind, gt_box, keep_inds in zip( np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate( (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate( (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1) inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh else: sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) for i in np.where(keep_inds)[0][inds]: pred_to_gt['all_rel_cates'][i].append(int(gt_ind)) pred_to_gt[rel_cats[gt_triplets[int(gt_ind), 1]]][i].append(int(gt_ind)) return pred_to_gt
def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: pred_to_gt: (NumOfPredRels, list) list of list, each pred_to_gt[i] means the i-th predication boxes matching gt rel list e.g. pred_to_gt[0] = [1, 2] means the 0-th pred rels match the 1 and 2 ground truth relation """ # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets # keeps: (NumOfGTRels, NumOfPredRels) boolean keeps = intersect_2d(gt_triplets, pred_triplets) gt_has_match = keeps.any(1) pred_to_gt = [[] for x in range(pred_boxes.shape[0])] for gt_ind, gt_box, keep_inds in zip( np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate( (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate( (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1) inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh else: sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) for i in np.where(keep_inds)[0][inds]: # for each matched pred_boxes pred_to_gt[i].append(int(gt_ind)) return pred_to_gt
def get_counts(train_data=VG( mode='train', filter_duplicate_rels=False, num_val_im=5000), must_overlap=True): """ Get counts of all of the relations. Used for modeling directly P(rel | o1, o2) :param train_data: :param must_overlap: :return: """ fg_matrix = np.zeros(( train_data.num_classes, train_data.num_classes, train_data.num_predicates, ), dtype=np.int64) bg_matrix = np.zeros(( train_data.num_classes, train_data.num_classes, ), dtype=np.int64) for ex_ind in range(len(train_data)): gt_classes = train_data.gt_classes[ex_ind].copy() gt_relations = train_data.relationships[ex_ind].copy() gt_boxes = train_data.gt_boxes[ex_ind].copy() # For the foreground, we'll just look at everything o1o2 = gt_classes[gt_relations[:, :2]] for (o1, o2), gtr in zip(o1o2, gt_relations[:, 2]): fg_matrix[o1, o2, gtr] += 1 # For the background, get all of the things that overlap. o1o2_total = gt_classes[np.array(box_filter(gt_boxes, must_overlap=must_overlap), dtype=int)] mask = intersect_2d(o1o2_total, o1o2).any(1) index = np.where(mask)[0] o1o2_bg = o1o2_total[index] for (o1, o2) in o1o2_bg: bg_matrix[o1, o2] += 1 return fg_matrix, bg_matrix
def evaluate_from_dict(gt_entry, pred_entry, mode, result_dict, multiple_preds=False, viz_dict=None, **kwargs): """ Shortcut to doing evaluate_recall from dict :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes :param mode: 'det' or 'cls' :param result_dict: :param viz_dict: :param kwargs: :return: """ gt_rels = gt_entry['gt_relations'] gt_boxes = gt_entry['gt_boxes'].astype(float) gt_classes = gt_entry['gt_classes'] pred_rel_inds = pred_entry['pred_rel_inds'] rel_scores = pred_entry['rel_scores'] if mode == 'predcls': pred_boxes = gt_boxes pred_classes = gt_classes obj_scores = np.ones(gt_classes.shape[0]) elif mode == 'sgcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'sgdet' or mode == 'phrdet': pred_boxes = pred_entry['pred_boxes'].astype(float) pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'preddet': # Only extract the indices that appear in GT prc = intersect_2d(pred_rel_inds, gt_rels[:, :2]) if prc.size == 0: for k in result_dict[mode + '_recall']: result_dict[mode + '_recall'][k].append(0.0) return None, None, None pred_inds_per_gt = prc.argmax(0) pred_rel_inds = pred_rel_inds[pred_inds_per_gt] rel_scores = rel_scores[pred_inds_per_gt] # Now sort the matching ones rel_scores_sorted = argsort_desc(rel_scores[:, 1:]) rel_scores_sorted[:, 1] += 1 rel_scores_sorted = np.column_stack( (pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:, 1])) matches = intersect_2d(rel_scores_sorted, gt_rels) for k in result_dict[mode + '_recall']: rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) return None, None, None else: raise ValueError('invalid mode') if multiple_preds: obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:] score_inds = argsort_desc(overall_scores)[:100] pred_rels = np.column_stack( (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1)) predicate_scores = rel_scores[score_inds[:, 0], score_inds[:, 1] + 1] else: pred_rels = np.column_stack( (pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1))) predicate_scores = rel_scores[:, 1:].max(1) pred_to_gt, pred_5ples, rel_scores = evaluate_recall( gt_rels, gt_boxes, gt_classes, pred_rels, pred_boxes, pred_classes, predicate_scores, obj_scores, phrdet=mode == 'phrdet', **kwargs) for k in result_dict[mode + '_recall']: match = reduce(np.union1d, pred_to_gt[:k]) rec_i = float(len(match)) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) return pred_to_gt, pred_5ples, rel_scores
def evaluate_from_dict(gt_entry, pred_entry, mode, result_dict, multiple_preds=False, viz_dict=None, **kwargs): """ Shortcut to doing evaluate_recall from dict :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes :param mode: 'det' or 'cls' :param result_dict: :param viz_dict: :param kwargs: :return: """ gt_rels = gt_entry['gt_relations'] gt_boxes = gt_entry['gt_boxes'].astype(float) gt_classes = gt_entry['gt_classes'] pred_rel_inds = pred_entry['pred_rel_inds'] rel_scores = pred_entry['rel_scores'] if mode == 'predcls': pred_boxes = gt_boxes pred_classes = gt_classes obj_scores = np.ones(gt_classes.shape[0]) elif mode == 'sgcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'sgdet' or mode == 'phrdet': pred_boxes = pred_entry['pred_boxes'].astype(float) pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'preddet': # Only extract the indices that appear in GT prc = intersect_2d(pred_rel_inds, gt_rels[:, :2]) if prc.size == 0: for k in result_dict[mode + '_recall']: result_dict[mode + '_recall'][k].append(0.0) return None, None, None pred_inds_per_gt = prc.argmax(0) pred_rel_inds = pred_rel_inds[pred_inds_per_gt] rel_scores = rel_scores[pred_inds_per_gt] # Now sort the matching ones rel_scores_sorted = argsort_desc(rel_scores[:,1:]) rel_scores_sorted[:,1] += 1 rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:,0]], rel_scores_sorted[:,1])) matches = intersect_2d(rel_scores_sorted, gt_rels) for k in result_dict[mode + '_recall']: rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) return None, None, None else: raise ValueError('invalid mode') # multi_pred controls whether Allow multiple predicates per pair of box0, box1 if multiple_preds: # compute overall_score which is used to sort @100 obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) overall_scores = obj_scores_per_rel[:,None] * rel_scores[:,1:] # sort and get the 100 highest overall_scores score_inds = argsort_desc(overall_scores)[:100] pred_rels = np.column_stack((pred_rel_inds[score_inds[:,0]], score_inds[:,1]+1)) predicate_scores = rel_scores[score_inds[:,0], score_inds[:,1]+1] else: # only one relationship: get the predicate index and score whose score is the highest # pred_rels is [num_rel, 2+1]; predicate_scores is [num_rel, 1] pred_rels = np.column_stack((pred_rel_inds, 1+rel_scores[:,1:].argmax(1))) # merge matrix; .argmax return the index of maximum predicate_scores = rel_scores[:,1:].max(1) # maximum of each row (most likely relation between certain 2 objects) # print("pred_rels is", pred_rels) # compare gt and prediction: boxes, classes, predicate pred_to_gt, pred_5ples, rel_scores = evaluate_recall( gt_rels, gt_boxes, gt_classes, pred_rels, pred_boxes, pred_classes, predicate_scores, obj_scores, phrdet= mode=='phrdet', **kwargs) ####################################################### # get k = @20 @50 @100 results from length of num_rel which is predicted for k in result_dict[mode + '_recall']: # how many are correct in the first k prdicitons; get a union set, element can be reduplicate; # shape: (#correct,) match = reduce(np.union1d, pred_to_gt[:k]) # attention: match / gt rec_i = float(len(match)) / float(gt_rels.shape[0]) # recall value in @20, @50, @100; stored in self.result_dict result_dict[mode + '_recall'][k].append(rec_i) # pred_to_gt: list len is num_rel, length of list inside the whole lis may not be 1 # pred_5ples: (num_rel, 5), (id0, id1, cls0, cls1, rel) # rel_scores: (num_rel, 3), (box1, box2, predicate score), sorted by overall score return pred_to_gt, pred_5ples, rel_scores
def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: """ # subject-predicate-object triplet matching # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets # Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those # rows match. keeps = intersect_2d(gt_triplets, pred_triplets) # list of boolean, length is gt_triplets.shape[0]; # True means the row there is pred_triplet matches gt_triplet gt_has_match = keeps.any(1) # len(pred_to_gt) = pred_triplets.shape[0] pred_to_gt = [[] for x in range(pred_boxes.shape[0])] # boxes matching; iterate for (#True in gt_has_match) times for gt_ind, gt_box, keep_inds in zip(np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): # keep_inds is an row of keeps; get boxes(8 dimension) where triplet matches gt boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate((gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate((box_union.min(1)[:,:2], box_union.max(1)[:,2:]), 1) inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh else: # scene graph detection where each object box must independently overlap with the corresponding ground truth box sub_iou = bbox_overlaps(gt_box[None,:4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None,4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) # give the row number of gt_triplets to the pred_to_gt list # ex. No.3 gt triplet matches, then No.10 pred_boxes matches, namely (3, 10) in keeps # And No.5 gt triplet matches, then No.10 pred_boxes matches, namely (5, 10) in keeps # so finally, No.10 element of pred_to_gt should be [3,5] (because different boxes but same classes) # also, the pred_triplets should be sorted, so the values cluster in previous entries of pred_to_gt list for i in np.where(keep_inds)[0][inds]: pred_to_gt[i].append(int(gt_ind)) # gt_trp is (7, 3) # pred_triplets is (810, 3) # gthasmatch is [False False False True True False True] # pred_to_gt len is 810 # pred_boxes is (810, 8) # keep_inds is (810,) # keep_inds is (810,) # keep_inds is (810,) return pred_to_gt
def forward(self, x, im_sizes, image_offset, gt_boxes=None, gt_classes=None, gt_rels=None, proposals=None, train_anchor_inds=None, return_fmap=False): """ Forward pass for detection :param x: Images@[batch_size, 3, IM_SIZE, IM_SIZE] :param im_sizes: A numpy array of (h, w, scale) for each image. :param image_offset: Offset onto what image we're on for MGPU training (if single GPU this is 0) :param gt_boxes: Training parameters: :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :param train_anchor_inds: a [num_train, 2] array of indices for the anchors that will be used to compute the training loss. Each (img_ind, fpn_idx) :return: If train: scores, boxdeltas, labels, boxes, boxtargets, rpnscores, rpnboxes, rellabels if test: prob dists, boxes, img inds, maxscores, classes """ result = self.detector(x, im_sizes, image_offset, gt_boxes, gt_classes, gt_rels, proposals, train_anchor_inds, return_fmap=True) # rel_feat = self.relationship_feat.feature_map(x) if result.is_none(): return ValueError("heck") im_inds = result.im_inds - image_offset boxes = result.rm_box_priors if self.training and result.rel_labels is None: assert self.mode == 'sgdet' result.rel_labels = rel_assignments(im_inds.data, boxes.data, result.rm_obj_labels.data, gt_boxes.data, gt_classes.data, gt_rels.data, image_offset, filter_non_overlap=True, num_sample_per_gt=1) rel_inds = self.get_rel_inds(result.rel_labels, im_inds, boxes) spt_feats = self.get_boxes_encode(boxes, rel_inds) pair_inds = self.union_pairs(im_inds) if self.hook_for_grad: rel_inds = gt_rels[:, :-1].data if self.hook_for_grad: fmap = result.fmap fmap.register_hook(self.save_grad) else: fmap = result.fmap.detach() rois = torch.cat((im_inds[:, None].float(), boxes), 1) result.obj_fmap = self.obj_feature_map(fmap, rois) # result.obj_dists_head = self.obj_classify_head(obj_fmap_rel) obj_embed = F.softmax(result.rm_obj_dists, dim=1) @ self.obj_embed.weight obj_embed_lstm = F.softmax(result.rm_obj_dists, dim=1) @ self.embeddings4lstm.weight pos_embed = self.pos_embed(Variable(center_size(boxes.data))) obj_pre_rep = torch.cat((result.obj_fmap, obj_embed, pos_embed), 1) obj_feats = self.merge_obj_feats(obj_pre_rep) # obj_feats=self.trans(obj_feats) obj_feats_lstm = torch.cat( (obj_feats, obj_embed_lstm), -1).contiguous().view(1, obj_feats.size(0), -1) # obj_feats = F.relu(obj_feats) phr_ori = self.visual_rep(fmap, rois, pair_inds[:, 1:]) vr_indices = torch.from_numpy( intersect_2d(rel_inds[:, 1:].cpu().numpy(), pair_inds[:, 1:].cpu().numpy()).astype( np.uint8)).cuda().max(-1)[1] vr = phr_ori[vr_indices] phr_feats_high = self.get_phr_feats(phr_ori) obj_feats_lstm_output, (obj_hidden_states, obj_cell_states) = self.lstm(obj_feats_lstm) rm_obj_dists1 = result.rm_obj_dists + self.context.decoder_lin( obj_feats_lstm_output.squeeze()) obj_feats_output = self.obj_mps1(obj_feats_lstm_output.view(-1, obj_feats_lstm_output.size(-1)), \ phr_feats_high, im_inds, pair_inds) obj_embed_lstm1 = F.softmax(rm_obj_dists1, dim=1) @ self.embeddings4lstm.weight obj_feats_lstm1 = torch.cat((obj_feats_output, obj_embed_lstm1), -1).contiguous().view(1, \ obj_feats_output.size(0), -1) obj_feats_lstm_output, _ = self.lstm( obj_feats_lstm1, (obj_hidden_states, obj_cell_states)) rm_obj_dists2 = rm_obj_dists1 + self.context.decoder_lin( obj_feats_lstm_output.squeeze()) obj_feats_output = self.obj_mps1(obj_feats_lstm_output.view(-1, obj_feats_lstm_output.size(-1)), \ phr_feats_high, im_inds, pair_inds) # Prevent gradients from flowing back into score_fc from elsewhere result.rm_obj_dists, result.obj_preds = self.context( rm_obj_dists2, obj_feats_output, result.rm_obj_labels if self.training or self.mode == 'predcls' else None, boxes.data, result.boxes_all) obj_dtype = result.obj_fmap.data.type() obj_preds_embeds = torch.index_select(self.ort_embedding, 0, result.obj_preds).type(obj_dtype) tranfered_boxes = torch.stack( (boxes[:, 0] / IM_SCALE, boxes[:, 3] / IM_SCALE, boxes[:, 2] / IM_SCALE, boxes[:, 1] / IM_SCALE, ((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])) / (IM_SCALE**2)), -1).type(obj_dtype) obj_features = torch.cat( (result.obj_fmap, obj_preds_embeds, tranfered_boxes), -1) obj_features_merge = self.merge_obj_low( obj_features) + self.merge_obj_high(obj_feats_output) # Split into subject and object representations result.subj_rep = self.post_emb_s(obj_features_merge)[rel_inds[:, 1]] result.obj_rep = self.post_emb_o(obj_features_merge)[rel_inds[:, 2]] prod_rep = result.subj_rep * result.obj_rep # obj_pools = self.visual_obj(result.fmap.detach(), rois, rel_inds[:, 1:]) # rel_pools = self.relationship_feat.union_rel_pooling(rel_feat, rois, rel_inds[:, 1:]) # context_pools = torch.cat([obj_pools, rel_pools], 1) # merge_pool = self.merge_feat(context_pools) # vr = self.roi_fmap(merge_pool) # vr = self.rel_refine(vr) prod_rep = prod_rep * vr if self.use_tanh: prod_rep = F.tanh(prod_rep) prod_rep = torch.cat((prod_rep, spt_feats), -1) freq_gate = self.freq_gate(prod_rep) freq_gate = F.sigmoid(freq_gate) result.rel_dists = self.rel_compress(prod_rep) # result.rank_factor = self.ranking_module(prod_rep).view(-1) if self.use_bias: result.rel_dists = result.rel_dists + freq_gate * self.freq_bias.index_with_labels( torch.stack(( result.obj_preds[rel_inds[:, 1]], result.obj_preds[rel_inds[:, 2]], ), 1)) if self.training: return result twod_inds = arange( result.obj_preds.data) * self.num_classes + result.obj_preds.data result.obj_scores = F.softmax(result.rm_obj_dists, dim=1).view(-1)[twod_inds] # Bbox regression if self.mode == 'sgdet': bboxes = result.boxes_all.view(-1, 4)[twod_inds].view( result.boxes_all.size(0), 4) else: # Boxes will get fixed by filter_dets function. bboxes = result.rm_box_priors rel_rep = F.softmax(result.rel_dists, dim=1) # rel_rep = smooth_one_hot(rel_rep) # rank_factor = F.sigmoid(result.rank_factor) return filter_dets(bboxes, result.obj_scores, result.obj_preds, rel_inds[:, 1:], rel_rep)
def evaluate_from_dict(gt_entry, pred_entry, mode, result_dict, multiple_preds=False, viz_dict=None, **kwargs): """ Shortcut to doing evaluate_recall from dict :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes :param mode: 'det' or 'cls' :param result_dict: :param viz_dict: :param kwargs: :return: """ gt_rels = gt_entry['gt_relations'] gt_boxes = gt_entry['gt_boxes'].astype(float) gt_classes = gt_entry['gt_classes'] pred_rel_inds = pred_entry['pred_rel_inds'] rel_scores = pred_entry['rel_scores'] pred_predicates = np.zeros((rel_scores.shape[-1]-1,)) gt_predicates = np.zeros((rel_scores.shape[-1]-1,)) vaild_mask = np.bincount(gt_rels[:, :-1].reshape(-1), minlength=len(gt_classes)) > 0 for i in gt_rels[:, 2]: gt_predicates[i - 1] += 1.0 if mode == 'predcls': pred_boxes = gt_boxes pred_classes = gt_classes obj_scores = np.ones(gt_classes.shape[0]) pred_rel_inds, rel_scores = filter_out_ind(pred_rel_inds, rel_scores, gt_classes.shape[0]) elif mode == 'sgcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'sgdet' or mode == 'phrdet': pred_boxes = pred_entry['pred_boxes'].astype(float) pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'preddet': # Only extract the indices that appear in GT prc = intersect_2d(pred_rel_inds, gt_rels[:, :2]) if prc.size == 0: for k in result_dict[mode + '_recall']: result_dict[mode + '_recall'][k].append(0.0) return None, None, None pred_inds_per_gt = np.where(prc)[0] pred_rel_inds = pred_rel_inds[pred_inds_per_gt] rel_scores = rel_scores[pred_inds_per_gt] # Now sort the matching ones rel_scores_sorted = argsort_desc(rel_scores[:,1:]) rel_scores = np.sort(np.ravel(rel_scores[:,1:]))[::-1] rel_scores_sorted[:,1] += 1 rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:,0]], rel_scores_sorted[:,1])) matches = intersect_2d(rel_scores_sorted, gt_rels) for k in result_dict[mode + '_recall']: rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) return rel_scores_sorted, None, rel_scores else: raise ValueError('invalid mode') if multiple_preds: obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) overall_scores = obj_scores_per_rel[:,None] * rel_scores[:,1:] score_inds = argsort_desc(overall_scores)[:100] pred_rels = np.column_stack((pred_rel_inds[score_inds[:,0]], score_inds[:,1]+1)) predicate_scores = rel_scores[score_inds[:,0], score_inds[:,1]+1] else: pred_rels = np.column_stack((pred_rel_inds, 1+rel_scores[:,1:].argmax(1))) predicate_scores = rel_scores[:,1:].max(1) pred_to_gt, pred_5ples, rel_scores = evaluate_recall( gt_rels, gt_boxes, gt_classes, pred_rels, pred_boxes, pred_classes, predicate_scores, obj_scores, phrdet= mode=='phrdet', **kwargs) match_indices = reduce(np.union1d, pred_to_gt[:100]).astype(np.int64) for i in gt_rels[match_indices, 2]: pred_predicates[i - 1] += 1.0 objs_mAP = (gt_classes[vaild_mask]==pred_classes[vaild_mask]).mean() result_dict[mode + '_objs_mAP'].append(objs_mAP) result_dict[mode + '_pred_predicates'].append(pred_predicates) result_dict[mode + '_gt_predicates'].append(gt_predicates) result_dict[mode + '_objs_mAP'].append(objs_mAP) for k in result_dict[mode + '_recall']: match = reduce(np.union1d, pred_to_gt[:k]) rec_i = float(len(match)) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) prc = intersect_2d(gt_rels[:, :-1], pred_rel_inds[:k]).any(1).sum() p_i = float(prc) / float(gt_rels.shape[0]) result_dict[mode + '_pairrecall'][k].append(p_i) return pred_to_gt, pred_5ples, predicate_scores, pred_rels
def rel_proposal_target(rois, rel_proposal_inds, gt_boxes, gt_classes, gt_rels, image_offset, mode): """ Assign the tareget for each proposal pairs. When the mode is predcls or sgcls, the target is directly obtained by comparing with gt_rel. When the mode is sgdet, the target is sampled by firstly compute iou with gt_pairs :param rois: :param rel_proposal_inds: [im_ind, ind1, ind2] :param gt_boxes: :param image_offset: :param mode: :return: """ im_inds = rois[:, 0].long() num_im = im_inds[-1] + 1 # Offset the image indices in fg_rels to refer to absolute indices (not just within img i) fg_rels = gt_rels.clone() fg_rels[:, 0] -= image_offset offset = {} for i, s, e in enumerate_by_image(gt_classes[:, 0]): offset[i] = s for i, s, e in enumerate_by_image(fg_rels[:, 0]): fg_rels[s:e, 1:3] += offset[i] rels_to_gt = [] num_gt_rels_seen = 0 if mode in ('predcls', 'sgcls'): rel_proposal_inds_np = rel_proposal_inds.cpu().numpy() fg_rels_np = fg_rels.cpu().numpy() ## Ngtp, 4 # locate the proposal locate_inds = np.where(intersect_2d(rel_proposal_inds_np, fg_rels_np[:, :-1])) proposal_to_gt = defaultdict(list) for ind in zip(*locate_inds): proposal_to_gt[ind[0]].append(ind[1]) for k, v in proposal_to_gt.items(): v0 = v[0] if len(v) == 1 else np.random.choice(v) proposal_to_gt[k] = v0 fg_proposal_inds = np.array(list(proposal_to_gt.keys())).astype(np.int32) bg_proposal_inds = np.array(list(set(list(range(rel_proposal_inds_np.shape[0]))) - set(list(proposal_to_gt.keys())))).astype(np.int32) rels_to_gt = np.ones(fg_proposal_inds.shape[0] + bg_proposal_inds.shape[0], dtype=np.int64) * -1 if len(fg_proposal_inds) > 0: rels_to_gt[fg_proposal_inds] = np.array([proposal_to_gt[ind] for ind in fg_proposal_inds]) num_fg = min(fg_proposal_inds.size, int(RELS_BATCHSIZE * REL_FG_FRACTION * num_im)) if num_fg < fg_proposal_inds.size: fg_proposal_inds = np.random.choice(fg_proposal_inds, num_fg, replace=False) num_bg = min(bg_proposal_inds.size if bg_proposal_inds.size else 0, int(RELS_BATCHSIZE * num_im) - num_fg) if num_bg < bg_proposal_inds.size: bg_proposal_inds = np.random.choice(bg_proposal_inds, num_bg, replace=False) if len(fg_proposal_inds) == 0: bg_labels = np.zeros(bg_proposal_inds.size) bg_rel_labels = np.hstack((rel_proposal_inds_np[bg_proposal_inds], bg_labels[:, None])) proposal_labels = bg_rel_labels else: fg_labels = np.array([fg_rels[proposal_to_gt[ind], -1] for ind in fg_proposal_inds]) fg_rel_labels = np.hstack((rel_proposal_inds_np[fg_proposal_inds], fg_labels[:, None])) bg_labels = np.zeros(bg_proposal_inds.size) bg_rel_labels = np.hstack((rel_proposal_inds_np[bg_proposal_inds], bg_labels[:, None])) proposal_labels = np.vstack((fg_rel_labels, bg_rel_labels)) rels_to_gt = np.hstack((rels_to_gt[fg_proposal_inds], rels_to_gt[bg_proposal_inds])) proposal_labels = torch.LongTensor(proposal_labels).cuda(gt_rels.get_device()) rels_to_gt = torch.LongTensor(rels_to_gt).cuda(gt_rels.get_device()) else: assert mode == 'sgdet' gt_box_pairs = torch.cat((gt_boxes[fg_rels[:, 1]], gt_boxes[fg_rels[:, 2]]), 1) rel_proposal_pairs = torch.cat((rois[:, 1:][rel_proposal_inds[:, 0]], rois[:, 1:][rel_proposal_inds[:, 1]]), 1) num_pairs = np.zeros(num_im + 1).astype(np.int32) for i, s, e in enumerate_by_image(rel_proposal_inds[:, 0]): num_pairs[i + 1] = e - s cumsum_num_pairs = np.cumsum(num_pairs).astype(np.int32) fg_rel_per_image = int(RELS_BATCHSIZE * REL_FG_FRACTION) proposal_labels = [] gt_rel_labels = fg_rels[:, -1].contiguous().view(-1) for i in range(1, num_im + 1): rel_proposal_inds_i = rel_proposal_inds[cumsum_num_pairs[i - 1]:cumsum_num_pairs[i]] rel_proposal_pairs_i = rel_proposal_pairs[cumsum_num_pairs[i - 1]:cumsum_num_pairs[i]] gt_box_pairs_i = gt_box_pairs[torch.nonzero(fg_rels[:, 0] == (i - 1)).view(-1)] gt_box_pairs_label_i = gt_rel_labels[torch.nonzero(fg_rels[:, 0] == (i - 1)).view(-1)].view(-1).contiguous() overlaps = co_bbox_overlaps(rel_proposal_pairs_i, gt_box_pairs_i) # Np, Ngtp max_overlaps, gt_assignment = torch.max(overlaps, 1) # Np fg_inds = torch.nonzero(max_overlaps >= 0.5).view(-1) fg_num = fg_inds.numel() bg_inds = torch.nonzero((max_overlaps < 0.5) & (max_overlaps >= 0.0)).view(-1) bg_num = bg_inds.numel() rels_to_gt_i = torch.LongTensor(rel_proposal_pairs_i.shape[0]).fill(-1).cuda(gt_rels.get_device()) rels_to_gt_i[fg_inds] = gt_assignment[fg_inds] + num_gt_rels_seen if fg_num > 0 and bg_num > 0: fg_this_image = min(fg_rel_per_image, fg_num) rand_num = torch.from_numpy(np.random.permutation(fg_num)).long().cuda() fg_inds = fg_inds[rand_num[:fg_this_image]] # sampling bg bg_this_image = RELS_BATCHSIZE - fg_this_image rand_num = np.floor(np.random.rand(bg_this_image) * bg_num) rand_num = torch.from_numpy(rand_num).long().cuda() bg_inds = bg_inds[rand_num] rels_to_gt_i = torch.cat((rels_to_gt_i[fg_inds], rels_to_gt_i[bg_inds]), 0) elif fg_num > 0 and bg_num == 0: rand_num = np.floor(np.random.rand(RELS_BATCHSIZE) * fg_num) rand_num = torch.from_numpy(rand_num).long().cuda() fg_inds = fg_inds[rand_num] fg_this_image = RELS_BATCHSIZE bg_this_image = 0 rels_to_gt_i = rels_to_gt_i[fg_inds] elif bg_num > 0 and fg_num == 0: # sampling bg # rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda() rand_num = np.floor(np.random.rand(RELS_BATCHSIZE) * bg_num) rand_num = torch.from_numpy(rand_num).long().cuda() bg_inds = bg_inds[rand_num] bg_this_image = RELS_BATCHSIZE fg_this_image = 0 rels_to_gt_i = rels_to_gt_i[bg_inds] else: import pdb pdb.set_trace() keep_inds = torch.cat([fg_inds, bg_inds], 0) rel_proposal_inds_i = rel_proposal_inds_i[keep_inds] labels_i = gt_box_pairs_label_i[gt_assignment[keep_inds]] if fg_this_image < labels_i.size(0): labels_i[fg_this_image:] = 0 rels_to_gt.append(rels_to_gt_i) num_gt_rels_seen += gt_box_pairs_i.shape[0] #try: # labels_i[fg_this_image:] = 0 #except ValueError: # print(labels_i) # print(fg_this_image) # import pdb # pdb.set_trace() proposal_labels.append(torch.cat((rel_proposal_inds_i, labels_i[:, None]), 1)) proposal_labels = torch.cat(proposal_labels, 0) rels_to_gt = torch.cat(rels_to_gt, 0) # sort _, perm = torch.sort( proposal_labels[:, 0] * (rois.size(0) ** 2) + proposal_labels[:, 1] * rois.size(0) + proposal_labels[:, 2]) proposal_labels = proposal_labels[perm].contiguous() rels_to_gt = rels_to_gt[perm].contiguous() return proposal_labels, rels_to_gt
def evaluate_from_dict(gt_entry, pred_entry, mode, eval_result_dict, eval_result_dict2, gtrel_dist_dict, gtkeyrel_dist_dict, predrel_dist_dict, predmatchedrel_dist_dict, predmatchedkeyrel_dist_dict, multiple_preds=False, predrel_treedeep_scores_dict=None, viz_dict=None, num_predicates=50, **kwargs): """ Shortcut to doing evaluate_recall from dict :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes :param mode: 'det' or 'cls' :param result_dict: :param viz_dict: :param kwargs: :return: """ gt_rels = gt_entry['gt_relations'] gt_boxes = gt_entry['gt_boxes'].astype(float) gt_classes = gt_entry['gt_classes'] gt_key_rels = gt_entry['gt_key_rels'] if 'gt_key_rels' in gt_entry else None pred_rel_inds = pred_entry['pred_rel_inds'] rel_scores = pred_entry['rel_scores'] rel_rank_scores = pred_entry['rel_rank_scores'] tree = pred_entry['forest'] # get each node's depth tree_depth_dict = None tree_depth = None tree_width = None if tree is not None: root = tree[0] tree_depth_dict = {} get_treeNodes_depth(tree_depth_dict, root) tree_depth = root.max_depth() tree_width = root.max_width() # if rel_rank_scores is not None: # rel_scores *= rel_rank_scores[:, None] if mode == 'predcls': pred_boxes = gt_boxes pred_classes = gt_classes obj_scores = np.ones(gt_classes.shape[0]) elif mode == 'sgcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'sgdet' or mode == 'phrdet': pred_boxes = pred_entry['pred_boxes'].astype(float) pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'preddet': # Only extract the indices that appear in GT prc = intersect_2d(pred_rel_inds, gt_rels[:, :2]) if prc.size == 0: for k in eval_result_dict[mode + '_recall']: eval_result_dict[mode + '_recall'][k].append(0.0) return None, None, None pred_inds_per_gt = prc.argmax(0) pred_rel_inds = pred_rel_inds[pred_inds_per_gt] rel_scores = rel_scores[pred_inds_per_gt] # Now sort the matching ones rel_scores_sorted = argsort_desc(rel_scores[:, 1:]) rel_scores_sorted[:, 1] += 1 rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:, 1])) matches = intersect_2d(rel_scores_sorted, gt_rels) for k in eval_result_dict[mode + '_recall']: rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0]) eval_result_dict[mode + '_recall'][k].append(rec_i) return None, None, None else: raise ValueError('invalid mode') if multiple_preds: obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:] score_inds = argsort_desc(overall_scores)[:100] pred_rels = np.column_stack((pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1)) predicate_scores = rel_scores[score_inds[:, 0], score_inds[:, 1] + 1] else: pred_rels = np.column_stack((pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1))) predicate_scores = rel_scores[:, 1:].max(1) # note: for relrank if rel_rank_scores is not None: predicate_scores *= rel_rank_scores RES_pred_to_gt, RES_subobj_to_gt, pred_5ples, rel_scores = evaluate_recall( gt_rels, gt_boxes, gt_classes, pred_rels, pred_boxes, pred_classes, predicate_scores, obj_scores, phrdet=mode == 'phrdet', **kwargs) # evaluate the tree: get the distribution over tree hierarchy if tree is not None: # get gt distribution, only do it in predcls or sgcls mode, use the gt box if mode == 'sgcls' or mode == 'predcls': for pair in gt_rels[:, :2]: gtrel_dist_dict[_gen_key(tree_depth_dict[pair[0]], tree_depth_dict[pair[1]])] += 1 if gt_key_rels is not None: for ind in gt_key_rels: pair = gt_rels[ind, :2] gtkeyrel_dist_dict[_gen_key(tree_depth_dict[pair[0]], tree_depth_dict[pair[1]])] += 1 # get the predicted rels distribution box_pair_inds = pred_rel_inds[:5] for ind in box_pair_inds: predrel_dist_dict[mode][_gen_key(tree_depth_dict[ind[0]], tree_depth_dict[ind[1]])] += 1 norm_triplet_rel_scores = rel_scores.prod(1) norm_triplet_rel_scores = norm_triplet_rel_scores / np.max(norm_triplet_rel_scores) for ind_idx, ind in enumerate(pred_rel_inds): predrel_treedeep_scores_dict[mode][_gen_key(tree_depth_dict[ind[0]], tree_depth_dict[ind[1]])].append(norm_triplet_rel_scores[ind_idx]) # get the match ones, only do it in predcls or sgcls mode, use the gt box match = reduce(np.union1d, RES_subobj_to_gt[:5]) for m in match: predmatchedrel_dist_dict[mode][ _gen_key(tree_depth_dict[gt_rels[int(m), 0]], tree_depth_dict[gt_rels[int(m), 1]])] += 1 if gt_key_rels is not None: key_match = np.intersect1d(match, gt_key_rels) for m in key_match: predmatchedkeyrel_dist_dict[mode][ _gen_key(tree_depth_dict[gt_rels[int(m), 0]], tree_depth_dict[gt_rels[int(m), 1]])] += 1 for pred_to_gt, result_dict in [(RES_pred_to_gt, eval_result_dict), (RES_subobj_to_gt, eval_result_dict2)]: for k in result_dict[mode + '_recall']: match = reduce(np.union1d, pred_to_gt[:k]) key_match = np.intersect1d(match, gt_key_rels) if gt_key_rels is not None else None if gt_key_rels is not None: for idx in range(len(key_match)): local_label = gt_rels[int(key_match[idx]), 2] if (mode + '_key_recall_hit') not in result_dict: result_dict[mode + '_key_recall_hit'] = {} if k not in result_dict[mode + '_key_recall_hit']: result_dict[mode + '_key_recall_hit'][k] = [0] * (num_predicates + 1) result_dict[mode + '_key_recall_hit'][k][int(local_label)] += 1 result_dict[mode + '_key_recall_hit'][k][0] += 1 for idx in range(gt_key_rels.shape[0]): local_label = gt_rels[int(gt_key_rels[idx]), 2] if (mode + '_key_recall_count') not in result_dict: result_dict[mode + '_key_recall_count'] = {} if k not in result_dict[mode + '_key_recall_count']: result_dict[mode + '_key_recall_count'][k] = [0] * (num_predicates + 1) result_dict[mode + '_key_recall_count'][k][int(local_label)] += 1 result_dict[mode + '_key_recall_count'][k][0] += 1 for idx in range(len(match)): local_label = gt_rels[int(match[idx]), 2] if (mode + '_recall_hit') not in result_dict: result_dict[mode + '_recall_hit'] = {} if k not in result_dict[mode + '_recall_hit']: result_dict[mode + '_recall_hit'][k] = [0] * (num_predicates + 1) result_dict[mode + '_recall_hit'][k][int(local_label)] += 1 result_dict[mode + '_recall_hit'][k][0] += 1 for idx in range(gt_rels.shape[0]): local_label = gt_rels[idx, 2] if (mode + '_recall_count') not in result_dict: result_dict[mode + '_recall_count'] = {} if k not in result_dict[mode + '_recall_count']: result_dict[mode + '_recall_count'][k] = [0] * (num_predicates + 1) result_dict[mode + '_recall_count'][k][int(local_label)] += 1 result_dict[mode + '_recall_count'][k][0] += 1 rec_i = float(len(match)) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) if gt_key_rels is not None: key_rec_i = float(len(key_match)) / float(gt_key_rels.shape[0]) result_dict[mode + '_key_recall'][k].append(key_rec_i) return RES_pred_to_gt, RES_subobj_to_gt, pred_5ples, rel_scores, tree_depth, tree_width
def evaluate_from_dict(gt_entry, pred_entry, mode, eval_result_dict, multiple_preds=1, num_predicates=50, **kwargs): """ Shortcut to doing evaluate_recall from dict :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes :param mode: 'det' or 'cls' :param result_dict: :param viz_dict: :param kwargs: :return: """ gt_rels = gt_entry['gt_relations'] gt_boxes = gt_entry['gt_boxes'].astype(float) gt_classes = gt_entry['gt_classes'] pred_rel_inds = pred_entry['pred_rel_inds'] rel_scores = pred_entry['rel_scores'] if mode == 'predcls': pred_boxes = gt_boxes pred_classes = gt_classes obj_scores = np.ones(gt_classes.shape[0]) elif mode == 'sgcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'sgdet' or mode == 'phrdet': pred_boxes = pred_entry['pred_boxes'].astype(float) pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'preddet': # Only extract the indices that appear in GT prc = intersect_2d(pred_rel_inds, gt_rels[:, :2]) if prc.size == 0: for k in eval_result_dict[mode + '_recall']: eval_result_dict[mode + '_recall'][k].append(0.0) return None, None, None pred_inds_per_gt = prc.argmax(0) pred_rel_inds = pred_rel_inds[pred_inds_per_gt] rel_scores = rel_scores[pred_inds_per_gt] # Now sort the matching ones rel_scores_sorted = argsort_desc(rel_scores[:, 1:]) rel_scores_sorted[:, 1] += 1 rel_scores_sorted = np.column_stack( (pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:, 1])) matches = intersect_2d(rel_scores_sorted, gt_rels) for k in eval_result_dict[mode + '_recall']: rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0]) eval_result_dict[mode + '_recall'][k].append(rec_i) return None, None, None else: raise ValueError('invalid mode') if multiple_preds > 1: if multiple_preds == rel_scores.shape[1] - 1: # all predicates obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:] score_inds = argsort_desc(overall_scores)[:100] pred_rels = np.column_stack( (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1)) predicate_scores = rel_scores[score_inds[:, 0], score_inds[:, 1] + 1] else: # between 1 and all predictes obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) # Nr overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:] # Nr, 70 # sort predicate scores for each pair sorted_predicates_idx = np.argsort( -overall_scores, axis=1)[:, :multiple_preds] # Nr, multiple_preds sorted_predicates_scores = np.sort( overall_scores, axis=1)[:, ::-1][:, :multiple_preds] score_inds = argsort_desc(sorted_predicates_scores)[:100] pred_rels = np.column_stack( (pred_rel_inds[score_inds[:, 0]], sorted_predicates_idx[score_inds[:, 0], score_inds[:, 1]] + 1)) predicate_scores = rel_scores[ score_inds[:, 0], sorted_predicates_idx[score_inds[:, 0], score_inds[:, 1]] + 1] else: pred_rels = np.column_stack( (pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1))) predicate_scores = rel_scores[:, 1:].max(1) RES_pred_to_gt, pred_5ples, rel_scores = evaluate_recall( gt_rels, gt_boxes, gt_classes, pred_rels, pred_boxes, pred_classes, predicate_scores, obj_scores, phrdet=mode == 'phrdet', **kwargs) pred_to_gt = RES_pred_to_gt result_dict = eval_result_dict for k in result_dict[mode + '_recall']: match = reduce(np.union1d, pred_to_gt[:k]) for idx in range(len(match)): local_label = gt_rels[int(match[idx]), 2] if (mode + '_recall_hit') not in result_dict: result_dict[mode + '_recall_hit'] = {} if k not in result_dict[mode + '_recall_hit']: result_dict[mode + '_recall_hit'][k] = [0] * (num_predicates + 1) result_dict[mode + '_recall_hit'][k][int(local_label)] += 1 result_dict[mode + '_recall_hit'][k][0] += 1 for idx in range(gt_rels.shape[0]): local_label = gt_rels[idx, 2] if (mode + '_recall_count') not in result_dict: result_dict[mode + '_recall_count'] = {} if k not in result_dict[mode + '_recall_count']: result_dict[mode + '_recall_count'][k] = [0] * (num_predicates + 1) result_dict[mode + '_recall_count'][k][int(local_label)] += 1 result_dict[mode + '_recall_count'][k][0] += 1 rec_i = float(len(match)) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) return RES_pred_to_gt, pred_5ples, rel_scores
def evaluate_from_dict(self, gt_entry, pred_entry, mode, result_dict, multiple_preds=False, viz_dict=None, **kwargs): """ Shortcut to doing evaluate_recall from dict :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes :param mode: 'det' or 'cls' :param result_dict: :param viz_dict: :param kwargs: :return: """ gt_rels = gt_entry['gt_relations'] gt_boxes = gt_entry['gt_boxes'].astype(float) gt_classes = gt_entry['gt_classes'] pred_rel_inds = pred_entry['pred_rel_inds'] rel_scores = pred_entry['rel_scores'] if mode == 'predcls': pred_boxes = gt_boxes pred_classes = gt_classes obj_scores = np.ones(gt_classes.shape[0]) elif mode == 'sgcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'objcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] # same as sgcls but assume perfect predicate recognition pred_rel_inds = gt_rels[:, :2] rel_scores = np.zeros((len(gt_rels), rel_scores.shape[1])) rel_scores[np.arange(len(gt_rels)), gt_rels[:, 2]] = 1 elif mode == 'sgdet' or mode == 'phrdet': pred_boxes = pred_entry['pred_boxes'].astype(float) pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'preddet': # Only extract the indices that appear in GT prc = intersect_2d(pred_rel_inds, gt_rels[:, :2]) if prc.size == 0: for k in result_dict[mode + '_recall']: result_dict[mode + '_recall'][k].append(0.0) if self.per_triplet: for k in result_dict[mode + '_recall_norm']: result_dict[mode + '_recall_norm'][k].append(0.0) return None, None, None pred_inds_per_gt = prc.argmax(0) pred_rel_inds = pred_rel_inds[pred_inds_per_gt] rel_scores = rel_scores[pred_inds_per_gt] # Now sort the matching ones rel_scores_sorted = argsort_desc(rel_scores[:, 1:]) rel_scores_sorted[:, 1] += 1 rel_scores_sorted = np.column_stack( (pred_rel_inds[rel_scores_sorted[:, 0]], rel_scores_sorted[:, 1])) matches = intersect_2d(rel_scores_sorted, gt_rels) for k in result_dict[mode + '_recall']: rec_i = float(matches[:k].any(0).sum()) / float( gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) if self.per_triplet: for k in result_dict[mode + '_recall_norm']: rec_i = float(matches[:k].any(0).sum()) / float( gt_rels.shape[0]) result_dict[mode + '_recall_norm'][k].append(rec_i) return None, None, None else: raise ValueError('invalid mode') if multiple_preds: obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) overall_scores = obj_scores_per_rel[:, None] * rel_scores[:, 1:] score_inds = argsort_desc(overall_scores)[:MAX_RECALL_K] pred_rels = np.column_stack( (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1)) predicate_scores = rel_scores[score_inds[:, 0], score_inds[:, 1] + 1] else: pred_rels = np.column_stack( (pred_rel_inds, 1 + rel_scores[:, 1:].argmax(1))) predicate_scores = rel_scores[:, 1:].max(1) # print('eval', gt_rels.shape, pred_rels.shape, predicate_scores.shape, gt_boxes.shape) pred_to_gt, pred_5ples, rel_scores = evaluate_recall( gt_rels, gt_boxes, gt_classes, pred_rels, pred_boxes, pred_classes, predicate_scores, obj_scores, phrdet=mode == 'phrdet', **kwargs) if self.per_triplet: counts = np.zeros(len(gt_rels)) for rel_i, gt_rel in enumerate(gt_rels): o, s, R = gt_rel tri_str = '{}_{}_{}'.format(gt_classes[o], R, gt_classes[s]) if tri_str in self.triplet_counts: counts[rel_i] = self.triplet_counts[tri_str] weights = self.normalize_counts(counts) for k in result_dict[mode + '_recall']: match = reduce(np.union1d, pred_to_gt[:k]) # print('match', match, type(match)) match = np.array(match).astype(np.int) rec_i = float(len(match)) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) if self.per_triplet: result_dict[mode + '_recall_norm'][k].append( np.sum(weights[match])) if self.per_triplet: # TODO: this looks similar to preddet, reuse that code score_inds = argsort_desc(overall_scores) pred_rels = np.column_stack( (pred_rel_inds[score_inds[:, 0]], score_inds[:, 1] + 1)) # Naive and slow code to get per triplet ranks ranks, counts = np.zeros(len(gt_rels)) - 1, np.zeros(len(gt_rels)) for rel_i, gt_rel in enumerate(gt_rels): o, s, R = gt_rel tri_str = '{}_{}_{}'.format(gt_classes[o], R, gt_classes[s]) if tri_str in self.triplet_counts: counts[rel_i] = self.triplet_counts[tri_str] # select only pairs with this bounding boxes ind = np.where((pred_rels[:, 0] == o) & (pred_rels[:, 1] == s) | (pred_rels[:, 0] == s) & (pred_rels[:, 1] == o))[0] pred_to_gt_triplet, _, _ = evaluate_recall( gt_rel.reshape(1, -1), gt_boxes, gt_classes, pred_rels[ind], pred_boxes, pred_classes) for r, p in enumerate(pred_to_gt_triplet): if len(p) > 0: assert p == [0], (p, gt_rel, pred_to_gt_triplet) ranks[rel_i] = r break if ranks[rel_i] < 0: ranks[rel_i] = MAX_RECALL_K + 1 # For sgcls not all combinations are present, so take some max rank as the default value if tri_str not in self.triplet_ranks: self.triplet_ranks[tri_str] = [] self.triplet_ranks[tri_str].append(ranks[rel_i]) result_dict[mode + '_rank'].extend(ranks) result_dict[mode + '_counts'].extend( counts) # save count to normalize later return pred_to_gt, pred_5ples, rel_scores
def evaluate_from_dict(gt_entry, pred_entry, mode, result_dict, multiple_preds=False, viz_dict=None, **kwargs): """ Shortcut to doing evaluate_recall from dict :param gt_entry: Dictionary containing gt_relations, gt_boxes, gt_classes :param pred_entry: Dictionary containing pred_rels, pred_boxes (if detection), pred_classes :param mode: 'det' or 'cls' :param result_dict: :param viz_dict: :param kwargs: :return: """ gt_rels = gt_entry['gt_relations'] gt_boxes = gt_entry['gt_boxes'].astype(float) gt_classes = gt_entry['gt_classes'] # gt_filenames = gt_entry['filenames'] pred_rel_inds = pred_entry['pred_rel_inds'] rel_scores = pred_entry['rel_scores'] if mode == 'predcls': pred_boxes = gt_boxes pred_classes = gt_classes obj_scores = np.ones(gt_classes.shape[0]) elif mode == 'sgcls': pred_boxes = gt_boxes pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'sgdet' or mode == 'phrdet': pred_boxes = pred_entry['pred_boxes'].astype(float) pred_classes = pred_entry['pred_classes'] obj_scores = pred_entry['obj_scores'] elif mode == 'preddet': # Only extract the indices that appear in GT prc = intersect_2d(pred_rel_inds, gt_rels[:, :2]) if prc.size == 0: for k in result_dict[mode + '_recall']: result_dict[mode + '_recall'][k].append(0.0) return None, None, None pred_inds_per_gt = prc.argmax(0) pred_rel_inds = pred_rel_inds[pred_inds_per_gt] rel_scores = rel_scores[pred_inds_per_gt] # Now sort the matching ones rel_scores_sorted = argsort_desc(rel_scores[:,1:]) rel_scores_sorted[:,1] += 1 rel_scores_sorted = np.column_stack((pred_rel_inds[rel_scores_sorted[:,0]], rel_scores_sorted[:,1])) matches = intersect_2d(rel_scores_sorted, gt_rels) for k in result_dict[mode + '_recall']: rec_i = float(matches[:k].any(0).sum()) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) return None, None, None else: raise ValueError('invalid mode') if multiple_preds: obj_scores_per_rel = obj_scores[pred_rel_inds].prod(1) overall_scores = obj_scores_per_rel[:,None] * rel_scores[:,1:] score_inds = argsort_desc(overall_scores)[:100] pred_rels = np.column_stack((pred_rel_inds[score_inds[:,0]], score_inds[:,1]+1)) predicate_scores = rel_scores[score_inds[:,0], score_inds[:,1]+1] else: pred_rels = np.column_stack((pred_rel_inds, 1+rel_scores[:,1:].argmax(1))) predicate_scores = rel_scores[:,1:].max(1) pred_to_gt, pred_5ples, rel_scores = evaluate_recall( gt_rels, gt_boxes, gt_classes, pred_rels, pred_boxes, pred_classes, predicate_scores, obj_scores, phrdet= mode=='phrdet', **kwargs) for k in result_dict[mode + '_recall']: match = reduce(np.union1d, pred_to_gt[:k]) # FIXME: I think this part of original code is wrong. We shouldn't do union. #: stores tuples (hits, count) hits_per_rel = dict() # gt_rels: shape: (m, 3), (s, p, r) for i in range(gt_rels.shape[0]): gt_s, gt_o, gt_r = gt_rels[i] hits_per_rel.setdefault(gt_r, [0, 0]) hits_per_rel[gt_r][1] += 1 hits_per_rel[gt_r][0] += i in match rec_per_rel = {r: (hits, cnt) for r, (hits, cnt) in hits_per_rel.items()} rec_i = float(len(match)) / float(gt_rels.shape[0]) result_dict[mode + '_recall'][k].append(rec_i) result_dict[mode + '_recall_per_rel'][k].append(rec_per_rel) return pred_to_gt, pred_5ples, rel_scores