def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: """ # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets keeps = intersect_2d(gt_triplets, pred_triplets) gt_has_match = keeps.any(1) pred_to_gt = [[] for x in range(pred_boxes.shape[0])] for gt_ind, gt_box, keep_inds in zip( np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate( (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate( (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1) inds = bbox_overlaps( torch.from_numpy(gt_box_union[None]), torch.from_numpy(box_union)).numpy() >= iou_thresh else: sub_iou = bbox_overlaps( torch.from_numpy(gt_box[None, :4]).contiguous(), torch.from_numpy(boxes[:, :4]).contiguous()).numpy()[0] obj_iou = bbox_overlaps( torch.from_numpy(gt_box[None, 4:]).contiguous(), torch.from_numpy(boxes[:, 4:]).contiguous()).numpy()[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) for i in np.where(keep_inds)[0][inds]: pred_to_gt[i].append(int(gt_ind)) return pred_to_gt
def _triplet(predicates, relations, classes, boxes, predicate_scores, class_scores, is_pred=False): # format predictions into triplets # compute the overlaps between boxes if is_pred: overlaps = bbox_overlaps( torch.from_numpy(boxes).contiguous(), torch.from_numpy(boxes).contiguous()) assert (predicates.shape[0] == relations.shape[0]) num_relations = relations.shape[0] triplets = np.zeros([num_relations, 3]).astype(np.int32) triplet_boxes = np.zeros([num_relations, 8]).astype(np.int32) triplet_scores = np.zeros([num_relations]).astype(np.float32) for i in range(num_relations): triplets[i, 1] = predicates[i] sub_i, obj_i = relations[i, :2] triplets[i, 0] = classes[sub_i] triplets[i, 2] = classes[obj_i] triplet_boxes[i, :4] = boxes[sub_i, :] triplet_boxes[i, 4:] = boxes[obj_i, :] # compute triplet score score = class_scores[sub_i] score *= class_scores[obj_i] if is_pred: if overlaps[sub_i, obj_i] == 0: score *= 0 else: score *= predicate_scores[i] else: score *= predicate_scores[i] triplet_scores[i] = score return triplets, triplet_boxes, triplet_scores
def load_graphs(graphs_file, images_file, mode='train', num_im=-1, num_val_im=0, filter_empty_rels=True, filter_non_overlap=False): """ Load the file containing the GT boxes and relations, as well as the dataset split :param graphs_file: HDF5 :param mode: (train, val, or test) :param num_im: Number of images we want :param num_val_im: Number of validation images :param filter_empty_rels: (will be filtered otherwise.) :param filter_non_overlap: If training, filter images that dont overlap. :return: image_index: numpy array corresponding to the index of images we're using boxes: List where each element is a [num_gt, 4] array of ground truth boxes (x1, y1, x2, y2) gt_classes: List where each element is a [num_gt] array of classes relationships: List where each element is a [num_r, 3] array of (box_ind_1, box_ind_2, predicate) relationships """ if mode not in ('train', 'val', 'test'): raise ValueError('{} invalid'.format(mode)) roi_h5 = h5py.File(graphs_file, 'r') im_h5 = h5py.File(images_file, 'r') data_split = roi_h5['split'][:] split = 2 if mode == 'test' else 0 split_mask = data_split == split # Filter out images without bounding boxes split_mask &= roi_h5['img_to_first_box'][:] >= 0 if filter_empty_rels: split_mask &= roi_h5['img_to_first_rel'][:] >= 0 image_index = np.where(split_mask)[0] if num_im > -1: image_index = image_index[:num_im] if num_val_im > 0: if mode == 'val': image_index = image_index[:num_val_im] elif mode == 'train': image_index = image_index[num_val_im:] split_mask = np.zeros_like(data_split).astype(bool) split_mask[image_index] = True # Get box information all_labels = roi_h5['labels'][:, 0] all_boxes = roi_h5['boxes_{}'.format(1024)][:] # will index later assert np.all(all_boxes[:, :2] >= 0) # sanity check assert np.all(all_boxes[:, 2:] > 0) # no empty box # convert from xc, yc, w, h to x1, y1, x2, y2 all_boxes[:, :2] = all_boxes[:, :2] - all_boxes[:, 2:] / 2 all_boxes[:, 2:] = all_boxes[:, :2] + all_boxes[:, 2:] im_to_first_box = roi_h5['img_to_first_box'][split_mask] im_to_last_box = roi_h5['img_to_last_box'][split_mask] im_to_first_rel = roi_h5['img_to_first_rel'][split_mask] im_to_last_rel = roi_h5['img_to_last_rel'][split_mask] im_widths = im_h5["image_widths"][split_mask] im_heights = im_h5["image_heights"][split_mask] # load relation labels _relations = roi_h5['relationships'][:] _relation_predicates = roi_h5['predicates'][:, 0] assert (im_to_first_rel.shape[0] == im_to_last_rel.shape[0]) assert (_relations.shape[0] == _relation_predicates.shape[0] ) # sanity check # Get everything by image. im_sizes = [] image_index_valid = [] boxes = [] gt_classes = [] relationships = [] for i in range(len(image_index)): boxes_i = all_boxes[im_to_first_box[i]:im_to_last_box[i] + 1, :] gt_classes_i = all_labels[im_to_first_box[i]:im_to_last_box[i] + 1] if im_to_first_rel[i] >= 0: predicates = _relation_predicates[ im_to_first_rel[i]:im_to_last_rel[i] + 1] obj_idx = _relations[im_to_first_rel[i]:im_to_last_rel[i] + 1] - im_to_first_box[i] assert np.all(obj_idx >= 0) assert np.all(obj_idx < boxes_i.shape[0]) rels = np.column_stack((obj_idx, predicates)) else: assert not filter_empty_rels rels = np.zeros((0, 3), dtype=np.int32) if filter_non_overlap: assert mode == 'train' inters = bbox_overlaps( torch.from_numpy(boxes_i).float(), torch.from_numpy(boxes_i).float()).numpy() rel_overs = inters[rels[:, 0], rels[:, 1]] inc = np.where(rel_overs > 0.0)[0] if inc.size > 0: rels = rels[inc] else: split_mask[image_index[i]] = 0 continue image_index_valid.append(image_index[i]) im_sizes.append(np.array([im_widths[i], im_heights[i]])) boxes.append(boxes_i) gt_classes.append(gt_classes_i) relationships.append(rels) im_sizes = np.stack(im_sizes, 0) return split_mask, image_index_valid, im_sizes, boxes, gt_classes, relationships