def _compute_pred_matches(gt_triplets, pred_triplets, gt_boxes, pred_boxes, iou_thresh, phrdet=False): """ Given a set of predicted triplets, return the list of matching GT's for each of the given predictions :param gt_triplets: :param pred_triplets: :param gt_boxes: :param pred_boxes: :param iou_thresh: :return: """ # This performs a matrix multiplication-esque thing between the two arrays # Instead of summing, we want the equality, so we reduce in that way # The rows correspond to GT triplets, columns to pred triplets keeps = intersect_2d(gt_triplets, pred_triplets) gt_has_match = keeps.any(1) pred_to_gt = [[] for x in range(pred_boxes.shape[0])] for gt_ind, gt_box, keep_inds in zip( np.where(gt_has_match)[0], gt_boxes[gt_has_match], keeps[gt_has_match], ): boxes = pred_boxes[keep_inds] if phrdet: # Evaluate where the union box > 0.5 gt_box_union = gt_box.reshape((2, 4)) gt_box_union = np.concatenate( (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0) box_union = boxes.reshape((-1, 2, 4)) box_union = np.concatenate( (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1) inds = bbox_overlaps(gt_box_union[None], box_union)[0] >= iou_thresh else: sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0] obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0] inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh) for i in np.where(keep_inds)[0][inds]: pred_to_gt[i].append(int(gt_ind)) return pred_to_gt
def box_filter(boxes, must_overlap=False): """ Only include boxes that overlap as possible relations. If no overlapping boxes, use all of them.""" n_cands = boxes.shape[0] overlaps = bbox_overlaps(boxes.astype(np.float), boxes.astype( np.float)) > 0 np.fill_diagonal(overlaps, 0) all_possib = np.ones_like(overlaps, dtype=np.bool) np.fill_diagonal(all_possib, 0) if must_overlap: possible_boxes = np.column_stack(np.where(overlaps)) if possible_boxes.size == 0: possible_boxes = np.column_stack(np.where(all_possib)) else: possible_boxes = np.column_stack(np.where(all_possib)) return possible_boxes
def get_rel_inds(self, rel_labels, im_inds, box_priors): # Get the relationship candidates if self.training: rel_inds = rel_labels[:, :3].data.clone() else: rel_cands = im_inds.data[:, None] == im_inds.data[None] rel_cands.view(-1)[diagonal_inds(rel_cands)] = 0 # Require overlap for detection if self.require_overlap: rel_cands = rel_cands & (bbox_overlaps(box_priors.data, box_priors.data) > 0) # if there are fewer then 100 things then we might as well add some? # amt_to_add = 100 - rel_cands.long().sum() rel_cands = rel_cands.nonzero() if rel_cands.dim() == 0: rel_cands = im_inds.data.new(1, 2).fill_(0) rel_inds = torch.cat( (im_inds.data[rel_cands[:, 0]][:, None], rel_cands), 1) return rel_inds
def load_graphs(graphs_file, mode='train', num_im=-1, num_val_im=0, filter_empty_rels=True, min_graph_size=-1, max_graph_size=-1, filter_non_overlap=False, training_triplets=None, random_subset=False, filter_zeroshots=True, n_shots=-1): """ Load the file containing the GT boxes and relations, as well as the dataset split :param graphs_file: HDF5 :param mode: (train, val, or test) :param num_im: Number of images we want :param num_val_im: Number of validation images :param filter_empty_rels: (will be filtered otherwise.) :param filter_non_overlap: If training, filter images that dont overlap. :return: image_index: numpy array corresponding to the index of images we're using boxes: List where each element is a [num_gt, 4] array of ground truth boxes (x1, y1, x2, y2) gt_classes: List where each element is a [num_gt] array of classes relationships: List where each element is a [num_r, 3] array of (box_ind_1, box_ind_2, predicate) relationships """ if mode not in ('train', 'val', 'test'): raise ValueError('{} invalid'.format(mode)) with h5py.File(graphs_file, 'r') as roi_h5: data_split = roi_h5['split'][:] split = 2 if mode == 'test' else 0 split_mask = data_split == split # Filter out images without bounding boxes split_mask &= roi_h5['img_to_first_box'][:] >= 0 if filter_empty_rels: split_mask &= roi_h5['img_to_first_rel'][:] >= 0 image_index = np.where(split_mask)[0] if num_im > -1: image_index = image_index[:num_im] if num_val_im > 0: if mode in [ 'val' ]: # , 'test' for faster preliminary evaluation on the test set image_index = image_index[:num_val_im] elif mode == 'train': image_index = image_index[num_val_im:] split_mask = np.zeros_like(data_split).astype(bool) split_mask[image_index] = True # Get box information all_labels = roi_h5['labels'][:, 0] all_boxes = roi_h5['boxes_{}'.format(BOX_SCALE)][:] # will index later assert np.all(all_boxes[:, :2] >= 0) # sanity check assert np.all(all_boxes[:, 2:] > 0) # no empty box # convert from xc, yc, w, h to x1, y1, x2, y2 all_boxes[:, :2] = all_boxes[:, :2] - all_boxes[:, 2:] / 2 all_boxes[:, 2:] = all_boxes[:, :2] + all_boxes[:, 2:] im_to_first_box = roi_h5['img_to_first_box'][split_mask] im_to_last_box = roi_h5['img_to_last_box'][split_mask] im_to_first_rel = roi_h5['img_to_first_rel'][split_mask] im_to_last_rel = roi_h5['img_to_last_rel'][split_mask] # load relation labels _relations = roi_h5['relationships'][:] _relation_predicates = roi_h5['predicates'][:, 0] assert (im_to_first_rel.shape[0] == im_to_last_rel.shape[0]) assert (_relations.shape[0] == _relation_predicates.shape[0] ) # sanity check # Get everything by image. boxes = [] gt_classes = [] relationships = [] for i in range(len(image_index)): boxes_i = all_boxes[im_to_first_box[i]:im_to_last_box[i] + 1, :] gt_classes_i = all_labels[im_to_first_box[i]:im_to_last_box[i] + 1] if min_graph_size > -1 and len( gt_classes_i ) <= min_graph_size: # 0-min_graph_size will be excluded split_mask[image_index[i]] = 0 continue if max_graph_size > -1 and len( gt_classes_i ) > max_graph_size: # max_graph_size+1-Inf will be excluded split_mask[image_index[i]] = 0 continue ind_zs = [] if im_to_first_rel[i] >= 0: predicates = _relation_predicates[ im_to_first_rel[i]:im_to_last_rel[i] + 1] obj_idx = _relations[im_to_first_rel[i]:im_to_last_rel[i] + 1] - im_to_first_box[i] assert np.all(obj_idx >= 0) assert np.all(obj_idx < boxes_i.shape[0]) rels = np.column_stack((obj_idx, predicates)) if training_triplets: n = len(rels) if n > 0: if random_subset: ind_zs = np.random.permutation( n)[:int(np.round(n / 15.))] else: for rel_ind, tri in enumerate(rels): o1, o2, R = tri tri_str = '{}_{}_{}'.format( gt_classes_i[o1], R, gt_classes_i[o2]) if isinstance(training_triplets, dict): assert n_shots > 0, n_shots if tri_str in training_triplets: if (n_shots == 10 and training_triplets[tri_str] >= 1 and training_triplets[tri_str] <= n_shots) \ or (n_shots == 100 and training_triplets[tri_str] >= 11 and training_triplets[tri_str] <= n_shots): ind_zs.append(rel_ind) elif tri_str not in training_triplets: assert n_shots == -1, n_shots ind_zs.append(rel_ind) ind_zs = np.array(ind_zs) if filter_zeroshots: if len(ind_zs) > 0: rels = rels[ind_zs] else: rels = np.zeros((0, 3), dtype=np.int32) else: assert not filter_empty_rels rels = np.zeros((0, 3), dtype=np.int32) if training_triplets and filter_zeroshots: assert len(rels) == len(ind_zs), (len(rels), len(ind_zs)) if training_triplets and filter_empty_rels and len(ind_zs) == 0: split_mask[image_index[i]] = 0 continue if filter_non_overlap: assert mode == 'train' inters = bbox_overlaps(boxes_i, boxes_i) rel_overs = inters[rels[:, 0], rels[:, 1]] inc = np.where(rel_overs > 0.0)[0] if inc.size > 0: rels = rels[inc] else: split_mask[image_index[i]] = 0 continue boxes.append(boxes_i) gt_classes.append(gt_classes_i) relationships.append(rels) return split_mask, boxes, gt_classes, relationships
def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True): """ Assign object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. :param rpn_rois: [img_ind, x1, y1, x2, y2] :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] :param gt_classes: [num_boxes, 2] array of [img_ind, class] :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) :return: rois: [num_rois, 5] labels: [num_rois] array of labels bbox_targets [num_rois, 4] array of targets for the labels. rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) """ fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64)) pred_inds_np = im_inds.cpu().numpy() pred_boxes_np = rpn_rois.cpu().numpy() pred_boxlabels_np = roi_gtlabels.cpu().numpy() gt_boxes_np = gt_boxes.cpu().numpy() gt_classes_np = gt_classes.cpu().numpy() gt_rels_np = gt_rels.cpu().numpy() gt_classes_np[:, 0] -= image_offset gt_rels_np[:, 0] -= image_offset num_im = gt_classes_np[:, 0].max() + 1 # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np # )) rel_labels = [] num_box_seen = 0 for im_ind in range(num_im): pred_ind = np.where(pred_inds_np == im_ind)[0] gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] gt_boxes_i = gt_boxes_np[gt_ind] gt_classes_i = gt_classes_np[gt_ind, 1] gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] # [num_pred, num_gt] pred_boxes_i = pred_boxes_np[pred_ind] pred_boxlabels_i = pred_boxlabels_np[pred_ind] ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) is_match = (pred_boxlabels_i[:, None] == gt_classes_i[None]) & (ious >= fg_thresh) # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i) if filter_non_overlap: rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) rels_intersect = rel_possibilities else: rel_possibilities = np.ones( (pred_boxes_i.shape[0], pred_boxes_i.shape[0]), dtype=np.int64) - np.eye(pred_boxes_i.shape[0], dtype=np.int64) rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) # ONLY select relations between ground truth because otherwise we get useless data rel_possibilities[pred_boxlabels_i == 0] = 0 rel_possibilities[:, pred_boxlabels_i == 0] = 0 # Sample the GT relationships. fg_rels = [] p_size = [] for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i): fg_rels_i = [] fg_scores_i = [] for from_ind in np.where(is_match[:, from_gtind])[0]: for to_ind in np.where(is_match[:, to_gtind])[0]: if from_ind != to_ind: fg_rels_i.append((from_ind, to_ind, rel_id)) fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) rel_possibilities[from_ind, to_ind] = 0 if len(fg_rels_i) == 0: continue p = np.array(fg_scores_i) p = p / p.sum() p_size.append(p.shape[0]) num_to_add = min(p.shape[0], num_sample_per_gt) for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): fg_rels.append(fg_rels_i[rel_to_add]) fg_rels = np.array(fg_rels, dtype=np.int64) if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] elif fg_rels.size == 0: fg_rels = np.zeros((0, 3), dtype=np.int64) bg_rels = np.column_stack(np.where(rel_possibilities)) bg_rels = np.column_stack( (bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0]) if bg_rels.size > 0: # Sample 4x as many intersecting relationships as non-intersecting. # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]] bg_rels = bg_rels[np.random.choice(bg_rels.shape[0], size=num_bg_rel, replace=False)] else: bg_rels = np.zeros((0, 3), dtype=np.int64) if fg_rels.size == 0 and bg_rels.size == 0: # Just put something here bg_rels = np.array([[0, 0, 0]], dtype=np.int64) # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) all_rels_i = np.concatenate((fg_rels, bg_rels), 0) all_rels_i[:, 0:2] += num_box_seen all_rels_i = all_rels_i[np.lexsort((all_rels_i[:, 1], all_rels_i[:, 0]))] rel_labels.append( np.column_stack(( im_ind * np.ones(all_rels_i.shape[0], dtype=np.int64), all_rels_i, ))) num_box_seen += pred_boxes_i.shape[0] rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda( rpn_rois.get_device(), non_blocking=True) return rel_labels