def match_anchors(anns, a_xywh, a_ltrb, pos_thresh=0.7, neg_thresh=0.3, get_label=lambda x: x['category_id'], debug=False): num_anchors = len(a_xywh) loc_t = a_xywh.new_zeros(num_anchors, 4) cls_t = loc_t.new_zeros(num_anchors, dtype=torch.long) if len(anns) == 0: ignore = loc_t.new_zeros(num_anchors, dtype=torch.uint8) return loc_t, cls_t, ignore bboxes = loc_t.new_tensor([ann['bbox'] for ann in anns]) bboxes = BBox.convert(bboxes, format=BBox.LTWH, to=BBox.XYWH, inplace=True) labels = loc_t.new_tensor([get_label(ann) for ann in anns], dtype=torch.long) bboxes_ltrb = BBox.convert(bboxes, BBox.XYWH, BBox.LTRB) ious = iou_mn(bboxes_ltrb, a_ltrb) pos = ious > pos_thresh for ipos, bbox, label in zip(pos, bboxes, labels): loc_t[ipos] = coords_to_target(bbox, a_xywh[ipos]) cls_t[ipos] = label max_ious, indices = ious.max(dim=1) if debug: print(max_ious.tolist()) loc_t[indices] = coords_to_target(bboxes, a_xywh[indices]) cls_t[indices] = labels ignore = (cls_t == 0) & ((ious >= neg_thresh).sum(dim=0) != 0) return loc_t, cls_t, ignore
def softer_roi_based_inference( rois, loc_p, cls_p, log_var_p, iou_threshold=0.5, topk=100): scores, labels = torch.softmax(cls_p, dim=1)[:, 1:].max(dim=1) var_p = log_var_p.exp_() loc_p[..., :2].mul_(rois[:, 2:]).add_(rois[:, :2]) loc_p[..., 2:].exp_().mul_(rois[:, 2:]) bboxes = BBox.convert( loc_p, format=BBox.XYWH, to=BBox.LTRB, inplace=True).cpu() scores = scores.cpu() var_p = var_p.cpu() indices = softer_nms_cpu( bboxes, scores, var_p, iou_threshold, topk) bboxes = BBox.convert( bboxes, format=BBox.LTRB, to=BBox.LTWH, inplace=True) dets = [] for i, ind in enumerate(indices): det = { 'image_id': -1, 'category_id': labels[ind].item() + 1, 'bbox': bboxes[ind].tolist(), 'score': scores[ind].item(), } dets.append(det) return dets
def match_rois2(anns, rois, pos_thresh=0.5, n_samples=64, pos_neg_ratio=1 / 3): num_rois = len(rois) if len(anns) == 0: loc_t = rois.new_zeros(num_rois, 4) cls_t = loc_t.new_zeros(num_rois, dtype=torch.long) return loc_t, cls_t, rois_xywh = BBox.convert(rois, BBox.LTRB, BBox.XYWH) bboxes = rois.new_tensor([ann['bbox'] for ann in anns]) bboxes = BBox.convert(bboxes, format=BBox.LTWH, to=BBox.XYWH, inplace=True) labels = rois.new_tensor([ann['category_id'] for ann in anns], dtype=torch.long) bboxes_ltrb = BBox.convert(bboxes, BBox.XYWH, BBox.LTRB) ious = iou_mn(bboxes_ltrb, rois) pos = ious > pos_thresh cls_t, ann_indices = (pos.long() * labels[:, None]).max(dim=0) loc_t_all = coords_to_target2(bboxes, rois_xywh) loc_t = select(loc_t_all, 0, ann_indices) max_ious, max_indices = ious.max(dim=1) loc_t[max_indices] = select(loc_t_all, 1, max_indices) cls_t[max_indices] = labels pos = cls_t != 0 n_pos = int(n_samples * pos_neg_ratio / (pos_neg_ratio + 1)) n_neg = n_samples - n_pos pos_indices = sample(torch.nonzero(pos).squeeze(1), n_pos) neg_indices = sample(torch.nonzero(~pos).squeeze(1), n_neg) loc_t = loc_t[pos_indices] indices = torch.cat([pos_indices, neg_indices], dim=0) cls_t = cls_t[indices] return loc_t, cls_t, indices
def match_anchors2(anns, a_xywh, a_ltrb, pos_thresh=0.7, neg_thresh=0.3, get_label=lambda x: x['category_id'], debug=False): num_anchors = len(a_xywh) if len(anns) == 0: loc_t = a_xywh.new_zeros(num_anchors, 4) cls_t = loc_t.new_zeros(num_anchors, dtype=torch.long) ignore = loc_t.new_zeros(num_anchors, dtype=torch.uint8) return loc_t, cls_t, ignore bboxes = a_xywh.new_tensor([ann['bbox'] for ann in anns]) bboxes = BBox.convert(bboxes, format=BBox.LTWH, to=BBox.XYWH, inplace=True) labels = a_xywh.new_tensor([get_label(ann) for ann in anns], dtype=torch.long) bboxes_ltrb = BBox.convert(bboxes, BBox.XYWH, BBox.LTRB) ious = iou_mn(bboxes_ltrb, a_ltrb) pos = ious > pos_thresh cls_t, indices = (pos.long() * labels[:, None]).max(dim=0) loc_t_all = coords_to_target2(bboxes, a_xywh) loc_t = select(loc_t_all, 0, indices) max_ious, max_indices = ious.max(dim=1) if debug: print(max_ious.tolist()) loc_t[max_indices] = select(loc_t_all, 1, max_indices) cls_t[max_indices] = labels ignore = (cls_t == 0) & ((ious >= neg_thresh).sum(dim=0) != 0) return loc_t, cls_t, ignore
def match_rois(anns, rois, pos_thresh=0.5, mask_size=(14, 14), n_samples=64, pos_neg_ratio=1 / 3): rois_xywh = BBox.convert(rois, BBox.LTRB, BBox.XYWH) num_anns = len(anns) num_rois = len(rois) loc_t = rois.new_zeros(num_rois, 4) cls_t = loc_t.new_zeros(num_rois, dtype=torch.long) if num_anns == 0: return loc_t, cls_t bboxes = loc_t.new_tensor([ann['bbox'] for ann in anns]) bboxes = BBox.convert(bboxes, format=BBox.LTWH, to=BBox.XYWH, inplace=True) labels = loc_t.new_tensor([ann['category_id'] for ann in anns], dtype=torch.long) bboxes_ltrb = BBox.convert(bboxes, BBox.XYWH, BBox.LTRB) ious = iou_mn(bboxes_ltrb, rois) ann_indices = torch.zeros(num_rois, dtype=torch.long) max_ious, indices = ious.max(dim=1) loc_t[indices] = coords_to_target(bboxes, rois_xywh[indices]) cls_t[indices] = labels ann_indices[indices] = torch.arange(num_anns) pos = ious > pos_thresh for ann_id, ipos, bbox, label in zip(range(num_rois), pos, bboxes, labels): loc_t[ipos] = coords_to_target(bbox, rois_xywh[ipos]) cls_t[ipos] = label ann_indices[ipos] = ann_id pos = cls_t != 0 n_pos = int(n_samples * pos_neg_ratio / (pos_neg_ratio + 1)) n_neg = n_samples - n_pos pos_indices = sample(torch.nonzero(pos).squeeze(1), n_pos) neg_indices = sample(torch.nonzero(~pos).squeeze(1), n_neg) loc_t = loc_t[pos_indices] indices = torch.cat([pos_indices, neg_indices], dim=0) cls_t = cls_t[indices] mask_t = loc_t.new_zeros(n_pos, *mask_size) for i in range(n_pos): ind = pos_indices[i] mask = anns[ann_indices[ind]]['segmentation'] height, width = mask.shape l, t, r, b = rois[ind] l = max(0, int(l * width)) t = max(0, int(t * height)) r = int(r * width) b = int(b * height) m = mask[t:b, l:r].float() m = m.view(1, 1, *m.size()) m = F.interpolate(m, size=mask_size).squeeze() mask_t[i] = m return loc_t, cls_t, mask_t, indices
def anchor_based_inference(loc_p, cls_p, anchors, conf_threshold=0.01, iou_threshold=0.5, topk=100, conf_strategy='softmax', nms_method='soft', min_score=None): bboxes = loc_p if conf_strategy == 'softmax': scores = torch.softmax(cls_p, dim=1) else: scores = torch.sigmoid_(cls_p) scores, labels = torch.max(scores[:, 1:], dim=1) if conf_threshold > 0: pos = scores > conf_threshold scores = scores[pos] labels = labels[pos] bboxes = bboxes[pos] anchors = anchors[pos] bboxes = target_to_coords(bboxes, anchors) bboxes = BBox.convert(bboxes, format=BBox.XYWH, to=BBox.LTRB, inplace=True).cpu() scores = scores.cpu() if nms_method == 'soft': min_score = min_score or conf_threshold indices = soft_nms_cpu(bboxes, scores, iou_threshold, topk, min_score=min_score) else: indices = nms(bboxes, scores, iou_threshold) scores = scores[indices] labels = labels[indices] bboxes = bboxes[indices] if scores.size(0) > topk: indices = scores.topk(topk)[1] else: indices = range(scores.size(0)) bboxes = BBox.convert(bboxes, format=BBox.LTRB, to=BBox.LTWH, inplace=True) dets = [] for ind in indices: det = { 'image_id': -1, 'category_id': labels[ind].item() + 1, 'bbox': bboxes[ind].tolist(), 'score': scores[ind].item(), } dets.append(det) return dets
def roi_based_inference(rois, loc_p, cls_p, predict_mask, iou_threshold=0.5, topk=100, nms_method='soft_nms'): scores, labels = torch.softmax(cls_p, dim=1)[:, 1:].max(dim=1) num_classes = cls_p.size(1) - 1 loc_p = expand_last_dim(loc_p, num_classes, 4) loc_p = select(loc_p, 1, labels) loc_p[..., :2].mul_(rois[:, 2:]).add_(rois[:, :2]) loc_p[..., 2:].exp_().mul_(rois[:, 2:]) bboxes = loc_p bboxes = BBox.convert(bboxes, format=BBox.XYWH, to=BBox.LTRB, inplace=True).cpu() scores = scores.cpu() if nms_method == 'nms': indices = nms(bboxes, scores, iou_threshold) if len(indices) > topk: indices = indices[scores[indices].topk(topk)[1]] else: warnings.warn("Only %d RoIs left after nms rather than top %d" % (len(scores), topk)) else: indices = soft_nms_cpu(bboxes, scores, iou_threshold, topk) bboxes = BBox.convert(bboxes, format=BBox.LTRB, to=BBox.LTWH, inplace=True) if predict_mask is not None: mask_p = predict_mask(indices) masks = (select(mask_p, 1, labels[indices]).sigmoid_() > 0.5).cpu().numpy() dets = [] for i, ind in enumerate(indices): det = { 'image_id': -1, 'category_id': labels[ind].item() + 1, 'bbox': bboxes[ind].tolist(), 'score': scores[ind].item(), } if predict_mask: det['segmentation'] = masks[i] dets.append(det) return dets
def inference_rois(loc_p, cls_p, anchors, iou_threshold=0.5, topk=100, conf_strategy='softmax'): if conf_strategy == 'softmax': scores = torch.softmax(cls_p, dim=1) else: scores = torch.sigmoid_(cls_p) scores = scores[..., 1:] scores = torch.max(scores, dim=-1)[0] loc_p[..., :2].mul_(anchors[:, 2:]).add_(anchors[:, :2]) loc_p[..., 2:].exp_().mul_(anchors[:, 2:]) bboxes = BBox.convert(loc_p, format=BBox.XYWH, to=BBox.LTRB, inplace=True) rois = [] for i in range(len(loc_p)): ibboxes = bboxes[i] iscores = scores[i] indices = nms(ibboxes, iscores, iou_threshold) ibboxes = ibboxes[indices] iscores = iscores[indices] if len(indices) > topk: indices = iscores.topk(topk)[1] ibboxes = ibboxes[indices] else: ibboxes = sample(ibboxes, topk) batch_idx = ibboxes.new_full((topk, 1), i) rois.append(torch.cat([batch_idx, ibboxes], dim=-1)) rois = torch.stack(rois, dim=0) return rois
def __init__(self, anchors, pos_thresh=0.7, neg_thresh=0.3, get_label=lambda x: 1, debug=False): self.a_xywh = flatten(anchors) self.a_ltrb = BBox.convert(self.a_xywh, BBox.XYWH, BBox.LTRB) self.pos_thresh = pos_thresh self.neg_thresh = neg_thresh self.get_label = get_label self.debug = debug
def roi_based_inference( rois, loc_p, cls_p, conf_threshold=0.01, iou_threshold=0.5, topk=100, nms_method='soft'): scores, labels = torch.softmax(cls_p, dim=1)[:, 1:].max(dim=1) # num_classes = cls_p.size(1) - 1 # loc_p = expand_last_dim(loc_p, num_classes, 4) # loc_p = select(loc_p, 1, labels) bboxes = loc_p if conf_threshold: pos = scores > conf_threshold bboxes = bboxes[pos] rois = rois[pos] scores = scores[pos] labels = labels[pos] bboxes[..., :2].mul_(rois[:, 2:]).add_(rois[:, :2]) bboxes[..., 2:].exp_().mul_(rois[:, 2:]) bboxes = BBox.convert( bboxes, format=BBox.XYWH, to=BBox.LTRB, inplace=True).cpu() scores = scores.cpu() if nms_method == 'soft': indices = soft_nms_cpu( bboxes, scores, iou_threshold, topk) else: indices = nms(bboxes, scores, iou_threshold) if len(indices) > topk: indices = indices[scores[indices].topk(topk)[1]] else: warnings.warn("Only %d RoIs left after nms rather than top %d" % (len(scores), topk)) bboxes = BBox.convert( bboxes, format=BBox.LTRB, to=BBox.LTWH, inplace=True) dets = [] for i, ind in enumerate(indices): det = { 'image_id': -1, 'category_id': labels[ind].item() + 1, 'bbox': bboxes[ind].tolist(), 'score': scores[ind].item(), } dets.append(det) return dets
def __call__(self, rois, loc_p, cls_p, predict_mask): image_dets = [] batch_size, num_rois = rois.size()[:2] rois = BBox.convert(rois, BBox.LTRB, BBox.XYWH, inplace=True) loc_p = loc_p.view(batch_size, num_rois, -1) cls_p = cls_p.view(batch_size, num_rois, -1) for i in range(batch_size): dets = roi_based_inference( rois[i], loc_p[i], cls_p[i], lambda indices: predict_mask(i, indices), self.iou_threshold, self.topk, self.nms_method) image_dets.append(dets) return image_dets
def __init__(self, anchors, pos_thresh=0.5, neg_thresh=None, get_label=get('category_id'), debug=False): self.anchors_xywh = flatten(anchors) self.anchors_ltrb = BBox.convert(self.anchors_xywh, BBox.XYWH, BBox.LTRB) self.pos_thresh = pos_thresh self.neg_thresh = neg_thresh self.get_label = get_label self.debug = debug
def match_rois(anns, rois, pos_thresh=0.5, n_samples=64, pos_neg_ratio=1 / 3): rois_xywh = BBox.convert(rois, BBox.LTRB, BBox.XYWH) num_anns = len(anns) num_rois = len(rois) loc_t = rois.new_zeros(num_rois, 4) cls_t = loc_t.new_zeros(num_rois, dtype=torch.long) if num_anns == 0: return loc_t, cls_t bboxes = loc_t.new_tensor([ann['bbox'] for ann in anns]) bboxes = BBox.convert(bboxes, format=BBox.LTWH, to=BBox.XYWH, inplace=True) labels = loc_t.new_tensor([ann['category_id'] for ann in anns], dtype=torch.long) bboxes_ltrb = BBox.convert(bboxes, BBox.XYWH, BBox.LTRB) ious = iou_mn(bboxes_ltrb, rois) max_ious, indices = ious.max(dim=1) loc_t[indices] = coords_to_target(bboxes, rois_xywh[indices]) cls_t[indices] = labels pos = ious > pos_thresh for ann_id, ipos, bbox, label in zip(range(num_rois), pos, bboxes, labels): loc_t[ipos] = coords_to_target(bbox, rois_xywh[ipos]) cls_t[ipos] = label pos = cls_t != 0 n_pos = int(n_samples * pos_neg_ratio / (pos_neg_ratio + 1)) n_neg = n_samples - n_pos pos_indices = sample(torch.nonzero(pos).squeeze(1), n_pos) neg_indices = sample(torch.nonzero(~pos).squeeze(1), n_neg) loc_t = loc_t[pos_indices] indices = torch.cat([pos_indices, neg_indices], dim=0) cls_t = cls_t[indices] return loc_t, cls_t, indices
def __call__(self, rois, loc_p, cls_p, log_var_p=None): image_dets = [] batch_size, num_rois = rois.size()[:2] rois = BBox.convert(rois, BBox.LTRB, BBox.XYWH, inplace=True) loc_p = loc_p.view(batch_size, num_rois, -1) cls_p = cls_p.view(batch_size, num_rois, -1) if log_var_p is not None: log_var_p = log_var_p.view(batch_size, num_rois, -1) for i in range(batch_size): if self.nms == 'softer' and log_var_p is not None: dets = softer_roi_based_inference( rois[i], loc_p[i], cls_p[i], log_var_p[i], self.iou_threshold, self.topk) else: dets = roi_based_inference( rois[i], loc_p[i], cls_p[i], self.iou_threshold, self.conf_threshold, self.topk, self.nms) image_dets.append(dets) return image_dets