def _do_test(b1, b2):
     # Compute IoU overlap with the cython implementation
     cython_iou = box_utils.bbox_overlaps(b1, b2)
     # Compute IoU overlap with the COCO API implementation
     # (requires converting boxes from xyxy to xywh format)
     xywh_b1 = box_utils.xyxy_to_xywh(b1)
     xywh_b2 = box_utils.xyxy_to_xywh(b2)
     not_crowd = [int(False)] * b2.shape[0]
     coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)
     # IoUs should be similar
     np.testing.assert_array_almost_equal(cython_iou,
                                          coco_ious,
                                          decimal=5)
 def _do_test(b1, b2):
     # Compute IoU overlap with the cython implementation
     cython_iou = box_utils.bbox_overlaps(b1, b2)
     # Compute IoU overlap with the COCO API implementation
     # (requires converting boxes from xyxy to xywh format)
     xywh_b1 = box_utils.xyxy_to_xywh(b1)
     xywh_b2 = box_utils.xyxy_to_xywh(b2)
     not_crowd = [int(False)] * b2.shape[0]
     coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)
     # IoUs should be similar
     np.testing.assert_array_almost_equal(
         cython_iou, coco_ious, decimal=5
     )
 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(prop_xyxy_boxes,
                                           np.array([gt_xyxy_box]),
                                           weights=weights)
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(prop_xyxy_boxes,
                                                deltas,
                                                weights=weights)
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]),
                         not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
 def test_bbox_dataset_to_prediction_roundtrip(self):
     """Simulate the process of reading a ground-truth box from a dataset,
     make predictions from proposals, convert the predictions back to the
     dataset format, and then use the COCO API to compute IoU overlap between
     the gt box and the predictions. These should have IoU of 1.
     """
     weights = (5, 5, 10, 10)
     # 1/ "read" a box from a dataset in the default (x1, y1, w, h) format
     gt_xywh_box = [10, 20, 100, 150]
     # 2/ convert it to our internal (x1, y1, x2, y2) format
     gt_xyxy_box = box_utils.xywh_to_xyxy(gt_xywh_box)
     # 3/ consider nearby proposal boxes
     prop_xyxy_boxes = random_boxes(gt_xyxy_box, 10, 10)
     # 4/ compute proposal-to-gt transformation deltas
     deltas = box_utils.bbox_transform_inv(
         prop_xyxy_boxes, np.array([gt_xyxy_box]), weights=weights
     )
     # 5/ use deltas to transform proposals to xyxy predicted box
     pred_xyxy_boxes = box_utils.bbox_transform(
         prop_xyxy_boxes, deltas, weights=weights
     )
     # 6/ convert xyxy predicted box to xywh predicted box
     pred_xywh_boxes = box_utils.xyxy_to_xywh(pred_xyxy_boxes)
     # 7/ use COCO API to compute IoU
     not_crowd = [int(False)] * pred_xywh_boxes.shape[0]
     ious = COCOmask.iou(pred_xywh_boxes, np.array([gt_xywh_box]), not_crowd)
     np.testing.assert_array_almost_equal(ious, np.ones(ious.shape))
Exemplo n.º 5
0
def _mammo_results_one_category(mammo_dataset, boxes, cat_id):
    results = []
    image_ids = mammo_dataset._image_index
    image_ids.sort()
    # if mammo_dataset._image_set == 'train':
    #     image_ids = image_ids[:400]
    assert len(boxes) == len(image_ids)
    for i, image_id in enumerate(image_ids):
        dets = boxes[i]
        if isinstance(dets, list) and len(dets) == 0:
            continue
        dets = dets.astype(np.float)
        scores = dets[:, -1]
        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])
        xs = xywh_dets[:, 0]
        ys = xywh_dets[:, 1]
        ws = xywh_dets[:, 2]
        hs = xywh_dets[:, 3]
        results.extend([{
            'image_id': image_id,
            'category_id': cat_id,
            'bbox': [xs[k], ys[k], ws[k], hs[k]],
            'score': scores[k]
        } for k in range(dets.shape[0])])
    return results
def _coco_bbox_results_one_category(json_dataset, boxes, cat_id):
    results = []
    # image_ids = json_dataset.COCO.getImgIds()
    # image_ids.sort()
    # assert len(boxes) == len(image_ids)
    for i, dets in enumerate(boxes):
        dets = boxes[i]
        if isinstance(dets, list) and len(dets) == 0:
            continue
        dets = dets.astype(np.float)
        scores = dets[:, -1]
        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])
        xs = xywh_dets[:, 0]
        ys = xywh_dets[:, 1]
        ws = xywh_dets[:, 2]
        hs = xywh_dets[:, 3]
        results.extend([
            {
                'image_id': i,  # dummy, to be compatible with COCO
                'category_id': cat_id,
                'bbox': [xs[k], ys[k], ws[k], hs[k]],
                'score': scores[k]
            } for k in range(dets.shape[0])
        ])
    return results
Exemplo n.º 7
0
 def convert_raw_predictions_to_objs(self, annots, image_id):
     if len(annots['boxes']) == 0:
         return []
     objs = []
     N = annots['boxes'].shape[0]
     for i in range(N):
         obj = {}
         # COCO labels are in xywh format, but I make predictions in xyxy
         # Remove the score from box before converting
         obj['bbox'] = box_utils.xyxy_to_xywh(annots['boxes'][i][
             np.newaxis, :4]).reshape((-1,)).tolist()
         obj['num_keypoints'] = annots['poses'][i].shape[-1]
         assert(obj['num_keypoints'] == cfg.KRCNN.NUM_KEYPOINTS)
         obj['segmentation'] = []
         obj['area'] = obj['bbox'][-1] * obj['bbox'][-2]
         obj['iscrowd'] = False
         pose = annots['poses'][i][:3].transpose()
         pose[pose[:, -1] >= 2.0, -1] = 2
         pose[pose[:, -1] < 2.0, -1] = 0
         obj['keypoints'] = pose.reshape((-1)).tolist()
         obj['track_id'] = annots['tracks'][i]
         obj['image_id'] = image_id
         obj['category_id'] = 1  # person
         objs.append(obj)
     return objs
Exemplo n.º 8
0
def _filter_crowd_proposals(roidb, crowd_thresh):
    """Finds proposals that are inside crowd regions and marks them as
    overlap = -1 with each ground-truth rois, which means they will be excluded
    from training.
    """
    for entry in roidb:
        gt_overlaps = entry['gt_overlaps'].toarray()
        crowd_inds = np.where(entry['is_crowd'] == 1)[0]
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:
            continue
        crowd_boxes = box_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])
        non_gt_boxes = box_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])
        iscrowd_flags = [int(True)] * len(crowd_inds)
        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd_flags)
        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]
        gt_overlaps[non_gt_inds[bad_inds], :] = -1
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps)
Exemplo n.º 9
0
def _filter_crowd_proposals(roidb, crowd_thresh):
    """Finds proposals that are inside crowd regions and marks them as
    overlap = -1 with each ground-truth rois, which means they will be excluded
    from training.
    """
    for entry in roidb:
        gt_overlaps = entry['gt_overlaps'].toarray()
        crowd_inds = np.where(entry['is_crowd'] == 1)[0]
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        if len(crowd_inds) == 0 or len(non_gt_inds) == 0:
            continue
        crowd_boxes = box_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :])
        non_gt_boxes = box_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :])
        iscrowd_flags = [int(True)] * len(crowd_inds)
        ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd_flags)
        bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0]
        gt_overlaps[non_gt_inds[bad_inds], :] = -1
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps)
Exemplo n.º 10
0
def track(cfg_path, opts):

    # Setup tracker configuration
    merge_cfg_from_file(cfg_path)

    opts = opts.split(' ')
    if len(opts) > 0:
        merge_cfg_from_list(opts)

    device = torch.device('cuda:{}'.format(0))

    tracker = SiamTracker(device)

    def load_image(img_path, use_pil):
        if use_pil:
            pil_img = Image.open(img_path)
            if pil_img.mode == 'L':
                pil_img = pil_img.convert(
                    'RGB')  # convert to RGB 3 channels if necessary
            im_tensor = to_tensor(pil_img)
        else:
            im = cv2.imread(img_path, cv2.IMREAD_COLOR)  # HxWxC
            im_tensor = torch.from_numpy(np.transpose(im, (2, 0, 1))).float()
        im_tensor = im_tensor.unsqueeze(0).to(device)  # 1*C*H*W
        return im_tensor

    # start to track
    handle = vot.VOT("polygon")
    Polygon = handle.region()

    box_cxcywh = vot.get_axis_aligned_bbox(Polygon)
    # convert to xyxy
    box_xyxy = ubox.xcycwh_to_xyxy(box_cxcywh)

    image_file = handle.frame()

    if not image_file:
        sys.exit(0)

    im_tensor = load_image(image_file, tracker.use_pil)
    tracker.tracker.init_tracker(im_tensor, box_xyxy)

    while True:
        image_file = handle.frame()
        if not image_file:
            break
        im_tensor = load_image(image_file, tracker.use_pil)
        box_xyxy = tracker.tracker.predict_next_frame(im_tensor, box_xyxy)
        box_xywh = ubox.xyxy_to_xywh(box_xyxy)

        handle.report(
            Rectangle(box_xywh[0], box_xywh[1], box_xywh[2], box_xywh[3]))
Exemplo n.º 11
0
def _wad_bbox_results_one_category(boxes, cat_id, args):
    results = []
    image_ids = args.image_ids
    assert len(boxes) == len(image_ids)
    for i, image_id in enumerate(image_ids):
        dets = boxes[i]
        if isinstance(dets, list) and len(dets) == 0:
            continue
        dets = dets.astype(np.float)
        scores = dets[:, -1]
        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])
        xs = xywh_dets[:, 0]
        ys = xywh_dets[:, 1]
        ws = xywh_dets[:, 2]
        hs = xywh_dets[:, 3]
        results.extend(
            [{'image_id': image_id,
              'category_id': cat_id,
              'bbox': [xs[k], ys[k], ws[k], hs[k]],
              'score': scores[k]} for k in range(dets.shape[0])])
    return results
Exemplo n.º 12
0
def _coco_bbox_results_one_category(json_dataset, boxes, cat_id):
    results = []
    image_ids = json_dataset.COCO.getImgIds()
    image_ids.sort()
    assert len(boxes) == len(image_ids)
    for i, image_id in enumerate(image_ids):
        dets = boxes[i]
        if isinstance(dets, list) and len(dets) == 0:
            continue
        dets = dets.astype(np.float)
        scores = dets[:, -1]
        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])
        xs = xywh_dets[:, 0]
        ys = xywh_dets[:, 1]
        ws = xywh_dets[:, 2]
        hs = xywh_dets[:, 3]
        results.extend(
            [{'image_id': image_id,
              'category_id': cat_id,
              'bbox': [xs[k], ys[k], ws[k], hs[k]],
              'score': scores[k]} for k in range(dets.shape[0])])
    return results
def convert_cityscapes_instance_only(data_dir, out_dir):
    """Convert from cityscapes format to COCO instance seg format - polygons"""
    sets = [
        'gtFine_val',
        # 'gtFine_train',
        # 'gtFine_test',

        # 'gtCoarse_train',
        # 'gtCoarse_val',
        # 'gtCoarse_train_extra'
    ]
    ann_dirs = [
        'gtFine_trainvaltest/gtFine/val',
        # 'gtFine_trainvaltest/gtFine/train',
        # 'gtFine_trainvaltest/gtFine/test',

        # 'gtCoarse/train',
        # 'gtCoarse/train_extra',
        # 'gtCoarse/val'
    ]
    json_name = 'instancesonly_filtered_%s.json'
    ends_in = '%s_polygons.json'
    img_id = 0
    ann_id = 0
    cat_id = 1
    category_dict = {}

    category_instancesonly = [
        'person',
        'rider',
        'car',
        'truck',
        'bus',
        'train',
        'motorcycle',
        'bicycle',
    ]

    for data_set, ann_dir in zip(sets, ann_dirs):
        print('Starting %s' % data_set)
        ann_dict = {}
        images = []
        annotations = []
        ann_dir = os.path.join(data_dir, ann_dir)
        for root, _, files in os.walk(ann_dir):
            for filename in files:
                if filename.endswith(ends_in % data_set.split('_')[0]):
                    if len(images) % 50 == 0:
                        print("Processed %s images, %s annotations" %
                              (len(images), len(annotations)))
                    json_ann = json.load(open(os.path.join(root, filename)))
                    image = {}
                    image['id'] = img_id
                    img_id += 1

                    image['width'] = json_ann['imgWidth']
                    image['height'] = json_ann['imgHeight']
                    image['file_name'] = filename[:-len(
                        ends_in % data_set.split('_')[0])] + 'leftImg8bit.png'
                    image['seg_file_name'] = filename[:-len(
                        ends_in % data_set.split('_')[0])] + \
                        '%s_instanceIds.png' % data_set.split('_')[0]
                    images.append(image)

                    fullname = os.path.join(root, image['seg_file_name'])
                    objects = cs.instances2dict_with_polygons(
                        [fullname], verbose=False)[fullname]

                    for object_cls in objects:
                        if object_cls not in category_instancesonly:
                            continue  # skip non-instance categories

                        for obj in objects[object_cls]:
                            if obj['contours'] == []:
                                print('Warning: empty contours.')
                                continue  # skip non-instance categories

                            len_p = [len(p) for p in obj['contours']]
                            if min(len_p) <= 4:
                                print('Warning: invalid contours.')
                                continue  # skip non-instance categories

                            ann = {}
                            ann['id'] = ann_id
                            ann_id += 1
                            ann['image_id'] = image['id']
                            ann['segmentation'] = obj['contours']

                            if object_cls not in category_dict:
                                category_dict[object_cls] = cat_id
                                cat_id += 1
                            ann['category_id'] = category_dict[object_cls]
                            ann['iscrowd'] = 0
                            ann['area'] = obj['pixelCount']
                            ann['bbox'] = bboxs_util.xyxy_to_xywh(
                                segms_util.polys_to_boxes(
                                    [ann['segmentation']])).tolist()[0]

                            annotations.append(ann)

        ann_dict['images'] = images
        categories = [{
            "id": category_dict[name],
            "name": name
        } for name in category_dict]
        ann_dict['categories'] = categories
        ann_dict['annotations'] = annotations
        print("Num categories: %s" % len(categories))
        print("Num images: %s" % len(images))
        print("Num annotations: %s" % len(annotations))
        with open(os.path.join(out_dir, json_name % data_set),
                  'wb') as outfile:
            outfile.write(json.dumps(ann_dict))
def convert_cityscapes_instance_only(
        data_dir, out_dir):
    """Convert from cityscapes format to COCO instance seg format - polygons"""
    sets = [
        'gtFine_val',
        # 'gtFine_train',
        # 'gtFine_test',

        # 'gtCoarse_train',
        # 'gtCoarse_val',
        # 'gtCoarse_train_extra'
    ]
    ann_dirs = [
        'gtFine_trainvaltest/gtFine/val',
        # 'gtFine_trainvaltest/gtFine/train',
        # 'gtFine_trainvaltest/gtFine/test',

        # 'gtCoarse/train',
        # 'gtCoarse/train_extra',
        # 'gtCoarse/val'
    ]
    json_name = 'instancesonly_filtered_%s.json'
    ends_in = '%s_polygons.json'
    img_id = 0
    ann_id = 0
    cat_id = 1
    category_dict = {}

    category_instancesonly = [
        'person',
        'rider',
        'car',
        'truck',
        'bus',
        'train',
        'motorcycle',
        'bicycle',
    ]

    for data_set, ann_dir in zip(sets, ann_dirs):
        print('Starting %s' % data_set)
        ann_dict = {}
        images = []
        annotations = []
        ann_dir = os.path.join(data_dir, ann_dir)
        for root, _, files in os.walk(ann_dir):
            for filename in files:
                if filename.endswith(ends_in % data_set.split('_')[0]):
                    if len(images) % 50 == 0:
                        print("Processed %s images, %s annotations" % (
                            len(images), len(annotations)))
                    json_ann = json.load(open(os.path.join(root, filename)))
                    image = {}
                    image['id'] = img_id
                    img_id += 1

                    image['width'] = json_ann['imgWidth']
                    image['height'] = json_ann['imgHeight']
                    image['file_name'] = filename[:-len(
                        ends_in % data_set.split('_')[0])] + 'leftImg8bit.png'
                    image['seg_file_name'] = filename[:-len(
                        ends_in % data_set.split('_')[0])] + \
                        '%s_instanceIds.png' % data_set.split('_')[0]
                    images.append(image)

                    fullname = os.path.join(root, image['seg_file_name'])
                    objects = cs.instances2dict_with_polygons(
                        [fullname], verbose=False)[fullname]

                    for object_cls in objects:
                        if object_cls not in category_instancesonly:
                            continue  # skip non-instance categories

                        for obj in objects[object_cls]:
                            if obj['contours'] == []:
                                print('Warning: empty contours.')
                                continue  # skip non-instance categories

                            len_p = [len(p) for p in obj['contours']]
                            if min(len_p) <= 4:
                                print('Warning: invalid contours.')
                                continue  # skip non-instance categories

                            ann = {}
                            ann['id'] = ann_id
                            ann_id += 1
                            ann['image_id'] = image['id']
                            ann['segmentation'] = obj['contours']

                            if object_cls not in category_dict:
                                category_dict[object_cls] = cat_id
                                cat_id += 1
                            ann['category_id'] = category_dict[object_cls]
                            ann['iscrowd'] = 0
                            ann['area'] = obj['pixelCount']
                            ann['bbox'] = bboxs_util.xyxy_to_xywh(
                                segms_util.polys_to_boxes(
                                    [ann['segmentation']])).tolist()[0]

                            annotations.append(ann)

        ann_dict['images'] = images
        categories = [{"id": category_dict[name], "name": name} for name in
                      category_dict]
        ann_dict['categories'] = categories
        ann_dict['annotations'] = annotations
        print("Num categories: %s" % len(categories))
        print("Num images: %s" % len(images))
        print("Num annotations: %s" % len(annotations))
        with open(os.path.join(out_dir, json_name % data_set), 'wb') as outfile:
            outfile.write(json.dumps(ann_dict))
Exemplo n.º 15
0
def deduplicate_regions(regions, iou_threshold=0.5):
    """This functions accepts pre-processed region descriptions for a given image, and removes regions that are redundant.
    Two regions are deemed redundant if 1) the text is closely matching 2) the IOU between region boxes is > iou_threshold
    A cleaned description is returned.
    """
    def helper_merge(regions):
        if len(regions) <= 1:
            return regions
        uf = UnionFind(len(regions))
        for r in regions:
            spans, txt2 = get_canonical_spans(r["tokens_positive"],
                                              r["caption"])
            if txt != txt2:
                raise PreprocessError(
                    f"inconsistent canonicalization fct. Mismatch: '{txt}' and '{txt2}'"
                )
            r["cano_tokens"] = spans

        for r1 in range(len(regions)):
            for r2 in range(r1 + 1, len(regions)):
                compatible = True
                assert len(regions[r1]["boxes"]) == len(
                    regions[r1]["cano_tokens"])
                assert len(regions[r2]["boxes"]) == len(
                    regions[r2]["cano_tokens"])
                ious = box_iou_helper(regions[r1]["boxes"],
                                      regions[r2]["boxes"])
                for b1 in range(len(regions[r1]["cano_tokens"])):
                    for b2 in range(len(regions[r2]["cano_tokens"])):
                        if (len(regions[r1]["cano_tokens"][b1]) == 0
                                or len(regions[r2]["cano_tokens"][b2])
                                == 0) or (spanlist_intersect_spanlist(
                                    regions[r1]["cano_tokens"][b1],
                                    regions[r2]["cano_tokens"][b2])
                                          and ious[b1][b2] < iou_threshold):
                            compatible = False
                            break
                    if not compatible:
                        break
                if compatible:
                    uf.unite(r1, r2)
        compo2regions = defaultdict(list)
        for i, r in enumerate(regions):
            compo2regions[uf.find(i)].append(r)

        final_regions = []
        for reg_list in compo2regions.values():
            if len(reg_list) == 1:
                final_regions.append(reg_list[0])
            else:
                # We pick as representative of this cluster the region with the most boxes
                sorted_regions = sorted([(len(r["boxes"]), i)
                                         for i, r in enumerate(reg_list)],
                                        reverse=True)
                reg_ids = [sr[1] for sr in sorted_regions]
                # We need to put the boxes and token spans in buckets
                cano_spans_buckets = []
                orig_spans_buckets = []
                boxes_buckets = []
                for idx in reg_ids:
                    for b in range(len(reg_list[idx]["boxes"])):
                        # find the bucket
                        bucket = -1
                        for j in range(len(cano_spans_buckets)):
                            if spanlist_intersect_spanlist(
                                    reg_list[idx]["cano_tokens"][b],
                                    cano_spans_buckets[j]):
                                bucket = j
                                break
                        if bucket == -1:
                            # bucket not found, creating one.
                            if idx != reg_ids[0]:
                                # This shouldn't happen. But if it does, we give up on the merging
                                return regions
                                assert idx == reg_ids[0], (
                                    "TODO: if this triggers, it means another regions has token spans than aren't covered by the main region."
                                    +
                                    "We need to create a new token span, which involve finding the span in the original sentencen of the main region. Don't forget to update the negative tokens"
                                )

                            bucket = len(orig_spans_buckets)
                            orig_spans_buckets.append(
                                reg_list[idx]["tokens_positive"][b])
                            cano_spans_buckets.append(
                                reg_list[idx]["cano_tokens"][b])
                            boxes_buckets.append([reg_list[idx]["boxes"][b]])
                        else:
                            boxes_buckets[bucket].append(
                                reg_list[idx]["boxes"][b])
                assert len(orig_spans_buckets) == len(boxes_buckets)
                merged_region = deepcopy(reg_list[reg_ids[0]])
                merged_region["tokens_positive"] = []
                merged_region["boxes"] = []
                for i in range(len(boxes_buckets)):
                    dedup_objs = combine_boxes(boxes_buckets[i],
                                               iou_threshold=0.5)
                    merged_region["boxes"] += dedup_objs
                    merged_region["tokens_positive"] += [
                        orig_spans_buckets[i] for _ in range(len(dedup_objs))
                    ]
                final_regions.append(merged_region)
        for r in final_regions:
            del r["cano_tokens"]
        return final_regions

    txt2region = defaultdict(list)
    for r in regions:
        txt2region[normalize_sentence(r["caption"])].append(r)

    stupid_sentence_set = set(["wall", "side", "building"])
    final_regions = []
    for txt, regions in txt2region.items():
        # Edge case, we remove the sentences like "the wall on the side of the building" which are uninformative and have spurious boxes
        if "wall" in txt and set(
                txt.strip().split(" ")).issubset(stupid_sentence_set):
            continue
        if len(regions) == 1:
            final_regions.append(deepcopy(regions[0]))
        else:
            # print(txt)

            regions_with_boxes = [r for r in regions if r["found_objects"]]
            all_boxes = sum([r["boxes"] for r in regions_with_boxes], [])
            # print("regions with boxes", len(regions_with_boxes))

            regions_without_boxes = []
            for r in regions:
                if not r["found_objects"]:
                    # we consider than one of the region with boxes will be better suited and drop this one
                    # if there is a positive iou. Otherwise, we have to keep it
                    if len(regions_with_boxes) == 0 or box_iou_helper(
                            all_boxes, r["boxes"]).max().item() < 0.1:
                        regions_without_boxes.append(r)

            # print("regions without boxes", len(regions_without_boxes))

            try:
                new_regions_with_boxes = helper_merge(regions_with_boxes)
            except PreprocessError as e:
                print("skipping", e)
                # Ouch, hit a cornercase, we give up on the merge
                new_regions_with_boxes = regions_with_boxes
            try:
                new_regions_without_boxes = helper_merge(regions_without_boxes)
            except PreprocessError as e:
                print("skipping", e)
                # Ouch, hit a cornercase, we give up on the merge
                new_regions_without_boxes = regions_without_boxes

            # now collapse into one big region. We do it only when the captions are exactly matching, otherwise it's a nightmare to recompute spans
            capt2region = defaultdict(list)
            for r in new_regions_with_boxes + new_regions_without_boxes:
                capt2region[r["caption"]].append(r)
            for capt, reg_list in capt2region.items():
                all_boxes = sum([r["boxes"] for r in reg_list], [])
                all_tokens = sum([r["tokens_positive"] for r in reg_list], [])
                compo2boxes, compo2id = get_boxes_equiv(all_boxes,
                                                        iou_threshold=0.75)
                final_boxes = []
                final_tokens = []
                if compo2boxes is not None:
                    for compo in compo2boxes.keys():
                        box_list = compo2boxes[compo]
                        id_list = compo2id[compo]
                        final_boxes.append(
                            xyxy_to_xywh(torch.stack(box_list,
                                                     0).mean(0)).tolist())
                        final_tokens.append(
                            consolidate_spans(
                                sum([all_tokens[i] for i in id_list], []),
                                capt))
                else:
                    final_boxes = all_boxes
                    final_tokens = all_tokens

                merged_region = {
                    "caption":
                    capt,
                    "original_image_id":
                    reg_list[0]["original_image_id"],
                    "original_region_id":
                    reg_list[0]["original_region_id"],
                    "boxes":
                    final_boxes,
                    "tokens_positive":
                    final_tokens,
                    "tokens_negative":
                    consolidate_spans(
                        sum([r["tokens_negative"] for r in reg_list], []),
                        capt),
                    "found_objects":
                    False,
                }
                final_regions.append(merged_region)

    return final_regions
def _coco_bbox_results_one_category(json_dataset, boxes, cat_id):
    results = []
    image_ids = json_dataset.COCO.getImgIds()
    image_ids.sort()
    assert len(boxes) == len(image_ids)
    for i, image_id in enumerate(image_ids):
        dets = boxes[i]
        if isinstance(dets, list) and len(dets) == 0:
            continue
        dets = dets.astype(np.float)
        scores = dets[:,  4]
	angles = dets[:, -1]
	
        xywh_dets = box_utils.xyxy_to_xywh(dets[:, 0:4])
        xs = xywh_dets[:, 0]
        ys = xywh_dets[:, 1]
        ws = xywh_dets[:, 2]
        hs = xywh_dets[:, 3]
	
	# ------------- edit ----------------
	for k in range(dets.shape[0]):
	    cx = xs[k] + ws[k] / 2.0
	    cy = ys[k] + hs[k] / 2.0
	    w = ws[k]
	    h = hs[k]
	    theta = angles[k]	    

            theta = angles[k]
            theta_pi = theta * np.pi / 180

            b = 2 * np.sqrt((h * h / 4 * (np.cos(theta_pi) ** 2) - w * w / 4 * (np.sin(theta_pi)) ** 2) / (np.cos(theta_pi) ** 4 - np.sin(theta_pi) ** 4))
            a = 2 * np.sqrt((h * h / 4 * (np.sin(theta_pi) ** 2) - w * w / 4 * (np.cos(theta_pi)) ** 2) / (np.sin(theta_pi) ** 4 - np.cos(theta_pi) ** 4))


            rp1_x = cx + a / 2.0 * math.cos(theta / 180.0 * math.pi)
            rp1_y = cy + a / 2.0 * math.sin(theta / 180.0 * math.pi)
            
            rp2_x = cx - a / 2.0 * math.cos(theta / 180.0 * math.pi)
            rp2_y = cy - a / 2.0 * math.sin(theta / 180.0 * math.pi)
            
            rp3_x = cx + b / 2.0 * math.sin(-theta / 180.0 * math.pi)
            rp3_y = cy + b / 2.0 * math.cos(-theta / 180.0 * math.pi)
            
            rp4_x = cx - b / 2.0 * math.sin(-theta / 180.0 * math.pi)
            rp4_y = cy - b / 2.0 * math.cos(-theta / 180.0 * math.pi)
	
	    #x_min = max(min(min(rp1_x, rp2_x), min(rp3_x, rp4_x)), 0)
            #x_max = max(max(rp1_x, rp2_x), max(rp3_x, rp4_x))
            #y_min = max(min(min(rp1_y, rp2_y), min(rp3_y, rp4_y)), 0)
            #y_max = max(max(rp1_y, rp2_y), max(rp3_y, rp4_y))
			
	    p2_x, p2_y = rp1_x - (cx - rp4_x), rp1_y - (cy - rp4_y)
            p1_x, p1_y = rp1_x - (cx - rp3_x), rp1_y - (cy - rp3_y)
            
            p3_x, p3_y = rp2_x - (cx - rp4_x), rp2_y - (cy - rp4_y)
            p4_x, p4_y = rp2_x - (cx - rp3_x), rp2_y - (cy - rp3_y)
                
	    results.append(
       	        {'image_id': image_id,
                  'category_id': cat_id,
                  'rect': [cx, cy, a, b], # center, a, b
                  'bbox': [xs[k], ys[k], w, h], #bbox fit
		  'segmentation': [[p1_x, p1_y, p2_x, p2_y, p3_x, p3_y, p4_x, p4_y, p1_x, p1_y]], #rect fit
                  'score': scores[k],
                  'angle': angles[k]})
	
	# ------------ ori ---------------	
        #results.extend(
        #    [{'image_id': image_id,
        #      'category_id': cat_id,
        #      'rect': [xs[k] + ws[k] / 2.0, ys[k] + hs[k] / 2.0, ws[k], hs[k]], # center, a, b
        #      'bbox': [xs[k] + ws[k] / 2.0]
	#      'score': scores[k],
	#      'angle': angles[k]} for k in range(dets.shape[0])])
    return results