Пример #1
0
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
Пример #2
0
    def update(self, dets: BoxList):
        W, H = dets.size
        assert dets.mode == 'xyxy'

        trks = np.zeros((len(self.trackers), 4), dtype=np.float32)
        to_del = []

        for t, trk in enumerate(trks):
            pos = self.trackers[t].predict()
            trk[:] = pos
            if np.any(np.isnan(pos)):
                to_del.append(t)

        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
        for t in reversed(to_del):
            self.trackers.pop(t)

        matched, unmatched_dets, unmatched_trks = associate(dets.bbox.numpy(), trks)

        dboxes, dmeta = dets.bbox, dets.extra_fields
        per_det_meta = [{k: v[i] for k, v in dmeta.items()} for i in range(len(dets))]

        # update matched trackers with assigned detections
        for t, trk in enumerate(self.trackers):
            if t not in unmatched_trks:
                bbox = matched[np.where(matched[:, 1] == t)[0], 0][0]
                trk.update(dboxes[bbox], per_det_meta[bbox])

        # create and initialise new trackers for unmatched detections
        for i in unmatched_dets:
            trk = KalmanTracker(dboxes[i], per_det_meta[i], int(self.max_age * 0.5), W, H)
            self.trackers.append(trk)

        i = len(self.trackers)

        detections = {"bbox": [], "index": []}
        detections.update({k: [] for k in dets.extra_fields.keys()})

        for trk in reversed(self.trackers):
            if trk.time_since_update <= self.max_age and trk.hits >= self.min_hits:
                bbox, meta = trk.get_state()
                meta['index'] = trk.id
                detections["bbox"].append(bbox)
                for k, v in meta.items():
                    detections[k].append(v)

            i -= 1
            # remove dead tracklet
            if trk.time_since_update > self.max_age:
                self.trackers.pop(i)

        if len(detections['bbox']) == 0:
            return None

        detections['bbox'] = torch.tensor(detections['bbox'], dtype=torch.float32)

        box_list = BoxList(detections['bbox'], (W, H))
        for k, v in detections.items():
            if k != 'bbox':
                if isinstance(v[0], torch.Tensor) and v[0].dim() != 0:
                    box_list.add_field(k, torch.cat(v))
                else:
                    box_list.add_field(k, torch.tensor(v))
        return box_list
Пример #3
0
def evaluate_box_proposals(
    predictions, dataset, thresholds=None, area="all", limit=None
):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0 ** 2, 1e5 ** 2],  # all
        [0 ** 2, 32 ** 2],  # small
        [32 ** 2, 96 ** 2],  # medium
        [96 ** 2, 1e5 ** 2],  # large
        [96 ** 2, 128 ** 2],  # 96-128
        [128 ** 2, 256 ** 2],  # 128-256
        [256 ** 2, 512 ** 2],  # 256-512
        [512 ** 2, 1e5 ** 2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for image_id, prediction in enumerate(predictions):
        original_id = dataset.id_to_img_map[image_id]

        img_info = dataset.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        prediction = prediction.resize((image_width, image_height))

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = prediction.get_field("objectness").sort(descending=True)[1]
        prediction = prediction[inds]

        ann_ids = dataset.coco.getAnnIds(imgIds=original_id)
        anno = dataset.coco.loadAnns(ann_ids)
        gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
        gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert(
            "xyxy"
        )
        gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])

        if len(gt_boxes) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if len(prediction) == 0:
            continue

        if limit is not None and len(prediction) > limit:
            prediction = prediction[:limit]

        overlaps = boxlist_iou(prediction, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(prediction), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = torch.cat(gt_overlaps, dim=0)
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }
Пример #4
0
    def __getitem__(self, idx):
        img, anno = super(WordDataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        if DEBUG: print('len(boxes)', len(boxes), boxes[0])
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        if DEBUG: print('len(classes)', len(classes), classes[0])
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        if DEBUG: print('len(masks)', len(masks), masks[0])
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        if anno and 'keypoints' in anno[0]:
            kes = [obj["keypoints"] for obj in anno]
            kes = self.kes_gen(kes)
            if DEBUG: print('len(kes)', len(kes), kes[0])
            kes = textKES(kes, img.size)
            target.add_field("kes", kes)

        if anno and 'match_type' in anno[0]:
            mty = [obj["match_type"] for obj in anno]
            mty = MTY(mty, img.size)
            target.add_field("mty", mty)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
Пример #5
0
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = box_regression.size(1) // 4
        C = box_cls.size(1) // A

        # put in the same format as anchors
        box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
        box_cls = box_cls.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        candidate_inds = box_cls > self.pre_nms_thresh

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        results = []
        for per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors in zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors):

            # Sort and select TopN
            # TODO most of this can be made out of the loop for
            # all images.
            # TODO:Yang: Not easy to do. Because the numbers of detections are
            # different in each image. Therefore, this part needs to be done
            # per image.
            per_box_cls = per_box_cls[per_candidate_inds]

            per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)

            per_candidate_nonzeros = \
                    per_candidate_inds.nonzero()[top_k_indices, :]

            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Пример #6
0
    def get_groundtruth(self, idx):
        img = self.idx_to_img[idx]
        boxes = self.detail.getBboxes(img)
        # example of 'boxes':
        # [{'bbox': [250, 209, 241, 149], 'category': 'motorbike'},
        # {'bbox': [312, 139, 109, 191], 'category': 'person'}]
        boxes = [box['bbox'] for box in boxes
                 ]  # TODO gubimy informację o otoczonym przedmiocie
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, self._img_size(img),
                         mode="xywh").convert("xyxy")
        target = target.clip_to_image(remove_empty=True)

        img_keypoints = self.detail.getKpts(img)
        keypoints = [skelton['keypoints'] for skelton in img_keypoints]

        # TODO keypoints - gubimy informację o bbox
        target.add_field("kpts", Keypoints(keypoints, self._img_size(img)))
        # target.add_field("mask", SegmentationMask(self.detail.getMask(img).tolist(), size=self._img_size(img)))
        # TODO getMask zwraca macierz rozmiaru (img.height, img.width), gdzie każdemu pikselowi
        # TODO odpowiada numer id klasy, do której należy. SegmentationMask

        # from getMask() doc:
        # If semantic segmentation of an image is requested (cat=instance=superpart=part=None),
        # the result is an image whose pixel values are the class IDs for that image.
        # If instance-level segmentation for one category of an image is requested (img and cat provided),
        # the result is an image whose pixel values are the instance IDs for that class and 0 everywhere else.
        target.add_field("class_mask", self.detail.getMask(img))
        target.add_field("instance_mask", self.detail.getMask(img,
                                                              cat='person'))
        target.add_field("bounds", self.detail.getBounds(img))
        target.add_field("occl", self.detail.getOccl(img))
        # TODO human parts?

        return target
Пример #7
0
 def __getitem__(self, item):
     im_name = os.path.basename(self.image_lists[item])
     # print(self.image_lists[item])
     img = Image.open(self.image_lists[item]).convert("RGB")
     width, height = img.size
     if self.gts_dir is not None:
         gt_path = os.path.join(self.gts_dir, im_name + ".txt")
         words, boxes, charsbbs, segmentations, labels = self.load_gt_from_txt(
             gt_path, height, width)
         if words[0] == "":
             use_char_ann = False
         else:
             use_char_ann = True
         if not self.use_charann:
             use_char_ann = False
         target = BoxList(boxes[:, :4],
                          img.size,
                          mode="xyxy",
                          use_char_ann=use_char_ann)
         if self.ignore_difficult:
             labels = torch.from_numpy(np.array(labels))
         else:
             labels = torch.ones(len(boxes))
         target.add_field("labels", labels)
         masks = SegmentationMask(segmentations, img.size)
         target.add_field("masks", masks)
         char_masks = SegmentationCharMask(charsbbs,
                                           words=words,
                                           use_char_ann=use_char_ann,
                                           size=img.size)
         target.add_field("char_masks", char_masks)
     else:
         target = None
     if self.transforms is not None:
         img, target = self.transforms(img, target)
     if self.vis:
         new_im = img.numpy().copy().transpose([1, 2, 0]) + [
             102.9801,
             115.9465,
             122.7717,
         ]
         new_im = Image.fromarray(new_im.astype(np.uint8)).convert("RGB")
         mask = target.extra_fields["masks"].polygons[0].convert("mask")
         mask = Image.fromarray(
             (mask.numpy() * 255).astype(np.uint8)).convert("RGB")
         if self.use_charann:
             m, _ = (target.extra_fields["char_masks"].chars_boxes[0].
                     convert("char_mask"))
             color = self.creat_color_map(37, 255)
             color_map = color[m.numpy().astype(np.uint8)]
             char = Image.fromarray(color_map.astype(
                 np.uint8)).convert("RGB")
             char = Image.blend(char, new_im, 0.5)
         else:
             char = new_im
         new = Image.blend(char, mask, 0.5)
         img_draw = ImageDraw.Draw(new)
         for box in target.bbox.numpy():
             box = list(box)
             box = box[:2] + [box[2], box[1]] + box[2:] + [box[0], box[3]
                                                           ] + box[:2]
             img_draw.line(box, fill=(255, 0, 0), width=2)
         new.save("./vis/char_" + im_name)
     return img, target, self.image_lists[item]
Пример #8
0
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        if cfg.ROTATE:
            for i in range(num_images):
                scores = boxlists[i].get_field("scores")
                labels = boxlists[i].get_field("labels")
                boxes = boxlists[i].bbox
                boxlist = boxlists[i]
                result = []
                # skip the background
                for j in range(1, self.num_classes):
                    inds = (labels == j).nonzero().view(-1)

                    scores_j = scores[inds]
                    boxes_j = boxes[inds, :].view(-1, 8)
                    boxlist_for_class = BoxList(boxes_j,
                                                boxlist.size,
                                                mode="xy8")
                    boxlist_for_class.add_field("scores", scores_j)
                    boxlist_for_class = boxlist_rnms(boxlist_for_class,
                                                     self.nms_thresh,
                                                     score_field="scores")
                    num_labels = len(boxlist_for_class)
                    boxlist_for_class.add_field(
                        "labels",
                        torch.full((num_labels, ),
                                   j,
                                   dtype=torch.int64,
                                   device=scores.device))
                    result.append(boxlist_for_class)

                result = cat_boxlist(result)
                number_of_detections = len(result)

                # Limit to max_per_image detections **over all classes**
                if number_of_detections > self.fpn_post_nms_top_n > 0:
                    cls_scores = result.get_field("scores")
                    image_thresh, _ = torch.kthvalue(
                        cls_scores.cpu(),
                        number_of_detections - self.fpn_post_nms_top_n + 1)
                    keep = cls_scores >= image_thresh.item()
                    keep = torch.nonzero(keep).squeeze(1)
                    result = result[keep]
                results.append(result)
        else:
            for i in range(num_images):
                scores = boxlists[i].get_field("scores")
                labels = boxlists[i].get_field("labels")
                boxes = boxlists[i].bbox
                boxlist = boxlists[i]
                result = []
                # skip the background
                for j in range(1, self.num_classes):
                    inds = (labels == j).nonzero().view(-1)

                    scores_j = scores[inds]
                    boxes_j = boxes[inds, :].view(-1, 4)
                    boxlist_for_class = BoxList(boxes_j,
                                                boxlist.size,
                                                mode="xyxy")
                    boxlist_for_class.add_field("scores", scores_j)
                    boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                    self.nms_thresh,
                                                    score_field="scores")
                    num_labels = len(boxlist_for_class)
                    boxlist_for_class.add_field(
                        "labels",
                        torch.full((num_labels, ),
                                   j,
                                   dtype=torch.int64,
                                   device=scores.device))
                    result.append(boxlist_for_class)

                result = cat_boxlist(result)
                number_of_detections = len(result)

                # Limit to max_per_image detections **over all classes**
                if number_of_detections > self.fpn_post_nms_top_n > 0:
                    cls_scores = result.get_field("scores")
                    image_thresh, _ = torch.kthvalue(
                        cls_scores.cpu(),
                        number_of_detections - self.fpn_post_nms_top_n + 1)
                    keep = cls_scores >= image_thresh.item()
                    keep = torch.nonzero(keep).squeeze(1)
                    result = result[keep]
                results.append(result)
        return results
    def forward_for_single_feature_map(self, anchors, box_cls, box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = int(box_regression.size(1) / 4)
        C = int(box_cls.size(1) / A)

        # put in the same format as anchors
        box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1, 2)
        box_cls = box_cls.reshape(N, -1, C)
        box_cls = box_cls.sigmoid()

        box_regression = box_regression.view(N, -1, 4, H, W)
        box_regression = box_regression.permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        num_anchors = A * H * W

        results = [[] for _ in range(N)]
        pre_nms_thresh = self.pre_nms_thresh
        candidate_inds = box_cls > self.pre_nms_thresh
        if candidate_inds.sum().item() == 0:
            return results

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors) in enumerate(zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors)):

            # Sort and select TopN
            per_box_cls = per_box_cls[per_candidate_inds]
            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1
            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                        per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_box_loc = per_box_loc[top_k_indices]
                per_class = per_class[top_k_indices]

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist.add_field("sparse_off", per_box_loc / 9)
            boxlist.add_field("sparse_anchor_idx", per_box_loc % 9)
            boxlist.add_field("sparse_anchors",
                              per_anchors.bbox[per_box_loc, :].view(-1, 4))
            boxlist.add_field("sparse_batch",
                              per_box_loc.clone().fill_(batch_idx))

            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results[batch_idx] = boxlist

        return results
Пример #10
0
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        if anno and "segmentation" in anno[0]:
            # Get dictionary of part segmentations for each object
            segmentations = [obj["segmentation"] for obj in anno]

            # Accumulate classes
            partclasses = []
            for i in range(len(segmentations)):
                partclass = [obj['class'] for obj in segmentations[i]]
                partclass = [
                    self.json_partcategory_id_to_contiguous_id[p]
                    for p in partclass
                ]
                partclass = torch.tensor(partclass)
                partclasses.append(partclass)

            # Accumulate masks
            masks = []
            for i in range(len(segmentations)):
                mask = [obj["segment"] for obj in segmentations[i]]
                mask = SegmentationMask(mask, img.size, mode='poly')
                masks.append(mask)

            # Merge all masks belonging to the same part class
            new_masks = []
            for msk, pcls in zip(masks, partclasses):
                segments = msk.get_mask_tensor()
                new_segments = torch.zeros(
                    (len(self.partcategories), segments.size()[1],
                     segments.size(2)),
                    dtype=torch.uint8)
                for partcat in range(len(self.partcategories)):
                    for n_poly in range(pcls.size()[0]):
                        if int(pcls[n_poly]) == (partcat + 1):
                            new_segments[partcat, :, :] = new_segments[
                                partcat, :, :] | segments[n_poly, :, :]
                new_mask = SegmentationMask(new_segments,
                                            img.size,
                                            mode='mask')
                new_masks.append(new_mask)

            new_partclass = [
                self.json_partcategory_id_to_contiguous_id[p + 1]
                for p in range(len(self.partcategories))
            ]
            new_partclass = torch.tensor(new_partclass)

            new_partclasses = []
            for a in range(len(new_masks)):
                new_partclasses.append(new_partclass)

            target.add_field('partlabels', new_partclasses)
            target.add_field('masks', new_masks)

        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = CarKeypoints(keypoints, img.size)
            target.add_field("keypoints", keypoints)

        target = target.clip_to_image(remove_empty=True)

        if self._transforms is not None:
            img, target = self._transforms(img, target)

        return img, target, idx
Пример #11
0
def agmap_coarse(gt_boxlist,
                 l_boxlist,
                 class_independ=False,
                 keep_small=True,
                 verbose=False):
    """
    利用真实值和低分辨率检测结果,生成agmap
    :param gt_boxlist (BoxList): 真实目标框,必须是xyxy类型
    :param l_boxlist (BoxList): 低分辨率检测结果,必须是xyxy类型
    :param class_independ (bool): 是否类别无关,只考虑proposal之间的iou
    :param keep_small (bool): 只计算小目标(< 96x96)的agmap
    :return: agmap (np.ndarray)
    """
    # 是否去除大目标,只计算96x96以下目标的agmap
    if keep_small:
        gt_area = gt_boxlist.area()
        l_area = l_boxlist.area()
        gt_keep, l_keep = gt_area < np.square(96), l_area < np.square(96)
        if torch.sum(gt_keep) == 0:
            gt_boxlist = BoxList([[0, 0, 0, 0]], gt_boxlist.size, mode="xyxy")
            gt_boxlist.add_field("labels",
                                 torch.as_tensor([0], dtype=torch.int64))
        else:
            gt_boxlist = gt_boxlist[gt_keep]
        if torch.sum(l_keep) == 0:
            l_boxlist = BoxList([[1, 1, 1, 1]], l_boxlist.size, mode="xyxy")
            l_boxlist.add_field("labels",
                                torch.as_tensor([0], dtype=torch.int64))
            l_boxlist.add_field("scores",
                                torch.as_tensor([0], dtype=torch.float32))
        else:
            l_boxlist = l_boxlist[l_keep]
    # 初始化agmap
    gt_w, gt_h = gt_boxlist.size
    agmap = np.zeros((2, gt_h, gt_w), np.float32)

    for i in range(len(gt_boxlist)):
        g_bbox_i = gt_boxlist[i]
        g_label = g_bbox_i.get_field("labels").item()

        if class_independ:  # 是否类别无关
            l_boxlist_sel = l_boxlist
        else:
            l_boxlist_sel = l_boxlist[l_boxlist.get_field("labels") ==
                                      g_label]  # 正确召回的类别
            if len(l_boxlist_sel) == 0:
                l_boxlist_sel = BoxList([[1, 1, 1, 1]],
                                        l_boxlist_sel.size,
                                        mode="xyxy")
                l_boxlist_sel.add_field(
                    "scores", torch.as_tensor([0], dtype=torch.float32))

        l_score = l_boxlist_sel.get_field("scores").cpu().numpy()

        iou_l = boxlist_iou(g_bbox_i, l_boxlist_sel)
        l_val, l_id = iou_l.max(dim=1)
        l_val, l_id = l_val.item(), l_id.item()  # g_bbox_i只有一个元素

        g_bbox = g_bbox_i.bbox[0, :].cpu().numpy()
        g_bbox = np.round(g_bbox).astype(np.int64)  # 取整,以便索引
        g_area = (g_bbox[3] - g_bbox[1]) * (g_bbox[2] - g_bbox[0])

        l_bbox = l_boxlist_sel.bbox[l_id, :].cpu().numpy()
        l_bbox = np.round(l_bbox).astype(np.int64)
        l_area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0])

        if l_val > iou_thrs and g_area != 0:
            agmap[0, g_bbox[1]:g_bbox[3],
                  g_bbox[0]:g_bbox[2]] += (1 - l_score[l_id]) / g_area
        elif g_area != 0:
            agmap[0, g_bbox[1]:g_bbox[3], g_bbox[0]:g_bbox[2]] += 1. / g_area

    iou_l = boxlist_iou(gt_boxlist, l_boxlist)

    l_score = l_boxlist.get_field("scores").cpu().numpy()
    l_label = l_boxlist.get_field("labels").cpu().numpy()
    g_label = gt_boxlist.get_field("labels").cpu().numpy()

    l_val, l_id = iou_l.max(dim=0)
    l_val, l_id = l_val.cpu().numpy(), l_id.cpu().numpy()

    for i in range(len(l_boxlist)):
        l_bbox = l_boxlist.bbox[i, :].cpu().numpy()
        l_bbox = np.round(l_bbox).astype(np.int64)  # 取整,以便索引
        area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0])
        if ((g_label[l_id[i]] != l_label[i] and not class_independ)
                or l_val[i] < iou_miss_thrs) and area != 0:
            agmap[1, l_bbox[1]:l_bbox[3],
                  l_bbox[0]:l_bbox[2]] += l_score[i] / area  # 低分辨率误检收益

    agmap = torch.from_numpy(agmap).unsqueeze(dim=0)
    with torch.no_grad():
        # agmap = agmap_avgpool(agmap)
        agmap = F.interpolate(agmap,
                              size=agmap_size,
                              mode='bilinear',
                              align_corners=False)
    agmap = np.squeeze(agmap.cpu().numpy())

    return agmap
Пример #12
0
def inference_with_agmap(image_dir,
                         ldet_dir,
                         c5_dir,
                         config_file,
                         weight_file,
                         gt=None):
    """
    读取原图像,计算粗分辨率检测结果,取出c5特征,计算agmap,分割,进行高分辨率推理,组合结果
    :param image_dir (str): 输入原始图像文件夹
    :param ldet_dir (str): 输入粗分辨率检测结果文件夹(spire格式json)
    :param c5_dir (str): 输入原始图像检测网络中的c5特征文件夹(npy格式)
    :return: None
    """

    image_fns = []
    fns = os.listdir(image_dir)
    fns.sort()

    # 加载cr模型
    checkpoint = torch.load(cr_saving_fn)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = CRNet(input_channels=2048).to(device)
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    from spicv.spire_anno import SpireAnno
    from spicv.detection.structures.boxlist_ops import cat_boxlist
    from spicv.detection.structures.boxlist_ops import ignored_regions_iop

    spire_anno = SpireAnno(dataset='coco')
    detector = SpireDetector(config_file, weight_file, origin_size=True)

    for f in fns:
        if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'):
            ldet_fn = os.path.join(ldet_dir, f) + '.json'
            ldet_boxlist = spire_anno.to_boxlist(ldet_fn)

            c5_fn = os.path.join(c5_dir, f) + '.npy'  ## 直接从硬盘中读取预存的c5特征
            c5_torch = torch.from_numpy(np.load(c5_fn)).to(device)
            with torch.no_grad():
                c5_torch = F.interpolate(c5_torch,
                                         size=c5_size,
                                         mode='bilinear',
                                         align_corners=False)
                output = model(c5_torch)
            predicted_agmap = np.squeeze(output.cpu().numpy())

            sub_wins, h_size = agmap_segmentation(predicted_agmap,
                                                  ldet_boxlist.size, 800)
            image_fn = os.path.join(image_dir, f)
            image_fns.append(image_fn)
            image = cv2.imread(image_fn)
            image_h = image.copy()
            image_l = image.copy()

            image = cv2.resize(image, h_size)
            prediction_list = []
            for win in sub_wins:
                cv2.rectangle(image, (win[0], win[1]),
                              (win[0] + win[2], win[1] + win[3]), (0, 210, 0),
                              2)
                image_win = image[win[1]:win[1] + win[3],
                                  win[0]:win[0] + win[2], :]
                prediction = detector.detect(image_win)
                prediction.bbox[:, 0] += win[0]
                prediction.bbox[:, 1] += win[1]
                prediction.bbox[:, 2] += win[0]
                prediction.bbox[:, 3] += win[1]
                prediction.size = h_size
                prediction = prediction.resize(ldet_boxlist.size)
                prediction_list.append(prediction)

            if len(sub_wins) > 0:
                sub_wins = BoxList(sub_wins, h_size, mode='xywh').resize(
                    ldet_boxlist.size).convert(mode='xyxy')
                iou = ignored_regions_iop(sub_wins,
                                          ldet_boxlist,
                                          use_bbox=True)
                ldet_val, ldet_id = iou.max(dim=0)
                ldet_boxlist = ldet_boxlist[ldet_val < 0.5]

            prediction_list.append(ldet_boxlist)
            predictions = cat_boxlist(prediction_list)

            spire_anno.from_maskrcnn_benchmark(predictions, f, image.shape)

            image_show = spire_anno.visualize_boxlist(image,
                                                      predictions,
                                                      score_th=0.01)

            ## 以下只是为了显示
            hdet_fn = os.path.join(h_det_dir, f) + '.json'
            hdet_boxlist = spire_anno.to_boxlist(hdet_fn)
            image_h = spire_anno.visualize_boxlist(image_h,
                                                   hdet_boxlist,
                                                   score_th=0.01)
            cv2.imshow('image_h', image_h)

            ldet_fn = os.path.join(l_det_dir, f) + '.json'
            ldet_boxlist = spire_anno.to_boxlist(ldet_fn)
            image_l = spire_anno.visualize_boxlist(image_l,
                                                   ldet_boxlist,
                                                   score_th=0.01)
            cv2.imshow('image_l', image_l)

            agmap_fn = os.path.join(agmap_saving_dir_train, f) + '.npy'
            agmap = cv2.resize(np.load(agmap_fn), ldet_boxlist.size)
            agmap_color = agmap * alpha
            agmap_color = (agmap_color + 1) * 127
            agmap_color[agmap_color > 255] = 255
            agmap_color[agmap_color < 0] = 0
            agmap_color = cv2.applyColorMap(agmap_color.astype(np.uint8),
                                            cv2.COLORMAP_HOT)
            cv2.imshow("agmap", agmap_color)

            agmap_fn = os.path.join(predicted_agmap_dir, f) + '.npy'
            agmap = cv2.resize(np.load(agmap_fn), ldet_boxlist.size)
            agmap_color = (agmap + 1) * 127
            agmap_color[agmap_color > 255] = 255
            agmap_color[agmap_color < 0] = 0
            agmap_color = cv2.applyColorMap(agmap_color.astype(np.uint8),
                                            cv2.COLORMAP_HOT)
            cv2.imshow("prediected_agmap", agmap_color)

            cv2.imshow('image', image_show)
            cv2.waitKey(100)

            print(f)

    if gt is not None:
        eval_res = spire_anno.cocoapi_eval(gt)
    print('inference done!')
Пример #13
0
def agmap_total(gt_boxlist,
                l_boxlist,
                h_boxlist,
                class_independ=False,
                keep_small=True,
                reward=False,
                verbose=False):
    """
    利用真实值,生成agmap
    :param gt_boxlist (BoxList): 真实目标框,必须是xyxy类型
    :param l_boxlist (BoxList): 低分辨率检测结果,必须是xyxy类型
    :param h_boxlist (BoxList): 高分辨率检测结果,必须是xyxy类型
    :param class_independ (bool): 是否类别无关,只考虑proposal之间的iou
    :param keep_small (bool): 只计算小目标(< 96x96)的agmap
    :return: agmap (np.ndarray)
    """
    # 是否去除大目标,只计算96x96以下目标的agmap
    if keep_small:
        gt_area = gt_boxlist.area()
        l_area = l_boxlist.area()
        h_area = h_boxlist.area()
        gt_keep, l_keep, h_keep = gt_area < np.square(96), l_area < np.square(
            96), h_area < np.square(96)
        if torch.sum(gt_keep) == 0:
            gt_boxlist = BoxList([[0, 0, 0, 0]], gt_boxlist.size, mode="xyxy")
            gt_boxlist.add_field("labels",
                                 torch.as_tensor([0], dtype=torch.int64))
        else:
            gt_boxlist = gt_boxlist[gt_keep]
        if torch.sum(l_keep) == 0:
            l_boxlist = BoxList([[0, 0, 0, 0]], l_boxlist.size, mode="xyxy")
            l_boxlist.add_field("labels",
                                torch.as_tensor([0], dtype=torch.int64))
            l_boxlist.add_field("scores",
                                torch.as_tensor([0], dtype=torch.float32))
        else:
            l_boxlist = l_boxlist[l_keep]
        if torch.sum(h_keep) == 0:
            h_boxlist = BoxList([[0, 0, 0, 0]], h_boxlist.size, mode="xyxy")
            h_boxlist.add_field("labels",
                                torch.as_tensor([0], dtype=torch.int64))
            h_boxlist.add_field("scores",
                                torch.as_tensor([0], dtype=torch.float32))
        else:
            h_boxlist = h_boxlist[h_keep]

    # gt_boxlist.size为(image_width, image_height),转置后获得正确尺寸
    agmap = np.zeros(gt_boxlist.size, np.float32).T
    # 将收益分误检和漏检
    agmap_split = np.zeros((1, gt_boxlist.size[1], gt_boxlist.size[0]),
                           np.float32)
    # 用于reward评价
    agval = 0.
    # 用ground-truth显示agmap,或者用l_det显示
    use_gt_bbox = True
    for i in range(len(gt_boxlist)):
        g_bbox_i = gt_boxlist[i]
        g_label = g_bbox_i.get_field("labels").item()

        if class_independ:
            l_boxlist_sel = l_boxlist
            h_boxlist_sel = h_boxlist
        else:
            l_boxlist_sel = l_boxlist[l_boxlist.get_field("labels") ==
                                      g_label]  # 正确召回的类别
            h_boxlist_sel = h_boxlist[h_boxlist.get_field("labels") == g_label]
            if len(l_boxlist_sel) == 0:
                l_boxlist_sel = BoxList([[0, 0, 0, 0]],
                                        l_boxlist_sel.size,
                                        mode="xyxy")
                l_boxlist_sel.add_field(
                    "scores", torch.as_tensor([0], dtype=torch.float32))
            if len(h_boxlist_sel) == 0:
                h_boxlist_sel = BoxList([[0, 0, 0, 0]],
                                        h_boxlist_sel.size,
                                        mode="xyxy")
                h_boxlist_sel.add_field(
                    "scores", torch.as_tensor([0], dtype=torch.float32))

        l_score = l_boxlist_sel.get_field("scores").cpu().numpy()
        h_score = h_boxlist_sel.get_field("scores").cpu().numpy()

        iou_l = boxlist_iou(g_bbox_i, l_boxlist_sel)
        iou_h = boxlist_iou(g_bbox_i, h_boxlist_sel)

        l_val, l_id = iou_l.max(dim=1)
        l_val, l_id = l_val.item(), l_id.item()  # g_bbox_i只有一个元素
        h_val, h_id = iou_h.max(dim=1)
        h_val, h_id = h_val.item(), h_id.item()  # g_bbox_i只有一个元素

        # 首先根据ground-truth对agmap进行评分,分为3种情况,l和h都召回目标,l召回目标,h召回目标
        # g_bbox = gt_boxlist.bbox[i, :].cpu().numpy()
        g_bbox = g_bbox_i.bbox[0, :].cpu().numpy()
        g_bbox = np.round(g_bbox).astype(np.int64)  # 取整,以便索引
        g_area = (g_bbox[3] - g_bbox[1]) * (g_bbox[2] - g_bbox[0])

        l_bbox = l_boxlist_sel.bbox[l_id, :].cpu().numpy()
        l_bbox = np.round(l_bbox).astype(np.int64)
        l_area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0])

        if l_val > iou_thrs and h_val > iou_thrs:
            ag = h_score[h_id] - l_score[l_id]
        elif l_val > iou_thrs:  # 高分辨率漏检收益
            ag = -l_score[l_id]
        elif h_val > iou_thrs:  # 低分辨率漏检收益
            ag = h_score[h_id]
            if g_area != 0:
                agmap_split[0, g_bbox[1]:g_bbox[3],
                            g_bbox[0]:g_bbox[2]] += ag / g_area
        else:
            ag = 0

        agval += ag
        if use_gt_bbox and g_area != 0:  # 使用ground-truth目标框来改变agmap的得分
            agmap[g_bbox[1]:g_bbox[3], g_bbox[0]:g_bbox[2]] += ag / g_area
        elif l_area != 0:
            agmap[l_bbox[1]:l_bbox[3], l_bbox[0]:l_bbox[2]] += ag / l_area

    iou_l = boxlist_iou(gt_boxlist, l_boxlist)
    iou_h = boxlist_iou(gt_boxlist, h_boxlist)

    l_score = l_boxlist.get_field("scores").cpu().numpy()
    h_score = h_boxlist.get_field("scores").cpu().numpy()
    l_label = l_boxlist.get_field("labels").cpu().numpy()
    h_label = h_boxlist.get_field("labels").cpu().numpy()

    g_label = gt_boxlist.get_field("labels").cpu().numpy()

    l_val, l_id = iou_l.max(dim=0)
    l_val, l_id = l_val.cpu().numpy(), l_id.cpu().numpy()
    h_val, h_id = iou_h.max(dim=0)
    h_val, h_id = h_val.cpu().numpy(), h_id.cpu().numpy()

    for i in range(len(l_boxlist)):
        l_bbox = l_boxlist.bbox[i, :].cpu().numpy()
        l_bbox = np.round(l_bbox).astype(np.int64)  # 取整,以便索引
        area = (l_bbox[3] - l_bbox[1]) * (l_bbox[2] - l_bbox[0])
        if ((g_label[l_id[i]] != l_label[i] and not class_independ)
                or l_val[i] < iou_thrs) and area != 0:
            agval += l_score[i]
            agmap[l_bbox[1]:l_bbox[3],
                  l_bbox[0]:l_bbox[2]] += l_score[i] / area  # 低分辨率误检收益

    for i in range(len(h_boxlist)):
        h_bbox = h_boxlist.bbox[i, :].cpu().numpy()
        h_bbox = np.round(h_bbox).astype(np.int64)  # 取整,以便索引
        area = (h_bbox[3] - h_bbox[1]) * (h_bbox[2] - h_bbox[0])
        if ((g_label[h_id[i]] != h_label[i] and not class_independ)
                or h_val[i] < iou_thrs) and area != 0:
            agval -= h_score[i]
            agmap[h_bbox[1]:h_bbox[3],
                  h_bbox[0]:h_bbox[2]] -= h_score[i] / area  # 高分辨率误检收益

    agmap = torch.from_numpy(agmap).unsqueeze(dim=0).unsqueeze(dim=0)
    agmap_split = torch.from_numpy(agmap_split).unsqueeze(dim=0)
    with torch.no_grad():
        # agmap = agmap_avgpool(agmap)
        agmap = F.interpolate(agmap,
                              size=agmap_size,
                              mode='bilinear',
                              align_corners=False)
        agmap_split = F.interpolate(agmap_split,
                                    size=agmap_size,
                                    mode='bilinear',
                                    align_corners=False)
    agmap = np.squeeze(agmap.cpu().numpy())
    agmap_split = np.squeeze(agmap_split.cpu().numpy())

    if verbose:
        # 从[-1,1]转换到[0,255],用以colormap可视化
        agmap_color = agmap * alpha
        agmap_color = cv2.resize(agmap_color, gt_boxlist.size)
        agmap_color = (agmap_color + 1) * 127
        agmap_color[agmap_color > 255] = 255
        agmap_color[agmap_color < 0] = 0
        agmap_color = cv2.applyColorMap(agmap_color.astype(np.uint8),
                                        cv2.COLORMAP_HOT)
        cv2.imshow("agmap", agmap_color)
        cv2.waitKey(200)

    if reward:
        return agval
    else:
        return agmap, agmap_split
Пример #14
0
    def __getitem__(self, idx):
        # idx %= 1

        if self.use_mask:
            coco = self.coco
            img_id = self.ids[idx]
            ann_ids = coco.getAnnIds(imgIds=img_id)
            anno = coco.loadAnns(ann_ids)
            path = coco.loadImgs(img_id)[0]['file_name']

            # filter crowd annotations
            # TODO might be better to add an extra field
            # anno = [obj for obj in anno if obj["iscrowd"] == 0]

            masks = [obj["segmentation"] for obj in anno]
            # RLE interpretation
            rle_sizes = [tuple(inst["size"]) for inst in masks]
            assert rle_sizes.count(rle_sizes[0]) == len(rle_sizes), (
                "All the sizes must be the same size: %s" % rle_sizes)
            # in RLE, height come first in "size"
            rle_height, rle_width = rle_sizes[0]

            masks = mask_utils.decode(masks)  # [h, w, n]
            image = cv2.cvtColor(cv2.imread(os.path.join(self.root, path)),
                                 cv2.COLOR_BGR2RGB)

            if self.data_aug:
                image, window, scale, padding, crop = self.resize_image(
                    image,
                    min_dim=512,
                    max_dim=512,
                    min_scale=False,
                    mode='crop',
                    aspect_ratio=1.3,  # 1.5
                    zoom=1.5,  # 1.7
                    min_enlarge=1.2,  # 1.5
                )
                masks = self.resize_mask(masks, scale, padding, crop)

                if random.randint(0, 1):
                    image = np.ascontiguousarray(np.fliplr(image))
                    masks = np.ascontiguousarray(np.fliplr(masks))

                if random.randint(0, 1):
                    image = np.ascontiguousarray(np.flipud(image))
                    masks = np.ascontiguousarray(np.flipud(masks))

                ## Random rotation
                coin = np.random.random()
                if coin < 0.25:
                    k = 1
                elif (coin >= 0.25 and coin < 0.5):
                    k = 2
                elif (coin >= 0.5 and coin < 0.75):
                    k = 3
                else:
                    k = 0
                image = np.rot90(image, k=k, axes=(0, 1))
                masks = np.rot90(masks, k=k, axes=(0, 1))

                rot_range = 10.  # 22.5
                channel_shift_range = 15  # 20

                if np.random.uniform(0, 1) > 0.5:
                    image, masks = self.img_rot(image,
                                                masks,
                                                angle=np.random.uniform(
                                                    -rot_range, rot_range))

                image = self.random_channel_shift(image, channel_shift_range,
                                                  2)

                # Note that some boxes might be all zeros if the corresponding mask got cropped out.
                # and here is to filter them out
                _idx = np.sum(masks, axis=(0, 1)) > 0
                masks = masks[:, :, _idx]
                # Bounding boxes. Note that some boxes might be all zeros
                # if the corresponding mask got cropped out.
                # bbox: [num_instances, (y1, x1, y2, x2)]
                boxes = self.extract_bboxes(masks)

                # visualize_datasets.vis_one_training_image(image, str(img_id),
                #                                           '/media/fs3017/eeum/nuclei/test',
                #                                           boxes, masks, is_box_xyxy=True)

                img = Image.fromarray(image)
                target = BoxList(torch.as_tensor(boxes), img.size, mode="xyxy")

                classes = [obj["category_id"] for obj in anno]
                classes = np.array([
                    self.json_category_id_to_contiguous_id[c] for c in classes
                ])[_idx]
                classes = torch.as_tensor(classes)
                target.add_field("labels", classes)

                is_crowd = np.array([obj["iscrowd"] for obj in anno])[_idx]
                is_crowd = torch.as_tensor(is_crowd)
                target.add_field("is_crowd", is_crowd)

                # print(masks.shape)
                # print(np.array([obj["iscrowd"] == 0 for obj in anno])[_idx])
                non_crowd_masks = masks[:, :,
                                        np.array([
                                            obj["iscrowd"] == 0 for obj in anno
                                        ])[_idx]]
                if non_crowd_masks.size == 0:
                    non_crowd_masks = np.zeros(shape=(masks.shape[0],
                                                      masks.shape[1], 1))
                centerness = scipy.ndimage.zoom(non_crowd_masks.max(axis=2),
                                                zoom=[0.25, 0.25],
                                                order=0)
                centerness = (centerness > 0).astype(np.float32)
                centerness[centerness == 0] = -1.
                centerness[centerness > 0] = 0.
                center_scale = 0.3
                gt_bbox = np.zeros(shape=(centerness.shape[0],
                                          centerness.shape[1], 4))
                anchor_bbox = np.zeros(shape=gt_bbox.shape)
                for xx in range(centerness.shape[1]):
                    for yy in range(centerness.shape[0]):
                        anchor_bbox[yy, xx, :] = [
                            max(0.0, xx * 4 - 16),
                            max(0.0, yy * 4 - 16),
                            min(xx * 4 + 16, masks.shape[1]),
                            min(yy * 4 + 16, masks.shape[0])
                        ]
                for bi, box in enumerate(boxes):
                    if is_crowd[bi]:
                        continue
                    x, y, xe, ye = box
                    w = xe - x
                    h = ye - y
                    ctr_x = x * 0.25 + w * 0.25 * 0.5
                    ctr_y = y * 0.25 + h * 0.25 * 0.5
                    hw = w * 0.25 * 0.5 * center_scale
                    hh = h * 0.25 * 0.5 * center_scale
                    sx = math.floor(ctr_x - hw)
                    sy = math.floor(ctr_y - hh)
                    ex = max(sx + 1, math.ceil(ctr_x + hw))
                    ey = max(sy + 1, math.ceil(ctr_y + hh))
                    centerness[sy:ey, sx:ex] = 1.
                    gt_bbox[sy:ey, sx:ex, :] = [x, y, xe, ye]

                masks = torch.tensor(masks).permute(2, 0, 1)  # [n, h, w]
                assert masks.shape[1] == img.size[1]
                assert masks.shape[2] == img.size[0]
                masks = SegmentationMask(masks, img.size, mode='mask')
                target.add_field("masks", masks)

                if self._transforms is not None:
                    img, target = self._transforms(img, target)
            else:
                if self.is_train:
                    if random.randint(0, 1):
                        image = np.ascontiguousarray(np.fliplr(image))
                        masks = np.ascontiguousarray(np.fliplr(masks))

                    if random.randint(0, 1):
                        image = np.ascontiguousarray(np.flipud(image))
                        masks = np.ascontiguousarray(np.flipud(masks))

                # boxes = [obj["bbox"] for obj in anno]
                boxes = self.extract_bboxes(masks)

                # visualize_datasets.vis_one_training_image(image, str(img_id),
                #                                           '/media/fs3017/eeum/nuclei/test',
                #                                           boxes, masks, is_box_xyxy=False)

                img = Image.fromarray(image)
                target = BoxList(torch.as_tensor(boxes), img.size, mode="xyxy")

                classes = [obj["category_id"] for obj in anno]
                classes = [
                    self.json_category_id_to_contiguous_id[c] for c in classes
                ]
                classes = torch.tensor(classes)
                target.add_field("labels", classes)

                is_crowd = [obj["iscrowd"] > 0 for obj in anno]
                is_crowd = torch.as_tensor(is_crowd)
                target.add_field("is_crowd", is_crowd)

                non_crowd_masks = masks[:, :,
                                        np.array([
                                            obj["iscrowd"] == 0 for obj in anno
                                        ])]
                centerness = scipy.ndimage.zoom(non_crowd_masks.max(axis=2),
                                                zoom=[0.25, 0.25],
                                                order=0)
                centerness = (centerness > 0).astype(np.float32)
                centerness[centerness == 0] = -1.
                centerness[centerness > 0] = 0.
                center_scale = 0.3
                gt_bbox = np.zeros(shape=(centerness.shape[0],
                                          centerness.shape[1], 4))
                anchor_bbox = np.zeros(shape=gt_bbox.shape)
                for xx in range(centerness.shape[1]):
                    for yy in range(centerness.shape[0]):
                        anchor_bbox[yy, xx, :] = [
                            max(0.0, xx * 4 - 16),
                            max(0.0, yy * 4 - 16),
                            min(xx * 4 + 16, masks.shape[1]),
                            min(yy * 4 + 16, masks.shape[0])
                        ]
                for bi, box in enumerate(boxes):
                    if is_crowd[bi]:
                        continue
                    x, y, xe, ye = box
                    w = xe - x
                    h = ye - y
                    ctr_x = x * 0.25 + w * 0.25 * 0.5
                    ctr_y = y * 0.25 + h * 0.25 * 0.5
                    hw = w * 0.25 * 0.5 * center_scale
                    hh = h * 0.25 * 0.5 * center_scale
                    sx = math.floor(ctr_x - hw)
                    sy = math.floor(ctr_y - hh)
                    ex = max(sx + 1, math.ceil(ctr_x + hw))
                    ey = max(sy + 1, math.ceil(ctr_y + hh))
                    centerness[sy:ey, sx:ex] = 1.
                    gt_bbox[sy:ey, sx:ex, :] = [x, y, xe, ye]
                    # print(gt_bbox[sy, sx, :], anchor_bbox[sy, sx, :])

                masks = torch.tensor(masks).permute(2, 0, 1)  # [n, h, w]
                assert masks.shape[1] == rle_height == img.size[1]
                assert masks.shape[2] == rle_width == img.size[0]
                masks = SegmentationMask(masks, img.size, mode='mask')
                target.add_field("masks", masks)

                target = target.clip_to_image(remove_empty=True)

                if self._transforms is not None:
                    img, target = self._transforms(img, target)

            # print(anchor_bbox, gt_bbox)
            return img, target, idx, \
                   (torch.as_tensor(centerness), torch.as_tensor(gt_bbox), torch.as_tensor(anchor_bbox))

        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        if anno and "segmentation" in anno[0]:
            masks = [obj["segmentation"] for obj in anno]
            masks = SegmentationMask(masks, img.size, mode='poly')
            target.add_field("masks", masks)

        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = PersonKeypoints(keypoints, img.size)
            target.add_field("keypoints", keypoints)

        target = target.clip_to_image(remove_empty=True)

        if self._transforms is not None:
            img, target = self._transforms(img, target)

        return img, target, idx
Пример #15
0
    def filter_results(self, boxlist_left, boxlist_right, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes_left = boxlist_left.bbox.reshape(-1, num_classes * 4)
        boxes_right = boxlist_right.bbox.reshape(-1, num_classes * 4)
        boxes_union = boxlist_union(boxlist_left, boxlist_right).bbox.reshape(
            -1, num_classes * 4)
        scores = boxlist_left.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result_left, result_right = [], []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes_union[inds, j * 4:(j + 1) * 4]
            boxes_j_left = boxes_left[inds, j * 4:(j + 1) * 4]
            boxes_j_right = boxes_right[inds, j * 4:(j + 1) * 4]
            # if j==1:print(inds_all[:, j])
            boxlist_for_class = BoxList(boxes_j,
                                        boxlist_left.size,
                                        mode="xyxy")
            boxlist_for_class_left = BoxList(boxes_j_left,
                                             boxlist_left.size,
                                             mode="xyxy")
            boxlist_for_class_right = BoxList(boxes_j_right,
                                              boxlist_left.size,
                                              mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class_left.add_field("scores", scores_j)
            boxlist_for_class_right.add_field("scores", scores_j)
            idxs = boxlist_nms_idx(boxlist_for_class, self.nms)
            boxlist_for_class_left = boxlist_for_class_left[idxs]
            boxlist_for_class_right = boxlist_for_class_right[idxs]
            num_labels = len(boxlist_for_class_left)
            boxlist_for_class_left.add_field(
                "labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            boxlist_for_class_right.add_field(
                "labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            result_left.append(boxlist_for_class_left)
            result_right.append(boxlist_for_class_right)

        result_left = cat_boxlist(result_left)
        result_right = cat_boxlist(result_right)
        number_of_detections = len(result_left)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result_left.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result_left = result_left[keep]
            result_right = result_right[keep]
        return result_left, result_right
Пример #16
0
def depth_evaluation(
    dataset,
    predictions,
    output_folder,
    box_only,
    iou_types,
    expected_results,
    expected_results_sigma_tol,
    score_threshold=0.05,
    bbox_iou_threshold=0.5,
    height_to_depth=False,
):
    logger = logging.getLogger("maskrcnn_benchmark.inference")

    logger.info("Preparing results for Depth Evaluation")
    # result table "file_name" : result
    depth_results = {}
    gt_box_num = 0
    for image_id, prediction in enumerate(predictions):
        original_id = dataset.id_to_img_map[image_id]
        if len(prediction) == 0:
            continue

        img_info = dataset.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        file_name = img_info["file_name"]

        # ground truth
        # img, gt, idx = dataset[original_id] # TODO: load gt only
        ann_ids = dataset.coco.getAnnIds(imgIds=original_id)
        anno = dataset.coco.loadAnns(ann_ids)

        # filter crowd annotations
        # TODO might be better to add an extra field
        if hasattr(dataset, 'remove_truncated') and dataset.remove_truncated:
            anno = [obj for obj in anno if obj["truncated"] == 0]

        if hasattr(dataset,
                   'class_filter_list') and len(dataset.class_filter_list) > 0:
            anno = [
                obj for obj in anno
                if obj["category_id"] in dataset.class_filter_list
            ]

        depth_key = dataset.depth_key if hasattr(dataset,
                                                 'depth_key') else "depth"
        input_depth_mode = dataset.input_depth_mode if hasattr(
            dataset, 'input_depth_mode') else depth_key
        output_depth_mode = dataset.output_depth_mode if hasattr(
            dataset, 'output_depth_mode') else "depth"
        min_value = dataset.min_value if hasattr(dataset, 'min_value') else 0.1
        max_value = dataset.max_value if hasattr(dataset, 'max_value') else 100

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, (image_width, image_height),
                         mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [
            dataset.json_category_id_to_contiguous_id[c] for c in classes
        ]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        if height_to_depth:
            height = [obj["height_rw"] for obj in anno]
            height = torch.tensor(height)
            target.add_field("depths", height)
            target = _height_to_depth(target, img_info)
        elif anno and depth_key in anno[0]:
            depth = [obj[depth_key] for obj in anno]
            # depth = torch.tensor(depth)
            depth = PointDepth(
                depth, (image_width, image_height),
                focal_length=img_info["camera_params"]["intrinsic"]["fx"],
                baseline=img_info["camera_params"]["extrinsic"]["baseline"],
                min_value=min_value,
                max_value=max_value,
                mode=input_depth_mode)
            target.add_field("depths", depth)

        gt = target.resize((image_width, image_height))

        gt_boxes = gt.bbox.tolist()
        if len(gt_boxes) == 0: continue
        gt_box_num += len(gt_boxes)
        gt_labels = gt.get_field("labels").tolist()
        gt_depths = gt.get_field('depths').convert("depth").depths.tolist()
        # print(gt_depths)
        gt_mapped_labels = [
            dataset.contiguous_category_id_to_json_id[i] for i in gt_labels
        ]

        prediction = prediction.resize((image_width, image_height))
        prediction = prediction.convert("xyxy")
        # print(prediction)

        scores = prediction.get_field("scores")
        positive_indices = scores > score_threshold
        scores = scores.tolist()

        boxes = prediction.bbox[positive_indices].tolist()
        if len(boxes) == 0: continue
        labels = prediction.get_field("labels")[positive_indices].tolist()

        if height_to_depth:
            prediction = _height_to_depth(prediction, img_info)
        depths = prediction.get_field('depths')[
            positive_indices]  # .convert("depth").depths
        if isinstance(depths, PointDepth):
            depths = depths  #.convert(output_depth_mode)
        else:
            depths = PointDepth(
                depths, (image_width, image_height),
                focal_length=img_info["camera_params"]["intrinsic"]["fx"],
                baseline=img_info["camera_params"]["extrinsic"]["baseline"],
                min_value=min_value,
                max_value=max_value,
                mode="depth")
        depths = depths.convert("depth")
        depths = depths.depths.tolist()
        # print(depths, gt_depths)

        mapped_labels = [
            dataset.contiguous_category_id_to_json_id[i] for i in labels
        ]

        # find corresponding box
        overlaps = boxlist_iou(prediction[positive_indices], gt)
        gt_overlaps = torch.zeros(len(gt_boxes))
        dt_matches = [-1] * len(boxes)
        for j in range(min(len(prediction), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            if gt_ovr < bbox_iou_threshold: continue
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            dt_matches[box_ind] = gt_ind
            # record the iou coverage of this gt box
            gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # locations, rotation_y = ddd2locrot(
        #   center, alpha, dimensions, depth, calibs[0])

        depth_results[file_name] = []
        # gt[file_name] = {}

        for k in range(len(boxes)):
            depth_results[file_name].append({
                'image_id':
                original_id,
                # 'calib': img_info['calib'],
                'category_id':
                mapped_labels[k],
                'bbox':
                boxes[k],
                'depth':
                depths[k][0],
                'gt_category_id':
                gt_mapped_labels[dt_matches[k]]
                if dt_matches[k] >= 0 else None,
                'gt_bbox':
                gt_boxes[dt_matches[k]] if dt_matches[k] >= 0 else None,
                'gt_depth':
                gt_depths[dt_matches[k]] if dt_matches[k] >= 0 else None,
                'score':
                scores[k],
            })

        # for k in range(len(gt_boxes)):
        #     gt[file_name].append({
        #         'image_id': original_id,
        #         'calib': img_info['calib'],
        #         'category_id': gt_mapped_labels[k],
        #         'bbox': gt_boxes[k],
        #         'depth': gt_depths[k],
        #     })

    logger.info("Evaluating predictions")
    logger.info("Ground Truth boxes %d" % gt_box_num)
    results = evaluate_results(depth_results)
    import json
    logger.info(json.dumps(results, sort_keys=True, indent=4))

    return results
    def __getitem__(self, idx):
        # img, anno = super(CityScapesWDDataset, self).__getitem__(idx)
        coco = self.coco
        img_id = self.ids[idx]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anno = coco.loadAnns(ann_ids)

        img_info = coco.loadImgs(img_id)[0]
        path = img_info['file_name']
        # right_path = img_info['right_file_name']

        img = Image.open(os.path.join(self.root, path)).convert('RGB')
        # right_img = Image.open(os.path.join(self.root, right_path)).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
            # right_img = self.transform(right_img)

        if self.target_transform is not None:
            anno = self.target_transform(anno)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        if len(self.class_filter_list) > 0:
            anno = [
                obj for obj in anno
                if obj["category_id"] in self.class_filter_list
            ]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        if anno and "segmentation" in anno[0]:
            masks = [obj["segmentation"] for obj in anno]
            masks = SegmentationMask(masks, img.size, mode='poly')
            target.add_field("masks", masks)

        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = PersonKeypoints(keypoints, img.size)
            target.add_field("keypoints", keypoints)

        # if anno and "height_rw" in anno[0]:
        #     depth = [obj["height_rw"] for obj in anno]
        if anno and self.depth_key in anno[0]:
            depth = [obj[self.depth_key] for obj in anno]
            # depth = torch.tensor(depth)
            depth = PointDepth(
                depth,
                img.size,
                focal_length=img_info["camera_params"]["intrinsic"]["fx"],
                baseline=img_info["camera_params"]["extrinsic"]["baseline"],
                min_value=self.depth_range[0],
                max_value=self.depth_range[1],
                mode=self.depth_key)
            # print(depth.depths)
            depth = depth.convert(self.output_depth_mode)
            # print(depth.depths)
            target.add_field("depths", depth)

        target = target.clip_to_image(remove_empty=True)

        # target.add_field("right_image", right_img)

        # print(target.get_field("depths").depths)

        if self._transforms is not None:
            img, target = self._transforms(img, target)

        # print(target.get_field("depths").depths)

        return img, target, idx
Пример #18
0
    def __getitem__(self, idx):

        coco = self.coco
        img_id = self.ids[idx]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anno = coco.loadAnns(ann_ids)

        loaded_img = coco.loadImgs(img_id)[0]
        path = loaded_img['file_name']

        img = Image.open(os.path.join(self.root, path)).convert('RGB')

        # if "angle" in loaded_img and loaded_img["angle"] is not 0:
        if 'angle' in loaded_img.keys() and loaded_img["angle"] is not 0:
            if loaded_img["angle"] == 90:
                img = img.rotate( 270, expand=True )
            elif loaded_img["angle"] == 180:
                img = img.rotate( 180, expand=True )
            elif loaded_img["angle"] == 270:
                img = img.rotate( 90, expand=True )
            else:
                raise ValueError()

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0 and obj["ignore"] == 0]

        """
        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
        """

        def to_rrect( x ):
            x = cv2.minAreaRect( x )
            x = cv2.boxPoints( x )
            return x

        # masks = [obj["segmentation"] for obj in anno]

        keypoints = np.array( [obj["keypoint"] for obj in anno] )
        # keypoints = np.array(keypoints, dtype=np.float32).reshape(-1, 8)
        # keypoints = list( poly_to_rect(keypoints.reshape( (-1, 8) ) ) )
        keypoints = np.array( keypoints, dtype=np.float32 ).reshape( (-1, 8) )
        
        xmins = np.min( keypoints[:,  ::2], axis=1 )
        minx_idx = xmins < 1
        xmins[minx_idx] = 1
        ymins = np.min( keypoints[:, 1::2], axis=1 )
        miny_idx = ymins < 1
        ymins[miny_idx] = 1
        xmaxs = np.max( keypoints[:,  ::2], axis=1 )
        maxx_idx = xmaxs > 1024
        xmaxs[maxx_idx] = 1024
        ymaxs = np.max( keypoints[:, 1::2], axis=1 )
        maxy_idx = ymaxs > 1024
        ymaxs[maxy_idx] = 1024
        
        xyxy = np.vstack( [xmins, ymins, xmaxs, ymaxs] ).transpose()
        boxes = torch.from_numpy( xyxy ).reshape(-1, 4)  # guard against no boxes
        target = BoxList( boxes, img.size, mode="xyxy" )

        keypoints = SegmentationMask( keypoints.reshape( (-1, 1, 8)).tolist(), img.size, mode='poly' )
        target.add_field( "keypoints", keypoints )

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        # NOTE Qimeng: close it for getting correct alpha
        #target = target.clip_to_image(remove_empty=True)

        if self._transforms is not None:
            img, target = self._transforms(img, target)

        return img, target, idx
Пример #19
0
    def forward(self, features_, proposals, targets=None, query=False):
        """
        Arguments:
            features (list[Tensor]): feature-maps from possibly several levels
            proposals (list[BoxList]): proposal boxes
            targets (list[BoxList], optional): the ground-truth targets.

        Returns:
            x (Tensor): the result of the feature extractor
            proposals (list[BoxList]): during training, the subsampled proposals
                are returned. During testing, the predicted boxlists are returned
            losses (dict[Tensor]): During training, returns the losses for the
                head. During testing, returns an empty dict.
        """

        tmpp = [feature.shape[2:] for feature in features_]
        features = [
            F.pad(feature, (0, 0, 0, size[0]), "constant", value=0.0)
            for size, feature in zip(tmpp, features_)
        ]

        if self.training:
            targets = self.ratio_estimator.get_ratio(targets, self.training)
            proposals = targets

        elif query:
            for target, proposal in zip(targets, proposals):
                target.add_field("embeds", proposal.get_field("embeds"))

            if self.padreg:
                for target, proposal in zip(targets, proposals):
                    target.add_field("reg_vals",
                                     proposal.get_field("reg_vals"))
                proposals = targets

                query_get_ratio = self.ratio_estimator.get_ratio \
                                if self.query_by_gt else self.ratio_estimator.get_ratio_by_est
                proposals = query_get_ratio(proposals, self.training)
            else:
                proposals = targets

        elif self.padreg:
            old_proposals = proposals
            device_ = proposals[0].bbox.device
            return_bboxlist = []

            for proposal in proposals:
                p_bbox = 1.0 * proposal.bbox
                new_bbox = []
                n_proposal = p_bbox.shape[0]

                regvals = proposal.get_field("reg_vals")
                reg_vals = est_decode(regvals)
                # reg_valss = (reg_vals+1)/2

                for j in range(n_proposal):
                    bbox = p_bbox[j, :]
                    h = bbox[3] - bbox[1]
                    new_h = h * (1. / (1. - reg_vals[j]))
                    bbox[3] = bbox[1] + new_h
                    new_bbox.append(bbox.tolist())
                if n_proposal == 0:
                    new_bbox = torch.tensor([]).view(0, 4)
                new_bboxlist = BoxList(new_bbox, proposal.size, mode="xyxy")
                new_bboxlist._copy_extra_fields(proposal)
                return_bboxlist.append(new_bboxlist)

            return_bboxlist = [
                return_box.to(device_) for return_box in return_bboxlist
            ]
            proposals = return_bboxlist

        else:
            pass
            # keep the proposals

        x = self.feature_extractor(features, proposals)

        # final classifier that converts the features into predictions

        part_feat = self.predictor(x)

        if not self.training:

            # when no training
            # for query, proposals are ground truth
            # for gallery, proposals are results, just add part_embeds on it

            if not query and self.padreg:
                proposals = self.exchange_box(old_proposals, proposals)

            result = self.post_processor(part_feat, proposals)

            return x, result, {}

        loss_part_oim = self.loss_evaluator(part_feat, targets)

        loss_dict = dict(
            zip([
                "loss_reid_p" + str(i) for i in range(1,
                                                      len(loss_part_oim) + 1)
            ], loss_part_oim))

        return (
            x,
            proposals,
            loss_dict,
        )
Пример #20
0
    def forward_for_single_feature_map(self, anchors, objectness,
                                       box_regression):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * REGRESSION_CN, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression = permute_and_flatten(box_regression, N, A,
                                             REGRESSION_CN, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n,
                                               dim=1,
                                               sorted=True)  # sorted!

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression = box_regression[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.get_field("rrects") for a in anchors],
                                   dim=0)
        concat_anchors = concat_anchors.reshape(N, -1,
                                                REGRESSION_CN)[batch_idx,
                                                               topk_idx]

        proposals = self.box_coder.decode(
            box_regression.view(-1, REGRESSION_CN),
            concat_anchors.view(-1, REGRESSION_CN))

        proposals = proposals.view(N, -1, REGRESSION_CN)

        result = []
        for proposal, score, im_shape in zip(proposals, objectness,
                                             image_shapes):
            # filter small boxes
            if self.min_size > 0:
                keep = remove_small_boxes(proposal, self.min_size)
                proposal = proposal[keep]
                score = score[keep]

            # perform rotated nms
            keep = self.nms_rotate(proposal, score)
            proposal = proposal[keep]
            score = score[keep]

            # convert anchor rects to bboxes
            bboxes = convert_rects_to_bboxes(proposal, torch)

            boxlist = BoxList(bboxes, im_shape, mode="xyxy")

            boxlist.add_field("rrects", proposal)
            boxlist.add_field("objectness", score)

            boxlist = boxlist.clip_to_image(remove_empty=False)
            result.append(boxlist)
        return result
Пример #21
0
    def forward_for_single_feature_map(self,
                                       anchors,
                                       box_cls,
                                       box_regression,
                                       pre_nms_thresh=0.05):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = box_cls.device
        N, _, H, W = box_cls.shape
        A = int(box_regression.size(1) / 4)  # A means anchor number
        C = int(box_cls.size(1) / A)  # C means class number

        # put in the same format as anchors
        box_cls = box_cls.view(N, -1, C, H, W).permute(0, 3, 4, 1,
                                                       2)  # N means batch_size
        box_cls = box_cls.reshape(N, -1, C)  # put matrix become a long vector
        box_cls = box_cls.sigmoid()  # pass sigmoid function

        box_regression = box_regression.view(N, -1, 4, H, W)
        box_regression = box_regression.permute(0, 3, 4, 1, 2)
        box_regression = box_regression.reshape(N, -1, 4)

        # H means height and W means Width
        num_anchors = A * H * W

        results = [[] for _ in range(N)]
        candidate_inds = box_cls > pre_nms_thresh
        if candidate_inds.sum().item() == 0:
            # no one can pass pre_nme_thresh
            empty_boxlists = []
            for a in anchors:
                empty_boxlist = BoxList(torch.Tensor(0, 4).to(device), a.size)
                empty_boxlist.add_field("labels",
                                        torch.LongTensor([]).to(device))
                empty_boxlist.add_field("scores", torch.Tensor([]).to(device))
                empty_boxlists.append(empty_boxlist)
            return empty_boxlists

        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)

        for batch_idx, (per_box_cls, per_box_regression, per_pre_nms_top_n, \
        per_candidate_inds, per_anchors) in enumerate(zip(
            box_cls,
            box_regression,
            pre_nms_top_n,
            candidate_inds,
            anchors)):

            # Sort and select TopN
            per_box_cls = per_box_cls[per_candidate_inds]
            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1]
            per_class += 1
            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                        per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_box_loc = per_box_loc[top_k_indices]
                per_class = per_class[top_k_indices]

            detections = self.box_coder.decode(
                per_box_regression[per_box_loc, :].view(-1, 4),
                per_anchors.bbox[per_box_loc, :].view(-1, 4))

            boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results[batch_idx] = boxlist

        return results
Пример #22
0
    def filter_results(self, boxlist, num_classes, new_thresh=None):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        if not new_thresh:
            new_thresh = self.score_thresh
        inds_all = scores > new_thresh
        tmp_thresh = new_thresh
        # while inds_all.nonzero().shape[0] < 10: # less than 10 objects
        #     print("less than 10 objects")
        #     tmp_thresh /= 2
        #     inds_all = scores > tmp_thresh
        idx = [] # to record the index of selected candidates
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            # print(scores[0])
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4 : (j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class, keep = boxlist_nms(
                boxlist_for_class, self.nms
            )
            num_labels = len(boxlist_for_class)
            if len(inds) > 0:
                idx.append(inds[keep])
            boxlist_for_class.add_field(
                "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)
            )
            result.append(boxlist_for_class)

        if len(idx) > 0:
            idx = torch.cat(idx)
        # print(idx.shape[0])
        result = cat_boxlist(result)
        number_of_detections = len(result)
        #print(number_of_detections)
        while number_of_detections < 10: # if detected objects less than 10
            result, idx = self.filter_results(boxlist, num_classes, new_thresh=new_thresh/2)
            number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(), number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
            idx = idx[keep]

        return result, idx
Пример #23
0
    def __getitem__(self, idx, raw_image=False):

        img, anno = super(COCODataset, self).__getitem__(idx)
        if raw_image:
            return img
        # filter crowd annotations
        # TODO might be better to add an extra field
        if self.is_ignore:
            _, anno_ignore = self.COCODataset_ignore.__getitem__(idx)
            anno_ignore = [obj for obj in anno_ignore if obj["iscrowd"] == 0]
            boxes_ignore = [obj["bbox"] for obj in anno_ignore]
            boxes_ignore = torch.as_tensor(boxes_ignore).reshape(
                -1, 4)  # guard against no boxes
            target_ignore = BoxList(boxes_ignore, img.size,
                                    mode="xywh").convert("xyxy")
            classes_ignore = [obj["category_id"] for obj in anno_ignore]
            classes_ignore = [
                self.json_category_id_to_contiguous_id[c]
                for c in classes_ignore
            ]
            classes_ignore = torch.tensor(classes_ignore)
            target_ignore.add_field("labels", classes_ignore)
            if anno_ignore and "segmentation" in anno_ignore[0]:
                masks_ignore = [obj["segmentation"] for obj in anno_ignore]
                masks_ignore = SegmentationMask(masks_ignore,
                                                img.size,
                                                mode='poly')
                target_ignore.add_field("masks", masks_ignore)
            target_ignore = target_ignore.clip_to_image(remove_empty=True)
        anno = [obj for obj in anno if obj["iscrowd"] == 0]
        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
        # print("box is same?", boxes, boxes_ignore)
        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        if anno and "segmentation" in anno[0]:
            masks = [obj["segmentation"] for obj in anno]
            masks = SegmentationMask(masks, img.size, mode='poly')
            target.add_field("masks", masks)

        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = PersonKeypoints(keypoints, img.size)
            target.add_field("keypoints", keypoints)

        target = target.clip_to_image(remove_empty=True)
        # print(self._transforms)
        if self._transforms is not None:
            if self.is_ignore:
                img, target, target_ignore = self._transforms(
                    img, target, target_ignore)
            else:
                img, target = self._transforms(img, target)
        if self.is_ignore:
            # print(target)
            # print(target_ignore)
            return img, target, idx, target_ignore
        else:
            return img, target, idx
Пример #24
0
    def __getitem__(self, k):
        im_ori_RGB = Image.open(self.img_files[k]).convert(
            'RGB')  # im_ori_RGB.size: (W, H
        with open(self.pickle_files[k], 'rb') as filehandle:
            data = pickle.load(filehandle)
        bboxes = data['bboxes'].astype(np.float32)  # [xywh]
        assert len(bboxes.shape) == 2 and bboxes.shape[1] == 4
        num_bboxes_ori = bboxes.shape[0]

        if 'label' in data:
            labels = data['label']  # ['car', 'person', 'person']
        else:
            labels = ['person'] * num_bboxes_ori
        # bboxes = np.load(self.bbox_npy_files[k]).astype(np.float32) # [xywh]
        if bboxes.shape[0] > self.cfg.DATA.COCO.GOOD_NUM:
            bboxes = bboxes[:self.cfg.DATA.COCO.GOOD_NUM, :]
            labels = labels[:self.cfg.DATA.COCO.GOOD_NUM]

        target_boxes = torch.as_tensor(bboxes).reshape(
            -1, 4)  # guard against no boxes
        target = BoxList(target_boxes, im_ori_RGB.size,
                         mode="xywh").convert("xyxy")
        num_boxes = target.bbox.shape[0]

        if self.opt.est_kps:
            if 'kps' in data:
                kps_gt = data['kps'].astype(int)  # [N, 51]
                if num_bboxes_ori > self.cfg.DATA.COCO.GOOD_NUM:
                    kps_gt = kps_gt[:self.cfg.DATA.COCO.GOOD_NUM, :]
                kps_gt = kps_gt.tolist()  # [[51]]
            else:
                kps_gt = [[0] * 51 for i in range(num_boxes)]

            target_keypoints = PersonKeypoints(kps_gt, im_ori_RGB.size)
            # kps_sum = torch.sum(torch.sum(target_keypoints.keypoints[:, :, :2], 1), 1)
            # kps_mask = kps_sum != 0.
            # print(target_keypoints.keypoints.shape, kps_sum, kps_mask)

            target.add_field("keypoints", target_keypoints)
            # target.add_field("keypoints_mask", kps_mask)
            target = target.clip_to_image(remove_empty=True)
            classes = [1] * num_boxes  # !!!!! all person (1) for now...
            classes = [
                self.json_category_id_to_contiguous_id[c] for c in classes
            ]
            classes = torch.tensor(classes)
            target.add_field("labels", classes)
            scores = torch.tensor([1.] * target.bbox.shape[0])
            target.add_field("scores", scores)

        W, H = im_ori_RGB.size[:2]
        if self.train:
            yannick_results = loadmat(self.yannick_mat_files[k])
            horizon_visible = yannick_results['horizon_visible'][0][0].astype(
                np.float32)
            assert horizon_visible == 1
            horizon = yannick_results['pitch'][0][0].astype(np.float32)
            horizon_pixels_yannick = H * horizon
            v0 = H - horizon_pixels_yannick
            vfov = yannick_results['vfov'][0][0].astype(np.float32)
            f_pixels_yannick = H / 2. / (np.tan(vfov / 2.))
        else:
            f_pixels_yannick = -1
            v0 = -1

        im_yannickTransform = self.transforms_yannick(
            im_ori_RGB)  # [0., 1.] by default
        im_maskrcnnTransform, target_maskrcnnTransform = self.transforms_maskrcnn(
            im_ori_RGB, target)  # [0., 1.] by default
        # print('---', im.size(), np.asarray(im).shape)
        # im_array = np.asarray(im)
        # if len(im_array.shape)==2:
        #     im_array = np.stack((im_array,)*3, axis=-1)
        #     # print(im_array.shape)
        # x = torch.from_numpy(im_array.transpose((2,0,1)))

        if self.train and self.opt.est_kps:
            target_maskrcnnTransform.add_field("keypoints_ori",
                                               target_keypoints)
            target_maskrcnnTransform.add_field("boxlist_ori", target)
        target_maskrcnnTransform.add_field('img_files',
                                           [self.img_files[k]] * num_boxes)

        if self.train:
            y_person = 1.75
            bbox_good_list = bboxes
            vc = H / 2.
            inv_f2_yannick = 1. / (f_pixels_yannick * f_pixels_yannick)
            yc_list = []
            for bbox in bbox_good_list:
                vt = H - bbox[1]
                vb = H - (bbox[1] + bbox[3])
                #     v0_single = yc * (vt - vb) / y_person + vb
                yc_single = y_person * (v0 - vb) / (vt - vb) / (
                    1. + (vc - v0) * (vc - vt) / f_pixels_yannick**2)
                yc_list.append(yc_single)
            yc_estCam = np.median(np.asarray(yc_list))
        else:
            yc_estCam = -1

        assert len(labels) == bboxes.shape[0]
        # im_ori_BGR_array = np.array(im_ori_RGB.copy())[:,:,::-1]
        return im_yannickTransform, im_maskrcnnTransform, W, H, \
               float(yc_estCam), \
               self.pad_bbox(bboxes, self.GOOD_NUM).astype(np.float32), bboxes.shape[0], float(v0), float(f_pixels_yannick), \
               os.path.basename(self.img_files[k])[:12], self.img_files[k], target_maskrcnnTransform, labels
    W = 240

    device = 'cpu'
    image = torch.zeros(N, C, H, W, device=device)
    targets = np.array([[50, 50, 100, 100, 0], [50, 50, 50, 50, -90]],
                       dtype=np.float32)
    bbox_targets = np.array([[0, 0, 100, 100], [25, 25, 75, 75]],
                            dtype=np.float32)

    targets = [targets for ix in range(N)]
    bbox_targets = [bbox_targets for ix in range(N)]

    test_rpn_post_processor(image, bbox_targets)
    test_rrpn_post_processor(image, targets)

    from maskrcnn_benchmark.modeling.rrpn.utils import get_segmentation_mask_rotated_rect_tensor
    tt = []
    for ix, td in enumerate(targets):
        rect_pts = convert_rect_to_pts2(td)  #.reshape((len(td), 8))
        nn = len(rect_pts)
        bboxes = np.zeros((nn, 4), dtype=np.float32)
        bboxes[:, :2] = np.min(rect_pts, axis=1)
        bboxes[:, 2:] = np.max(rect_pts, axis=1)
        boxlist = BoxList(bboxes, (W, H), mode="xyxy")
        mm = SegmentationMask(rect_pts.reshape(nn, 1, 8).tolist(), (W, H),
                              mode='poly')
        boxlist.add_field("masks", mm)
        tt.append(boxlist)

        rrect_tensor = get_segmentation_mask_rotated_rect_tensor(mm)
Пример #26
0
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5):
    """Calculate precision and recall based on evaluation code of PASCAL VOC.
    This function calculates precision and recall of
    predicted bounding boxes obtained from a dataset which has :math:`N`
    images.
    The code is based on the evaluation code used in PASCAL VOC Challenge.
   """
    n_pos = defaultdict(int)
    score = defaultdict(list)
    match = defaultdict(list)
    for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists):
        pred_bbox = pred_boxlist.bbox.numpy()
        pred_label = pred_boxlist.get_field("labels").numpy()
        pred_score = pred_boxlist.get_field("scores").numpy()
        gt_bbox = gt_boxlist.bbox.numpy()
        gt_label = gt_boxlist.get_field("labels").numpy()
        gt_difficult = gt_boxlist.get_field("difficult").numpy()

        for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
            pred_mask_l = pred_label == l
            pred_bbox_l = pred_bbox[pred_mask_l]
            pred_score_l = pred_score[pred_mask_l]
            # sort by score
            order = pred_score_l.argsort()[::-1]
            pred_bbox_l = pred_bbox_l[order]
            pred_score_l = pred_score_l[order]

            gt_mask_l = gt_label == l
            gt_bbox_l = gt_bbox[gt_mask_l]
            gt_difficult_l = gt_difficult[gt_mask_l]

            n_pos[l] += np.logical_not(gt_difficult_l).sum()
            score[l].extend(pred_score_l)

            if len(pred_bbox_l) == 0:
                continue
            if len(gt_bbox_l) == 0:
                match[l].extend((0, ) * pred_bbox_l.shape[0])
                continue

            # VOC evaluation follows integer typed bounding boxes.
            pred_bbox_l = pred_bbox_l.copy()
            pred_bbox_l[:, 2:] += 1
            gt_bbox_l = gt_bbox_l.copy()
            gt_bbox_l[:, 2:] += 1
            iou = boxlist_iou(
                BoxList(pred_bbox_l, gt_boxlist.size),
                BoxList(gt_bbox_l, gt_boxlist.size),
            ).numpy()
            gt_index = iou.argmax(axis=1)
            # set -1 if there is no matching ground truth
            gt_index[iou.max(axis=1) < iou_thresh] = -1
            del iou

            selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
            for gt_idx in gt_index:
                if gt_idx >= 0:
                    if gt_difficult_l[gt_idx]:
                        match[l].append(-1)
                    else:
                        if not selec[gt_idx]:
                            match[l].append(1)
                        else:
                            match[l].append(0)
                    selec[gt_idx] = True
                else:
                    match[l].append(0)

    n_fg_class = max(n_pos.keys()) + 1
    prec = [None] * n_fg_class
    rec = [None] * n_fg_class

    for l in n_pos.keys():
        score_l = np.array(score[l])
        match_l = np.array(match[l], dtype=np.int8)

        order = score_l.argsort()[::-1]
        match_l = match_l[order]

        tp = np.cumsum(match_l == 1)
        fp = np.cumsum(match_l == 0)

        # If an element of fp + tp is 0,
        # the corresponding element of prec[l] is nan.
        prec[l] = tp / (fp + tp)
        # If n_pos[l] is 0, rec[l] is None.
        if n_pos[l] > 0:
            rec[l] = tp / n_pos[l]

    return prec, rec
def test_rrpn_post_processor(image_tensor, targets_data):
    from maskrcnn_benchmark.modeling.rrpn.inference import make_rpn_postprocessor, REGRESSION_CN
    from maskrcnn_benchmark.modeling.rrpn.loss import make_rpn_loss_evaluator

    N, C, H, W = image_tensor.shape

    targets = []
    for ix, td in enumerate(targets_data):
        rect_pts = convert_rect_to_pts2(td)  #.reshape((len(td), 8))
        nn = len(rect_pts)
        bboxes = np.zeros((nn, 4), dtype=np.float32)
        bboxes[:, :2] = np.min(rect_pts, axis=1)
        bboxes[:, 2:] = np.max(rect_pts, axis=1)
        boxlist = BoxList(bboxes, (W, H), mode="xyxy")
        mm = SegmentationMask(rect_pts.reshape(nn, 1, 8).tolist(), (W, H),
                              mode='poly')
        boxlist.add_field("masks", mm)
        targets.append(boxlist)

    device = image_tensor.device

    USE_FPN = False
    cfg.MODEL.ROTATED = True
    CFG_RPN = cfg.MODEL.RPN

    CFG_RPN.ANCHOR_ANGLES = (-90, -54, -18, 18, 54)
    CFG_RPN.ANCHOR_SIZES = (48, 84, 128, 224)
    CFG_RPN.ANCHOR_STRIDE = (16, )
    CFG_RPN.ASPECT_RATIOS = (1.0, 2.0)

    if USE_FPN:
        CFG_RPN.ANCHOR_STRIDE = tuple(np.array(CFG_RPN.ANCHOR_SIZES) // 8)
    CFG_RPN.POST_NMS_TOP_N_TRAIN = 100

    image_list, feature_maps = get_image_list_and_feature_maps(
        image_tensor, CFG_RPN.ANCHOR_STRIDE)

    anchor_generator = make_rrpn_anchor_generator(cfg)
    num_anchors = anchor_generator.num_anchors_per_location()

    anchors = anchor_generator.forward(image_list, feature_maps)

    objectness = []
    box_regression = []
    for ix, fm in enumerate(feature_maps):
        n_anchors = num_anchors[ix]
        N, _, h, w = fm.shape
        objectness.append(torch.rand(N, n_anchors, h, w, device=device))
        box_regression.append(
            torch.rand(N, n_anchors * REGRESSION_CN, h, w, device=device))

    # train mode
    postprocessor_train = make_rpn_postprocessor(cfg,
                                                 rpn_box_coder=None,
                                                 is_train=True)
    postprocessor_train.train()

    # result = postprocessor_train.forward(anchors, objectness, box_regression, targets=targets)

    # check loss
    loss_evaluator = make_rpn_loss_evaluator(cfg,
                                             postprocessor_train.box_coder)
    loss_objectness, loss_rpn_box_reg = loss_evaluator(anchors, objectness,
                                                       box_regression, targets)

    # test mode
    postprocessor_test = make_rpn_postprocessor(cfg,
                                                rpn_box_coder=None,
                                                is_train=False)
    postprocessor_test.eval()

    result = postprocessor_test.forward(anchors, objectness, box_regression)
Пример #28
0
    def forward_for_single_feature_map(self, locations, box_cls,
                                       box_regression, centerness,
                                       image_sizes):
        """
        Arguments:
            anchors: list[BoxList]
            box_cls: tensor of size N, A * C, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        N, C, H, W = box_cls.shape

        # put in the same format as locations
        box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
        box_cls = box_cls.reshape(N, -1, C).sigmoid()
        box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
        box_regression = box_regression.reshape(N, -1, 4)
        centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
        centerness = centerness.reshape(N, -1).sigmoid()

        candidate_inds = box_cls > self.pre_nms_thresh
        pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
        pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n)
        #print('pre_nms_top_n: ', pre_nms_top_n)

        # multiply the classification scores with centerness scores
        box_cls = box_cls * centerness[:, :, None]

        results = []
        for i in range(N):
            per_box_cls = box_cls[i]
            per_candidate_inds = candidate_inds[i]
            per_box_cls = per_box_cls[per_candidate_inds]

            per_candidate_nonzeros = per_candidate_inds.nonzero()
            per_box_loc = per_candidate_nonzeros[:, 0]
            per_class = per_candidate_nonzeros[:, 1] + 1

            per_box_regression = box_regression[i]
            per_box_regression = per_box_regression[per_box_loc]
            per_locations = locations[per_box_loc]

            per_pre_nms_top_n = pre_nms_top_n[i]

            if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
                per_box_cls, top_k_indices = \
                    per_box_cls.topk(per_pre_nms_top_n, sorted=False)
                per_class = per_class[top_k_indices]
                per_box_regression = per_box_regression[top_k_indices]
                per_locations = per_locations[top_k_indices]

            detections = torch.stack([
                per_locations[:, 0] - per_box_regression[:, 0],
                per_locations[:, 1] - per_box_regression[:, 1],
                per_locations[:, 0] + per_box_regression[:, 2],
                per_locations[:, 1] + per_box_regression[:, 3],
            ],
                                     dim=1)

            h, w = image_sizes[i]
            boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
            boxlist.add_field("labels", per_class)
            boxlist.add_field("scores", per_box_cls)
            boxlist = boxlist.clip_to_image(remove_empty=False)
            boxlist = remove_small_boxes(boxlist, self.min_size)
            results.append(boxlist)

        return results
Пример #29
0
    def forward_for_single_feature_map(self, anchors, objectness, box_regression_left, box_regression_right):
        """
        Arguments:
            anchors: list[BoxList]
            objectness: tensor of size N, A, H, W
            box_regression: tensor of size N, A * 4, H, W
        """
        device = objectness.device
        N, A, H, W = objectness.shape

        # put in the same format as anchors
        objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
        objectness = objectness.sigmoid()

        box_regression_left = permute_and_flatten(box_regression_left, N, A, 4, H, W)
        box_regression_right = permute_and_flatten(box_regression_right, N, A, 4, H, W)

        num_anchors = A * H * W

        pre_nms_top_n = min(self.pre_nms_top_n, num_anchors)
        objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True)

        batch_idx = torch.arange(N, device=device)[:, None]
        box_regression_left = box_regression_left[batch_idx, topk_idx]
        box_regression_right = box_regression_right[batch_idx, topk_idx]

        image_shapes = [box.size for box in anchors]
        concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
        concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]

        proposals_left = self.box_coder.decode(
            box_regression_left.view(-1, 4), concat_anchors.view(-1, 4)
        )
        proposals_right = self.box_coder.decode(
            box_regression_right.view(-1, 4), concat_anchors.view(-1, 4)
        )

        proposals_left = proposals_left.view(N, -1, 4)
        proposals_right = proposals_right.view(N, -1, 4)

        result, result_right = [], []
        for proposal_left, proposal_right, score, im_shape in zip(proposals_left, proposals_right, objectness, image_shapes):
            boxlist_left = BoxList(proposal_left, im_shape, mode="xyxy")
            boxlist_left.add_field("objectness", score)
            boxlist_left = boxlist_left.clip_to_image(remove_empty=False)
            boxlist_left = remove_small_boxes(boxlist_left, self.min_size) # MAY CAUSE RuntimeError if training is unstable: copy_if failed to synchronize: device-side assert triggered
            keep_idx_i_left = boxlist_nms_idx(
                boxlist_left,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            boxlist_right = BoxList(proposal_right, im_shape, mode="xyxy")
            boxlist_right.add_field("objectness", score)
            boxlist_right = boxlist_right.clip_to_image(remove_empty=False)
            boxlist_right = remove_small_boxes(boxlist_right, self.min_size) # MAY CAUSE RuntimeError if training is unstable: copy_if failed to synchronize: device-side assert triggered
            keep_idx_i_right = boxlist_nms_idx(
                boxlist_right,
                self.nms_thresh,
                max_proposals=self.post_nms_top_n,
                score_field="objectness",
            )
            # TODO: optimize this!
            keep_idx_i = torch.from_numpy(np.intersect1d(keep_idx_i_left.cpu().numpy(), keep_idx_i_right.cpu().numpy())).cuda()
            boxlist_left = boxlist_left[keep_idx_i]
            boxlist_right = boxlist_right[keep_idx_i]
            result.append(boxlist_left)
            result_right.append(boxlist_right)
        return result, result_right
Пример #30
0
    def forward(self,
                anchors,
                objectness,
                box_regression,
                targets=None,
                centerness=None,
                rpn_center_box_regression=None,
                centerness_pack=None):
        """
        Arguments:
            anchors: list[list[BoxList]]
            objectness: list[tensor]
            box_regression: list[tensor]

        Returns:
            boxlists (list[BoxList]): the post-processed anchors, after
                applying box decoding and NMS
        """
        sampled_boxes = []
        num_levels = len(objectness)
        anchors = list(zip(*anchors))
        for a, o, b in zip(anchors, objectness, box_regression):
            sampled_boxes.append(self.forward_for_single_feature_map(a, o, b))

        boxlists = list(zip(*sampled_boxes))
        boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]

        if num_levels > 1:
            boxlists = self.select_over_all_levels(boxlists)

        # append ground-truth bboxes to proposals
        if self.training and targets is not None:
            boxlists = self.add_gt_proposals(boxlists, targets)

        if self.pred_targets:
            pred_targets = []
            if True:
                for img_centerness, center_box_reg in zip(
                        centerness, rpn_center_box_regression):
                    # gt_centerness, gt_bbox, anchor_bbox = center_target
                    # print(rpn_center_box_regression, anchor_bbox)
                    # gt_mask = gt_centerness.detach().cpu().numpy() > 0.0
                    img_centerness = img_centerness[0, :, :]

                    center_box_reg = center_box_reg[:, :, :].permute(1, 2, 0)

                    anchor_bbox = np.zeros(shape=(center_box_reg.shape[0],
                                                  center_box_reg.shape[1], 4))
                    for xx in range(anchor_bbox.shape[1]):
                        for yy in range(anchor_bbox.shape[0]):
                            anchor_bbox[yy, xx, :] = [
                                max(0.0, xx * 4 - 16),
                                max(0.0, yy * 4 - 16),
                                min(xx * 4 + 16, boxlists[0].size[0]),
                                min(yy * 4 + 16, boxlists[0].size[1])
                            ]
                    anchor_bbox = torch.as_tensor(anchor_bbox,
                                                  device=center_box_reg.device)

                    # print(center_box_reg.shape, anchor_bbox.shape)
                    boxes = self.box_coder.decode(
                        center_box_reg.reshape(-1, 4), anchor_bbox.view(-1, 4))

                    pred_target = None
                    pred_score = torch.sigmoid(
                        img_centerness.detach()).cpu().numpy()
                    pred_mask = pred_score > 0.95
                    # print(gt_mask.shape, pred_mask.shape)
                    imllabel, numlabel = scipy.ndimage.label(pred_mask)
                    if numlabel > 0:
                        valid = np.zeros(shape=(numlabel, ), dtype=np.bool)
                        box_inds = []
                        for ano in range(1, numlabel + 1):
                            mask = imllabel == ano
                            valid[ano - 1] = True  #  gt_mask[mask].sum() == 0
                            box_inds.append(np.argmax(pred_score * mask))
                        if np.any(valid):
                            boxes = boxes[box_inds, :]
                            # print(box_inds, boxes, anchor_bbox.view(-1, 4)[box_inds, :], gt_bbox.view(-1, 4)[box_inds, :])
                            pred_target = BoxList(torch.as_tensor(boxes),
                                                  boxlists[0].size,
                                                  mode="xyxy")
                            pred_target.clip_to_image()
                            pred_target = pred_target.to(img_centerness.device)
                            # print(img_centerness.device, pred_target.bbox.device)
                    pred_targets.append(pred_target)
            else:
                for img_centerness in centerness:
                    pred_target = None
                    pred_mask = torch.sigmoid(
                        img_centerness[0, :, :].detach()).cpu().numpy() > 0.95
                    # print(gt_mask.shape, pred_mask.shape)
                    imllabel, numlabel = scipy.ndimage.label(pred_mask)
                    if numlabel > 0:
                        masks = np.zeros(shape=(pred_mask.shape[0],
                                                pred_mask.shape[1], numlabel),
                                         dtype=np.uint8)
                        valid = np.zeros(shape=(numlabel, ), dtype=np.bool)
                        for ano in range(1, numlabel + 1):
                            mask = imllabel == ano
                            valid[ano - 1] = True
                            masks[:, :, ano - 1] = mask
                        if np.any(valid):
                            masks = masks[:, :, valid]
                            boxes = extract_bboxes(masks)
                            pred_target = BoxList(torch.as_tensor(boxes),
                                                  boxlists[0].size,
                                                  mode="xyxy")
                            pred_target.clip_to_image()
                            pred_target = pred_target.to(img_centerness.device)
                            # print(img_centerness.device, pred_target.bbox.device)
                    pred_targets.append(pred_target)

            if True:
                if not self.training:
                    print('add', [
                        len(pred_target)
                        for pred_target in pred_targets if pred_target
                    ], 'proposals')
                boxlists = self.add_pred_proposals(boxlists, pred_targets)
        else:
            pred_targets = None

        return boxlists, pred_targets
Пример #31
0
def to_image_list_synthesize_4(transposed_info, size_divisible=0):
    tensors = transposed_info[0]
    if isinstance(tensors, (tuple, list)):
        targets = transposed_info[1]
        img_ids = transposed_info[2]
        #synthesize data:
        assert len(tensors) % 4 == 0, \
            'len(tensor) % 4 != 0, could not be synthesized ! uneven'
        max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))

        # TODO Ideally, just remove this and let me model handle arbitrary
        # input sizs
        if size_divisible > 0:
            import math

            stride = size_divisible
            max_size = list(max_size)
            max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
            max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
            max_size = tuple(max_size)

        batch_shape = (len(tensors)//4,) + max_size
        syn_batched_imgs = tensors[0].new(*batch_shape).zero_()

        syn_targets = []
        with torch.no_grad():
            for idx, pad_img in enumerate(syn_batched_imgs):
                # currently suppose first w then h
                new_h, new_w = max_size[1]//2, max_size[2]//2

                #NOTE: interpolate api require first h then w !
                mode = 'nearest'
                topLeftImg = torch.nn.functional.interpolate(tensors[idx*4].unsqueeze(0),size=(new_h, new_w),mode=mode).squeeze(0)
                topRightImg = torch.nn.functional.interpolate(tensors[idx*4+1].unsqueeze(0),size=(new_h, new_w),mode=mode).squeeze(0)
                bottomLeftImg = torch.nn.functional.interpolate(tensors[idx*4+2].unsqueeze(0),size=(new_h, new_w),mode=mode).squeeze(0)
                bottomRightImg = torch.nn.functional.interpolate(tensors[idx*4+3].unsqueeze(0),size=(new_h, new_w),mode=mode).squeeze(0)
                c = topLeftImg.shape[0]
                assert c == topRightImg.shape[0] and c == bottomLeftImg.shape[0] and c == bottomRightImg.shape[0]

                pad_img[:c, :new_h, :new_w].copy_(topLeftImg)
                pad_img[:c, :new_h, new_w:].copy_(topRightImg)
                pad_img[:c, new_h:, :new_w].copy_(bottomLeftImg)
                pad_img[:c, new_h:, new_w:].copy_(bottomRightImg)

                # resize each of four sub-imgs into (new_h, new_w) scale
                # resize api require first w then h !
                topLeftBL = targets[idx*4].resize((new_w, new_h))
                topRightBL = targets[idx*4+1].resize((new_w, new_h))
                bottomLeftBL = targets[idx*4+2].resize((new_w, new_h))
                bottomRightBL = targets[idx*4+3].resize((new_w, new_h))
                assert topLeftBL.mode == 'xyxy'
                offsets = [torch.Tensor([0.0,0.0,0.0,0.0]), torch.Tensor([new_w,0.0,new_w,0.0]), torch.Tensor([0.0,new_h,0.0,new_h]),torch.Tensor([new_w,new_h,new_w,new_h])]
                # append offsets to box coordinates except for topLeftBL
                syn_bbox = torch.cat(
                    (topLeftBL.bbox + offsets[0],
                     topRightBL.bbox + offsets[1],
                     bottomLeftBL.bbox + offsets[2],
                     bottomRightBL.bbox + offsets[3]), dim=0)
                #NOTE: BoxList initialization require first w then h
                tmp_BoxList = BoxList(syn_bbox, (new_w*2, new_h*2), mode='xyxy')

                tmp_BoxList.add_field('labels', torch.cat((topLeftBL.extra_fields['labels'], topRightBL.extra_fields['labels'], bottomLeftBL.extra_fields['labels'], bottomRightBL.extra_fields['labels']), dim=-1))

                #NOTE: adjust the targets mask
                topLeftPoly = [poly.polygons[0] for poly in topLeftBL.extra_fields['masks'].instances.polygons]
                topRightPoly =  [poly.polygons[0] for poly in topRightBL.extra_fields['masks'].instances.polygons]
                bottomLeftPoly = [poly.polygons[0] for poly in bottomLeftBL.extra_fields['masks'].instances.polygons]
                bottomRightPoly = [poly.polygons[0] for poly in bottomRightBL.extra_fields['masks'].instances.polygons]

                offsets = [[0.0,0.0], [new_w,0.0], [0.0,new_h], [new_w,new_h]]
                syn_mask = [[list(np.array(poly)+np.array(offsets[0]*int(len(poly)/2)))] for poly in topLeftPoly] + \
                    [[list(np.array(poly)+np.array(offsets[1]*int(len(poly)/2)))] for poly in topRightPoly] + \
                    [[list(np.array(poly)+np.array(offsets[2]*int(len(poly)/2)))] for poly in bottomLeftPoly] + \
                    [[list(np.array(poly)+np.array(offsets[3]*int(len(poly)/2)))] for poly in bottomRightPoly]
                syn_mask = SegmentationMask(syn_mask, (new_w*2, new_h*2), mode='poly')
                tmp_BoxList.add_field('masks', syn_mask)
                
                # append a four-to-one BoxList object
                syn_targets.append(tmp_BoxList)

        syn_targets = tuple(syn_targets)

        assert len(img_ids)%4==0
        #since images are synthesized, id is meaningless, substitute with -1
        syn_img_ids = tuple([-1]*(len(syn_targets)))
        syn_image_sizes = [list(max_size)[-2:] for i in range(batch_shape[0])]

        return ImageList(syn_batched_imgs, syn_image_sizes), syn_targets, syn_img_ids
    else:
        raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))