def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field if len(anno) > 0: if 'iscrowd' in anno[0]: anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if 'segm' in self.ann_types: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if 'hier' in self.ann_types: if anno and "hier" in anno[0]: hier = [obj["hier"] for obj in anno] hier = Hier(hier, img.size) target.add_field("hier", hier) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def get_det_result(self, locations, box_cls, box_regression, boxes): N = len(box_cls) h, w = self.resolution candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) _boxes = boxes.bbox size = boxes.size boxes_scores = boxes.get_field("scores") results = [] for i in range(N): box = _boxes[i] boxes_score = boxes_scores[i] per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 2 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = per_box_cls.topk( per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] _x1 = per_locations[:, 0] - per_box_regression[:, 0] _y1 = per_locations[:, 1] - per_box_regression[:, 1] _x2 = per_locations[:, 0] + per_box_regression[:, 2] _y2 = per_locations[:, 1] + per_box_regression[:, 3] _x1 = _x1 / w * (box[2] - box[0]) + box[0] _y1 = _y1 / h * (box[3] - box[1]) + box[1] _x2 = _x2 / w * (box[2] - box[0]) + box[0] _y2 = _y2 / h * (box[3] - box[1]) + box[1] detections = torch.stack([_x1, _y1, _x2, _y2], dim=-1) boxlist = BoxList(detections, size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field( "scores", torch.sqrt(torch.sqrt(per_box_cls) * boxes_score)) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) results = cat_boxlist(results) return results
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field if len(anno) > 0: if 'iscrowd' in anno[0]: anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if 'segm' in self.ann_types: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if 'keypoints' in self.ann_types: if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = PersonKeypoints(keypoints, img.size) target.add_field("keypoints", keypoints) if 'parsing' in self.ann_types: parsing = [get_parsing(self.root, obj["parsing"]) for obj in anno] parsing = Parsing(parsing, img.size) target.add_field("parsing", parsing) if 'uv' in self.ann_types: uv_ann = [] for anno_uv in anno: if "dp_x" in anno_uv: uv_ann.append([ anno_uv['dp_x'], anno_uv['dp_y'], anno_uv['dp_I'], anno_uv['dp_U'], anno_uv['dp_V'], anno_uv['dp_masks'] ]) else: uv_ann.append([]) uv = DenseposeUVs(uv_ann, img.size) target.add_field("uv", uv) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx
def forward_for_single_feature_map(self, anchors, objectness, box_regression): """ Arguments: anchors: list[BoxList] objectness: tensor of size N, A, H, W box_regression: tensor of size N, A * 4, H, W """ device = objectness.device N, A, H, W = objectness.shape # put in the same format as anchors objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) objectness = objectness.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) num_anchors = A * H * W pre_nms_top_n = min(self.pre_nms_top_n, num_anchors) objectness, topk_idx = objectness.topk(pre_nms_top_n, dim=1, sorted=True) batch_idx = torch.arange(N, device=device)[:, None] box_regression = box_regression[batch_idx, topk_idx] image_shapes = [box.size for box in anchors] concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] proposals = self.box_coder.decode(box_regression.view(-1, 4), concat_anchors.view(-1, 4)) proposals = proposals.view(N, -1, 4) result = [] for proposal, score, im_shape in zip(proposals, objectness, image_shapes): boxlist = BoxList(proposal, im_shape, mode="xyxy") boxlist.add_field("objectness", score) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) boxlist = boxlist_nms( boxlist, self.nms_thresh, max_proposals=self.post_nms_top_n, score_field="objectness", ) result.append(boxlist) return result
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field if len(anno) > 0: if 'iscrowd' in anno[0]: anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) if 'segm' in self.ann_types: masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size, mode='poly') target.add_field("masks", masks) if 'semseg' in self.ann_types: if 'parsing' in self.ann_types: semsegs_anno = get_semseg( self.root, self.coco.loadImgs(self.ids[idx])[0]['file_name']) semsegs = SemanticSegmentation(semsegs_anno, classes, img.size, mode='pic') else: semsegs_anno = [obj["segmentation"] for obj in anno] semsegs = SemanticSegmentation(semsegs_anno, classes, img.size, mode='poly') target.add_field("semsegs", semsegs) if 'parsing' in self.ann_types: parsing = [get_parsing(self.root, obj["parsing"]) for obj in anno] parsing = Parsing(parsing, img.size) target.add_field("parsing", parsing) target = target.clip_to_image(remove_empty=True) if self._transforms is not None: img, target = self._transforms(img, target) return img, target, idx