示例#1
0
        def _checkIgnore(dt, iregion):
            if iregion is None:
                return True

            bb = np.array(dt['bbox']).astype(np.int)
            x1,y1,x2,y2 = bb[0],bb[1],bb[0]+bb[2],bb[1]+bb[3]
            x2 = min([x2,iregion.shape[1]])
            y2 = min([y2,iregion.shape[0]])

            if bb[2]* bb[3] == 0:
                return False

            crop_iregion = iregion[y1:y2, x1:x2]

            if crop_iregion.sum() == 0:
                return True

            if not 'uv' in dt.keys(): # filtering boxes
                return crop_iregion.sum()/bb[2]/bb[3] < self.ignoreThrBB

            # filtering UVs
            ignoremask = np.require(crop_iregion, requirements=['F'])
            uvmask = np.require(np.asarray(dt['uv'][0]>0), dtype = np.uint8,
                    requirements=['F'])
            uvmask_ = maskUtils.encode(uvmask)
            ignoremask_ = maskUtils.encode(ignoremask)
            uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0]
            return uviou < self.ignoreThrUV
def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
    """Arrange resutls to match COCO specs in http://cocodataset.org/#format
    """
    # If no results, return an empty list
    if rois is None:
        return []

    results = []
    for image_id in image_ids:
        # Loop through detections
        for i in range(rois.shape[0]):
            class_id = class_ids[i]
            score = scores[i]
            bbox = np.around(rois[i], 1)
            mask = masks[:, :, i]

            result = {
                "image_id": image_id,
                "category_id": dataset.get_source_class_id(class_id, "coco"),
                "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
                "score": score,
                "segmentation": maskUtils.encode(np.asfortranarray(mask))
            }
            results.append(result)
    return results
示例#3
0
def crop_mask(boxes,segmentations,flipped, imsize):
    assert (boxes.shape[0]==len(segmentations))
    psegmentations=[]
    for i in xrange(len(segmentations)):
        gts=segmentations[i]
        box=boxes[i,:]
        if type(gts) == list and gts:
            assert (type(gts[0]) != dict)
            prle= mask.frPyObjects(gts,imsize[1],imsize[0])
        elif type(gts) == dict and type(gts['counts']) == list:
            prle= mask.frPyObjects([gts],imsize[1],imsize[0])
        elif type(gts) == dict and \
                     type(gts['counts'] == unicode or type(gts['counts']) == str):
            prle = [gts]
        else:
            print '{} box has no segmentation'.format(i)
            psegmentations.append([])
            continue
        if len(prle)==1:
            prle=prle[0]
        else:
            prle= mask.merge(prle)
        pmask=mask.decode([prle])
        if flipped:
            pmask=pmask[:,::-1,:]
        pmask=np.copy(pmask[box[1]:box[3],box[0]:box[2],:],order='F')
        psegmentations.append(mask.encode(pmask))
    return psegmentations
示例#4
0
def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w,
                 num_classes=81,
                 M=14, #  cfg.MRCNN.RESOLUTION
                 cls_specific_mask=True,
                 thresh_binarize=0.5):
    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms = []
        for _ in range(cls_boxes[j].shape[0]):
            if cls_specific_mask:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
            else:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = ref_box[2] - ref_box[0] + 1
            h = ref_box[3] - ref_box[1] + 1
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > thresh_binarize, dtype=np.uint8)
            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask[y_0:y_1, x_0:x_1] = mask[
                (y_0 - ref_box[1]):(y_1 - ref_box[1]),
                (x_0 - ref_box[0]):(x_1 - ref_box[0])
            ]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F')
            )[0]
            rle['counts'] = rle['counts'].decode() # convert back to str so that it can be later saved to json
            segms.append(rle)

            mask_ind += 1

        cls_segms[j] = segms

    assert mask_ind == masks.shape[0]
    return cls_segms
示例#5
0
 def _flip_rle(rle, height, width):
     if 'counts' in rle and type(rle['counts']) == list:
         # Magic RLE format handling painfully discovered by looking at the
         # COCO API showAnns function.
         rle = mask_util.frPyObjects([rle], height, width)
     mask = mask_util.decode(rle)
     mask = mask[:, ::-1, :]
     rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
     return rle
示例#6
0
文件: test.py 项目: chenyilun95/PANet
def segm_results(cls_boxes, masks, ref_boxes, im_h, im_w):
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_segms = [[] for _ in range(num_classes)]
    mask_ind = 0
    # To work around an issue with cv2.resize (it seems to automatically pad
    # with repeated border values), we manually zero-pad the masks by 1 pixel
    # prior to resizing back to the original image resolution. This prevents
    # "top hat" artifacts. We therefore need to expand the reference boxes by an
    # appropriate factor.
    M = cfg.MRCNN.RESOLUTION
    scale = (M + 2.0) / M
    ref_boxes = box_utils.expand_boxes(ref_boxes, scale)
    ref_boxes = ref_boxes.astype(np.int32)
    padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32)

    # skip j = 0, because it's the background class
    for j in range(1, num_classes):
        segms = []
        for _ in range(cls_boxes[j].shape[0]):
            if cfg.MRCNN.CLS_SPECIFIC_MASK:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, j, :, :]
            else:
                padded_mask[1:-1, 1:-1] = masks[mask_ind, 0, :, :]

            ref_box = ref_boxes[mask_ind, :]
            w = (ref_box[2] - ref_box[0] + 1)
            h = (ref_box[3] - ref_box[1] + 1)
            w = np.maximum(w, 1)
            h = np.maximum(h, 1)

            mask = cv2.resize(padded_mask, (w, h))
            mask = np.array(mask > cfg.MRCNN.THRESH_BINARIZE, dtype=np.uint8)
            im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

            x_0 = max(ref_box[0], 0)
            x_1 = min(ref_box[2] + 1, im_w)
            y_0 = max(ref_box[1], 0)
            y_1 = min(ref_box[3] + 1, im_h)

            im_mask[y_0:y_1, x_0:x_1] = mask[
                (y_0 - ref_box[1]):(y_1 - ref_box[1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])]

            # Get RLE encoding used by the COCO evaluation API
            rle = mask_util.encode(np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            # For dumping to json, need to decode the byte string.
            # https://github.com/cocodataset/cocoapi/issues/70
            rle['counts'] = rle['counts'].decode('ascii')
            segms.append(rle)

            mask_ind += 1

        cls_segms[j] = segms

    assert mask_ind == masks.shape[0]
    return cls_segms
示例#7
0
def _RleCompress(masks):
  """Compresses mask using Run-length encoding provided by pycocotools.

  Args:
    masks: uint8 numpy array of shape [mask_height, mask_width] with values in
    {0, 1}.

  Returns:
    A pycocotools Run-length encoding of the mask.
  """
  return mask.encode(np.asfortranarray(masks))
def convert(args):
	data_dict = json.load(open(args.imgCatIdsFile, 'r'))
	img2id = {x['file_name']: x['id'] 
			  for x in data_dict['images']}
	img2info = {x['file_name']: x 
			    for x in data_dict['images']}

	categories = data_dict['categories'] 
	images = []
	images_unique = set()
	annotations = []
	ann_id = 0
	# loop over annotation files
	files_ann = sorted(glob.glob(os.path.join(args.ann_dir, '*.png')))
	for i, file_ann in enumerate(files_ann):
		if i % 50 == 0:
			print('#files processed: {}'.format(i))

		file_name = os.path.basename(file_ann).replace('.png', '.jpg')
		img_id = img2id[file_name]
		if file_name not in images_unique:
			images_unique.add(file_name)
			images.append(img2info[file_name])

		ann_mask = imread(file_ann)
		Om = ann_mask[:, :, 0]
		Oi = ann_mask[:, :, 1]

		# loop over instances
		for instIdx in np.unique(Oi):
			if instIdx == 0:
				continue
			imask = (Oi == instIdx)
			cat_id = Om[imask][0]

			# RLE encoding
			rle = COCOmask.encode(np.asfortranarray(imask.astype(np.uint8)))

			ann = {}
			ann['id'] = ann_id
			ann_id += 1
			ann['image_id'] = img_id
			ann['segmentation'] = rle
			ann['category_id'] = int(cat_id)
			ann['iscrowd'] = 0
			ann['area'] = np.sum(imask)
			annotations.append(ann)

	# data_dict['annotations'] = annotations
	print('#files: {}, #instances: {}'.format(len(files_ann), len(annotations)))

	data_out = {'categories': categories, 'images': images, 'annotations': annotations}
	with open(args.output_json, 'w') as f:
		json.dump(data_out, f)
示例#9
0
def prepare_for_coco_segmentation(predictions, dataset):
    import pycocotools.mask as mask_util
    import numpy as np

    masker = Masker(threshold=0.5, padding=1)
    # assert isinstance(dataset, COCODataset)
    coco_results = []
    for image_id, prediction in tqdm(enumerate(predictions)):
        original_id = dataset.id_to_img_map[image_id]
        if len(prediction) == 0:
            continue

        # TODO replace with get_img_info?
        image_width = dataset.coco.imgs[original_id]["width"]
        image_height = dataset.coco.imgs[original_id]["height"]
        prediction = prediction.resize((image_width, image_height))
        masks = prediction.get_field("mask")
        # t = time.time()
        masks = masker(masks, prediction)
        # logger.info('Time mask: {}'.format(time.time() - t))
        # prediction = prediction.convert('xywh')

        # boxes = prediction.bbox.tolist()
        scores = prediction.get_field("scores").tolist()
        labels = prediction.get_field("labels").tolist()

        # rles = prediction.get_field('mask')

        rles = [
            mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0]
            for mask in masks
        ]
        for rle in rles:
            rle["counts"] = rle["counts"].decode("utf-8")

        mapped_labels = [dataset.contiguous_category_id_to_json_id[i] for i in labels]

        coco_results.extend(
            [
                {
                    "image_id": original_id,
                    "category_id": mapped_labels[k],
                    "segmentation": rle,
                    "score": scores[k],
                }
                for k, rle in enumerate(rles)
            ]
        )
    return coco_results
示例#10
0
def eval_coco(df, detect_func, tqdm_bar=None):
    """
    Args:
        df: a DataFlow which produces (image, image_id)
        detect_func: a callable, takes [image] and returns [DetectionResult]
        tqdm_bar: a tqdm object to be shared among multiple evaluation instances. If None,
            will create a new one.

    Returns:
        list of dict, to be dumped to COCO json format
    """
    df.reset_state()
    all_results = []
    # tqdm is not quite thread-safe: https://github.com/tqdm/tqdm/issues/323
    with ExitStack() as stack:
        if tqdm_bar is None:
            tqdm_bar = stack.enter_context(
                tqdm.tqdm(total=df.size(), **get_tqdm_kwargs()))
        for img, img_id in df:
            results = detect_func(img)
            for r in results:
                box = r.box
                cat_id = COCOMeta.class_id_to_category_id[r.class_id]
                box[2] -= box[0]
                box[3] -= box[1]

                res = {
                    'image_id': img_id,
                    'category_id': cat_id,
                    'bbox': list(map(lambda x: round(float(x), 3), box)),
                    'score': round(float(r.score), 4),
                }

                # also append segmentation to results
                if r.mask is not None:
                    rle = cocomask.encode(
                        np.array(r.mask[:, :, None], order='F'))[0]
                    rle['counts'] = rle['counts'].decode('ascii')
                    res['segmentation'] = rle
                all_results.append(res)
            tqdm_bar.update(1)
    return all_results
示例#11
0
文件: eval.py 项目: wu-yy/tensorpack
def eval_coco(df, detect_func):
    """
    Args:
        df: a DataFlow which produces (image, image_id)
        detect_func: a callable, takes [image] and returns [DetectionResult]

    Returns:
        list of dict, to be dumped to COCO json format
    """
    df.reset_state()
    all_results = []
    with tqdm.tqdm(total=df.size(), **get_tqdm_kwargs()) as pbar:
        for img, img_id in df.get_data():
            results = detect_func(img)
            for r in results:
                box = r.box
                cat_id = COCOMeta.class_id_to_category_id[r.class_id]
                box[2] -= box[0]
                box[3] -= box[1]

                res = {
                    'image_id': img_id,
                    'category_id': cat_id,
                    'bbox': list(map(lambda x: float(round(x, 1)), box)),
                    'score': float(round(r.score, 2)),
                }

                # also append segmentation to results
                if r.mask is not None:
                    rle = cocomask.encode(
                        np.array(r.mask[:, :, None], order='F'))[0]
                    rle['counts'] = rle['counts'].decode('ascii')
                    res['segmentation'] = rle
                all_results.append(res)
            pbar.update(1)
    return all_results
示例#12
0
def rle_mask_voting(
    top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'
):
    """Returns new masks (in correspondence with `top_masks`) by combining
    multiple overlapping masks coming from the pool of `all_masks`. Two methods
    for combining masks are supported: 'AVG' uses a weighted average of
    overlapping mask pixels; 'UNION' takes the union of all mask pixels.
    """
    if len(top_masks) == 0:
        return

    all_not_crowd = [False] * len(all_masks)
    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
    decoded_all_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
    ]
    decoded_top_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
    ]
    all_boxes = all_dets[:, :4].astype(np.int32)
    all_scores = all_dets[:, 4]

    # Fill box support with weights
    mask_shape = decoded_all_masks[0].shape
    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
    for k in range(len(all_masks)):
        ref_box = all_boxes[k]
        x_0 = max(ref_box[0], 0)
        x_1 = min(ref_box[2] + 1, mask_shape[1])
        y_0 = max(ref_box[1], 0)
        y_1 = min(ref_box[3] + 1, mask_shape[0])
        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
    mask_weights = np.maximum(mask_weights, 1e-5)

    top_segms_out = []
    for k in range(len(top_masks)):
        # Corner case of empty mask
        if decoded_top_masks[k].sum() == 0:
            top_segms_out.append(top_masks[k])
            continue

        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
        # Only matches itself
        if len(inds_to_vote) == 1:
            top_segms_out.append(top_masks[k])
            continue

        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
        if method == 'AVG':
            ws = mask_weights[inds_to_vote]
            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
        elif method == 'UNION':
            # Any pixel that's on joins the mask
            soft_mask = np.sum(masks_to_vote, axis=0)
            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
        else:
            raise NotImplementedError('Method {} is unknown'.format(method))
        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        top_segms_out.append(rle)

    return top_segms_out
示例#13
0
    def get_bboxes_single(self,
                          cls_scores,
                          bbox_preds,
                          centernesses,
                          cof_preds,
                          feat_mask,
                          mlvl_points,
                          img_shape,
                          ori_shape,
                          scale_factor,
                          cfg,
                          rescale=False):
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_centerness = []
        mlvl_cofs = []
        for cls_score, bbox_pred, cof_pred, centerness, points in zip(
                cls_scores, bbox_preds, cof_preds, centernesses, mlvl_points):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).sigmoid()
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            cof_pred = cof_pred.permute(1, 2, 0).reshape(-1, 32 * 4)

            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = (scores * centerness[:, None]).max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                points = points[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                cof_pred = cof_pred[topk_inds, :]
                scores = scores[topk_inds, :]
                centerness = centerness[topk_inds]
            bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape)
            mlvl_cofs.append(cof_pred)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            mlvl_centerness.append(centerness)
        mlvl_bboxes = torch.cat(mlvl_bboxes)
        mlvl_cofs = torch.cat(mlvl_cofs)

        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
        mlvl_scores = torch.cat(mlvl_scores)
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
        mlvl_centerness = torch.cat(mlvl_centerness)

        if self.ssd_flag is False:
            det_bboxes, det_labels, idxs_keep = multiclass_nms_idx(
                mlvl_bboxes,
                mlvl_scores,
                cfg.score_thr,
                cfg.nms,
                cfg.max_per_img,
                score_factors=mlvl_centerness)
        else:
            mlvl_scores = mlvl_scores * mlvl_centerness.view(-1, 1)
            det_bboxes, det_labels, det_cofs = self.fast_nms(
                mlvl_bboxes,
                mlvl_scores[:, 1:].transpose(1, 0).contiguous(),
                mlvl_cofs,
                iou_threshold=cfg.nms.iou_thr,
                score_thr=cfg.score_thr)

        cls_segms = [[] for _ in range(self.num_classes - 1)]
        mask_scores = [[] for _ in range(self.num_classes - 1)]
        if det_bboxes.shape[0] > 0:
            scale = 2

            if self.ssd_flag is False:
                det_cofs = mlvl_cofs[idxs_keep]
            #####spp########################
            img_mask1 = feat_mask.permute(1, 2, 0)
            pos_masks00 = torch.sigmoid(img_mask1 @ det_cofs[:, 0:32].t())
            pos_masks01 = torch.sigmoid(img_mask1 @ det_cofs[:, 32:64].t())
            pos_masks10 = torch.sigmoid(img_mask1 @ det_cofs[:, 64:96].t())
            pos_masks11 = torch.sigmoid(img_mask1 @ det_cofs[:, 96:128].t())
            pos_masks = torch.stack(
                [pos_masks00, pos_masks01, pos_masks10, pos_masks11], dim=0)
            if rescale == None:
                scale_factor = scale_factor * 0 + 1.0
            pos_masks = self.crop_cuda(
                pos_masks, det_bboxes[:, :4] *
                det_bboxes.new_tensor(scale_factor) / scale)
            # pos_masks = crop_split(pos_masks00, pos_masks01, pos_masks10, pos_masks11,
            #                        det_bboxes * det_bboxes.new_tensor(scale_factor) / scale)

            pos_masks = pos_masks.permute(2, 0, 1)
            # masks = F.interpolate(pos_masks.unsqueeze(0), scale_factor=scale/scale_factor, mode='bilinear', align_corners=False).squeeze(0)
            if self.ssd_flag:
                masks = F.interpolate(pos_masks.unsqueeze(0),
                                      scale_factor=scale /
                                      scale_factor[3:1:-1],
                                      mode='bilinear',
                                      align_corners=False).squeeze(0)
            else:
                masks = F.interpolate(pos_masks.unsqueeze(0),
                                      scale_factor=scale / scale_factor,
                                      mode='bilinear',
                                      align_corners=False).squeeze(0)
            masks.gt_(0.4)

            if self.rescoring_flag:
                pred_iou = pos_masks.unsqueeze(1)
                pred_iou = self.convs_scoring(pred_iou)
                pred_iou = self.relu(self.mask_scoring(pred_iou))
                pred_iou = F.max_pool2d(
                    pred_iou,
                    kernel_size=pred_iou.size()[2:]).squeeze(-1).squeeze(-1)
                pred_iou = pred_iou[range(pred_iou.size(0)),
                                    det_labels].squeeze()
                mask_scores = pred_iou * det_bboxes[:, -1]
                mask_scores = mask_scores.cpu().numpy()
                mask_scores = [
                    mask_scores[det_labels.cpu().numpy() == i]
                    for i in range(self.num_classes - 1)
                ]

        for i in range(det_bboxes.shape[0]):
            label = det_labels[i]
            mask = masks[i].cpu().numpy()
            if rescale:
                im_mask = np.zeros((ori_shape[0], ori_shape[1]),
                                   dtype=np.uint8)
                shape = np.minimum(mask.shape, ori_shape[0:2])
            else:
                im_mask = np.zeros((img_shape[0], img_shape[1]),
                                   dtype=np.uint8)
                shape = np.minimum(mask.shape, img_shape[0:2])
            im_mask[:shape[0], :shape[1]] = mask[:shape[0], :shape[1]]
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            cls_segms[label].append(rle)

        if self.rescoring_flag:
            return det_bboxes, det_labels, (cls_segms, mask_scores)
        else:
            return det_bboxes, det_labels, cls_segms
def convert_predictions_to_coco_annotations(predictions):
    """Converts a batch of predictions to annotations in COCO format.

  Args:
    predictions: a dictionary of lists of numpy arrays including the following
      fields. K below denotes the maximum number of instances per image.
      Required fields:
        - source_id: a list of numpy arrays of int or string of shape
            [batch_size].
        - num_detections: a list of numpy arrays of int of shape [batch_size].
        - detection_boxes: a list of numpy arrays of float of shape
            [batch_size, K, 4], where coordinates are in the original image
            space (not the scaled image space).
        - detection_classes: a list of numpy arrays of int of shape
            [batch_size, K].
        - detection_scores: a list of numpy arrays of float of shape
            [batch_size, K].
      Optional fields:
        - detection_masks: a list of numpy arrays of float of shape
            [batch_size, K, mask_height, mask_width].

  Returns:
    coco_predictions: prediction in COCO annotation format.
  """
    coco_predictions = []
    num_batches = len(predictions['source_id'])
    batch_size = predictions['source_id'][0].shape[0]
    max_num_detections = predictions['detection_classes'][0].shape[1]
    use_outer_box = 'detection_outer_boxes' in predictions
    for i in range(num_batches):
        predictions['detection_boxes'][i] = box_utils.yxyx_to_xywh(
            predictions['detection_boxes'][i])
        if use_outer_box:
            predictions['detection_outer_boxes'][i] = box_utils.yxyx_to_xywh(
                predictions['detection_outer_boxes'][i])
            mask_boxes = predictions['detection_outer_boxes']
        else:
            mask_boxes = predictions['detection_boxes']

        for j in range(batch_size):
            if 'detection_masks' in predictions:
                image_masks = mask_utils.paste_instance_masks(
                    predictions['detection_masks'][i][j], mask_boxes[i][j],
                    int(predictions['image_info'][i][j, 0, 0]),
                    int(predictions['image_info'][i][j, 0, 1]))
                binary_masks = (image_masks > 0.0).astype(np.uint8)
                encoded_masks = [
                    mask_api.encode(np.asfortranarray(binary_mask))
                    for binary_mask in list(binary_masks)
                ]
            for k in range(max_num_detections):
                ann = {}
                ann['image_id'] = predictions['source_id'][i][j]
                ann['category_id'] = predictions['detection_classes'][i][j, k]
                ann['bbox'] = predictions['detection_boxes'][i][j, k]
                ann['score'] = predictions['detection_scores'][i][j, k]
                if 'detection_masks' in predictions:
                    ann['segmentation'] = encoded_masks[k]
                coco_predictions.append(ann)

    for i, ann in enumerate(coco_predictions):
        ann['id'] = i + 1

    return coco_predictions
示例#15
0
        _image = cv2.imread(sample.filepath)
        image = func.to_tensor(image).to(device)
        c, h, w = image.shape

        # Perform inference
        preds = segmentor.predictor(_image)
        instances = preds["instances"]
        boxes = instances.pred_boxes.tensor.numpy()
        boxes = boxes.tolist()
        scores = instances.scores.tolist()
        labels = instances.pred_classes.tolist()
        has_mask = instances.has("pred_masks")

        if has_mask:
            rles = [
                mask_util.encode(
                    np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
                for mask in instances.pred_masks
            ]
            for rle in rles:
                rle["counts"] = rle["counts"].decode("utf-8")

        # Convert detections to FiftyOne format
        detections = []
        for label, score, box in zip(labels, scores, boxes):
            # Convert to [top-left-x, top-left-y, width, height]
            # in relative coordinates in [0, 1] x [0, 1]
            x1, y1, x2, y2 = box
            rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]

            detections.append(
                fo.Detection(label=classes[label],
示例#16
0
def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.):
  np.random.seed(cfg.RNG_SEED)
  """Test a Fast R-CNN network on an image database."""
  num_images = len(imdb.image_index)
  # all detections are collected into:
  #  all_boxes[cls][image] = N x 5 array of detections in
  #  (x1, y1, x2, y2, score)
  all_boxes = [[[] for _ in range(num_images)]
         for _ in range(imdb.num_classes)]
  #  all_rles[cls][image] = [rle] array of N rles
  all_rles = [[[] for _ in range(num_images)] 
         for _ in range(imdb.num_classes)]

  output_dir = get_output_dir(imdb, weights_filename)
  # timers
  _t = {'im_detect' : Timer(), 'misc' : Timer()}

  for i in range(num_images):
    im = cv2.imread(imdb.image_path_at(i))

    _t['im_detect'].tic()
    scores, boxes, net_conv, im_scale = im_detect(net, im) # (n, 81), (n, 81*4), (n, 1024, H, W), float
    _t['im_detect'].toc()

    _t['misc'].tic()

    # skip j = 0, because it's the background class
    for j in range(1, imdb.num_classes):
      inds = np.where(scores[:, j] > thresh)[0]
      cls_scores = scores[inds, j]
      cls_boxes = boxes[inds, j*4:(j+1)*4]
      cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
        .astype(np.float32, copy=False)
      keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else []
      cls_dets = cls_dets[keep, :]
      all_boxes[j][i] = cls_dets

    # Limit to max_per_image detections *over all classes*
    if max_per_image > 0:
      image_scores = np.hstack([all_boxes[j][i][:, -1]
                    for j in range(1, imdb.num_classes)])
      if len(image_scores) > max_per_image:
        image_thresh = np.sort(image_scores)[-max_per_image]
        for j in range(1, imdb.num_classes):
          keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
          all_boxes[j][i] = all_boxes[j][i][keep, :]

    # run mask branch on all_boxes[:][i]
    accumulated_boxes  = []
    accumulated_labels = []
    for j in range(1, imdb.num_classes):
      if all_boxes[j][i].shape[0] > 0:
        accumulated_boxes += [all_boxes[j][i][:, :4]]
        accumulated_labels += [j]*all_boxes[j][i].shape[0]
    accumulated_boxes = np.vstack(accumulated_boxes)   # acculuate max_per_image boxes [xyxy]
    accumulated_labels = np.array(accumulated_labels, dtype=np.uint8) # n category labels
    mask_prob = net._predict_masks_from_boxes_and_labels(net_conv, 
                            accumulated_boxes * im_scale,  # scaled boxes [xyxy]
                            accumulated_labels) # (n, 14, 14)
    mask_prob = mask_prob.data.cpu().numpy() # convert to numpy
    masks = recover_masks(mask_prob, accumulated_boxes, im.shape[0], im.shape[1]) # (n, ih, iw) uint8 [0,1]
    masks = (masks > 122.).astype(np.uint8)  # (n, ih, iw) uint8 [0,1] original size
    
    # add to all_rles
    rles = [COCOmask.encode(np.asfortranarray(m)) for m in masks]
    ri = 0
    for j in range(1, imdb.num_classes):
      ri_next = ri+all_boxes[j][i].shape[0]
      all_rles[j][i] = rles[ri:ri_next]
      assert len(all_rles[j][i]) == all_boxes[j][i].shape[0]
      ri = ri_next

    _t['misc'].toc()

    print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
        .format(i + 1, num_images, _t['im_detect'].average_time(),
            _t['misc'].average_time()))

  det_file = os.path.join(output_dir, 'detections.pkl')
  with open(det_file, 'wb') as f:
    pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

  print('Evaluating detections')
  imdb.evaluate_detections(all_boxes, all_rles, output_dir)
示例#17
0
def encode_mask(mask):
    """Convert mask to coco rle"""
    rle = cocomask.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
    rle['counts'] = rle['counts'].decode('ascii')
    return rle
示例#18
0
def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
  """Convernt groundtruths to the dataset in COCO format.

  Args:
    groundtruths: a dictionary of numpy arrays including the fields below.
      See also different parsers under `../dataloader` for more details.
      Required fields:
        - source_id: a list of numpy arrays of int or string of shape
            [batch_size].
        - image_info: a list of numpy arrays of float of shape
            [batch_size, 4, 2].
        - num_detections: a list of numpy arrays of int of shape [batch_size].
        - boxes: a list of numpy arrays of float of shape [batch_size, K, 4].
        - classes: a list of numpy arrays of int of shape [batch_size, K].
      Optional fields:
        - is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If
            th field is absent, it is assumed that this instance is not crowd.
        - areas: a list of numy arrays of float of shape [batch_size, K]. If the
            field is absent, the area is calculated using either boxes or
            masks depending on which one is available.
        - masks: a list of numpy arrays of float of shape
            [batch_size, K, mask_height, mask_width],
    label_map: (optional) a dictionary that defines items from the category id
      to the category name. If `None`, collect the category mappping from the
      `groundtruths`.

  Returns:
    coco_groundtruths: the groundtruth dataset in COCO format.
  """
  image_size = np.concatenate(groundtruths['image_info'], axis=0)[:, 0, :]
  source_id = np.concatenate(groundtruths['source_id'], axis=0)
  gt_images = [{'id': i, 'height': h, 'width': w} for
               i, h, w in zip(source_id, image_size[:, 0], image_size[:, 1])]

  for k in groundtruths:
    groundtruths[k] = np.stack(groundtruths[k], axis=0)

  num_batches = groundtruths['source_id'].shape[0]
  batch_size = groundtruths['source_id'].shape[1]

  boxes_ymin = groundtruths['boxes'][:, :, :, 0]
  boxes_xmin = groundtruths['boxes'][:, :, :, 1]
  boxes_width = (groundtruths['boxes'][:, :, :, 3] -
                 groundtruths['boxes'][:, :, :, 1])
  boxes_height = (groundtruths['boxes'][:, :, :, 2] -
                  groundtruths['boxes'][:, :, :, 0])
  groundtruths['boxes'] = np.stack(
      [boxes_xmin, boxes_ymin, boxes_width, boxes_height], axis=3)

  gt_annotations = []
  for b in range(num_batches):
    for k in range(batch_size):
      if 'masks' in groundtruths:
        encoded_mask = [
            mask_utils.encode(np.asfortranarray(instance_mask.astype(np.uint8)))
            for instance_mask in list(groundtruths['masks'][b, k])]
      for i in range(groundtruths['num_detections'][b, k]):
        ann = {}
        ann['image_id'] = groundtruths['source_id'][b, k]
        if 'is_crowds' in groundtruths:
          ann['iscrowd'] = groundtruths['is_crowds'][b, k, i]
        else:
          ann['iscrowd'] = 0
        ann['category_id'] = groundtruths['classes'][b, k, i]
        ann['bbox'] = groundtruths['boxes'][b, k, i]
        if 'area' in groundtruths:
          ann['area'] = groundtruths['areas'][b, k, i]
        else:
          ann['area'] = (groundtruths['boxes'][b, k, i, 2] *
                         groundtruths['boxes'][b, k, i, 3])
        if 'masks' in groundtruths:
          ann['segmentation'] = encoded_mask[i]
          if 'area' not in groundtruths:
            ann['area'] = mask_utils.area(encoded_mask[i])
        gt_annotations.append(ann)

  for i, ann in enumerate(gt_annotations):
    ann['id'] = i + 1

  if label_map:
    gt_categories = [{'id': i, 'name': label_map[i]} for i in label_map]
  else:
    category_ids = [gt['category_id'] for gt in gt_annotations]
    gt_categories = [{'id': i} for i in set(category_ids)]

  gt_dataset = {
      'images': gt_images,
      'categories': gt_categories,
      'annotations': copy.deepcopy(gt_annotations),
  }
  return gt_dataset
示例#19
0
def rle_mask_voting(top_masks,
                    all_masks,
                    all_dets,
                    iou_thresh,
                    binarize_thresh,
                    method='AVG'):
    """Returns new masks (in correspondence with `top_masks`) by combining
    multiple overlapping masks coming from the pool of `all_masks`. Two methods
    for combining masks are supported: 'AVG' uses a weighted average of
    overlapping mask pixels; 'UNION' takes the union of all mask pixels.
    """
    if len(top_masks) == 0:
        return

    all_not_crowd = [False] * len(all_masks)
    top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
    decoded_all_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks
    ]
    decoded_top_masks = [
        np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks
    ]
    all_boxes = all_dets[:, :4].astype(np.int32)
    all_scores = all_dets[:, 4]

    # Fill box support with weights
    mask_shape = decoded_all_masks[0].shape
    mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
    for k in range(len(all_masks)):
        ref_box = all_boxes[k]
        x_0 = max(ref_box[0], 0)
        x_1 = min(ref_box[2] + 1, mask_shape[1])
        y_0 = max(ref_box[1], 0)
        y_1 = min(ref_box[3] + 1, mask_shape[0])
        mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
    mask_weights = np.maximum(mask_weights, 1e-5)

    top_segms_out = []
    for k in range(len(top_masks)):
        # Corner case of empty mask
        if decoded_top_masks[k].sum() == 0:
            top_segms_out.append(top_masks[k])
            continue

        inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
        # Only matches itself
        if len(inds_to_vote) == 1:
            top_segms_out.append(top_masks[k])
            continue

        masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
        if method == 'AVG':
            ws = mask_weights[inds_to_vote]
            soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
            mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
        elif method == 'UNION':
            # Any pixel that's on joins the mask
            soft_mask = np.sum(masks_to_vote, axis=0)
            mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
        else:
            raise NotImplementedError('Method {} is unknown'.format(method))
        rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
        top_segms_out.append(rle)

    return top_segms_out
示例#20
0
def mask2out(results, clsid2catid, resolution, thresh_binarize=0.5):
    import pycocotools.mask as mask_util
    scale = (resolution + 2.0) / resolution

    segm_res = []

    # for each batch
    for t in results:
        bboxes = t['bbox'][0]

        lengths = t['bbox'][1][0]
        im_ids = np.array(t['im_id'][0])
        if bboxes.shape == (1, 1) or bboxes is None:
            continue
        if len(bboxes.tolist()) == 0:
            continue

        masks = t['mask'][0]

        s = 0
        # for each sample
        for i in range(len(lengths)):
            num = lengths[i]
            im_id = int(im_ids[i][0])
            im_shape = t['im_shape'][0][i]

            bbox = bboxes[s:s + num][:, 2:]
            clsid_scores = bboxes[s:s + num][:, 0:2]
            mask = masks[s:s + num]
            s += num

            im_h = int(im_shape[0])
            im_w = int(im_shape[1])

            expand_bbox = expand_boxes(bbox, scale)
            expand_bbox = expand_bbox.astype(np.int32)

            padded_mask = np.zeros(
                (resolution + 2, resolution + 2), dtype=np.float32)

            for j in range(num):
                xmin, ymin, xmax, ymax = expand_bbox[j].tolist()
                clsid, score = clsid_scores[j].tolist()
                clsid = int(clsid)
                padded_mask[1:-1, 1:-1] = mask[j, clsid, :, :]

                catid = clsid2catid[clsid]

                w = xmax - xmin + 1
                h = ymax - ymin + 1
                w = np.maximum(w, 1)
                h = np.maximum(h, 1)

                resized_mask = cv2.resize(padded_mask, (w, h))
                resized_mask = np.array(
                    resized_mask > thresh_binarize, dtype=np.uint8)
                im_mask = np.zeros((im_h, im_w), dtype=np.uint8)

                x0 = min(max(xmin, 0), im_w)
                x1 = min(max(xmax + 1, 0), im_w)
                y0 = min(max(ymin, 0), im_h)
                y1 = min(max(ymax + 1, 0), im_h)

                im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), (
                    x0 - xmin):(x1 - xmin)]
                segm = mask_util.encode(
                    np.array(
                        im_mask[:, :, np.newaxis], order='F'))[0]
                catid = clsid2catid[clsid]
                segm['counts'] = segm['counts'].decode('utf8')
                coco_res = {
                    'image_id': im_id,
                    'category_id': catid,
                    'segmentation': segm,
                    'score': score
                }
                segm_res.append(coco_res)
    return segm_res
示例#21
0
def convert_predictions_to_coco_annotations(predictions):
  """Convernt predictions to annotations in COCO format.

  Args:
    predictions: a dictionary of lists of numpy arrays including the following
      fields. See different parsers under `../dataloader` for more details.
      Required fields:
        - source_id: a list of numpy arrays of int or string of shape
            [batch_size].
        - image_info: a list of numpy arrays of float of shape
            [batch_size, 4, 2].
        - num_detections: a list of numpy arrays of int of shape [batch_size].
        - detection_boxes: a list of numpy arrays of float of shape
            [batch_size, K, 4].
        - detection_classes: a list of numpy arrays of int of shape
            [batch_size, K].
        - detection_scores: a list of numpy arrays of float of shape
            [batch_size, K].
      Optional fields:
        - detection_masks: a list of numpy arrays of float of shape
            [batch_size, K, mask_height, mask_width].

  Returns:
    coco_predictions: prediction in COCO annotation format.
  """
  for k in predictions:
    predictions[k] = np.stack(predictions[k], axis=0)

  num_batches = predictions['source_id'].shape[0]
  batch_size = predictions['source_id'].shape[1]
  max_num_detections = predictions['detection_classes'].shape[2]

  image_scale = np.tile(predictions['image_info'][:, :, 2:3, :], (1, 1, 1, 2))
  predictions['detection_boxes'] = predictions['detection_boxes'] / image_scale
  boxes_ymin = predictions['detection_boxes'][:, :, :, 0]
  boxes_xmin = predictions['detection_boxes'][:, :, :, 1]
  boxes_width = (predictions['detection_boxes'][:, :, :, 3] -
                 predictions['detection_boxes'][:, :, :, 1])
  boxes_height = (predictions['detection_boxes'][:, :, :, 2] -
                  predictions['detection_boxes'][:, :, :, 0])
  predictions['detection_boxes'] = np.stack(
      [boxes_xmin, boxes_ymin, boxes_width, boxes_height], axis=3)

  coco_predictions = []
  for b in range(num_batches):
    for k in range(batch_size):
      if 'detection_masks' in predictions:
        image_masks = predictions['detection_masks'][b, k]
        encoded_mask = [
            mask_utils.encode(np.asfortranarray(image_mask.astype(np.uint8)))
            for image_mask in list(image_masks)]
      for i in range(max_num_detections):
        ann = {}
        ann['iscrowd'] = 0
        ann['image_id'] = predictions['source_id'][b, k]
        ann['category_id'] = predictions['detection_classes'][b, k, i]
        ann['score'] = predictions['detection_scores'][b, k, i]
        ann['bbox'] = predictions['detection_boxes'][b, k, i]
        if 'detection_masks' in predictions:
          ann['segmentation'] = encoded_mask[i]
        coco_predictions.append(ann)

  for i, ann in enumerate(coco_predictions):
    ann['id'] = i + 1

  return coco_predictions
示例#22
0
    def _ochuman_segm2json(self, results):
        """Convert instance segmentation results to COCO json style."""
        bbox_json_results = []
        segm_json_results = []
        for idx in range(len(self)):
            img_id = self.img_ids[idx]
            det, seg = results[idx]
            for label in range(len(det)):
                # bbox results
                bboxes = det[label]
                for i in range(bboxes.shape[0]):
                    data = dict()
                    data['image_id'] = img_id
                    data['bbox'] = self.xyxy2xywh(bboxes[i])
                    data['score'] = float(bboxes[i][4])
                    data['category_id'] = self.cat_ids[label]
                    bbox_json_results.append(data)

                # segm results
                # some detectors use different scores for bbox and mask
                if isinstance(seg, tuple):
                    segms = seg[0][label]
                    mask_score = seg[1][label]
                else:
                    segms = seg[label]
                    mask_score = [bbox[4] for bbox in bboxes]
                for i in range(bboxes.shape[0]):
                    data = dict()
                    data['image_id'] = img_id
                    data['bbox'] = self.xyxy2xywh(bboxes[i])
                    data['score'] = float(mask_score[i])
                    data['category_id'] = self.cat_ids[label]
                    maskencode = maskUtils.encode(np.asfortranarray(segms[i]))
                    maskencode['counts'] = maskencode['counts'].decode('ascii')
                    data['segmentation'] = segms[i]
                    segm_json_results.append(data)
            return bbox_json_results, segm_json_results

        def results2json(self, results, outfile_prefix):
            """Dump the detection results to a COCO style json file.
        There are 3 types of results: proposals, bbox predictions, mask
        predictions, and they have different data types. This method will
        automatically recognize the type, and dump them to json files.
        Args:
            results (list[list | tuple | ndarray]): Testing results of the
                dataset.
            outfile_prefix (str): The filename prefix of the json files. If the
                prefix is "somepath/xxx", the json files will be named
                "somepath/xxx.bbox.json", "somepath/xxx.segm.json",
                "somepath/xxx.proposal.json".
        Returns:
            dict[str: str]: Possible keys are "bbox", "segm", "proposal", and \
                values are corresponding filenames.
        """
            result_files = dict()
            if isinstance(results[0], list):
                json_results = self._det2json(results)
                result_files['bbox'] = f'{outfile_prefix}.bbox.json'
                result_files['proposal'] = f'{outfile_prefix}.bbox.json'
                mmcv.dump(json_results, result_files['bbox'])
            elif isinstance(results[0], tuple):
                json_results = self._ochuman_segm2json(results)
                result_files['bbox'] = f'{outfile_prefix}.bbox.json'
                result_files['proposal'] = f'{outfile_prefix}.bbox.json'
                result_files['segm'] = f'{outfile_prefix}.segm.json'
                mmcv.dump(json_results[0], result_files['bbox'])
                mmcv.dump(json_results[1], result_files['segm'])
            elif isinstance(results[0], np.ndarray):
                json_results = self._proposal2json(results)
                result_files['proposal'] = f'{outfile_prefix}.proposal.json'
                mmcv.dump(json_results, result_files['proposal'])
            else:
                raise TypeError('invalid type of results')
            return result_files
示例#23
0
def instances_to_coco_json(instances, img_id):
    """
    Dump an "Instances" object to a COCO-format json that's used for evaluation.

    Args:
        instances (Instances):
        img_id (int): the image id

    Returns:
        list[dict]: list of json annotations in COCO format.
    """
    num_instance = len(instances)
    if num_instance == 0:
        return []

    boxes = instances.pred_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = instances.scores.tolist()
    classes = instances.pred_classes.tolist()
    attr_classes = instances.attr_classes.tolist()
    attr_scores = instances.attr_scores.tolist()

    #print (len(scores), len(attr_scores), len(attr_classes))

    has_mask = instances.has("pred_masks")
    if has_mask:
        # use RLE to encode the masks, because they are too large and takes memory
        # since this evaluator stores outputs of the entire dataset
        rles = [
            mask_util.encode(
                np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
            for mask in instances.pred_masks
        ]
        for rle in rles:
            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
            # json writer which always produces strings cannot serialize a bytestream
            # unless you decode it. Thankfully, utf-8 works out (which is also what
            # the pycocotools/_mask.pyx does).
            rle["counts"] = rle["counts"].decode("utf-8")

    has_keypoints = instances.has("pred_keypoints")
    if has_keypoints:
        keypoints = instances.pred_keypoints

    results = []
    for k in range(num_instance):
        result = {
            "image_id": img_id,
            "category_id": classes[k],
            "bbox": boxes[k],
            "score": scores[k],
            "attributes": attr_classes[k],
            "attr_scores": attr_scores[k],
        }
        if has_mask:
            result["segmentation"] = rles[k]
        if has_keypoints:
            # In COCO annotations,
            # keypoints coordinates are pixel indices.
            # However our predictions are floating point coordinates.
            # Therefore we subtract 0.5 to be consistent with the annotation format.
            # This is the inverse of data loading logic in `datasets/coco.py`.
            keypoints[k][:, :2] -= 0.5
            result["keypoints"] = keypoints[k].flatten().tolist()
        results.append(result)
    return results
示例#24
0
    def eval_sequence(self, data):
        """Returns J&F metrics for one sequence"""

        # Only loaded when run to reduce minimum requirements
        from pycocotools import mask as mask_utils

        num_timesteps = data['num_timesteps']
        num_tracker_ids = data['num_tracker_ids']
        num_gt_ids = data['num_gt_ids']
        gt_dets = data['gt_dets']
        tracker_dets = data['tracker_dets']
        gt_ids = data['gt_ids']
        tracker_ids = data['tracker_ids']

        # get shape of frames
        frame_shape = None
        if num_gt_ids > 0:
            for t in range(num_timesteps):
                if len(gt_ids[t]) > 0:
                    frame_shape = gt_dets[t][0]['size']
                    break
        elif num_tracker_ids > 0:
            for t in range(num_timesteps):
                if len(tracker_ids[t]) > 0:
                    frame_shape = tracker_dets[t][0]['size']
                    break

        if frame_shape:
            # append all zero masks for timesteps in which tracks do not have a detection
            zero_padding = np.zeros((frame_shape), order='F').astype(np.uint8)
            padding_mask = mask_utils.encode(zero_padding)
            for t in range(num_timesteps):
                gt_id_det_mapping = {
                    gt_ids[t][i]: gt_dets[t][i]
                    for i in range(len(gt_ids[t]))
                }
                gt_dets[t] = [
                    gt_id_det_mapping[index]
                    if index in gt_ids[t] else padding_mask
                    for index in range(num_gt_ids)
                ]
                tracker_id_det_mapping = {
                    tracker_ids[t][i]: tracker_dets[t][i]
                    for i in range(len(tracker_ids[t]))
                }
                tracker_dets[t] = [
                    tracker_id_det_mapping[index]
                    if index in tracker_ids[t] else padding_mask
                    for index in range(num_tracker_ids)
                ]
            # also perform zero padding if number of tracker IDs < number of ground truth IDs
            if num_tracker_ids < num_gt_ids:
                diff = num_gt_ids - num_tracker_ids
                for t in range(num_timesteps):
                    tracker_dets[t] = tracker_dets[t] + [
                        padding_mask for _ in range(diff)
                    ]
                num_tracker_ids += diff

        j = self._compute_j(gt_dets, tracker_dets, num_gt_ids, num_tracker_ids,
                            num_timesteps)

        # boundary threshold for F computation
        bound_th = 0.008

        # perform matching
        if self.optim_type == 'J&F':
            f = np.zeros_like(j)
            for k in range(num_tracker_ids):
                for i in range(num_gt_ids):
                    f[k, i, :] = self._compute_f(gt_dets, tracker_dets, k, i,
                                                 bound_th)
            optim_metrics = (np.mean(j, axis=2) + np.mean(f, axis=2)) / 2
            row_ind, col_ind = linear_sum_assignment(-optim_metrics)
            j_m = j[row_ind, col_ind, :]
            f_m = f[row_ind, col_ind, :]
        elif self.optim_type == 'J':
            optim_metrics = np.mean(j, axis=2)
            row_ind, col_ind = linear_sum_assignment(-optim_metrics)
            j_m = j[row_ind, col_ind, :]
            f_m = np.zeros_like(j_m)
            for i, (tr_ind, gt_ind) in enumerate(zip(row_ind, col_ind)):
                f_m[i] = self._compute_f(gt_dets, tracker_dets, tr_ind, gt_ind,
                                         bound_th)
        else:
            raise TrackEvalException(
                'Unsupported optimization type %s for J&F metric.' %
                self.optim_type)

        # append zeros for false negatives
        if j_m.shape[0] < data['num_gt_ids']:
            diff = data['num_gt_ids'] - j_m.shape[0]
            j_m = np.concatenate((j_m, np.zeros((diff, j_m.shape[1]))), axis=0)
            f_m = np.concatenate((f_m, np.zeros((diff, f_m.shape[1]))), axis=0)

        # compute the metrics for each ground truth track
        res = {
            'J-Mean': [np.nanmean(j_m[i, :]) for i in range(j_m.shape[0])],
            'J-Recall': [
                np.nanmean(j_m[i, :] > 0.5 + np.finfo('float').eps)
                for i in range(j_m.shape[0])
            ],
            'F-Mean': [np.nanmean(f_m[i, :]) for i in range(f_m.shape[0])],
            'F-Recall': [
                np.nanmean(f_m[i, :] > 0.5 + np.finfo('float').eps)
                for i in range(f_m.shape[0])
            ],
            'J-Decay': [],
            'F-Decay': []
        }
        n_bins = 4
        ids = np.round(
            np.linspace(1, data['num_timesteps'], n_bins + 1) + 1e-10) - 1
        ids = ids.astype(np.uint8)

        for k in range(j_m.shape[0]):
            d_bins_j = [
                j_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)
            ]
            res['J-Decay'].append(
                np.nanmean(d_bins_j[0]) - np.nanmean(d_bins_j[3]))
        for k in range(f_m.shape[0]):
            d_bins_f = [
                f_m[k][ids[i]:ids[i + 1] + 1] for i in range(0, n_bins)
            ]
            res['F-Decay'].append(
                np.nanmean(d_bins_f[0]) - np.nanmean(d_bins_f[3]))

        # count number of tracks for weighting of the result
        res['num_gt_tracks'] = len(res['J-Mean'])
        for field in [
                'J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall',
                'F-Decay'
        ]:
            res[field] = np.mean(res[field])
        res['J&F'] = (res['J-Mean'] + res['F-Mean']) / 2
        return res
    def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
                      ori_shape, scale_factor, rescale):
        """Get segmentation masks from mask_pred and bboxes.

        Args:
            mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
                For single-scale testing, mask_pred is the direct output of
                model, whose type is Tensor, while for multi-scale testing,
                it will be converted to numpy array outside of this method.
            det_bboxes (Tensor): shape (n, 4/5)
            det_labels (Tensor): shape (n, )
            img_shape (Tensor): shape (3, )
            rcnn_test_cfg (dict): rcnn testing config
            ori_shape: original image size

        Returns:
            list[list]: encoded masks
        """
        if isinstance(mask_pred, torch.Tensor):
            mask_pred = mask_pred.sigmoid().cpu().numpy()
        assert isinstance(mask_pred, np.ndarray)

        cls_segms = [[] for _ in range(self.num_classes - 1)]
        bboxes = det_bboxes.cpu().numpy()[:, :4]
        labels = det_labels.cpu().numpy() + 1

        if rescale:  # keep the img size as the ori size.
            img_h, img_w = ori_shape[:2]
        else:
            # if not rescale, that means the output bboxes fit to
            # the size of input images.
            img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
            img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
            scale_factor = 1.0

        for i in range(bboxes.shape[0]):
            # the bbox are are fit to the rescaled image.
            bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
            label = labels[i]
            if not self.class_agnostic:
                mask_pred_ = mask_pred[i, label, :, :]
            else:
                mask_pred_ = mask_pred[i, 0, :, :]

            # here to add expand mask and expand bbox.
            paded_mask_, scale = self.expand_mask(mask_pred_)
            bbox = self.expand_bbox(bbox, scale)

            w = max(bbox[2] - bbox[0] + 1, 1)
            h = max(bbox[3] - bbox[1] + 1, 1)

            im_mask = np.zeros((img_h, img_w), dtype=np.uint8)

            bbox_mask = mmcv.imresize(paded_mask_,
                                      (w, h))  # expand bbox before.
            bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
                np.uint8)
            # add according to maskrcnn benchmark
            x0 = max(bbox[0], 0)
            x1 = min(bbox[2] + 1, img_w)
            y0 = max(bbox[1], 0)
            y1 = min(bbox[3] + 1, img_h)

            # im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask[]
            im_mask[y0:y1, x0:x1] = bbox_mask[(y0 - bbox[1]):(y1 - bbox[1]),
                                              (x0 - bbox[0]):(x1 - bbox[0])]
            rle = mask_util.encode(
                np.array(im_mask[:, :, np.newaxis], order='F'))[0]
            cls_segms[label - 1].append(rle)

        return cls_segms
示例#26
0
def create_coco_style(self, input_path, des):
    maxInt = sys.maxsize

    while True:
        try:
            csv.field_size_limit(maxInt)
            break
        except:
            maxInt = int(maxInt / 10)

    data_path = input_path + 'train.csv'
    # json_des = '/kaggle/working/label_descriptions.json'

    # with open(json_des, 'r') as f:
    #     des = json.load(f)

    info = des['info']
    categories = des['categories']
    attributes = des['attributes']

    #f = open(data_path, 'r')
    #
    X = pd.read_csv(data_path)

    X_train, X_test = train_test_split(X, test_size=0.2)
    # X_train = X

    X_dtrain = X_train.to_dict('records', into=OrderedDict)
    X_dtest = X_test.to_dict('records', into=OrderedDict)

    #reader = csv.DictReader(f)#, fieldnames=('imageid', 'height', 'width', 'encodedpixels', 'classid'))

    rows_train = []
    rows_test = []

    myorder = ['ImageId', 'Height', 'Width', 'EncodedPixels', 'ClassId']

    image_id = 1
    segmentation_id = 1

    coco_output = {
        "info": info,
        "licenses": "",
        "categories": categories,
        "images": [],
        "annotations": []
    }

    coco_output_test = coco_output

    with open('{}train.txt'.format(input_path), 'w') as output_text_file:
        for row in X_dtrain:
            # Write training text
            output_text_file.write('{} '.format(row['ImageId']))

            # Ordered Dict
            ordered = OrderedDict((k, row[k]) for k in myorder)
            #    ordered['EncodedPixels'] = list(map(int, ordered['EncodedPixels'].split(' ')))
            if len(ordered['ClassId']) > 2:
                classes = [list(map(int, ordered['ClassId'].split('_')))]
                ordered['ClassId'] = classes[0][0]
            else:
                ordered['ClassId'] = int(ordered['ClassId'])

            # COCO
            image_info = pycoco.create_image_info(
                image_id, input_path + row['ImageId'],
                (row['Width'], row['Height']))
            coco_output["images"].append(image_info)

            rle, binary_mask = rle_decode(row['EncodedPixels'],
                                          (row['Height'], row['Width']))
            fortran_binary_mask = np.asfortranarray(
                binary_mask.astype(np.uint8))

            binary_mask_encoded = mask.encode(fortran_binary_mask)
            #    rle2 = pycoco.binary_mask_to_rle(fortran_binary_mask)

            area = mask.area(binary_mask_encoded)
            bounding_box = mask.toBbox(binary_mask_encoded)

            annotation_info = {
                "id": segmentation_id,
                "image_id": image_id,
                "category_id": ordered['ClassId'],
                "iscrowd": 1,
                "area": area.tolist(),
                "bbox": bounding_box.tolist(),
                "segmentation": rle,
                "width": row['Width'],
                "height": row['Height'],
            }
            coco_output["annotations"].append(annotation_info)
            segmentation_id += 1
            image_id += 1

            rows_train.append(ordered)

    with open('{}train.json'.format(input_path), 'w') as output_json_file:
        json.dump(coco_output, output_json_file)

    with open('{}test.txt'.format(input_path), 'w') as output_text_file:
        for row in X_dtest:
            # Write test text
            output_text_file.write('{} '.format(row['ImageId']))

            # Ordered Dict
            ordered = OrderedDict((k, row[k]) for k in myorder)
            #    ordered['EncodedPixels'] = list(map(int, ordered['EncodedPixels'].split(' ')))
            if len(ordered['ClassId']) > 2:
                classes = [list(map(int, ordered['ClassId'].split('_')))]
                ordered['ClassId'] = classes[0][0]
            else:
                ordered['ClassId'] = int(ordered['ClassId'])

            # COCO
            image_info = pycoco.create_image_info(
                image_id, input_path + row['ImageId'],
                (row['Width'], row['Height']))
            coco_output_test["images"].append(image_info)

            rle, binary_mask = rle_decode(row['EncodedPixels'],
                                          (row['Height'], row['Width']))
            fortran_binary_mask = np.asfortranarray(
                binary_mask.astype(np.uint8))

            binary_mask_encoded = mask.encode(fortran_binary_mask)
            #    rle2 = pycoco.binary_mask_to_rle(fortran_binary_mask)

            area = mask.area(binary_mask_encoded)
            bounding_box = mask.toBbox(binary_mask_encoded)

            annotation_info = {
                "id": segmentation_id,
                "image_id": image_id,
                "category_id": ordered['ClassId'],
                "iscrowd": 1,
                "area": area.tolist(),
                "bbox": bounding_box.tolist(),
                "segmentation": rle,
                "width": row['Width'],
                "height": row['Height'],
            }
            coco_output_test["annotations"].append(annotation_info)
            segmentation_id += 1
            image_id += 1

            rows_train.append(ordered)

        with open('{}test.json'.format(input_path), 'w') as output_json_file:
            json.dump(coco_output_test, output_json_file)
示例#27
0
                # save as json
                pred = []

                for j, (box, prob, label) in enumerate(
                        zip(final_boxes, final_probs, final_labels)):
                    box[2] -= box[0]
                    box[3] -= box[1]  # produce x,y,w,h output

                    cat_id = label
                    cat_name = targetid2class[cat_id]

                    # encode mask
                    rle = None
                    if args.add_mask:
                        final_mask = final_masks[j]  # [14, 14]
                        rle = cocomask.encode(
                            np.array(final_mask[:, :, None], order="F"))[0]
                        rle['counts'] = rle['counts'].decode("ascii")

                    res = {
                        "category_id": cat_id,
                        "cat_name": cat_name,  #[0-80]
                        "score": float(round(prob, 7)),
                        "bbox": list(map(lambda x: float(round(x, 2)), box)),
                        "segmentation": rle,
                    }

                    pred.append(res)

                #predfile = os.path.join(args.out_dir, "%s_F_%08d.json"%(videoname, cur_frame))
                if args.use_my_naming:
                    predfile = os.path.join(
示例#28
0
    # person is first class in COCO
    # so extract person bbox and mask from the entire result
    result_person_only = ([result[0][0]], [result[1][0]])
    result_bbox = [result[0][0]]
    result_mask = [result[1][0]]

    # gt.txt structure
    # clsid = 1 we set the clsid to objid for easier labeling
    # frameid, objid, clsid, img_h, img_w, rle_code
    start = 0

    pairs = sorted(zip(result_bbox[0], result_mask[0]), key=lambda x: x[0][0])

    for bbox, mask in pairs:
        if bbox[-1] >= 0.9:
            rle_code = cocomask.encode(np.asfortranarray(mask))
            f.write('{} {} {} {} {} {}\n'.format(
                i + 1, start, 1, 720, 1280,
                rle_code['counts'].decode('ascii')))
            start += 1

    model.show_result(frame,
                      result,
                      score_thr=1.1,
                      out_file='../msc/cam4/img/{:06d}.jpg'.format(i + 1))
    model.show_result(frame,
                      result_person_only,
                      score_thr=0.9,
                      bbox_color='red',
                      text_color='red',
                      out_file='../msc/cam4/det/{:06d}.jpg'.format(i + 1))
示例#29
0
def prepare_for_coco_segmentation(predictions, dataset):
    import pycocotools.mask as mask_util
    import numpy as np

    masker = Masker(threshold=0.5, padding=1)
    # assert isinstance(dataset, COCODataset)
    coco_results = []
    # 通过图片id进行遍历
    for image_id, prediction in tqdm(enumerate(predictions)):
        original_id = dataset.id_to_img_map[image_id]
        if len(prediction) == 0:
            continue

        # 获得原始的图片信息(宽高)
        img_info = dataset.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        prediction = prediction.resize((image_width, image_height))
        masks = prediction.get_field("mask")
        # t = time.time()
        # Masker is necessary only if masks haven't been already resized.
        if list(masks.shape[-2:]) != [image_height, image_width]:
            masks = masker(masks.expand(1, -1, -1, -1, -1), prediction)
            masks = masks[0]
        # logger.info('Time mask: {}'.format(time.time() - t))
        # prediction = prediction.convert('xywh')

        # boxes = prediction.bbox.tolist()
        scores = prediction.get_field("scores").tolist()
        labels = prediction.get_field("labels").tolist()

        # 获取零件类别
        component_scores = prediction.get_field("component_scores").tolist()
        components = prediction.get_field("components").tolist()

        # rles = prediction.get_field('mask')

        rles = [
            mask_util.encode(np.array(mask[0, :, :, np.newaxis], order="F"))[0]
            for mask in masks
        ]
        for rle in rles:
            rle["counts"] = rle["counts"].decode("utf-8")

        mapped_labels = [
            dataset.contiguous_category_id_to_json_id[i] for i in labels
        ]
        # 预测值和类别id之间的映射关系
        mapped_components = [
            dataset.contiguous_component_id_to_json_id[i] for i in components
        ]

        coco_results.extend([{
            "image_id": original_id,
            "category_id": mapped_labels[k],
            "component_id": mapped_components[k],
            "segmentation": rle,
            "score": scores[k] * component_scores[k],
            "component_score": component_scores[k],
        } for k, rle in enumerate(rles)])
    return coco_results
示例#30
0
def rle_from_binary(prediction):
    prediction = np.asfortranarray(prediction)
    return cocomask.encode(prediction)
示例#31
0
def maskify(    im, 
                crop_box, 
                threshold, 
                positive_histogram, 
                negative_histogram, 
                threshold
):
    """ For each annotation, create a COCO formated segmentation

        Arguments:
            - im:                   The input image
            - crop_box:             Tuple of coordinates isolating object of interest
            - threshold:            Percentage value representing liklihood of a pixel belonging
                                    to the positive histogram class
            - positive_histogram:   Histogram representing pixels pertaining to an object
            - negative_histogram:   Histogram representing non-example pixels pertaining to 
                                    an object 

        Return: Array of COCO styled segmentation annotations

    """
    # Get the size of the image
    original_rows, original_cols = im.size

    # Crop the image around the bounding box
    im = im.crop(crop_box)

    # Load pixel RGB data
    pix = im.load()

    # Get row and cols of cropped image
    cols, rows = im.size

    # Convert cropped image to numpy array
    im = np.array(im)

    # Get the height and width of the cropped image
    rows = np.shape(im)[0]
    cols = np.shape(im)[1]

    # Get histogram bins
    histogram_bins = np.shape(positive_histogram)[0]

    # Get the factor based on the histogram bins. Used to index into to the histogram. 
    factor = 256 / histogram_bins

    # Declare a results numpy array that contains only zeros
    result = np.zeros((rows, cols))

    # Determine the probability of water given RGB and histograms representing water and non water
    for row in range(rows):
        for col in range(cols):

            # Get each RGB value
            red = float(pix[col, row][0])
            green = float(pix[col, row][1])
            blue = float(pix[col, row][2])
            
            # Get the index into histograms based on RGB value and histogram factor size (declared above)
            red_index = floor(red / factor)
            green_index = floor(green / factor)
            blue_index = floor(blue / factor)
            
            # Get positive and negative values from histograms
            positive = positive_histogram[red_index, green_index, blue_index]
            negative_value = negative_histogram[red_index, green_index, blue_index]
            
            total = positive + negative
            
            if total is not 0:
                result[row, col] = water_value / total

    # Set threshold equal to the median value of the resulting numpy array if 
    threshold = np.median(result) if threshold is 'auto' else threshold

    # The intuition here is that if our threshold is equal to the median value of the resulting
    # array, then there will be a largest connected component. Any other value, and we're risking
    # the possibility of no largest connected component existing, which is a potential error that we 
    # have to account for. 
    if threshold != np.median(result):
        result_backup = np.copy(result)

    # Parse values of result given threshold
    for row in range(rows):
        for col in range(cols):
            if result[row, col] < threshold:
                result[row, col] = 1
            else:
                result[row, col] = 0
    
    # Retry if all values in result are 0 (ie - no largest connected component)
    if np.sum(result) == 0:
        result = result_backup

        for row in range(rows):
            for col in range(cols):
                if result[row, col] < np.median(result):
                    result[row, col] = 1
                else:
                    result[row, col] = 0

    
    # Get the largest connected component
    labels = label(result)
    assert( labels.max() != 0 ) # assume at least 1 CC
    largestCC = labels == np.argmax(np.bincount(labels.flat)[1:])+1
    
    # Fill holes in the boat
    largestCC = binary_fill_holes(largestCC)

    # Dialate to expand the mask
    largestCC = binary_dilation(largestCC, iterations=4)
    plt.imshow(largestCC)

    # Create numpy zeros array the same size as the original image before cropping
    image_with_mask = np.zeros((original_cols, original_rows))

    # Overlay binary mask onto zeros array
    image_with_mask[crop_box[1]:crop_box[1] + rows, crop_box[0]:crop_box[0] + cols] = largestCC

    """ Convert the binary mask to COCO JSON format. Code referenced from:
            - https://github.com/cocodataset/cocoapi/issues/131#issuecomment-371250565
    """
    image_with_mask = np.array(image_with_mask, dtype=np.uint8)
    fortran_ground_truth_binary_mask = np.asfortranarray(image_with_mask)
    encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask)
    ground_truth_area = mask.area(encoded_ground_truth)
    ground_truth_bounding_box = mask.toBbox(encoded_ground_truth)
    contours = measure.find_contours(image_with_mask, 0.5)

    segmentations = []

    for contour in contours:
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        segmentations.append(segmentation)
    
    return segmentations
示例#32
0
def evaluate_coco(generator, model, threshold=0.05):
    # start collecting results
    results = []
    image_ids = []
    for index in range(generator.size()):
        image = generator.load_image(index)
        image_shape = image.shape
        image = generator.preprocess_image(image)
        image, scale = generator.resize_image(image)

        # run network
        outputs = model.predict_on_batch(np.expand_dims(image, axis=0))
        boxes   = outputs[-4]
        scores  = outputs[-3]
        labels  = outputs[-2]
        masks   = outputs[-1]

        # correct boxes for image scale
        boxes /= scale

        # change to (x, y, w, h) (MS COCO standard)
        boxes[..., 2] -= boxes[..., 0]
        boxes[..., 3] -= boxes[..., 1]

        # compute predicted labels and scores
        for box, score, label, mask in zip(boxes[0], scores[0], labels[0], masks[0]):
            # scores are sorted by the network
            if score < threshold:
                break

            b = box.astype(int)  # box (x, y, w, h) as one int vector

            mask = cv2.resize(mask[:, :, label], (b[2], b[3]))
            mask = (mask > 0.5).astype(np.uint8)  # binarize for encoding as RLE

            segmentation = np.zeros((image_shape[0], image_shape[1]), dtype=np.uint8)
            segmentation[b[1]:b[1] + b[3], b[0]:b[0] + b[2]] = mask
            segmentation = mask_utils.encode(np.asfortranarray(segmentation))

            # append boxes for each positively labeled class
            image_result = {
                'image_id'    : generator.image_ids[index],
                'category_id' : generator.label_to_coco_label(label),
                'score'       : float(score),
                'bbox'        : box.tolist(),
                'segmentation': segmentation
            }

            # convert byte to str to write in json (in Python 3)
            if not isinstance(image_result['segmentation']['counts'], str):
                image_result['segmentation']['counts'] = image_result['segmentation']['counts'].decode()

            # append detection to results
            results.append(image_result)

        # append image to list of processed images
        image_ids.append(generator.image_ids[index])

        # print progress
        print('{}/{}'.format(index, generator.size()), end='\r')

    if not len(results):
        return

    # write output
    json.dump(results, open('{}_segm_results.json'.format(generator.set_name), 'w'), indent=4)
    json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4)

    # load results in COCO evaluation tool
    coco_true = generator.coco
    coco_pred = coco_true.loadRes('{}_segm_results.json'.format(generator.set_name))

    # run COCO evaluation
    coco_eval = COCOeval(coco_true, coco_pred, 'segm')
    coco_eval.params.imgIds = image_ids
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
示例#33
0
def bbox_merge(dets, segs, iou_thr, scr_thr, mask_thr):
    # dets: [[x1, y1, x2, y2, score], ... ]
    if dets.shape[0] <= 1:
        return dets, segs
    order = dets[:, -1].ravel().argsort()[::-1]
    dets = dets[order, :]
    scr_keep_inds = (np.where(dets[:, -1] > scr_thr))[0]
    dets = dets[scr_keep_inds, :]
    segs = [segs[ind] for ind in scr_keep_inds]

    dets_res = np.zeros([0, 5])
    segs_res = []
    imgHeight, imgWidth = 1024, 2048

    while dets.shape[0] > 0:
        num = dets.shape[0]
        # IoU
        area = (dets[:, 2] - dets[:, 0] + 1) * (dets[:, 3] - dets[:, 1] + 1)
        xx1 = np.maximum(dets[0, 0], dets[:, 0])
        yy1 = np.maximum(dets[0, 1], dets[:, 1])
        xx2 = np.minimum(dets[0, 2], dets[:, 2])
        yy2 = np.minimum(dets[0, 3], dets[:, 3])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        o = inter / (area[0] + area[:] - inter)

        # get needed merge det and delete these det
        merge_inds = np.where(o >= iou_thr)[0]
        dets_to_merge = dets[merge_inds, :]
        segs_to_merge = [segs[ind] for ind in merge_inds]
        dets = np.delete(dets, merge_inds,
                         0)  # remained dets and segs after remerge.
        segs = [segs[i] for i in range(num) if i not in merge_inds]

        if merge_inds.shape[0] <= 1:
            dets_res = np.row_stack((dets_res, dets_to_merge))
            segs_res += segs_to_merge

        else:
            scores = dets_to_merge[:, -1:]
            dets_to_merge[:, :-1] = dets_to_merge[:, :-1] * np.tile(
                scores, (1, 4))
            max_score = np.max(scores)
            det_merged = np.zeros((1, 5))
            det_merged[:, :-1] = np.sum(dets_to_merge[:, :-1],
                                        axis=0) / np.sum(scores)
            det_merged[:, -1] = max_score
            dets_res = np.row_stack((dets_res, det_merged))

            img = np.zeros((imgHeight, imgWidth))
            for i in range(merge_inds.shape[0]):
                mask = maskUtils.decode(segs_to_merge[i]).astype(np.bool)
                img[mask] += scores[i, -1]
            img = img / np.max(img)
            img[img >= mask_thr] = 1
            img[img < mask_thr] = 0
            img = img.astype(np.uint8)
            # print(img.shape)
            seg_merged = maskUtils.encode(
                np.array(img[:, :, np.newaxis], order='F'))[0]
            segs_res.append(seg_merged)

    return dets_res, segs_res
示例#34
0
    def _forward(self, step_num):
        video_tag = self.val_data.get_video_tag()
        time_step_id = self.val_data.get_object_idx_in_video()
        img_filename = self.val_data._curr_video_data[time_step_id][0][
            DataKeys.IMAGE_FILENAMES]
        timestep_name = img_filename.split('/')[-1].replace('.jpg', '')
        if self.print_per_object_stats:
            print("forwarding on", video_tag + ":" + str(time_step_id),
                  "after step", step_num, "proposals:",
                  len(self.val_data._curr_video_data[time_step_id]))
        measures = {}

        # Get proposals:
        proposals_dir = self.config.string("bb_input_dir", None)
        output_dir = self.config.string("output_dir", None)

        curr = video_tag + timestep_name.zfill(5) + ".json"
        in_dir = proposals_dir + curr
        out_dir = output_dir + curr
        with open(in_dir, "r") as f:
            proposals = json.load(f)

        for idx in range(self.val_data.n_examples_per_epoch()):
            feed_dict = self.val_data.get_feed_dict_for_next_step()
            # step_res = self.trainer.validation_step(feed_dict=feed_dict, extraction_keys=[
            #   Extractions.SEGMENTATION_POSTERIORS_ORIGINAL_SIZE, Extractions.SEGMENTATION_MASK_ORIGINAL_SIZE,
            #   DataKeys.IMAGE_FILENAMES, DataKeys.RAW_IMAGES, DataKeys.OBJ_TAGS])
            step_res = self.trainer.validation_step(
                feed_dict=feed_dict,
                extraction_keys=[
                    Extractions.SEGMENTATION_POSTERIORS_ORIGINAL_SIZE,
                    Extractions.SEGMENTATION_MASK_ORIGINAL_SIZE,
                    DataKeys.OBJ_TAGS
                ])

            extractions = step_res[Extractions.EXTRACTIONS]
            step_measures = step_res[Measures.MEASURES]
            accumulate_measures(measures, step_measures)

            def extract(key):
                if key not in extractions:
                    return None
                val = extractions[key]

                # for now assume we only use 1 gpu for forwarding
                assert len(val) == 1, len(val)
                val = val[0]

                # # for now assume, we use a batch size of 1 for forwarding
                assert val.shape[0] == 1, val.shape[0]
                val = val[0]

                return val

            predicted_segmentation = extract(
                Extractions.SEGMENTATION_MASK_ORIGINAL_SIZE)
            obj_tag = extract(DataKeys.OBJ_TAGS)
            posteriors = extract(
                Extractions.SEGMENTATION_POSTERIORS_ORIGINAL_SIZE)
            # img_filename = extract(DataKeys.IMAGE_FILENAMES)
            # img = extract(DataKeys.RAW_IMAGES)

            ########### New code for saving json directly
            # Insert mask into proposals
            obj_tag = int(obj_tag.decode('utf-8'))
            mask = predicted_segmentation.astype("uint8") * 255
            encoded_mask = encode(np.asfortranarray(mask))
            encoded_mask['counts'] = encoded_mask['counts'].decode("utf-8")
            proposals[obj_tag]["segmentation"] = encoded_mask

            conf_scores = posteriors.copy()
            conf_scores[predicted_segmentation ==
                        0] = 1 - posteriors[predicted_segmentation == 0]
            conf_scores = 2 * conf_scores - 1
            conf_score = conf_scores[:].mean()
            proposals[obj_tag]["conf_score"] = str(conf_score)

        create_out_dir = '/'.join(out_dir.split('/')[:-1])
        if not os.path.exists(create_out_dir):
            os.makedirs(create_out_dir)
        with open(out_dir, 'w') as f:
            json.dump(proposals, f)
示例#35
0
    def add_data_to_coco(self, mode, data_path, category_number):
        if mode =='train':
            coco_dict =  self.train_dict
            coco_images_path =  self.coco_train_path
            coco_json_path = self.train_json_path
        elif mode =='val':
            coco_dict =  self.val_dict
            coco_images_path = self.coco_val_path
            coco_json_path = self.val_json_path
        else:
            raise NotImplementedError

        images_path = os.path.join(data_path ,'processed', 'images')
        masks_path = os.path.join(data_path ,'processed', 'image_masks')
        # TODO
        yaml_path = os.path.join(data_path ,'processed', 'door_lever_3_keypoint.yaml')

        with open(yaml_path, 'r') as f:
            dataset_yaml_map = yaml.load(f.read())


        id_index = self.get_dataset_number(mode)

        train_mode = 'Door_ ' +mode

        for key in dataset_yaml_map.keys():
            # TODO
            origin_file_path = os.path.join(images_path, dataset_yaml_map[key]['rgb_image_filename'])
            target_file_name = train_mode + '_%06d.png ' %id_index
            target_file_path = os.path.join(coco_images_path, target_file_name)

            shutil.copyfile(origin_file_path ,target_file_path )

            img_dict ={'license': 3,
                        'file_name': target_file_name,
                        'coco_url': '',
                        'height': 480,
                        'width': 640,
                        'date_captured': '2013-11-14 11:18:45',
                        'flickr_url': '',
                        'id': id_index}

            x, y = dataset_yaml_map[key]['bbox_top_left_xy']
            x2, y2 = dataset_yaml_map[key]['bbox_bottom_right_xy']
            w = x2 - x
            h = y2 - y
            area = float(w * h)

            img_number = int(dataset_yaml_map[key]['rgb_image_filename'].split('_')[0])
            mask_file_path = os.path.join(masks_path, "%06d_mask.png" % img_number)
            # NOTE !!! UNIT16 or UNIT8
            # ground_truth_binary_mask = cv2.convertScaleAbs(cv2.imread(mask_file_path, cv2.IMREAD_UNCHANGED))
            ground_truth_binary_mask = cv2.imread(mask_file_path, cv2.IMREAD_UNCHANGED)
            # plt.imshow(ground_truth_binary_mask)
            # plt.colorbar()

            fortran_ground_truth_binary_mask = np.asfortranarray(ground_truth_binary_mask)
            encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask)
            ground_truth_area = mask.area(encoded_ground_truth)
            ground_truth_bounding_box = mask.toBbox(encoded_ground_truth)
            contours = measure.find_contours(ground_truth_binary_mask, 0.5)

            annot_dict = {'segmentation': [],
                          'area': ground_truth_area.tolist(),
                          'iscrowd': 0,
                          'image_id': id_index,
                          'bbox': [x, y, w, h],
                          'category_id': category_number,
                          'id': id_index}

            for contour in contours:
                contour = np.flip(contour, axis=1)
                segmentation = contour.ravel().tolist()
                annot_dict["segmentation"].append(segmentation)

            coco_dict['images'].append(img_dict)
            coco_dict['annotations'].append(annot_dict)
            id_index += 1

        with open(coco_json_path, "w") as f:
            json.dump(coco_dict, f)
示例#36
0
def postprocess_ytbvis(det_output, img_meta, interpolation_mode='bilinear',
                       display_mask=False, visualize_lincomb=False, crop_masks=True, score_threshold=0,
                       img_ids=None, mask_det_file=None):
    """
    Postprocesses the output of Yolact on testing mode into a format that makes sense,
    accounting for all the possible configuration settings.

    Args:
        - det_output: The lost of dicts that Detect outputs.
        - w: The real with of the image.
        - h: The real height of the image.
        - batch_idx: If you have multiple images for this batch, the image's index in the batch.
        - interpolation_mode: Can be 'nearest' | 'area' | 'bilinear' (see torch.nn.functional.interpolate)

    Returns 4 torch Tensors (in the following order):
        - classes [num_det]: The class idx for each detection.
        - scores  [num_det]: The confidence score for each detection.
        - boxes   [num_det, 4]: The bounding box for each detection in absolute point form.
        - masks   [num_det, h, w]: Full image masks for each detection.
    """

    net = det_output['net']
    detection = det_output['detection']
    dets = {}
    for k, v in detection.items():
        dets[k] = v.clone()

    ori_h, ori_w = img_meta['ori_shape'][:2]
    img_h, img_w = img_meta['img_shape'][:2]
    pad_h, pad_w = img_meta['pad_shape'][:2]
    s_w, s_h = (img_w / pad_w, img_h / pad_h)

    if dets['box'].nelement() == 0:
        dets['segm'] = torch.Tensor()
        return dets

    # double check
    if score_threshold > 0:
        keep = dets['score'] > score_threshold

        for k in dets:
            if k not in {'proto', 'bbox_idx', 'priors', 'embed_vectors', 'box_shift'} and dets[k] is not None:
                dets[k] = dets[k][keep]

    # Undo the padding introduced with preserve_aspect_ratio
    if cfg.preserve_aspect_ratio and dets['score'].nelement() != 0:
        # Get rid of any detections whose centers are outside the image
        boxes = dets['box']
        boxes = center_size(boxes)
        not_outside = ((boxes[:, 0] > s_w) + (boxes[:, 1] > s_h)) < 1  # not (a or b)
        for k in dets:
            if k not in {'proto', 'bbox_idx', 'priors', 'embed_vectors', 'box_shift'} and dets[k] is not None:
                dets[k] = dets[k][not_outside]

    if dets['score'].size(0) == 0:
        dets['segm'] = torch.Tensor()
        return dets

    # Actually extract everything from dets now
    boxes = dets['box']
    masks_coeff = dets['mask_coeff']
    masks = dets['mask']
    proto_data = dets['proto']
    # normlized_coeff = F.normalize(masks_coeff, dim=1)
    # sim = torch.mm(normlized_coeff, normlized_coeff.t())

    if visualize_lincomb:
        display_lincomb(proto_data, masks_coeff, img_ids, mask_det_file)

    # Undo padding for masks
    masks = masks[:, :int(s_h*masks.size(1)), :int(s_w*masks.size(2))]
    # Scale masks up to the full image
    if cfg.preserve_aspect_ratio:
        masks = F.interpolate(masks.unsqueeze(0), (ori_h, ori_w), mode=interpolation_mode,
                              align_corners=False).squeeze(0)
    else:
        masks = F.interpolate(masks.unsqueeze(0), (img_h, img_w), mode=interpolation_mode,
                              align_corners=False).squeeze(0)
    # Binarize the masks
    masks.gt_(0.5)

    if display_mask:
        dets['segm'] = masks
    else:
        # segm annotation: png2rle
        masks_output_json = []
        for i in range(masks.size(0)):
            cur_mask = mask_util.encode(np.array(masks[i].cpu(), order='F', dtype='uint8'))
            # masks[i, :, :] = torch.from_numpy(mask_util.decode(cur_mask)).cuda()
            masks_output_json.append(cur_mask)
        dets['segm'] = masks_output_json

    # Undo padding for bboxes
    boxes[:, 0::2] = boxes[:, 0::2] / s_w
    boxes[:, 1::2] = boxes[:, 1::2] / s_h
    # priors = dets['priors']  # [cx, cy, w, h]
    # priors[:, :2] = priors[:, :2] - priors[:, 2:]/2
    # priors[:, 2:] = priors[:, :2] + priors[:, 2:]
    # priors[:, 0::2] = priors[:, 0::2] / s_w
    # priors[:, 1::2] = priors[:, 1::2] / s_h

    if cfg.preserve_aspect_ratio:
        out_w = ori_w
        out_h = ori_h
    else:
        out_w = img_w
        out_h = img_h

    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], out_w, cast=False)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], out_h, cast=False)
    # priors[:, 0], priors[:, 2] = sanitize_coordinates(priors[:, 0], priors[:, 2], out_w, cast=False)
    # priors[:, 1], priors[:, 3] = sanitize_coordinates(priors[:, 1], priors[:, 3], out_h, cast=False)

    boxes = boxes.long()
    dets['box'] = boxes
    # dets['priors'] = priors.long()

    return dets
示例#37
0
def rle_from_binary(prediction):
    prediction = np.asfortranarray(prediction)
    return cocomask.encode(prediction)
示例#38
0
def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
    """Converts groundtruths to the dataset in COCO format.

    Args:
      groundtruths: a dictionary of numpy arrays including the fields below.
        Note that each element in the list represent the number for a single
        example without batch dimension. K below denotes the actual number of
        instances for each image.
        Required fields:
          - source_id: a list of numpy arrays of int or string of shape
            [batch_size].
          - height: a list of numpy arrays of int of shape [batch_size].
          - width: a list of numpy arrays of int of shape [batch_size].
          - num_detections: a list of numpy arrays of int of shape [batch_size].
          - boxes: a list of numpy arrays of float of shape [batch_size, K, 4],
              where coordinates are in the original image space (not the
              normalized coordinates).
          - classes: a list of numpy arrays of int of shape [batch_size, K].
        Optional fields:
          - is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If
              th field is absent, it is assumed that this instance is not crowd.
          - areas: a list of numy arrays of float of shape [batch_size, K]. If the
              field is absent, the area is calculated using either boxes or
              masks depending on which one is available.
          - masks: a list of numpy arrays of string of shape [batch_size, K],
      label_map: (optional) a dictionary that defines items from the category id
        to the category name. If `None`, collect the category mappping from the
        `groundtruths`.

    Returns:
      coco_groundtruths: the groundtruth dataset in COCO format.
    """
    source_ids = np.concatenate(groundtruths['source_id'], axis=0)
    heights = np.concatenate(groundtruths['height'], axis=0)
    widths = np.concatenate(groundtruths['width'], axis=0)
    gt_images = [{'id': int(i), 'height': int(h), 'width': int(w)} for i, h, w
                in zip(source_ids, heights, widths)]

    gt_annotations = []
    num_batches = len(groundtruths['source_id'])
    batch_size = groundtruths['source_id'][0].shape[0]
    for i in range(num_batches):
        for j in range(batch_size):
            num_instances = groundtruths['num_detections'][i][j]
            for k in range(num_instances):
                ann = {}
                ann['image_id'] = int(groundtruths['source_id'][i][j])
                if 'is_crowds' in groundtruths:
                    ann['iscrowd'] = int(groundtruths['is_crowds'][i][j, k])
                else:
                    ann['iscrowd'] = 0
                ann['category_id'] = int(groundtruths['classes'][i][j, k])
                boxes = groundtruths['boxes'][i]
                ann['bbox'] = [float(boxes[j, k, 1]),
                               float(boxes[j, k, 0]),
                               float(boxes[j, k, 3] - boxes[j, k, 1]),
                               float(boxes[j, k, 2] - boxes[j, k, 0])]
                if 'areas' in groundtruths:
                    ann['area'] = float(groundtruths['areas'][i][j, k])
                else:
                    ann['area'] = float((boxes[j, k, 3] - boxes[j, k, 1]) * (boxes[j, k, 2] - boxes[j, k, 0]))

                if 'masks' in groundtruths:
                    mask = Image.open(io.BytesIO(groundtruths['masks'][i][j, k]))
                    width, height = mask.size
                    np_mask = (np.array(mask.getdata()).reshape(height, width).astype(np.uint8))
                    np_mask[np_mask > 0] = 255
                    encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
                    ann['segmentation'] = encoded_mask
                    if 'areas' not in groundtruths:
                        ann['area'] = mask_api.area(encoded_mask)

                gt_annotations.append(ann)

    for i, ann in enumerate(gt_annotations):
        ann['id'] = i + 1

    if label_map:
        gt_categories = [{'id': i, 'name': label_map[i]} for i in label_map]
    else:
        category_ids = [gt['category_id'] for gt in gt_annotations]
        gt_categories = [{'id': i} for i in set(category_ids)]

    gt_dataset = {
        'images': gt_images,
        'categories': gt_categories,
        'annotations': copy.deepcopy(gt_annotations),
    }

    return gt_dataset
# imgIds = coco_ann.getImgIds()
# img = coco_train.loadImgs(imgIds[9])[0]
#
img = coco_ann.loadImgs(imgIds[np.random.randint(0, len(imgIds))])[0]
I = io.imread(img['coco_url'])
plt.axis('off')
plt.imshow(I)
plt.show()

# load and display instance annotations
plt.imshow(I)
plt.axis('off')
annIds = coco_ann.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco_ann.loadAnns(annIds)
print(anns)
print(len(anns))
coco_ann.showAnns(anns, draw_bbox=False)
plt.show()

merge()

if len(anns) != 0:
    for i, ann in enumerate(anns):
        mask = coco_ann.annToMask(ann)
        img = encode(mask)
        print(img)
        # io.imsave('/1.png', img)
        plt.imshow(mask)
        plt.savefig('1.jpg')
        plt.show()
示例#40
0
    def draw_panoptic_seg_predictions(self,
                                      frame,
                                      panoptic_seg,
                                      segments_info,
                                      area_threshold=None,
                                      alpha=0.5):
        frame_visualizer = Visualizer(frame, self.metadata)
        pred = _PanopticPrediction(panoptic_seg, segments_info)

        if self._instance_mode == ColorMode.IMAGE_BW:
            frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
                pred.non_empty_mask())

        # draw mask for all semantic segments first i.e. "stuff"
        for mask, sinfo in pred.semantic_masks():
            category_idx = sinfo["category_id"]
            try:
                mask_color = [
                    x / 255 for x in self.metadata.stuff_colors[category_idx]
                ]
            except AttributeError:
                mask_color = None

            frame_visualizer.draw_binary_mask(
                mask,
                color=mask_color,
                text=self.metadata.stuff_classes[category_idx],
                alpha=alpha,
                area_threshold=area_threshold,
            )

        all_instances = list(pred.instance_masks())
        if len(all_instances) == 0:
            return frame_visualizer.output
        # draw mask for all instances second
        masks, sinfo = list(zip(*all_instances))
        num_instances = len(masks)
        masks_rles = mask_util.encode(
            np.asarray(np.asarray(masks).transpose(1, 2, 0),
                       dtype=np.uint8,
                       order="F"))
        assert len(masks_rles) == num_instances

        category_ids = [x["category_id"] for x in sinfo]
        detected = [
            _DetectedInstance(category_ids[i],
                              bbox=None,
                              mask_rle=masks_rles[i],
                              color=None,
                              ttl=8) for i in range(num_instances)
        ]
        colors = self._assign_colors(detected)
        labels = [self.metadata.thing_classes[k] for k in category_ids]

        frame_visualizer.overlay_instances(
            boxes=None,
            masks=masks,
            labels=labels,
            keypoints=None,
            assigned_colors=colors,
            alpha=alpha,
        )
        return frame_visualizer.output
示例#41
0
                # Increment ann id
                annId = annId + 1

                # Add a comma and line break after each annotation
                outfile.write(unicode(','))
                outfile.write(unicode('\n'))

        # Add stuff annotations
        for i, labelIdx in enumerate(labelsStuff):
            # Create mask and encode it
            labelMask = np.zeros((h, w))
            labelMask[:, :] = S == labelIdx
            labelMask = np.expand_dims(labelMask, axis=2)
            labelMask = labelMask.astype('uint8')
            labelMask = np.asfortranarray(labelMask)
            Rs = mask.encode(labelMask)

            # Create annotation data
            anndata = {}
            anndata['id'] = annId
            anndata['image_id'] = imageIds[imageIdx]
            anndata['category_id'] = labelIdx - oldStuffStartIdx + newStuffStartIdx # Stuff classes start from 92 in v. 1.1
            anndata['segmentation'] = Rs
            anndata['area'] = float(mask.area(Rs))
            anndata['bbox'] = mask.toBbox(Rs).tolist()
            anndata['iscrowd'] = 1

            # Write JSON
            str_ = json.dumps(anndata, indent=indent, sort_keys=True, separators=separators, ensure_ascii=ensure_ascii)
            outfile.write(unicode(str_))
示例#42
0
 def test_uncompressed_RLE(self):
     mask = make_mask()
     rle = mask_util.encode(np.asarray(mask, order="F"))
     uncompressed = uncompressed_rle(mask)
     compressed = mask_util.frPyObjects(uncompressed, *rle["size"])
     self.assertEqual(rle, compressed)
示例#43
0
    image_name = image_name.replace("GT", "RGB")

    image_json = {
        "height": np_im.shape[0],
        "width": np_im.shape[1],
        "id": img_id,
        "file_name": image_name
    }

    mainjson["images"].append(image_json)

    ground_truth_binary_mask = np_im

    fortran_ground_truth_binary_mask = np.asfortranarray(
        ground_truth_binary_mask)
    encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask)
    ground_truth_area = mask.area(encoded_ground_truth)
    ground_truth_bounding_box = mask.toBbox(encoded_ground_truth)
    contours = measure.find_contours(ground_truth_binary_mask, 0.5)

    maskbits = im
    maskbits = np.array(maskbits)
    #mask = resize(mask, (768, 1024), preserve_range=True)
    maskbits = maskbits.astype(np.uint8)
    maskbits[maskbits < 255] = 0
    maskbits[maskbits == 255] = 1
    obj_ids = np.unique(maskbits)
    obj_ids = obj_ids[1:]
    masks = maskbits == obj_ids[:, None, None]
    num_objs = len(obj_ids)
示例#44
0
    def simple_test_mask(self,
                         score_map,
                         corner_offsets,
                         img_meta,
                         det_bboxes,
                         rescale=False):
        '''
        :param semantic_map: semantic map  hxwx80
        :param img_meta:
        :param det_bboxes:
        :param rescale:
        :return:
        '''
        # TODO: solve hardcode
        semantic_map = (score_map > 0.4).astype('int')
        h, w, _ = semantic_map.shape
        instance_map = -np.ones_like(semantic_map)
        border_y, border_x = -img_meta['offset']
        ori_h, ori_w, _ = img_meta['ori_shape']
        _, img_h, img_w = img_meta['img_shape']

        for label, bboxes in enumerate(det_bboxes):
            #keepinds = (bboxes[...,-1]>0.4)
            #bboxes = bboxes[keepinds]
            if (len(bboxes) == 0) or (semantic_map[..., label].sum() == 0):
                continue
            centers = np.array(bboxes)[..., :4]
            centers[..., 0::2] += border_x
            centers[..., 1::2] += border_y
            pixels = semantic_map[..., label]

            #pdb.set_trace()
            if len(bboxes) == 1:
                instance_map[..., label] = pixels - 1
            else:
                for y in range(h):
                    for x in range(w):
                        if pixels[y, x] == 0:
                            continue
                        tl_x = 4 * (x + corner_offsets[label, y, x]) - 1
                        tl_y = 4 * (y + corner_offsets[label + 80, y, x]) - 1
                        br_x = 4 * (x + corner_offsets[label + 160, y, x]) - 1
                        br_y = 4 * (y + corner_offsets[label + 240, y, x]) - 1
                        #pdb.set_trace()
                        instance_map[y, x, label] = KNN_cluster(
                            centers, np.array([tl_x, tl_y, br_x, br_y]))

        #seg_maps = []
        cls_segms = [[] for _ in range(80)]

        for label in range(80):
            map_with_id = instance_map[..., label]
            if map_with_id.max() == -1:
                continue

            for ins_id in range(map_with_id.max() + 1):
                seg_map = (map_with_id == ins_id).astype('float32')
                seg_map *= score_map[..., label]
                seg_map = cv2.resize(seg_map, (img_w, img_h))
                seg_map = (seg_map > 0.4).astype('int')
                #seg_map = seg_map[border_y:border_y + ori_h, border_x:border_x + ori_w]
                if seg_map.sum() == 0:
                    continue
                seg_map = np.uint8(seg_map)

                rle = mask_util.encode(
                    np.array(seg_map[:, :, np.newaxis], order='F'))[0]
                #rle['counts'].decode()
                #cls_segms[label].append(rle)
                cls_segms[label].append(seg_map)
        #pdb.set_trace()
        return cls_segms