def test_draw_dataset_dict(self): img = np.random.rand(512, 512, 3) * 255 dic = { "annotations": [{ "bbox": [ 368.9946492271106, 330.891438763377, 13.148537455410235, 13.644708680142685, ], "bbox_mode": BoxMode.XYWH_ABS, "category_id": 0, "iscrowd": 1, "segmentation": { "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2", "size": [512, 512], }, }], "height": 512, "image_id": 1, "width": 512, } v = Visualizer(img, self.metadata) v.draw_dataset_dict(dic)
def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None if predictions.has("pred_masks"): masks = predictions.pred_masks # mask IOU is not yet enabled # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) # assert len(masks_rles) == num_instances else: masks = None detected = [ _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( (masks.any(dim=0) > 0).numpy() if masks is not None else None ) alpha = 0.3 else: alpha = 0.5 frame_visualizer.overlay_instances( boxes=None if masks is not None else boxes, # boxes are a bit distracting masks=masks, labels=labels, keypoints=keypoints, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def test_draw_instance_predictions(self): img, boxes, _, _, masks = self._random_data() num_inst = len(boxes) inst = Instances((img.shape[0], img.shape[1])) inst.pred_classes = torch.randint(0, 80, size=(num_inst, )) inst.scores = torch.rand(num_inst) inst.pred_boxes = torch.from_numpy(boxes) inst.pred_masks = torch.from_numpy(np.asarray(masks)) v = Visualizer(img, self.metadata) v.draw_instance_predictions(inst)
def draw_sem_seg(self, frame, sem_seg, area_threshold=None): """ Args: sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W), each value is the integer label. area_threshold (Optional[int]): only draw segmentations larger than the threshold """ # don't need to do anything special frame_visualizer = Visualizer(frame, self.metadata) frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None) return frame_visualizer.output
def test_overlay_rotated_instances(self): H, W = 100, 150 img = np.random.rand(H, W, 3) * 255 num_boxes = 50 boxes_5d = torch.zeros(num_boxes, 5) boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W) boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H) boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) rotated_boxes = RotatedBoxes(boxes_5d) labels = [str(i) for i in range(num_boxes)] v = Visualizer(img, self.metadata) output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image() self.assertEqual(output.shape, img.shape)
def draw_panoptic_seg_predictions( self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5 ): frame_visualizer = Visualizer(frame, self.metadata) pred = _PanopticPrediction(panoptic_seg, segments_info) if self._instance_mode == ColorMode.IMAGE_BW: frame_visualizer.output.img = frame_visualizer._create_grayscale_image( pred.non_empty_mask() ) # draw mask for all semantic segments first i.e. "stuff" for mask, sinfo in pred.semantic_masks(): category_idx = sinfo["category_id"] try: mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] except AttributeError: mask_color = None frame_visualizer.draw_binary_mask( mask, color=mask_color, text=self.metadata.stuff_classes[category_idx], alpha=alpha, area_threshold=area_threshold, ) all_instances = list(pred.instance_masks()) if len(all_instances) == 0: return frame_visualizer.output # draw mask for all instances second masks, sinfo = list(zip(*all_instances)) num_instances = len(masks) masks_rles = mask_util.encode( np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F") ) assert len(masks_rles) == num_instances category_ids = [x["category_id"] for x in sinfo] detected = [ _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = [self.metadata.thing_classes[k] for k in category_ids] frame_visualizer.overlay_instances( boxes=None, masks=masks, labels=labels, keypoints=None, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def test_overlay_instances(self): img, boxes, labels, polygons, masks = self._random_data() v = Visualizer(img, self.metadata) output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() self.assertEqual(output.shape, img.shape) # Test 2x scaling v = Visualizer(img, self.metadata, scale=2.0) output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() self.assertEqual(output.shape[0], img.shape[0] * 2) # Test overlay masks v = Visualizer(img, self.metadata) output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image() self.assertEqual(output.shape, img.shape)
def visualize_training(self, batched_inputs, results): """ A function used to visualize ground truth images and final network predictions. It shows ground truth bounding boxes on the original image and up to 20 predicted object bounding boxes on the original image. Args: batched_inputs (list): a list that contains input to the model. results (List[Instances]): a list of #images elements. """ from mydl.utils.visualizer import Visualizer assert len(batched_inputs) == len( results), "Cannot visualize inputs and results of different sizes" storage = get_event_storage() max_boxes = 20 image_index = 0 # only visualize a single image img = batched_inputs[image_index]["image"].cpu().numpy() assert img.shape[0] == 3, "Images should have 3 channels." if self.input_format == "BGR": img = img[::-1, :, :] img = img.transpose(1, 2, 0) v_gt = Visualizer(img, None) v_gt = v_gt.overlay_instances( boxes=batched_inputs[image_index]["instances"].gt_boxes) anno_img = v_gt.get_image() processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1]) predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu( ).numpy() v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes]) prop_img = v_pred.get_image() vis_img = np.vstack((anno_img, prop_img)) vis_img = vis_img.transpose(2, 0, 1) vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results" storage.put_image(vis_name, vis_img)
def visualize_training(self, batched_inputs, proposals): """ A function used to visualize images and proposals. It shows ground truth bounding boxes on the original image and up to 20 predicted object proposals on the original image. Users can implement different visualization functions for different models. Args: batched_inputs (list): a list that contains input to the model. proposals (list): a list that contains predicted proposals. Both batched_inputs and proposals should have the same length. """ from mydl.utils.visualizer import Visualizer storage = get_event_storage() max_vis_prop = 20 for input, prop in zip(batched_inputs, proposals): img = input["image"].cpu().numpy() assert img.shape[0] == 3, "Images should have 3 channels." if self.input_format == "BGR": img = img[::-1, :, :] img = img.transpose(1, 2, 0) v_gt = Visualizer(img, None) v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) anno_img = v_gt.get_image() box_size = min(len(prop.proposal_boxes), max_vis_prop) v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances( boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() ) prop_img = v_pred.get_image() vis_img = np.concatenate((anno_img, prop_img), axis=1) vis_img = vis_img.transpose(2, 0, 1) vis_name = " 1. GT bounding boxes 2. Predicted proposals" storage.put_image(vis_name, vis_img)
if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): def dataset_id_map(ds_id): return metadata.thing_dataset_id_to_contiguous_id[ds_id] elif "lvis" in args.dataset: # LVIS results are in the same format as COCO results, but have a different # mapping from dataset category id to contiguous category id in [0, #categories - 1] def dataset_id_map(ds_id): return ds_id - 1 else: raise ValueError("Unsupported dataset: {}".format(args.dataset)) os.makedirs(args.output, exist_ok=True) for dic in tqdm.tqdm(dicts): img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] basename = os.path.basename(dic["file_name"]) predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) vis = Visualizer(img, metadata) vis_pred = vis.draw_instance_predictions(predictions).get_image() vis = Visualizer(img, metadata) vis_gt = vis.draw_dataset_dict(dic).get_image() concat = np.concatenate((vis_pred, vis_gt), axis=1) cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])
if args.type == "instance": dicts = load_cityscapes_instances(args.image_dir, args.gt_dir, from_json=True, to_polygons=True) logger.info("Done loading {} samples.".format(len(dicts))) thing_classes = [ k.name for k in labels if k.hasInstances and not k.ignoreInEval ] meta = Metadata().set(thing_classes=thing_classes) else: dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) logger.info("Done loading {} samples.".format(len(dicts))) stuff_names = [k.name for k in labels if k.trainId != 255] stuff_colors = [k.color for k in labels if k.trainId != 255] meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors) for d in dicts: img = np.array(Image.open(d["file_name"])) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) # cv2.imshow("a", vis.get_image()[:, :, ::-1]) # cv2.waitKey() fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath)
def test_overlay_instances_no_boxes(self): img, boxes, labels, polygons, _ = self._random_data() v = Visualizer(img, self.metadata) v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image()
def test_correct_output_shape(self): img = np.random.rand(928, 928, 3) * 255 v = Visualizer(img, self.metadata) out = v.output.get_image() self.assertEqual(out.shape, img.shape)
print("Saving to {} ...".format(filepath)) vis.save(filepath) scale = 2.0 if args.show else 1.0 if args.source == "dataloader": train_data_loader = build_detection_train_loader(cfg) for batch in train_data_loader: for per_image in batch: # Pytorch tensor is in (C, H, W) format img = per_image["image"].permute(1, 2, 0) if cfg.INPUT.FORMAT == "BGR": img = img[:, :, [2, 1, 0]] else: img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) visualizer = Visualizer(img, metadata=metadata, scale=scale) target_fields = per_image["instances"].get_fields() labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] vis = visualizer.overlay_instances( labels=labels, boxes=target_fields.get("gt_boxes", None), masks=target_fields.get("gt_masks", None), keypoints=target_fields.get("gt_keypoints", None), ) output(vis, str(per_image["image_id"]) + ".jpg") else: dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) if cfg.MODEL.KEYPOINT_ON: dicts = filter_images_with_few_keypoints(dicts, 1) for dic in tqdm.tqdm(dicts): img = utils.read_image(dic["file_name"], "RGB")