def visualize_training(self, batched_inputs, proposals): """ A function used to visualize images and proposals. It shows ground truth bounding boxes on the original image and up to 20 top-scoring predicted object proposals on the original image. Users can implement different visualization functions for different models. Args: batched_inputs (list): a list that contains input to the model. proposals (list): a list that contains predicted proposals. Both batched_inputs and proposals should have the same length. """ storage = get_event_storage() max_vis_prop = 20 for input, prop in zip(batched_inputs, proposals): img = input["image"] img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) v_gt = Visualizer(img, None) v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) anno_img = v_gt.get_image() box_size = min(len(prop.proposal_boxes), max_vis_prop) v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances( boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy()) prop_img = v_pred.get_image() vis_img = np.concatenate((anno_img, prop_img), axis=1) vis_img = vis_img.transpose(2, 0, 1) vis_name = "Left: GT bounding boxes; Right: Predicted proposals" storage.put_image(vis_name, vis_img) break # only visualize one image in a batch
def draw_instance_predictions(self, frame, predictions): """ Draw instance-level prediction results on an image. Args: frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. predictions (Instances): the output of an instance detection model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores". Returns: output (VisImage): image object with visualizations. """ frame_visualizer = Visualizer(frame, self.metadata) num_instances = len(predictions) if num_instances == 0: return frame_visualizer.output boxes = predictions.pred_boxes.tensor.numpy() if predictions.has( "pred_boxes") else None scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes.numpy() if predictions.has( "pred_classes") else None detected = [ _DetectedInstance(classes[i], boxes[i], color=None, ttl=8) for i in range(num_instances) ] colors = self._assign_colors(detected) labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) if self._instance_mode == ColorMode.IMAGE_BW: # any() returns uint8 tensor frame_visualizer.output.img = frame_visualizer._create_grayscale_image( ) alpha = 0.3 else: alpha = 0.5 frame_visualizer.overlay_instances( boxes=boxes, # boxes are a bit distracting labels=labels, assigned_colors=colors, alpha=alpha, ) return frame_visualizer.output
def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output
train_data_loader = build_detection_train_loader(cfg, mapper=mapper) for batch in train_data_loader: for per_image in batch: # type(per_image), per_images.keys() # <class 'dict'>, dict_keys(['file_name', 'image_id', 'height', 'width', 'image', 'instances']) # Pytorch tensor is in (C, H, W) format img = per_image["image"].permute(1, 2, 0) if cfg.INPUT.FORMAT == "BGR": img = img[:, :, [2, 1, 0]] else: img = np.asarray( Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) visualizer = Visualizer(img, metadata=metadata, scale=scale) # target_fields.keys() => dict_keys(['gt_boxes', 'gt_classes']) target_fields = per_image["instances"].get_fields() labels = [ metadata.thing_classes[i] for i in target_fields["gt_classes"] ] vis = visualizer.overlay_instances( labels=labels, boxes=target_fields.get("gt_boxes", None), ) # modified: voc I=1 in any case num_instances = len(per_image['instances']) output( vis, "I{}_".format(num_instances) +
return dataset_dicts if __name__ == "__main__": """ Test the LVIS json dataset loader. Usage: python -m fsdet.data.datasets.lvis \ path/to/json path/to/image_root dataset_name vis_limit """ import sys import numpy as np from fsdet.utils.logger import setup_logger from PIL import Image from fsdet.utils.visualizer import Visualizer logger = setup_logger(name=__name__) meta = MetadataCatalog.get(sys.argv[3]) dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3]) logger.info("Done loading {} samples.".format(len(dicts))) dirname = "lvis-data-vis" os.makedirs(dirname, exist_ok=True) for d in dicts[:int(sys.argv[4])]: img = np.array(Image.open(d["file_name"])) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath)
if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): def dataset_id_map(ds_id): return metadata.thing_dataset_id_to_contiguous_id[ds_id] elif "lvis" in args.dataset: # LVIS results are in the same format as COCO results, but have a different # mapping from dataset category id to contiguous category id in [0, #categories - 1] def dataset_id_map(ds_id): return ds_id - 1 else: raise ValueError("Unsupported dataset: {}".format(args.dataset)) os.makedirs(args.output, exist_ok=True) for dic in tqdm.tqdm(dicts): img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] basename = os.path.basename(dic["file_name"]) predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) vis = Visualizer(img, metadata) vis_pred = vis.draw_instance_predictions(predictions).get_image() vis = Visualizer(img, metadata) vis_gt = vis.draw_dataset_dict(dic).get_image() concat = np.concatenate((vis_pred, vis_gt), axis=1) cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])