def draw_binary_mask( self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=4096 ): """ Args: binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and W is the image width. Each value in the array is either a 0 or 1 value of uint8 type. color: color of the mask. Refer to `matplotlib.colors` for a full list of formats that are accepted. If None, will pick a random color. edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a full list of formats that are accepted. text (str): if None, will be drawn in the object's center of mass. alpha (float): blending efficient. Smaller values lead to more transparent masks. area_threshold (float): a connected component small than this will not be shown. Returns: output (VisImage): image object with mask drawn. """ if color is None: color = random_color(rgb=True, maximum=1) if area_threshold is None: area_threshold = 4096 has_valid_segment = False binary_mask = binary_mask.astype("uint8") # opencv needs uint8 mask = GenericMask(binary_mask, self.output.height, self.output.width) shape2d = (binary_mask.shape[0], binary_mask.shape[1]) if not mask.has_holes: # draw polygons for regular masks for segment in mask.polygons: area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) if area < area_threshold: continue has_valid_segment = True segment = segment.reshape(-1, 2) self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) else: rgba = np.zeros(shape2d + (4,), dtype="float32") rgba[:, :, :3] = color rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha has_valid_segment = True self.output.ax.imshow(rgba) if text is not None and has_valid_segment: # TODO sometimes drawn on wrong objects. the heuristics here can improve. lighter_color = self._change_color_brightness(color, brightness_factor=0.7) _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8) largest_component_id = np.argmax(stats[1:, -1]) + 1 # draw text on the largest component, as well as other very large components. for cid in range(1, _num_cc): if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH: # median is more stable than centroid # center = centroids[largest_component_id] center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] self.draw_text(text, center, color=lighter_color) return self.output
def draw_classification_gt_pd(self, gt, pd): """ Draw instance-level prediction results on an image. Args: gt: gt json pred: prediction json Returns: output (VisImage): image object with visualizations. """ category_id = [pd['category_id'] - 1] category_score = [pd['category_score']] category_label = _create_text_labels(category_id, category_score, self.metadata.get("category2_classes", None)) part = [pd['part'] - 1] part_score = [pd['part_score']] part_label = _create_text_labels(part, part_score, self.metadata.get("part_classes", None)) toward = [pd['toward'] - 1] toward_score = [pd['toward_score']] toward_label = _create_text_labels(toward, toward_score, self.metadata.get("toward_classes", None)) # gt gt_category_id = [gt['category2_id'] - 1] gt_category_label = _create_text_labels(gt_category_id, None, self.metadata.get("category2_classes", None)) gt_part = [gt['part'] - 1] gt_part_label = _create_text_labels(gt_part, None, self.metadata.get("part_classes", None)) gt_toward = [gt['toward'] - 1] gt_toward_label = _create_text_labels(gt_toward, None, self.metadata.get("toward_classes", None)) color = random_color(rgb=True, maximum=1) label = f"predict: {category_label[0]}\n" + f"ground truth: {gt_category_label[0]}" if gt_category_id[0] == 1: label = label + '\n\n'\ f"predict: {part_label[0]}, {toward_label[0]}\n" \ f"ground truth: {gt_part_label[0]}, {gt_toward_label[0]}" text_pos = (2, 2) horiz_align = "left" height_ratio = self.output.height / np.sqrt(self.output.height * self.output.width) lighter_color = self._change_color_brightness(color, brightness_factor=0.7) font_size = ( np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size ) self.draw_text( label, text_pos, color=lighter_color, horizontal_alignment=horiz_align, font_size=font_size, ) return self.output
def draw_panoptic_seg_predictions( self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7 ): """ Draw panoptic prediction results on an image. Args: panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment. segments_info (list[dict]): Describe each segment in `panoptic_seg`. Each dict contains keys "id", "category_id", "isthing". area_threshold (int): stuff segments with less than `area_threshold` are not drawn. Returns: output (VisImage): image object with visualizations. """ pred = _PanopticPrediction(panoptic_seg, segments_info) if self._instance_mode == ColorMode.IMAGE_BW: self.output.img = self._create_grayscale_image(pred.non_empty_mask()) # draw mask for all semantic segments first i.e. "stuff" for mask, sinfo in pred.semantic_masks(): category_idx = sinfo["category_id"] try: mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] except AttributeError: mask_color = None text = self.metadata.stuff_classes[category_idx] self.draw_binary_mask( mask, color=mask_color, edge_color=_OFF_WHITE, text=text, alpha=alpha, area_threshold=area_threshold, ) # draw mask for all instances second all_instances = list(pred.instance_masks()) if len(all_instances) == 0: return self.output masks, sinfo = list(zip(*all_instances)) category_ids = [x["category_id"] for x in sinfo] try: scores = [x["score"] for x in sinfo] except KeyError: scores = None labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes) try: colors = [random_color(rgb=True, maximum=1) for k in category_ids] except AttributeError: colors = None self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha) return self.output
def _assign_colors(self, instances): """ Naive tracking heuristics to assign same color to the same instance, will update the internal state of tracked instances. Returns: list[tuple[float]]: list of colors. """ # Compute iou with either boxes or masks: is_crowd = np.zeros((len(instances), ), dtype=np.bool) if instances[0].bbox is None: assert instances[0].mask_rle is not None # use mask iou only when box iou is None # because box seems good enough rles_old = [x.mask_rle for x in self._old_instances] rles_new = [x.mask_rle for x in instances] ious = mask_util.iou(rles_old, rles_new, is_crowd) threshold = 0.5 else: boxes_old = [x.bbox for x in self._old_instances] boxes_new = [x.bbox for x in instances] ious = mask_util.iou(boxes_old, boxes_new, is_crowd) threshold = 0.6 if len(ious) == 0: ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32") # Only allow matching instances of the same label: for old_idx, old in enumerate(self._old_instances): for new_idx, new in enumerate(instances): if old.label != new.label: ious[old_idx, new_idx] = 0 matched_new_per_old = np.asarray(ious).argmax(axis=1) max_iou_per_old = np.asarray(ious).max(axis=1) # Try to find match for each old instance: extra_instances = [] for idx, inst in enumerate(self._old_instances): if max_iou_per_old[idx] > threshold: newidx = matched_new_per_old[idx] if instances[newidx].color is None: instances[newidx].color = inst.color continue # If an old instance does not match any new instances, # keep it for the next frame in case it is just missed by the detector inst.ttl -= 1 if inst.ttl > 0: extra_instances.append(inst) # Assign random color to newly-detected instances: for inst in instances: if inst.color is None: inst.color = random_color(rgb=True, maximum=1) self._old_instances = instances[:] + extra_instances return [d.color for d in instances]
def draw_classification_predictions(self, predictions): """ Draw instance-level prediction results on an image. Args: predictions (Instances): the output of an instance detection/segmentation model. Following fields will be used to draw: "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). Returns: output (VisImage): image object with visualizations. """ category_id = predictions.category_id if predictions.has("category_id") else None category_score = predictions.category_score if predictions.has("category_score") else None category_label = _create_text_labels(category_id, category_score, self.metadata.get("category2_classes", None)) part = predictions.part if predictions.has("part") else None part_score = predictions.part_score if predictions.has("part_score") else None part_label = _create_text_labels(part, part_score, self.metadata.get("part_classes", None)) toward = predictions.toward if predictions.has("toward") else None toward_score = predictions.toward_score if predictions.has("toward_score") else None toward_label = _create_text_labels(toward, toward_score, self.metadata.get("toward_classes", None)) color = random_color(rgb=True, maximum=1) label = category_label[0] if category_id == 1: label = label + ', ' + part_label[0] + ', ' + toward_label[0] text_pos = (2, 2) horiz_align = "left" height_ratio = self.output.height / np.sqrt(self.output.height * self.output.width) lighter_color = self._change_color_brightness(color, brightness_factor=0.7) font_size = ( np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size ) self.draw_text( label, text_pos, color=lighter_color, horizontal_alignment=horiz_align, font_size=font_size, ) return self.output
def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None): """ Args: boxes (ndarray): an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format for the N objects in a single image. labels (list[str]): the text to be displayed for each instance. assigned_colors (list[matplotlib.colors]): a list of colors, where each color corresponds to each mask or box in the image. Refer to 'matplotlib.colors' for full list of formats that the colors are accepted in. Returns: output (VisImage): image object with visualizations. """ num_instances = len(boxes) if assigned_colors is None: assigned_colors = [ random_color(rgb=True, maximum=1) for _ in range(num_instances) ] if num_instances == 0: return self.output # Display in largest to smallest order to reduce occlusion. if boxes is not None: areas = boxes[:, 2] * boxes[:, 3] sorted_idxs = np.argsort(-areas).tolist() # Re-order overlapped instances in descending order. boxes = boxes[sorted_idxs] labels = [labels[k] for k in sorted_idxs] if labels is not None else None colors = [assigned_colors[idx] for idx in sorted_idxs] for i in range(num_instances): self.draw_rotated_box_with_label( boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None) return self.output
def overlay_instances(self, *, boxes=None, labels=None, masks=None, keypoints=None, points=None, assigned_colors=None, alpha=0.5): """ Args: boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, or a :class:`RotatedBoxes`, or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format for the N objects in a single image, labels (list[str]): the text to be displayed for each instance. masks (masks-like object): Supported types are: * :class:`detectron2.structures.PolygonMasks`, :class:`detectron2.structures.BitMasks`. * list[list[ndarray]]: contains the segmentation masks for all objects in one image. The first level of the list corresponds to individual instances. The second level to all the polygon that compose the instance, and the third level to the polygon coordinates. The third level should have the format of [x0, y0, x1, y1, ..., xn, yn] (n >= 3). * list[ndarray]: each ndarray is a binary mask of shape (H, W). * list[dict]: each dict is a COCO-style RLE. keypoints (Keypoint or array like): an array-like object of shape (N, K, 3), where the N is the number of instances and K is the number of keypoints. The last dimension corresponds to (x, y, visibility or score). points (Keypoint or array like): an array-like object of shape (N, M, 2), where the N is the number of instances and M is the number of points. The last dimension corresponds to (x, y). assigned_colors (list[matplotlib.colors]): a list of colors, where each color corresponds to each mask or box in the image. Refer to 'matplotlib.colors' for full list of formats that the colors are accepted in. Returns: output (VisImage): image object with visualizations. """ num_instances = None if boxes is not None: boxes = self._convert_boxes(boxes) num_instances = len(boxes) if masks is not None: masks = self._convert_masks(masks) if num_instances: assert len(masks) == num_instances else: num_instances = len(masks) if keypoints is not None: if num_instances: assert len(keypoints) == num_instances else: num_instances = len(keypoints) keypoints = self._convert_keypoints(keypoints) if points is not None: points = self._convert_points(points) if num_instances: assert len(points) == num_instances else: num_instances = len(points) if labels is not None: assert len(labels) == num_instances if assigned_colors is None: assigned_colors = [ random_color(rgb=True, maximum=1) for _ in range(num_instances) ] if num_instances == 0: return self.output if boxes is not None and boxes.shape[1] == 5: return self.overlay_rotated_instances( boxes=boxes, labels=labels, assigned_colors=assigned_colors) # Display in largest to smallest order to reduce occlusion. areas = None if boxes is not None: areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) elif masks is not None: areas = np.asarray([x.area() for x in masks]) if areas is not None: sorted_idxs = np.argsort(-areas).tolist() # Re-order overlapped instances in descending order. boxes = boxes[sorted_idxs] if boxes is not None else None labels = [labels[k] for k in sorted_idxs] if labels is not None else None masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] keypoints = keypoints[ sorted_idxs] if keypoints is not None else None points = points[sorted_idxs] if points is not None else None for i in range(num_instances): color = assigned_colors[i] if boxes is not None: self.draw_box(boxes[i], edge_color=color) if masks is not None: for segment in masks[i].polygons: self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) if points is not None: self.draw_points(points[i], color=color) if labels is not None: # first get a box if boxes is not None: x0, y0, x1, y1 = boxes[i] text_pos = ( x0, y0 ) # if drawing boxes, put text on the box corner. horiz_align = "left" elif masks is not None: x0, y0, x1, y1 = masks[i].bbox() # draw text in the center (defined by median) when box is not drawn # median is less sensitive to outliers. text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] horiz_align = "center" else: continue # drawing the box confidence for keypoints isn't very useful. # for small objects, draw text at the side to avoid occlusion instance_area = (y1 - y0) * (x1 - x0) if (instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale or y1 - y0 < 40 * self.output.scale): if y1 >= self.output.height - 5: text_pos = (x1, y0) else: text_pos = (x0, y1) height_ratio = (y1 - y0) / np.sqrt( self.output.height * self.output.width) lighter_color = self._change_color_brightness( color, brightness_factor=0.7) font_size = (np.clip( (height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size) self.draw_text( labels[i], text_pos, color=lighter_color, horizontal_alignment=horiz_align, font_size=font_size, ) # draw keypoints if keypoints is not None: for keypoints_per_instance in keypoints: self.draw_and_connect_keypoints(keypoints_per_instance) return self.output
def draw_interaction_predictions(self, predictions): """ Draw interaction prediction results on an image. Args: predictions (Instances): the output of an interaction detection model. Following fields will be used to draw: "person_boxes", "object_boxes", "pred_classes", "scores" Returns: output (VisImage): image object with visualizations. """ num_instances = len(predictions) if num_instances == 0: return self.output person_boxes = self._convert_boxes(predictions.person_boxes) object_boxes = self._convert_boxes(predictions.object_boxes) scores = predictions.scores if predictions.has("scores") else None classes = predictions.pred_classes if predictions.has( "pred_classes") else None # convert labels # contiguous_id_to_classes = self.metadata.get("contiguous_id_to_classes", None) labels = _create_text_labels(classes, scores) # Take unique person and object boxes and assign colors. unique_person_boxes = np.asarray( [list(x) for x in set(tuple(x) for x in person_boxes)]) unique_object_boxes = np.asarray( [list(x) for x in set(tuple(x) for x in object_boxes)]) # If labels and meta data are available, use assigned colors. Otherwise, use random colors. thing_colors = self.metadata.get("thing_colors", None) assigned_person_colors = {tuple(x): 'w' for x in unique_person_boxes} assigned_object_colors = { tuple(x): random_color(True, 1) for x in unique_object_boxes } # if labels is not None and thing_colors is not None: # for label_ix, box_ix in zip(labels, object_boxes): # class_name = " ".join(label_ix.split(" ")[1:-1]) # color = thing_colors[class_name] if class_name in thing_colors else None # if color: # assigned_object_colors[tuple(box_ix)] = np.asarray(color) / 255. # Take all interaction associated with each unique person box interactions_to_draw = {tuple(x): [] for x in unique_person_boxes} labels_to_draw = {tuple(x): [] for x in unique_person_boxes} for i in range(num_instances): x = tuple(person_boxes[i]) interactions_to_draw[x].append(object_boxes[i]) if labels is not None: labels_to_draw[x].append({ "label": labels[i], "color": assigned_object_colors[tuple(object_boxes[i])] }) self.overlay_interactions( unique_person_boxes=unique_person_boxes, unique_object_boxes=unique_object_boxes, interactions=interactions_to_draw, interaction_labels=labels_to_draw, assigned_person_colors=assigned_person_colors, assigned_object_colors=assigned_object_colors, alpha=0.5, ) return self.output
def overlay_covariance_instances(self, *, boxes=None, covariance_matrices=None, labels=None, assigned_colors=None, alpha=0.5): """ Args: boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, or a :class:`RotatedBoxes`, or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format for the N objects in a single image, covariance_matrices (ndarray): numpy array containing the corner covariance matrices labels (list[str]): the text to be displayed for each instance. assigned_colors (list[matplotlib.colors]): a list of colors, where each color corresponds to each mask or box in the image. Refer to 'matplotlib.colors' for full list of formats that the colors are accepted in. alpha: alpha value Returns: output (VisImage): image object with visualizations. """ num_instances = None if boxes is not None: boxes = self._convert_boxes(boxes) num_instances = len(boxes) if labels is not None: assert len(labels) == num_instances if assigned_colors is None: assigned_colors = [ random_color(rgb=True, maximum=1) for _ in range(num_instances) ] if num_instances == 0: return self.output # Display in largest to smallest order to reduce occlusion. areas = None if boxes is not None: areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) if areas is not None: sorted_idxs = np.argsort(-areas).tolist() # Re-order overlapped instances in descending order. boxes = boxes[sorted_idxs] if boxes is not None else None labels = [labels[k] for k in sorted_idxs] if labels is not None else None assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] for i in range(num_instances): color = assigned_colors[i] if boxes is not None: self.draw_box(boxes[i], edge_color=color, alpha=alpha) if covariance_matrices is not None: self.draw_ellipse(boxes[i], covariance_matrices[i], edge_color=color, alpha=alpha) if labels is not None: # first get a box if boxes is not None: x0, y0, x1, y1 = boxes[i] # if drawing boxes, put text on the box corner. text_pos = (x0, y0) horiz_align = "left" else: # drawing the box confidence for keypoints isn't very # useful. continue # for small objects, draw text at the side to avoid occlusion instance_area = (y1 - y0) * (x1 - x0) if (instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale or y1 - y0 < 40 * self.output.scale): if y1 >= self.output.height - 5: text_pos = (x1, y0) else: text_pos = (x0, y1) height_ratio = (y1 - y0) / \ np.sqrt(self.output.height * self.output.width) lighter_color = self._change_color_brightness( color, brightness_factor=0.7) font_size = (np.clip( (height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size) self.draw_text( labels[i], text_pos, color=lighter_color, horizontal_alignment=horiz_align, font_size=font_size, ) return self.output
def overlay_instances(self, *, boxes=None, labels=None, masks=None, keypoints=None, assigned_colors=None, alpha=0.5): """ Modified from super class to give access to alpha for box plotting. Returns: output (VisImage): image object with visualizations. """ num_instances = None if boxes is not None: boxes = self._convert_boxes(boxes) num_instances = len(boxes) if masks is not None: masks = self._convert_masks(masks) if num_instances: assert len(masks) == num_instances else: num_instances = len(masks) if keypoints is not None: if num_instances: assert len(keypoints) == num_instances else: num_instances = len(keypoints) keypoints = self._convert_keypoints(keypoints) if labels is not None: assert len(labels) == num_instances if assigned_colors is None: assigned_colors = [ random_color(rgb=True, maximum=1) for _ in range(num_instances) ] if num_instances == 0: return self.output if boxes is not None and boxes.shape[1] == 5: return self.overlay_rotated_instances( boxes=boxes, labels=labels, assigned_colors=assigned_colors) # Display in largest to smallest order to reduce occlusion. areas = None if boxes is not None: areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) elif masks is not None: areas = np.asarray([x.area() for x in masks]) if areas is not None: sorted_idxs = np.argsort(-areas).tolist() # Re-order overlapped instances in descending order. boxes = boxes[sorted_idxs] if boxes is not None else None labels = [labels[k] for k in sorted_idxs] if labels is not None else None masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] keypoints = keypoints[ sorted_idxs] if keypoints is not None else None for i in range(num_instances): color = assigned_colors[i] if boxes is not None: self.draw_box(boxes[i], edge_color=color, alpha=alpha) if masks is not None: for segment in masks[i].polygons: self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) if labels is not None: # first get a box if boxes is not None: x0, y0, x1, y1 = boxes[i] # if drawing boxes, put text on the box corner. text_pos = (x0, y0) horiz_align = "left" elif masks is not None: x0, y0, x1, y1 = masks[i].bbox() # draw text in the center (defined by median) when box is not drawn # median is less sensitive to outliers. text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] horiz_align = "center" else: # drawing the box confidence for keypoints isn't very # useful. continue # for small objects, draw text at the side to avoid occlusion instance_area = (y1 - y0) * (x1 - x0) if (instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale or y1 - y0 < 40 * self.output.scale): if y1 >= self.output.height - 5: text_pos = (x1, y0) else: text_pos = (x0, y1) height_ratio = (y1 - y0) / \ np.sqrt(self.output.height * self.output.width) lighter_color = self._change_color_brightness( color, brightness_factor=0.7) font_size = (np.clip( (height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size) self.draw_text( labels[i], text_pos, color=lighter_color, horizontal_alignment=horiz_align, font_size=font_size, ) # draw keypoints if keypoints is not None: for keypoints_per_instance in keypoints: self.draw_and_connect_keypoints(keypoints_per_instance) return self.output