def non_max_suppression_image_data(image_data: ImageData, iou: float) -> ImageData: image_data = copy.deepcopy(image_data) current_bboxes_data = image_data.bboxes_data.copy() new_bboxes_data = [] while len(current_bboxes_data) != 0: current_bbox_data = current_bboxes_data[0] success = True if len(current_bboxes_data) > 1: for idx, bbox_data in enumerate(current_bboxes_data): if idx == 0: continue bbox_iou = intersection_over_union(current_bbox_data, bbox_data) if bbox_iou >= iou: pairs_bboxes_data = [bbox_data, current_bbox_data] pairs_scores = [ possible_bbox_data.detection_score if possible_bbox_data.detection_score is not None else 1. for possible_bbox_data in pairs_bboxes_data ] top_score_idx = np.argmax(pairs_scores) current_bboxes_data.pop(idx) current_bboxes_data.pop(0) current_bboxes_data.append( BboxData( xmin=min(bbox_data.xmin, current_bbox_data.xmin), ymin=min(bbox_data.ymin, current_bbox_data.ymin), xmax=max(bbox_data.xmax, current_bbox_data.xmax), ymax=max(bbox_data.ymax, current_bbox_data.ymax), detection_score=pairs_bboxes_data[top_score_idx]. detection_score, label=pairs_bboxes_data[top_score_idx].label, keypoints=pairs_bboxes_data[top_score_idx]. keypoints, additional_bboxes_data=pairs_bboxes_data[ top_score_idx].additional_bboxes_data, additional_info=pairs_bboxes_data[top_score_idx]. additional_info)) success = False break if success: new_bboxes_data.append(current_bboxes_data.pop(0)) image_data.bboxes_data = new_bboxes_data image_data.image_path = image_data.image_path image_data.image = image_data.image return image_data
def _inference_detection_and_get_metrics( self, model_spec: DetectionModelSpec, true_images_data: List[ImageData], score_threshold: float, minimum_iou: float, extra_bbox_label: str = None, batch_size: int = 16) -> Tuple[pd.DataFrame, pd.DataFrame]: detection_model = model_spec.load() inferencer = DetectionInferencer(detection_model) images_data_gen = BatchGeneratorImageData( true_images_data, batch_size=batch_size, use_not_caught_elements_as_last_batch=True) raw_pred_images_data = inferencer.predict(images_data_gen, score_threshold=0.) pred_images_data = [ ImageData(image_path=image_data.image_path, bboxes_data=[ bbox_data for bbox_data in image_data.bboxes_data if bbox_data.detection_score >= score_threshold ]) for image_data in raw_pred_images_data ] df_detection_metrics = get_df_detection_metrics( true_images_data=true_images_data, pred_images_data=pred_images_data, minimum_iou=minimum_iou, raw_pred_images_data=raw_pred_images_data) df_detection_recall_per_class = get_df_detection_recall_per_class( true_images_data=true_images_data, pred_images_data=pred_images_data, minimum_iou=minimum_iou, ) return df_detection_metrics, df_detection_recall_per_class
def convert_image_data_to_polygon_label( image_data: ImageData, from_name: str, polygonlabels: str, ) -> Dict: if image_data.image_path is not None: im_width, im_height = imagesize.get(image_data.image_path) else: im_height, im_width, _ = image_data.open_image().shape rectangle_labels = [] for bbox_data in image_data.bboxes_data: rectangle_labels.append({ "original_width": im_width, "original_height": im_height, "image_rotation": 0, "value": { "points": [[x * 100 / im_width, y * 100 / im_height] for x, y in bbox_data.keypoints], "polygonlabels": [polygonlabels] }, "from_name": from_name, "to_name": "image", "type": "polygonlabels" }) return rectangle_labels
def convert_image_data_to_rectangle_labels( image_data: ImageData, from_name: str, to_name: str, ) -> Dict: if image_data.image_path is not None: im_width, im_height = imagesize.get(image_data.image_path) else: im_height, im_width, _ = image_data.open_image().shape rectangle_labels = [] for bbox_data in image_data.bboxes_data: rectangle_labels.append({ "original_width": im_width, "original_height": im_height, "image_rotation": 0, "value": { "x": bbox_data.xmin / im_width * 100, "y": bbox_data.ymin / im_height * 100, "width": (bbox_data.xmax - bbox_data.xmin) / im_width * 100, "height": (bbox_data.ymax - bbox_data.ymin) / im_height * 100, "rotation": 0, "rectanglelabels": [bbox_data.label] }, "from_name": from_name, "to_name": to_name, "type": "rectanglelabels" }) return rectangle_labels
def convert_image_data_to_keypoint_label( image_data: ImageData, from_name: str, keypointlabels: str, ) -> Dict: if image_data.image_path is not None: im_width, im_height = imagesize.get(image_data.image_path) else: im_height, im_width, _ = image_data.open_image().shape rectangle_labels = [] for bbox_data in image_data.bboxes_data: for keypoint in bbox_data.keypoints: x, y = keypoint[0], keypoint[1] rectangle_labels.append({ "original_width": im_width, "original_height": im_height, "image_rotation": 0, "value": { "x": x * 100 / im_width, "y": y * 100 / im_height, "width": 0.55, "keypointlabels": [keypointlabels] }, "from_name": from_name, "to_name": "image", "type": "keypointlabels" }) return rectangle_labels
def get_image_data_from_annot( self, image_path: Union[str, Path], annot: Union[Path, str, Dict, fsspec.core.OpenFile, List[str]] ) -> ImageData: if isinstance(annot, str) or isinstance(annot, Path): with fsspec.open(annot, 'r', encoding='utf8') as f: annots = f.read() elif isinstance(annot, fsspec.core.OpenFile): with annot as f: annots = f.read() elif isinstance(annot, List): annots = '\n'.join(annot) width, height = get_image_size(image_path) bboxes_data = [] for line in annots.strip().split('\n'): idx, xcenter, ycenter, w, h = line.split(' ') label = self.idx_to_class_name[int(idx)] xcenter, ycenter, w, h = float(xcenter), float(ycenter), float( w), float(h) xcenter, w = xcenter * width, w * width ycenter, h = ycenter * height, h * height bboxes_data.append( BboxData(xmin=xcenter - w / 2, ymin=ycenter - h / 2, xmax=xcenter + w / 2, ymax=ycenter + h / 2, label=label)) return ImageData(image_path=image_path, bboxes_data=bboxes_data)
def _postprocess_images_data( self, images_data: List[ImageData], pred_labels_top_n: List[List[str]], pred_scores_top_n: List[List[float]], open_images_in_images_data: bool) -> List[ImageData]: images_data_res = [] for (image_data, pred_label_top_n, pred_classification_score_top_n) in zip(images_data, pred_labels_top_n, pred_scores_top_n): image = image_data.image if open_images_in_images_data else None images_data_res.append( ImageData( image_path=image_data.image_path, image=image, label=pred_label_top_n[0], bboxes_data=image_data.bboxes_data, keypoints=image_data.keypoints, additional_info={ **image_data.additional_info, 'pred_classification_score': pred_classification_score_top_n[0], 'pred_label_top_n': pred_label_top_n, 'pred_classification_scores_top_n': pred_classification_score_top_n })) return images_data_res
def thumbnail_image_data(image_data: ImageData, size: Tuple[int, int], resample: Optional[int] = None) -> ImageData: image = image_data.open_image() new_width, new_height = get_thumbnail_resize(Image.fromarray(image), size) return resize_image_data(image_data, (new_width, new_height), resample=resample)
def get_annot_from_image_data(self, image_data: ImageData) -> Dict: image_data = self.filter_image_data(image_data) image = image_data.open_image() height, width, _ = image.shape annot = { "description": "", "tags": [], "size": { "height": height, "width": width }, "objects": [{ "description": "", "geometryType": "rectangle", "tags": [{ "name": str(bbox_data.label), "value": None, }], "classTitle": "bbox", "points": { "exterior": [[int(bbox_data.xmin), int(bbox_data.ymin)], [int(bbox_data.xmax), int(bbox_data.ymax)]], "interior": [] } } for bbox_data in image_data.bboxes_data] } return annot
def get_image_data_from_annot( self, image_path: Union[str, Path, fsspec.core.OpenFile], annot: Union[Path, str, Dict, fsspec.core.OpenFile]) -> ImageData: if isinstance(annot, str) or isinstance(annot, Path): with fsspec.open(annot, 'r', encoding='utf8') as f: annot = json.load(f) if isinstance(annot, fsspec.core.OpenFile): with annot as f: annot = json.load(f) bboxes_data = [] for obj in annot['objects']: (xmin, ymin), (xmax, ymax) = obj['points']['exterior'] label = obj['tags'][0]['name'] if obj['tags'] else None bboxes_data.append( BboxData(image_path=image_path, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, label=label)) image_data = ImageData(image_path=image_path, bboxes_data=bboxes_data) return image_data
def rotate_image_data(image_data: ImageData, angle: float, border_mode: Optional[int] = None, border_value: Tuple[int, int, int] = None): if abs(angle) <= 1e-6: return image_data image = image_data.open_image() height, width, _ = image.shape image_center = width // 2, height // 2 angle_to_factor = {0: 0, 90: 1, 180: 2, 270: 3} angle = angle % 360 rotated_image_data = copy.deepcopy(image_data) if angle in angle_to_factor: factor = angle_to_factor[angle] rotated_image = np.rot90(image, factor) rotated_image_data.keypoints = rotate_keypoints90( image_data.keypoints, factor, width, height) rotated_image_data.bboxes_data = [ _rotate_bbox_data90(bbox_data, factor, width, height) for bbox_data in rotated_image_data.bboxes_data ] else: # grab the rotation matrix rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.) # compute the new bounding dimensions of the image abs_cos = abs(rotation_mat[0, 0]) abs_sin = abs(rotation_mat[0, 1]) bound_w = int(height * abs_sin + width * abs_cos) bound_h = int(height * abs_cos + width * abs_sin) # adjust the rotation matrix to take into account translation rotation_mat[0, 2] += bound_w / 2 - image_center[0] rotation_mat[1, 2] += bound_h / 2 - image_center[1] rotated_image = cv2.warpAffine(image, rotation_mat, (bound_w, bound_h), borderMode=border_mode, borderValue=border_value) new_height, new_width, _ = rotated_image.shape rotated_image_data = copy.deepcopy(image_data) rotated_image_data.keypoints = rotate_keypoints( image_data.keypoints, rotation_mat, new_height, new_width) keypoints = [] for (x, y) in rotated_image_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) rotated_image_data.keypoints = np.array(keypoints).reshape(-1, 2) rotated_image_data.bboxes_data = [ _rotate_bbox_data(bbox_data, rotation_mat, new_height, new_width) for bbox_data in rotated_image_data.bboxes_data ] rotated_image_data.image_path = None # It applies to all bboxes_data inside rotated_image_data.image = rotated_image return rotated_image_data
def get_true_and_pred_images_data_with_visualized_labels( image_data_matching: ImageDataMatching, error_type: Literal['detection', 'pipeline'], label: str = None) -> ImageData: """ Create true and pred ImageData with changed label for visualization For detection error_type, the label will be one of ["TP", "FP", "FN"] For pipeline error_type, the label will be in format "label {matching_error_type}" (where matching_error_type is one of ["TP", "FP", "FN", "TP (extra bbox)", "FP (extra bbox)"]) """ for tag, tag_image_data in [('true', image_data_matching.true_image_data), ('pred', image_data_matching.pred_image_data)]: tag_bboxes_data_with_visualized_label = [] for tag_bbox_data_matching in image_data_matching.bboxes_data_matchings: if tag == 'true': tag_bbox_data = tag_bbox_data_matching.true_bbox_data elif tag == 'pred': tag_bbox_data = tag_bbox_data_matching.pred_bbox_data if tag_bbox_data is None: continue if error_type == 'detection': label_caption = f"[{tag_bbox_data_matching.get_detection_error_type(label=label)}]" elif error_type == 'pipeline': pipeline_error_type = tag_bbox_data_matching.get_pipeline_error_type( label=label) if label is None: label_caption = f"{tag_bbox_data.label} [{pipeline_error_type}]" else: label_caption = f"{tag_bbox_data.label} [{pipeline_error_type}, cls. '{label}']" tag_bbox_data_with_visualized_label = BboxData( image_path=tag_bbox_data.image_path, cropped_image=tag_bbox_data.cropped_image, xmin=tag_bbox_data.xmin, ymin=tag_bbox_data.ymin, xmax=tag_bbox_data.xmax, ymax=tag_bbox_data.ymax, detection_score=tag_bbox_data.detection_score, label=label_caption, classification_score=tag_bbox_data.classification_score) tag_bboxes_data_with_visualized_label.append( tag_bbox_data_with_visualized_label) tag_image_data_with_visualized_labels = ImageData( image_path=tag_image_data.image_path, image=tag_image_data.image, bboxes_data=tag_bboxes_data_with_visualized_label) if tag == 'true': true_image_data_with_visualized_labels = tag_image_data_with_visualized_labels elif tag == 'pred': pred_image_data_with_visualized_labels = tag_image_data_with_visualized_labels return true_image_data_with_visualized_labels, pred_image_data_with_visualized_labels
def get_images_data_from_annots( self, images_dir: Union[str, Path, fsspec.core.OpenFile], annots: Union[Path, str, Dict, fsspec.core.OpenFile] ) -> List[ImageData]: if isinstance(annots, str) or isinstance(annots, Path): with fsspec.open(annots, 'r', encoding='utf8') as f: annots = json.load(f) elif isinstance(annots, fsspec.core.OpenFile): with annots as f: annots = json.load(f) images_dir = Pathy(images_dir) images_data = [] for annot in annots: image_name = annot['filename'] additional_info = {} for key in annot: if key not in ['objects', 'filename']: additional_info[key] = annot[key] bboxes_data = [] for obj in annot['objects']: if 'bbox' in obj: xmin, ymin, xmax, ymax = obj['bbox'] else: xmin, ymin, xmax, ymax = obj xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax) label = obj['label'] if 'label' in obj else None labels_top_n = obj['labels_top_n'] if 'labels_top_n' in obj else None top_n = len(labels_top_n) if labels_top_n is not None else None keypoints = obj['keypoints'] if 'keypoints' in obj else [] bbox_additional_info = {} if isinstance(obj, dict): for key in obj: if key not in ['bbox', 'label', 'labels_top_n', 'top_n']: bbox_additional_info[key] = obj[key] bboxes_data.append(BboxData( image_path=images_dir / image_name, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, keypoints=keypoints, label=label, labels_top_n=labels_top_n, top_n=top_n, additional_info=bbox_additional_info )) images_data.append(ImageData( image_path=images_dir / image_name, bboxes_data=bboxes_data, additional_info=additional_info )) return images_data
def get_annot_from_n_bboxes_data( self, image_paths: List[Union[str, Path]], n_bboxes_data: List[List[BboxData]], ) -> List[List[BboxData]]: images_data = [ ImageData(image_path=image_path, bboxes_data=bboxes_data) for image_path, bboxes_data in zip(image_paths, n_bboxes_data) ] return self.get_annot_from_images_data(images_data)
def apply_perspective_transform_to_image_data( image_data: ImageData, perspective_matrix: np.ndarray, result_width: int, result_height: int, allow_negative_and_large_coords: bool, remove_bad_coords: bool) -> ImageData: image = image_data.open_image() image = cv2.warpPerspective(image, perspective_matrix, (result_width, result_height)) image_data = copy.deepcopy(image_data) image_data.keypoints = apply_perspective_transform_to_points( image_data.keypoints, perspective_matrix, result_width, result_height, allow_negative_and_large_coords, remove_bad_coords) image_data.bboxes_data = [ _apply_perspective_transform_to_bbox_data( bbox_data, perspective_matrix, result_width, result_height, allow_negative_and_large_coords, remove_bad_coords) for bbox_data in image_data.bboxes_data ] image_data.bboxes_data = [ bbox_data for bbox_data in image_data.bboxes_data if bbox_data is not None ] image_data.image_path = None image_data.image = image return image_data
def _postprocess_predictions( self, images_data: List[ImageData], n_pred_bboxes: List[List[Tuple[int, int, int, int]]], n_k_pred_keypoints: List[List[Tuple[int, int]]], n_pred_detection_scores: List[List[float]], n_pred_labels_top_n: List[List[List[str]]], n_pred_classification_scores_top_n: List[List[List[float]]], open_images_in_images_data: bool, open_cropped_images_in_bboxes_data: bool) -> List[ImageData]: pred_images_data = [] for (image_data, pred_bboxes, k_pred_keypoints, pred_detection_scores, pred_labels_top_n, pred_classification_scores_top_n) in zip( images_data, n_pred_bboxes, n_k_pred_keypoints, n_pred_detection_scores, n_pred_labels_top_n, n_pred_classification_scores_top_n): bboxes_data = [] for (pred_bbox, pred_keypoints, pred_detection_score, pred_label_top_n, pred_classification_score_top_n) in zip( pred_bboxes, k_pred_keypoints, pred_detection_scores, pred_labels_top_n, pred_classification_scores_top_n): xmin, ymin, xmax, ymax = pred_bbox bboxes_data.append( BboxData( image_path=image_data.image_path, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, keypoints=pred_keypoints, detection_score=pred_detection_score, label=pred_label_top_n[0], classification_score=pred_classification_score_top_n[ 0], top_n=len(pred_label_top_n), labels_top_n=pred_label_top_n, classification_scores_top_n= pred_classification_score_top_n)) if open_cropped_images_in_bboxes_data: for bbox_data in bboxes_data: bbox_data.open_cropped_image(source_image=image_data.image, inplace=True) image = image_data.image if open_images_in_images_data else None pred_images_data.append( ImageData( image_path=image_data.image_path, image=image, bboxes_data=bboxes_data, label=image_data.label, keypoints=image_data.keypoints, additional_info=image_data.additional_info, )) return pred_images_data
def get_annot_from_image_data(self, image_data: ImageData) -> List[str]: image_data = self.filter_image_data(image_data) width, height = image_data.get_image_size() txt_results = [] for bbox_data in image_data.bboxes_data: w = bbox_data.xmax - bbox_data.xmin h = bbox_data.ymax - bbox_data.ymin xcenter = bbox_data.xmin + w / 2 ycenter = bbox_data.ymin + h / 2 xcenter, w = round(xcenter / width, 6), round(w / width, 6) ycenter, h = round(ycenter / height, 6), round(h / height, 6) idx = self.class_name_to_idx[bbox_data.label] txt_results.append(f"{idx} {xcenter} {ycenter} {w} {h}") return txt_results
def resize_image_data(image_data: ImageData, size: Tuple[int, int], resample: Optional[int] = None) -> ImageData: image_data = copy.deepcopy(image_data) image = image_data.open_image() old_height, old_width, _ = image.shape image = Image.fromarray(image) image = image.resize(size, resample=resample) image = np.array(image) new_height, new_width, _ = image.shape def resize_coords(bbox_data: BboxData): bbox_data.xmin = max( 0, min(int(bbox_data.xmin * (new_width / old_width)), new_width - 1)) bbox_data.ymin = max( 0, min(int(bbox_data.ymin * (new_height / old_height)), new_height - 1)) bbox_data.xmax = max( 0, min(int(bbox_data.xmax * (new_width / old_width)), new_width - 1)) bbox_data.ymax = max( 0, min(int(bbox_data.ymax * (new_height / old_height)), new_height - 1)) bbox_data.keypoints[:, 0] = (bbox_data.keypoints[:, 0] * (new_width / old_width)).astype(int) bbox_data.keypoints[:, 1] = (bbox_data.keypoints[:, 1] * (new_height / old_height)).astype(int) bbox_data.keypoints = bbox_data.keypoints.astype(int) bbox_data.cropped_image = None keypoints = [] for (x, y) in bbox_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) bbox_data.keypoints = np.array(keypoints).reshape(-1, 2) for additional_bbox_data in bbox_data.additional_bboxes_data: resize_coords(additional_bbox_data) for bbox_data in image_data.bboxes_data: resize_coords(bbox_data) image_data.keypoints[:, 0] = (image_data.keypoints[:, 0] * (new_width / old_width)).astype(int) image_data.keypoints[:, 1] = (image_data.keypoints[:, 1] * (new_height / old_height)).astype(int) keypoints = [] for (x, y) in image_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) image_data.keypoints = np.array(keypoints).reshape(-1, 2) image_data.image_path = None image_data.image = image return image_data
def tf_record_from_image_data(image_data: ImageData, label_map: Dict[str, int], use_thumbnail: Tuple[int, int] = None): filename = image_data.image_path encoded_filename = str(filename).encode('utf8') true_bboxes = np.array( [[bbox_data.xmin, bbox_data.ymin, bbox_data.xmax, bbox_data.ymax] for bbox_data in image_data.bboxes_data], dtype=float) image = image_data.open_image() height, width, _ = image.shape if len(true_bboxes) > 0: normalized_true_bboxes = true_bboxes.copy() normalized_true_bboxes[:, [0, 2]] /= width normalized_true_bboxes[:, [1, 3]] /= height xmins = normalized_true_bboxes[:, 0] ymins = normalized_true_bboxes[:, 1] xmaxs = normalized_true_bboxes[:, 2] ymaxs = normalized_true_bboxes[:, 3] else: ymins, xmins, ymaxs, xmaxs = [], [], [], [] encoded_jpg = BytesIO() image = Image.fromarray(image) if use_thumbnail: image.thumbnail(use_thumbnail) image.save(encoded_jpg, format='JPEG') encoded_jpg = encoded_jpg.getvalue() image_format = b'jpg' class_names = [bbox_data.label for bbox_data in image_data.bboxes_data] encoded_class_names = [ class_name.encode('utf-8') for class_name in class_names ] classes = [label_map[class_name] for class_name in class_names] tf_record = create_tf_record(height=height, width=width, encoded_filename=encoded_filename, encoded_jpg=encoded_jpg, image_format=image_format, xmins=xmins, ymins=ymins, xmaxs=xmaxs, ymaxs=ymaxs, encoded_class_names=encoded_class_names, classes=classes) return tf_record
def get_image_data_filtered_by_labels(image_data: ImageData, filter_by_labels: List[str] = None, include: bool = True) -> ImageData: if filter_by_labels is None or len(filter_by_labels) == 0: return image_data filter_by_labels = set(filter_by_labels) bboxes_data = [ bbox_data for bbox_data in image_data.bboxes_data if (include and bbox_data.label in filter_by_labels) or ( not include and bbox_data.label not in filter_by_labels) ] return ImageData(image_path=image_data.image_path, image=image_data.image, bboxes_data=bboxes_data)
def _postprocess_images_data( self, images_data: List[ImageData], pred_n_keypoints: List[List[Tuple[float, float]]], open_images_in_images_data: bool) -> List[ImageData]: images_data_res = [] pred_n_keypoints = np.array(pred_n_keypoints) for image_data, pred_keypoints in zip(images_data, pred_n_keypoints): width, height = image_data.get_image_size() pred_keypoints[:, 0] *= width pred_keypoints[:, 1] *= height image = image_data.image if open_images_in_images_data else None images_data_res.append( ImageData(image_path=image_data.image_path, image=image, bboxes_data=image_data.bboxes_data, label=image_data.label, keypoints=pred_keypoints, additional_info=image_data.additional_info)) return images_data_res
def flatten_additional_bboxes_data_in_image_data( image_data: ImageData, additional_bboxes_data_depth: Optional[int] = None, ) -> ImageData: image_data = copy.deepcopy(image_data) bboxes_data = [] def _append_bbox_data(bbox_data: BboxData, depth: int): if additional_bboxes_data_depth is not None and depth > additional_bboxes_data_depth: return bboxes_data.append(bbox_data) for additional_bbox_data in bbox_data.additional_bboxes_data: _append_bbox_data(additional_bbox_data) bbox_data.additional_bboxes_data = [] for bbox_data in bboxes_data: _append_bbox_data(bbox_data, depth=0) image_data.bboxes_data = bboxes_data return image_data
def non_max_suppression_image_data_using_tf(image_data: ImageData, iou: float) -> ImageData: import tensorflow as tf image_data = copy.deepcopy(image_data) if len(image_data.bboxes_data) <= 1: return image_data bboxes = [(bbox_data.ymin, bbox_data.xmin, bbox_data.ymax, bbox_data.xmax) for bbox_data in image_data.bboxes_data] scores = [ bbox_data.detection_score if bbox_data.detection_score is not None else 1. for bbox_data in image_data.bboxes_data ] result = tf.image.non_max_suppression(bboxes, scores, len(image_data.bboxes_data), iou_threshold=iou) image_data.bboxes_data = [ image_data.bboxes_data[i] for i in result.numpy() ] return image_data
def draw_overlay(self, frame: np.ndarray, tracked_bboxes: List[Tuple[int, int, int, int]], tracked_ids: List[int], ready_frames_at_the_moment: List['FrameResult'], filter_by_labels: List[str] = None) -> np.ndarray: image = frame.copy() tracked_bboxes = tracked_bboxes.astype(int) ready_tracks_ids_at_the_moment = [ ready_frame.track_id for ready_frame in ready_frames_at_the_moment ] current_bboxes_data = [] for bbox, track_id in zip(tracked_bboxes, tracked_ids): if track_id not in ready_tracks_ids_at_the_moment: continue xmin, ymin, xmax, ymax = bbox ready_frame = ready_frames_at_the_moment[ ready_tracks_ids_at_the_moment.index(track_id)] label = ready_frame.label current_bbox_data = BboxData(image=image, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, label=label) current_bboxes_data.append(current_bbox_data) image_data = ImageData(image=frame, bboxes_data=current_bboxes_data) image = visualize_image_data( image_data=image_data, use_labels=self.write_labels, filter_by_labels=filter_by_labels, draw_base_labels_with_given_label_to_base_label_image=( self.draw_base_labels_with_given_label_to_base_label_image), known_labels=self.classification_inferencer.class_names) return image
def _inference_pipeline_and_get_metrics( self, model_spec: PipelineModelSpec, true_images_data: List[ImageData], detection_score_threshold: float, minimum_iou: float, extra_bbox_label: str, batch_size: int, pseudo_class_names: List[str] ) -> Tuple[pd.DataFrame, pd.DataFrame]: pipeline_model = model_spec.load() inferencer = PipelineInferencer(pipeline_model) images_data_gen = BatchGeneratorImageData( true_images_data, batch_size=batch_size, use_not_caught_elements_as_last_batch=True) raw_pred_images_data = inferencer.predict(images_data_gen, detection_score_threshold=0.) pred_images_data = [ ImageData( image_path=image_data.image_path, bboxes_data=[ bbox_data for bbox_data in image_data.bboxes_data if bbox_data.detection_score >= detection_score_threshold ]) for image_data in raw_pred_images_data ] df_detection_metrics = get_df_detection_metrics( true_images_data=true_images_data, pred_images_data=pred_images_data, minimum_iou=minimum_iou, raw_pred_images_data=raw_pred_images_data) df_pipeline_metrics = get_df_pipeline_metrics( true_images_data=true_images_data, pred_images_data=pred_images_data, minimum_iou=minimum_iou, extra_bbox_label=extra_bbox_label, pseudo_class_names=pseudo_class_names, known_class_names=pipeline_model.class_names) return df_detection_metrics, df_pipeline_metrics
def filter_image_data(self, image_data: ImageData) -> ImageData: if image_data is None: return None looked_bboxes = set() new_bboxes_data = [] for bbox_data in image_data.bboxes_data: xmin, ymin, xmax, ymax = bbox_data.xmin, bbox_data.ymin, bbox_data.xmax, bbox_data.ymax xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax) if (xmin, ymin, xmax, ymax) in looked_bboxes: logger.warning( f'Repeated bbox detected at image {bbox_data.image_path}: ' f'(xmin, ymin, xmax, ymax) = {(xmin, xmin, ymax, xmax)}. Skipping.' ) continue else: looked_bboxes.add((xmin, ymin, xmax, ymax)) if xmin >= xmax or ymin >= ymax or xmin < 0 or ymin < 0: logger.warning( f"Wrong annotation at image {bbox_data.image_path}: " f"incorrect bbox (xmin, ymin, xmax, ymax): {(xmin, ymin, xmax, ymax)} " "(xmin >= xmax or ymin >= ymax or xmin < 0 or ymin < 0). Skipping." ) continue new_bboxes_data.append(bbox_data) image_data = ImageData(image_path=image_data.image_path, image=image_data.image, bboxes_data=new_bboxes_data, additional_info=image_data.additional_info, keypoints=image_data.keypoints, label=image_data.label) return image_data
def visualize_image_data( image_data: ImageData, use_labels: bool = False, score_type: Literal['detection', 'classification'] = None, filter_by_labels: List[str] = None, known_labels: List[str] = None, draw_base_labels_with_given_label_to_base_label_image: Callable[[str], np.ndarray] = None, keypoints_radius: int = 5, include_additional_bboxes_data: bool = False, additional_bboxes_data_depth: Optional[int] = None, fontsize: int = 24, thickness: int = 4, return_as_pil_image: bool = False ) -> Union[np.ndarray, Image.Image]: image_data = get_image_data_filtered_by_labels( image_data=image_data, filter_by_labels=filter_by_labels ) image = image_data.open_image() if include_additional_bboxes_data: bboxes_data = [] def recursive_get_bboxes_data(bbox_data, depth): if additional_bboxes_data_depth is not None and depth > additional_bboxes_data_depth: return bboxes_data.append(bbox_data) for bbox_data in bbox_data.additional_bboxes_data: recursive_get_bboxes_data(bbox_data, depth=depth+1) for bbox_data in image_data.bboxes_data: recursive_get_bboxes_data(bbox_data, depth=0) else: bboxes_data = image_data.bboxes_data labels = [bbox_data.label for bbox_data in bboxes_data] if known_labels is None: known_labels = list(set(labels)) k_keypoints = [bbox_data.keypoints for bbox_data in bboxes_data] bboxes = np.array([ (bbox_data.ymin, bbox_data.xmin, bbox_data.ymax, bbox_data.xmax) for bbox_data in bboxes_data ]) angles = np.array([0. for _ in bboxes_data]) if score_type == 'detection': scores = np.array([bbox_data.detection_score for bbox_data in bboxes_data]) skip_scores = False elif score_type == 'classification': scores = np.array([bbox_data.classification_score for bbox_data in bboxes_data]) skip_scores = False else: scores = None skip_scores = True image = visualize_boxes_and_labels_on_image_array( image=image, bboxes=bboxes, angles=angles, scores=scores, k_keypoints=k_keypoints, labels=labels, use_normalized_coordinates=False, skip_scores=skip_scores, skip_labels=not use_labels, groundtruth_box_visualization_color='lime', known_labels=known_labels, keypoints_radius=keypoints_radius, fontsize=fontsize, thickness=thickness ) if len(image_data.keypoints) > 0: image_pil = Image.fromarray(image) draw = ImageDraw.Draw(image_pil) for idx, (x, y) in enumerate(image_data.keypoints): draw.pieslice( [(x-keypoints_radius, y-keypoints_radius), (x+keypoints_radius, y+keypoints_radius)], start=0, end=360, fill=STANDARD_COLORS_RGB[idx % len(STANDARD_COLORS_RGB)] ) image = np.array(image_pil) if draw_base_labels_with_given_label_to_base_label_image is not None: for bbox_data in image_data.bboxes_data: base_label_image = draw_base_labels_with_given_label_to_base_label_image(bbox_data.label) draw_label_image( image=image, base_label_image=base_label_image, bbox_data=bbox_data, inplace=True ) if return_as_pil_image: return Image.fromarray(image) return image
def concat_images_data(image_data_a: ImageData, image_data_b: ImageData, background_color_a: Tuple[int, int, int, int] = None, background_color_b: Tuple[int, int, int, int] = None, thumbnail_size_a: Tuple[int, int] = None, thumbnail_size_b: Tuple[int, int] = None, how: Literal['horizontally', 'vertically'] = 'horizontally', mode: Literal['L', 'RGB', 'RGBA'] = 'RGBA', background_edge_width: int = 3, between_edge_width: int = 0) -> ImageData: image_data_a = copy.deepcopy(image_data_a) image_data_b = copy.deepcopy(image_data_b) if image_data_a is None and image_data_b is not None: return image_data_b if image_data_a is not None and image_data_b is None: return image_data_a image_a = image_data_a.open_image() image_b = image_data_b.open_image() ha, wa = image_a.shape[:2] hb, wb = image_b.shape[:2] image = concat_images(image_a=image_a, image_b=image_b, background_color_a=background_color_a, background_color_b=background_color_b, thumbnail_size_a=thumbnail_size_a, thumbnail_size_b=thumbnail_size_b, how=how, mode=mode, background_edge_width=background_edge_width, between_edge_width=between_edge_width) image_data_a_new_xmin, image_data_a_new_ymin = None, None image_data_b_new_xmin, image_data_b_new_ymin = None, None if how == 'horizontally': max_height = np.max([ha, hb]) min_ha = max_height // 2 - ha // 2 max_ha = max_height // 2 + ha // 2 min_hb = max_height // 2 - hb // 2 max_hb = max_height // 2 + hb // 2 image_data_a_new_xmin = 0 image_data_a_new_ymin = min_ha image_data_a_new_xmax = wa image_data_a_new_ymax = max_ha image_data_b_new_xmin = wa + between_edge_width image_data_b_new_ymin = min_hb image_data_b_new_xmax = wa + between_edge_width + wb image_data_b_new_ymax = max_hb elif how == 'vertically': max_width = np.max([wa, wb]) min_wa = max_width // 2 - wa // 2 max_wa = max_width // 2 + wa // 2 min_wb = max_width // 2 - wb // 2 max_wb = max_width // 2 + wb // 2 image_data_a_new_xmin = min_wa image_data_a_new_ymin = 0 image_data_a_new_xmax = max_wa image_data_a_new_ymax = ha image_data_b_new_xmin = min_wb image_data_b_new_ymin = ha + between_edge_width image_data_b_new_xmax = max_wb image_data_b_new_ymax = ha + between_edge_width + hb keypoints_a = image_data_a.keypoints keypoints_b = image_data_a.keypoints keypoints_a[:, 0] += image_data_a_new_xmin keypoints_a[:, 1] += image_data_a_new_ymin keypoints_b[:, 0] += image_data_b_new_xmin keypoints_b[:, 1] += image_data_b_new_ymin def _get_new_coords_for_bbox_data(bbox_data: BboxData, xmin: int, ymin: int): bbox_data.keypoints[:, 0] += xmin bbox_data.keypoints[:, 1] += ymin bbox_data.xmin += xmin bbox_data.ymin += ymin bbox_data.xmax += xmin bbox_data.ymax += ymin bbox_data.image = None bbox_data.image_path = None bbox_data.cropped_image = None for additional_bbox_data in bbox_data.additional_bboxes_data: _get_new_coords_for_bbox_data(additional_bbox_data, xmin, ymin) for bbox_data in image_data_a.bboxes_data: _get_new_coords_for_bbox_data(bbox_data, image_data_a_new_xmin, image_data_a_new_ymin) if 'concat_images_data__image_data' not in [ bbox_data.label for bbox_data in image_data_a.bboxes_data ]: bbox_data_a_into = [ BboxData(xmin=image_data_a_new_xmin, ymin=image_data_a_new_ymin, xmax=image_data_a_new_xmax, ymax=image_data_a_new_ymax, label='concat_images_data__image_data', additional_bboxes_data=[ bbox_data for bbox_data in image_data_a.bboxes_data if 'concat_images_data__image_data' != bbox_data.label ]) ] else: bbox_data_a_into = [] image_data_a.bboxes_data = [ bbox_data for bbox_data in image_data_a.bboxes_data if 'concat_images_data__image_data' == bbox_data.label ] + bbox_data_a_into for bbox_data in image_data_b.bboxes_data: _get_new_coords_for_bbox_data(bbox_data, image_data_b_new_xmin, image_data_b_new_ymin) if 'concat_images_data__image_data' not in [ bbox_data.label for bbox_data in image_data_b.bboxes_data ]: bbox_data_b_into = [ BboxData(xmin=image_data_b_new_xmin, ymin=image_data_b_new_ymin, xmax=image_data_b_new_xmax, ymax=image_data_b_new_ymax, label='concat_images_data__image_data', additional_bboxes_data=[ bbox_data for bbox_data in image_data_b.bboxes_data if 'concat_images_data__image_data' != bbox_data.label ]) ] else: bbox_data_b_into = [] image_data_b.bboxes_data = [ bbox_data for bbox_data in image_data_b.bboxes_data if 'concat_images_data__image_data' == bbox_data.label ] + bbox_data_b_into image_data = ImageData(image_path=None, image=image, bboxes_data=image_data_a.bboxes_data + image_data_b.bboxes_data, label=None, keypoints=np.concatenate([keypoints_a, keypoints_b], axis=0), additional_info={ **image_data_a.additional_info, **image_data_b.additional_info }) return image_data
def run_pipeline_on_frame( self, frame: np.ndarray, frame_idx: int, fps: float, detection_delay: int, classification_delay: int, detection_score_threshold: float, batch_size: int ) -> Tuple[List[Tuple[int, int, int, int]], List[int]]: frame = frame.copy() image_data = ImageData(image=frame) image_data_gen = BatchGeneratorImageData( [image_data], batch_size=batch_size, use_not_caught_elements_as_last_batch=True) pred_image_data = self.detection_inferencer.predict( images_data_gen=image_data_gen, score_threshold=detection_score_threshold)[0] bboxes = np.array([(bbox_data.xmin, bbox_data.ymin, bbox_data.xmax, bbox_data.ymax) for bbox_data in pred_image_data.bboxes_data]) detection_scores = np.array([ bbox_data.detection_score for bbox_data in pred_image_data.bboxes_data ]) self.opencv_tracker = OpenCVTracker(bboxes, frame) tracked_bboxes, tracked_ids = self.update_sort_tracker( bboxes=bboxes, scores=detection_scores) # detection_delay_frames = int(round(detection_delay * fps / 1000)) # classification_delay_frames = int(round(classification_delay * fps / 1000)) current_tracks_ids = [ frame_result.track_id for frame_result in self.current_ready_frames_queue ] current_not_tracked_items_idxs = [ idx for idx, tracked_id in enumerate(tracked_ids) if tracked_id not in current_tracks_ids ] if current_not_tracked_items_idxs: current_not_tracked_bboxes = tracked_bboxes[ current_not_tracked_items_idxs] current_not_tracked_ids = tracked_ids[ current_not_tracked_items_idxs] bboxes_data = [ BboxData(image=frame, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax) for (xmin, ymin, xmax, ymax) in current_not_tracked_bboxes ] bboxes_data_gen = BatchGeneratorBboxData( [bboxes_data], batch_size=batch_size, use_not_caught_elements_as_last_batch=True) pred_bboxes_data = self.classification_inferencer.predict( bboxes_data_gen)[0] for bbox_data, tracked_id in zip(pred_bboxes_data, current_not_tracked_ids): ready_at_frame = frame_idx frame_result = FrameResult(label=bbox_data.label, track_id=tracked_id, ready_at_frame=ready_at_frame) self.current_ready_frames_queue.append(frame_result) return tracked_bboxes, tracked_ids
def crop_image_data( image_data: ImageData, xmin: int, ymin: int, xmax: int, ymax: int, allow_negative_and_large_coords: bool, remove_bad_coords: bool, ) -> ImageData: assert 0 <= xmin and 0 <= ymin assert xmin <= xmax and ymin <= ymax image_data = copy.deepcopy(image_data) image = image_data.open_image() height, width, _ = image.shape assert xmax <= width and ymax <= height image = image[ymin:ymax, xmin:xmax] new_height, new_width, _ = image.shape def resize_coords(bbox_data: BboxData): bbox_data.xmin = bbox_data.xmin - xmin bbox_data.ymin = bbox_data.ymin - ymin bbox_data.xmax = bbox_data.xmax - xmin bbox_data.ymax = bbox_data.ymax - ymin bbox_data.keypoints[:, 0] -= xmin bbox_data.keypoints[:, 1] -= ymin bbox_data.cropped_image = None if not allow_negative_and_large_coords: bbox_data.xmin = max(0, min(bbox_data.xmin, new_width - 1)) bbox_data.ymin = max(0, min(bbox_data.ymin, new_height - 1)) bbox_data.xmax = max(0, min(bbox_data.xmax, new_width - 1)) bbox_data.ymax = max(0, min(bbox_data.ymax, new_height - 1)) keypoints = [] for (x, y) in bbox_data.keypoints: x = max(0, min(x, new_width - 1)) y = max(0, min(y, new_height - 1)) keypoints.append([x, y]) bbox_data.keypoints = np.array(keypoints).reshape(-1, 2) for additional_bbox_data in bbox_data.additional_bboxes_data: resize_coords(additional_bbox_data) for bbox_data in image_data.bboxes_data: resize_coords(bbox_data) keypoints = [] for (x, y) in image_data.keypoints: x = max(0, min(x - xmin, new_width - 1)) y = max(0, min(y - ymin, new_height - 1)) keypoints.append([x, y]) image_data.keypoints = np.array(keypoints).reshape(-1, 2) def if_bbox_data_inside_crop(bbox_data: BboxData): bbox_data.keypoints = bbox_data.keypoints[( (bbox_data.keypoints[:, 0] >= 0) & (bbox_data.keypoints[:, 1] >= 0) & (bbox_data.keypoints[:, 0] < new_height) & (bbox_data.keypoints[:, 1] < new_width))] bbox_data.additional_bboxes_data = [ additional_bbox_data for additional_bbox_data in bbox_data.additional_bboxes_data if if_bbox_data_inside_crop(additional_bbox_data) ] return (bbox_data.xmin >= 0 and bbox_data.ymin >= 0 and bbox_data.xmax < new_width and bbox_data.ymax < new_height and bbox_data.xmin < bbox_data.xmax and bbox_data.ymin < bbox_data.ymax) if remove_bad_coords: image_data.bboxes_data = [ bbox_data for bbox_data in image_data.bboxes_data if if_bbox_data_inside_crop(bbox_data) ] image_data.keypoints = image_data.keypoints[( (image_data.keypoints[:, 0] >= 0) & (image_data.keypoints[:, 1] >= 0) & (image_data.keypoints[:, 0] < new_height) & (image_data.keypoints[:, 1] < new_width))] image_data.image_path = None image_data.image = image return image_data