def test_is_3dlist(): assert utils.is_3dlist([]) assert utils.is_3dlist([[]]) assert utils.is_3dlist([[[]]]) assert utils.is_3dlist([[[1]]]) assert not utils.is_3dlist([[1, 2]]) assert not utils.is_3dlist([[np.array([1, 2])]])
def imshow_text_char_boundary(img, text_quads, boundaries, char_quads, chars, show=False, thickness=1, font_scale=0.5, win_name='', wait_time=-1, out_file=None): """Draw text boxes and char boxes on img. Args: img (str or ndarray): The img to be displayed. text_quads (list[list[int|float]]): The text boxes. boundaries (list[list[int|float]]): The boundary list. char_quads (list[list[list[int|float]]]): A 2d list of char boxes. char_quads[i] is for the ith text, and char_quads[i][j] is the jth char of the ith text. chars (list[list[char]]). The string for each text box. thickness (int): Thickness of lines. font_scale (float): Font scales of texts. show (bool): Whether to show the image. win_name (str): The window name. wait_time (int): Value of waitKey param. out_file (str or None): The filename of the output. """ assert isinstance(img, (np.ndarray, str)) assert utils.is_2dlist(text_quads) assert utils.is_2dlist(boundaries) assert utils.is_3dlist(char_quads) assert utils.is_2dlist(chars) assert utils.equal_len(text_quads, char_quads, boundaries) img = mmcv.imread(img) char_color = [mmcv.color_val('blue'), mmcv.color_val('green')] text_color = mmcv.color_val('red') text_inx = 0 for text_box, boundary, char_box, txt in zip(text_quads, boundaries, char_quads, chars): text_box = np.array(text_box) boundary = np.array(boundary) text_box = text_box.reshape(-1, 2).astype(np.int32) cv2.polylines(img, [text_box.reshape(-1, 1, 2)], True, color=text_color, thickness=thickness) if boundary.shape[0] > 0: cv2.polylines(img, [boundary.reshape(-1, 1, 2)], True, color=text_color, thickness=thickness) for b in char_box: b = np.array(b) c = char_color[text_inx % 2] b = b.astype(np.int32) cv2.polylines(img, [b.reshape(-1, 1, 2)], True, color=c, thickness=thickness) label_text = ''.join(txt) cv2.putText(img, label_text, (text_box[0, 0], text_box[0, 1] - 2), cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color) text_inx = text_inx + 1 if show: mmcv.imshow(img, win_name, wait_time) if out_file is not None: mmcv.imwrite(img, out_file) return img
def eval_hmean_iou(pred_boxes, gt_boxes, gt_ignored_boxes, iou_thr=0.5, precision_thr=0.5): """Evaluate hmean of text detection using IOU standard. Args: pred_boxes (list[list[list[float]]]): Text boxes for an img list. Each box has 2k (>=8) values. gt_boxes (list[list[list[float]]]): Ground truth text boxes for an img list. Each box has 2k (>=8) values. gt_ignored_boxes (list[list[list[float]]]): Ignored ground truth text boxes for an img list. Each box has 2k (>=8) values. iou_thr (float): Iou threshold when one (gt_box, det_box) pair is matched. precision_thr (float): Precision threshold when one (gt_box, det_box) pair is matched. Returns: hmean (tuple[dict]): Tuple of dicts indicates the hmean for the dataset and all images. """ assert utils.is_3dlist(pred_boxes) assert utils.is_3dlist(gt_boxes) assert utils.is_3dlist(gt_ignored_boxes) assert 0 <= iou_thr <= 1 assert 0 <= precision_thr <= 1 img_num = len(pred_boxes) assert img_num == len(gt_boxes) assert img_num == len(gt_ignored_boxes) dataset_gt_num = 0 dataset_pred_num = 0 dataset_hit_num = 0 img_results = [] for i in range(img_num): gt = gt_boxes[i] gt_ignored = gt_ignored_boxes[i] pred = pred_boxes[i] gt_num = len(gt) gt_ignored_num = len(gt_ignored) pred_num = len(pred) hit_num = 0 # get gt polygons. gt_all = gt + gt_ignored gt_polys = [eval_utils.points2polygon(p) for p in gt_all] gt_ignored_index = [gt_num + i for i in range(len(gt_ignored))] gt_num = len(gt_polys) pred_polys, _, pred_ignored_index = eval_utils.ignore_pred( pred, gt_ignored_index, gt_polys, precision_thr) # match. if gt_num > 0 and pred_num > 0: sz = [gt_num, pred_num] iou_mat = np.zeros(sz) gt_hit = np.zeros(gt_num, np.int8) pred_hit = np.zeros(pred_num, np.int8) for gt_id in range(gt_num): for pred_id in range(pred_num): gt_pol = gt_polys[gt_id] det_pol = pred_polys[pred_id] iou_mat[gt_id, pred_id] = eval_utils.poly_iou(det_pol, gt_pol) for gt_id in range(gt_num): for pred_id in range(pred_num): if (gt_hit[gt_id] != 0 or pred_hit[pred_id] != 0 or gt_id in gt_ignored_index or pred_id in pred_ignored_index): continue if iou_mat[gt_id, pred_id] > iou_thr: gt_hit[gt_id] = 1 pred_hit[pred_id] = 1 hit_num += 1 gt_care_number = gt_num - gt_ignored_num pred_care_number = pred_num - len(pred_ignored_index) r, p, h = eval_utils.compute_hmean(hit_num, hit_num, gt_care_number, pred_care_number) img_results.append({'recall': r, 'precision': p, 'hmean': h}) dataset_hit_num += hit_num dataset_gt_num += gt_care_number dataset_pred_num += pred_care_number dataset_r, dataset_p, dataset_h = eval_utils.compute_hmean( dataset_hit_num, dataset_hit_num, dataset_gt_num, dataset_pred_num) dataset_results = { 'num_gts': dataset_gt_num, 'num_dets': dataset_pred_num, 'num_match': dataset_hit_num, 'recall': dataset_r, 'precision': dataset_p, 'hmean': dataset_h } return dataset_results, img_results
def eval_hmean_ic13(det_boxes, gt_boxes, gt_ignored_boxes, precision_thr=0.4, recall_thr=0.8, center_dist_thr=1.0, one2one_score=1., one2many_score=0.8, many2one_score=1.): """Evaluate hmean of text detection using the icdar2013 standard. Args: det_boxes (list[list[list[float]]]): List of arrays of shape (n, 2k). Each element is the det_boxes for one img. k>=4. gt_boxes (list[list[list[float]]]): List of arrays of shape (m, 2k). Each element is the gt_boxes for one img. k>=4. gt_ignored_boxes (list[list[list[float]]]): List of arrays of (l, 2k). Each element is the ignored gt_boxes for one img. k>=4. precision_thr (float): Precision threshold of the iou of one (gt_box, det_box) pair. recall_thr (float): Recall threshold of the iou of one (gt_box, det_box) pair. center_dist_thr (float): Distance threshold of one (gt_box, det_box) center point pair. one2one_score (float): Reward when one gt matches one det_box. one2many_score (float): Reward when one gt matches many det_boxes. many2one_score (float): Reward when many gts match one det_box. Returns: hmean (tuple[dict]): Tuple of dicts which encodes the hmean for the dataset and all images. """ assert utils.is_3dlist(det_boxes) assert utils.is_3dlist(gt_boxes) assert utils.is_3dlist(gt_ignored_boxes) assert 0 <= precision_thr <= 1 assert 0 <= recall_thr <= 1 assert center_dist_thr > 0 assert 0 <= one2one_score <= 1 assert 0 <= one2many_score <= 1 assert 0 <= many2one_score <= 1 img_num = len(det_boxes) assert img_num == len(gt_boxes) assert img_num == len(gt_ignored_boxes) dataset_gt_num = 0 dataset_pred_num = 0 dataset_hit_recall = 0.0 dataset_hit_prec = 0.0 img_results = [] for i in range(img_num): gt = gt_boxes[i] gt_ignored = gt_ignored_boxes[i] pred = det_boxes[i] gt_num = len(gt) ignored_num = len(gt_ignored) pred_num = len(pred) accum_recall = 0. accum_precision = 0. gt_points = gt + gt_ignored gt_polys = [eval_utils.points2polygon(p) for p in gt_points] gt_ignored_index = [gt_num + i for i in range(len(gt_ignored))] gt_num = len(gt_polys) pred_polys, pred_points, pred_ignored_index = eval_utils.ignore_pred( pred, gt_ignored_index, gt_polys, precision_thr) if pred_num > 0 and gt_num > 0: gt_hit = np.zeros(gt_num, np.int8).tolist() pred_hit = np.zeros(pred_num, np.int8).tolist() # compute area recall and precision for each (gt, pred) pair # in one img. recall_mat, precision_mat = compute_recall_precision( gt_polys, pred_polys) # match one gt to one pred box. for gt_id in range(gt_num): for pred_id in range(pred_num): if (gt_hit[gt_id] != 0 or pred_hit[pred_id] != 0 or gt_id in gt_ignored_index or pred_id in pred_ignored_index): continue match = eval_utils.one2one_match_ic13( gt_id, pred_id, recall_mat, precision_mat, recall_thr, precision_thr) if match: gt_point = np.array(gt_points[gt_id]) det_point = np.array(pred_points[pred_id]) norm_dist = eval_utils.box_center_distance( det_point, gt_point) norm_dist /= eval_utils.box_diag( det_point) + eval_utils.box_diag(gt_point) norm_dist *= 2.0 if norm_dist < center_dist_thr: gt_hit[gt_id] = 1 pred_hit[pred_id] = 1 accum_recall += one2one_score accum_precision += one2one_score # match one gt to many det boxes. for gt_id in range(gt_num): if gt_id in gt_ignored_index: continue match, match_det_set = eval_utils.one2many_match_ic13( gt_id, recall_mat, precision_mat, recall_thr, precision_thr, gt_hit, pred_hit, pred_ignored_index) if match: gt_hit[gt_id] = 1 accum_recall += one2many_score accum_precision += one2many_score * len(match_det_set) for pred_id in match_det_set: pred_hit[pred_id] = 1 # match many gt to one det box. One pair of (det,gt) are matched # successfully if their recall, precision, normalized distance # meet some thresholds. for pred_id in range(pred_num): if pred_id in pred_ignored_index: continue match, match_gt_set = eval_utils.many2one_match_ic13( pred_id, recall_mat, precision_mat, recall_thr, precision_thr, gt_hit, pred_hit, gt_ignored_index) if match: pred_hit[pred_id] = 1 accum_recall += many2one_score * len(match_gt_set) accum_precision += many2one_score for gt_id in match_gt_set: gt_hit[gt_id] = 1 gt_care_number = gt_num - ignored_num pred_care_number = pred_num - len(pred_ignored_index) r, p, h = eval_utils.compute_hmean(accum_recall, accum_precision, gt_care_number, pred_care_number) img_results.append({'recall': r, 'precision': p, 'hmean': h}) dataset_gt_num += gt_care_number dataset_pred_num += pred_care_number dataset_hit_recall += accum_recall dataset_hit_prec += accum_precision total_r, total_p, total_h = eval_utils.compute_hmean( dataset_hit_recall, dataset_hit_prec, dataset_gt_num, dataset_pred_num) dataset_results = { 'num_gts': dataset_gt_num, 'num_dets': dataset_pred_num, 'num_recall': dataset_hit_recall, 'num_precision': dataset_hit_prec, 'recall': total_r, 'precision': total_p, 'hmean': total_h } return dataset_results, img_results