def generate_text_region_mask(self, img_size, text_polys): """Generate text center region mask and geometry attribute maps. Args: img_size (tuple): The image size (height, width). text_polys (list[list[ndarray]]): The list of text polygons. Returns: text_region_mask (ndarray): The text region mask. """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) h, w = img_size text_region_mask = np.zeros((h, w), dtype=np.uint8) for poly in text_polys: assert len(poly) == 1 text_instance = [[poly[0][i], poly[0][i + 1]] for i in range(0, len(poly[0]), 2)] polygon = np.array(text_instance, dtype=np.int32).reshape( (1, -1, 2)) cv2.fillPoly(text_region_mask, polygon, 1) return text_region_mask
def generate_center_region_mask(self, img_size, text_polys): """Generate text center region mask. Args: img_size (tuple): The image size of (height, width). text_polys (list[list[ndarray]]): The list of text polygons. Returns: center_region_mask (ndarray): The text center region mask. """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) h, w = img_size center_region_mask = np.zeros((h, w), np.uint8) center_region_boxes = [] for poly in text_polys: assert len(poly) == 1 polygon_points = poly[0].reshape(-1, 2) _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points) resampled_top_line, resampled_bot_line = self.resample_sidelines( top_line, bot_line, self.resample_step) resampled_bot_line = resampled_bot_line[::-1] center_line = (resampled_top_line + resampled_bot_line) / 2 line_head_shrink_len = norm(resampled_top_line[0] - resampled_bot_line[0]) / 4.0 line_tail_shrink_len = norm(resampled_top_line[-1] - resampled_bot_line[-1]) / 4.0 head_shrink_num = int(line_head_shrink_len // self.resample_step) tail_shrink_num = int(line_tail_shrink_len // self.resample_step) if len(center_line) > head_shrink_num + tail_shrink_num + 2: center_line = center_line[head_shrink_num:len(center_line) - tail_shrink_num] resampled_top_line = resampled_top_line[ head_shrink_num:len(resampled_top_line) - tail_shrink_num] resampled_bot_line = resampled_bot_line[ head_shrink_num:len(resampled_bot_line) - tail_shrink_num] for i in range(0, len(center_line) - 1): tl = center_line[i] + (resampled_top_line[i] - center_line[i] ) * self.center_region_shrink_ratio tr = center_line[i + 1] + ( resampled_top_line[i + 1] - center_line[i + 1]) * self.center_region_shrink_ratio br = center_line[i + 1] + ( resampled_bot_line[i + 1] - center_line[i + 1]) * self.center_region_shrink_ratio bl = center_line[i] + (resampled_bot_line[i] - center_line[i] ) * self.center_region_shrink_ratio current_center_box = np.vstack([tl, tr, br, bl]).astype(np.int32) center_region_boxes.append(current_center_box) cv2.fillPoly(center_region_mask, center_region_boxes, 1) return center_region_mask
def generate_kernels(self, img_size, text_polys, shrink_ratio, max_shrink=sys.maxsize, ignore_tags=None): """Generate text instance kernels for one shrink ratio. Args: img_size (tuple(int, int)): The image size of (height, width). text_polys (list[list[ndarray]]: The list of text polygons. shrink_ratio (float): The shrink ratio of kernel. Returns: text_kernel (ndarray): The text kernel mask of (height, width). """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) assert isinstance(shrink_ratio, float) h, w = img_size text_kernel = np.zeros((h, w), dtype=np.float32) for text_ind, poly in enumerate(text_polys): instance = poly[0].reshape(-1, 2).astype(np.int32) area = plg.Polygon(instance).area() peri = cv2.arcLength(instance, True) distance = min( int(area * (1 - shrink_ratio * shrink_ratio) / (peri + 0.001) + 0.5), max_shrink) pco = pyclipper.PyclipperOffset() pco.AddPath(instance, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) shrinked = np.array(pco.Execute(-distance)) # check shrinked == [] or empty ndarray if len(shrinked) == 0 or shrinked.size == 0: if ignore_tags is not None: ignore_tags[text_ind] = True continue try: shrinked = np.array(shrinked[0]).reshape(-1, 2) except Exception as e: print_log(f'{shrinked} with error {e}') if ignore_tags is not None: ignore_tags[text_ind] = True continue cv2.fillPoly(text_kernel, [shrinked.astype(np.int32)], text_ind + 1) return text_kernel, ignore_tags
def generate_fourier_maps(self, img_size, text_polys): """Generate Fourier coefficient maps. Args: img_size (tuple): The image size of (height, width). text_polys (list[list[ndarray]]): The list of text polygons. Returns: fourier_real_map (ndarray): The Fourier coefficient real part maps. fourier_image_map (ndarray): The Fourier coefficient image part maps. """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) h, w = img_size k = self.fourier_degree real_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) imag_map = np.zeros((k * 2 + 1, h, w), dtype=np.float32) for poly in text_polys: assert len(poly) == 1 text_instance = [[poly[0][i], poly[0][i + 1]] for i in range(0, len(poly[0]), 2)] mask = np.zeros((h, w), dtype=np.uint8) polygon = np.array(text_instance).reshape((1, -1, 2)) cv2.fillPoly(mask, polygon.astype(np.int32), 1) fourier_coeff = self.cal_fourier_signature(polygon[0], k) for i in range(-k, k + 1): if i != 0: real_map[i + k, :, :] = mask * fourier_coeff[i + k, 0] + ( 1 - mask) * real_map[i + k, :, :] imag_map[i + k, :, :] = mask * fourier_coeff[i + k, 1] + ( 1 - mask) * imag_map[i + k, :, :] else: yx = np.argwhere(mask > 0.5) k_ind = np.ones((len(yx)), dtype=np.int64) * k y, x = yx[:, 0], yx[:, 1] real_map[k_ind, y, x] = fourier_coeff[k, 0] - x imag_map[k_ind, y, x] = fourier_coeff[k, 1] - y return real_map, imag_map
def generate_kernels(self, resize_shape, pad_shape, char_boxes, char_inds, shrink_ratio=0.5, binary=True): """Generate char instance kernels for one shrink ratio. Args: resize_shape (tuple(int, int)): Image size (height, width) after resizing. pad_shape (tuple(int, int)): Image size (height, width) after padding. char_boxes (list[list[float]]): The list of char polygons. char_inds (list[int]): List of char indexes. shrink_ratio (float): The shrink ratio of kernel. binary (bool): If True, return binary ndarray containing 0 & 1 only. Returns: char_kernel (ndarray): The text kernel mask of (height, width). """ assert isinstance(resize_shape, tuple) assert isinstance(pad_shape, tuple) assert check_argument.is_2dlist(char_boxes) assert check_argument.is_type_list(char_inds, int) assert isinstance(shrink_ratio, float) assert isinstance(binary, bool) char_kernel = np.zeros(pad_shape, dtype=np.int32) char_kernel[:resize_shape[0], resize_shape[1]:] = self.pad_val for i, char_box in enumerate(char_boxes): if self.box_type == 'char_rects': poly = self.shrink_char_rect(char_box, shrink_ratio) elif self.box_type == 'char_quads': poly = self.shrink_char_quad(char_box, shrink_ratio) fill_value = 1 if binary else char_inds[i] cv2.fillConvexPoly(char_kernel, poly.astype(np.int32), (fill_value)) return char_kernel
def sort_points(points): """Sort arbitory points in clockwise order. Reference: https://stackoverflow.com/a/6989383. Args: points (list[ndarray] or ndarray or list[list]): A list of unsorted boundary points. Returns: list[ndarray]: A list of points sorted in clockwise order. """ assert is_type_list(points, np.ndarray) or isinstance(points, np.ndarray) \ or is_2dlist(points) points = np.array(points) center = np.mean(points, axis=0) def cmp(a, b): oa = a - center ob = b - center # Some corner cases if oa[0] >= 0 and ob[0] < 0: return 1 if oa[0] < 0 and ob[0] >= 0: return -1 prod = np.cross(oa, ob) if prod > 0: return 1 if prod < 0: return -1 # a, b are on the same line from the center return 1 if (oa**2).sum() < (ob**2).sum() else -1 return sorted(points, key=functools.cmp_to_key(cmp))
def generate_effective_mask(self, mask_size: tuple, polygons_ignore): """Generate effective mask by setting the ineffective regions to 0 and effective regions to 1. Args: mask_size (tuple): The mask size. polygons_ignore (list[[ndarray]]: The list of ignored text polygons. Returns: mask (ndarray): The effective mask of (height, width). """ assert check_argument.is_2dlist(polygons_ignore) mask = np.ones(mask_size, dtype=np.uint8) for poly in polygons_ignore: instance = poly[0].reshape(-1, 2).astype(np.int32).reshape(1, -1, 2) cv2.fillPoly(mask, instance, 0) return mask
def resize_boundary(self, boundaries, scale_factor): """Rescale boundaries via scale_factor. Args: boundaries (list[list[float]]): The boundary list. Each boundary has :math:`2k+1` elements with :math:`k>=4`. scale_factor (ndarray): The scale factor of size :math:`(4,)`. Returns: list[list[float]]: The scaled boundaries. """ assert check_argument.is_2dlist(boundaries) assert isinstance(scale_factor, np.ndarray) assert scale_factor.shape[0] == 4 for b in boundaries: sz = len(b) check_argument.valid_boundary(b, True) b[:sz - 1] = (np.array(b[:sz - 1]) * (np.tile(scale_factor[:2], int( (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist() return boundaries
def generate_center_mask_attrib_maps(self, img_size, text_polys): """Generate text center region mask and geometric attribute maps. Args: img_size (tuple): The image size of (height, width). text_polys (list[list[ndarray]]): The list of text polygons. Returns: center_region_mask (ndarray): The text center region mask. radius_map (ndarray): The distance map from each pixel in text center region to top sideline. sin_map (ndarray): The sin(theta) map where theta is the angle between vector (top point - bottom point) and vector (1, 0). cos_map (ndarray): The cos(theta) map where theta is the angle between vector (top point - bottom point) and vector (1, 0). """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) h, w = img_size center_region_mask = np.zeros((h, w), np.uint8) radius_map = np.zeros((h, w), dtype=np.float32) sin_map = np.zeros((h, w), dtype=np.float32) cos_map = np.zeros((h, w), dtype=np.float32) for poly in text_polys: assert len(poly) == 1 text_instance = [[poly[0][i], poly[0][i + 1]] for i in range(0, len(poly[0]), 2)] polygon_points = np.array(text_instance, dtype=np.int32).reshape(-1, 2) _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points) resampled_top_line, resampled_bot_line = self.resample_sidelines( top_line, bot_line, self.resample_step) resampled_bot_line = resampled_bot_line[::-1] center_line = (resampled_top_line + resampled_bot_line) / 2 if self.vector_slope(center_line[-1] - center_line[0]) > 0.9: if (center_line[-1] - center_line[0])[1] < 0: center_line = center_line[::-1] resampled_top_line = resampled_top_line[::-1] resampled_bot_line = resampled_bot_line[::-1] else: if (center_line[-1] - center_line[0])[0] < 0: center_line = center_line[::-1] resampled_top_line = resampled_top_line[::-1] resampled_bot_line = resampled_bot_line[::-1] line_head_shrink_len = norm(resampled_top_line[0] - resampled_bot_line[0]) / 4.0 line_tail_shrink_len = norm(resampled_top_line[-1] - resampled_bot_line[-1]) / 4.0 head_shrink_num = int(line_head_shrink_len // self.resample_step) tail_shrink_num = int(line_tail_shrink_len // self.resample_step) if len(center_line) > head_shrink_num + tail_shrink_num + 2: center_line = center_line[head_shrink_num:len(center_line) - tail_shrink_num] resampled_top_line = resampled_top_line[ head_shrink_num:len(resampled_top_line) - tail_shrink_num] resampled_bot_line = resampled_bot_line[ head_shrink_num:len(resampled_bot_line) - tail_shrink_num] self.draw_center_region_maps(resampled_top_line, resampled_bot_line, center_line, center_region_mask, radius_map, sin_map, cos_map, self.center_region_shrink_ratio) return center_region_mask, radius_map, sin_map, cos_map
def generate_center_mask_attrib_maps(self, img_size, text_polys): """Generate text center region masks and geometric attribute maps. Args: img_size (tuple): The image size (height, width). text_polys (list[list[ndarray]]): The list of text polygons. Returns: center_lines (list): The list of text center lines. center_region_mask (ndarray): The text center region mask. top_height_map (ndarray): The map on which the distance from points to top side lines will be drawn for each pixel in text center regions. bot_height_map (ndarray): The map on which the distance from points to bottom side lines will be drawn for each pixel in text center regions. sin_map (ndarray): The sin(theta) map where theta is the angle between vector (top point - bottom point) and vector (1, 0). cos_map (ndarray): The cos(theta) map where theta is the angle between vector (top point - bottom point) and vector (1, 0). """ assert isinstance(img_size, tuple) assert check_argument.is_2dlist(text_polys) h, w = img_size center_lines = [] center_region_mask = np.zeros((h, w), np.uint8) top_height_map = np.zeros((h, w), dtype=np.float32) bot_height_map = np.zeros((h, w), dtype=np.float32) sin_map = np.zeros((h, w), dtype=np.float32) cos_map = np.zeros((h, w), dtype=np.float32) for poly in text_polys: assert len(poly) == 1 polygon_points = poly[0].reshape(-1, 2) _, _, top_line, bot_line = self.reorder_poly_edge(polygon_points) resampled_top_line, resampled_bot_line = self.resample_sidelines( top_line, bot_line, self.resample_step) resampled_bot_line = resampled_bot_line[::-1] center_line = (resampled_top_line + resampled_bot_line) / 2 if self.vector_slope(center_line[-1] - center_line[0]) > 2: if (center_line[-1] - center_line[0])[1] < 0: center_line = center_line[::-1] resampled_top_line = resampled_top_line[::-1] resampled_bot_line = resampled_bot_line[::-1] else: if (center_line[-1] - center_line[0])[0] < 0: center_line = center_line[::-1] resampled_top_line = resampled_top_line[::-1] resampled_bot_line = resampled_bot_line[::-1] line_head_shrink_len = np.clip( (norm(top_line[0] - bot_line[0]) * self.comp_w_h_ratio), self.min_width, self.max_width) / 2 line_tail_shrink_len = np.clip( (norm(top_line[-1] - bot_line[-1]) * self.comp_w_h_ratio), self.min_width, self.max_width) / 2 num_head_shrink = int(line_head_shrink_len // self.resample_step) num_tail_shrink = int(line_tail_shrink_len // self.resample_step) if len(center_line) > num_head_shrink + num_tail_shrink + 2: center_line = center_line[num_head_shrink:len(center_line) - num_tail_shrink] resampled_top_line = resampled_top_line[ num_head_shrink:len(resampled_top_line) - num_tail_shrink] resampled_bot_line = resampled_bot_line[ num_head_shrink:len(resampled_bot_line) - num_tail_shrink] center_lines.append(center_line.astype(np.int32)) self.draw_center_region_maps(resampled_top_line, resampled_bot_line, center_line, center_region_mask, top_height_map, bot_height_map, sin_map, cos_map, self.center_region_shrink_ratio) return (center_lines, center_region_mask, top_height_map, bot_height_map, sin_map, cos_map)