def main(): src_path = "../chars/src_dan_svg_simple_resized.png" tag_path = "../chars/tag_fu_processed.png" src_img = cv2.imread(src_path, 0) tag_img = cv2.imread(tag_path, 0) _, src_img = cv2.threshold(src_img, 127, 255, cv2.THRESH_BINARY) _, tag_img = cv2.threshold(tag_img, 127, 255, cv2.THRESH_BINARY) src_img = np.uint8(src_img) tag_img = np.uint8(tag_img) # grayscale to rgb src_img_rgb = cv2.cvtColor(src_img, cv2.COLOR_GRAY2RGB) tag_img_rgb = cv2.cvtColor(tag_img, cv2.COLOR_GRAY2RGB) # connectivity components = getConnectedComponents(tag_img) print("components len: %d" % len(components)) # Check C-points exist or not. if no C-points, the single stroke. But C-points exist, several strokes for idx, components in enumerate(components): win_name = "src " + str(idx) cv2.imshow(win_name, components) cv2.waitKey(0) cv2.destroyAllWindows()
def autoStrokeExtractFromCharacter(rgb): """ Automatically stroke extract. :param rgb: rbg image :return: strokes images """ if rgb is None: return # 1. Load RGB image; # 2. Convert RGB image to gryascale image; img_gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY) # 3. Convert grayscale to binary image; _, img_bit = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) # 4. Get components from binary image; components = getConnectedComponents(img_bit, connectivity=8) print("components num: %d" % len(components)) # 5. Process each component; strokes = [] for comp in components: sub_strokes = autoStrokeExtractFromComponent(comp) print("sub_strokes num: %d" % len(sub_strokes)) strokes += sub_strokes return strokes
def autoSmoothContoursOfCharacter(grayscale): """ Automatically smooth contours of grayscale :param grayscale: :return: """ if grayscale is None: return # 1. Load image RGB image # 2. RGB image -> grayscale # 3. Gayscale -> bitmap img_gray = grayscale.copy() _, img_bit = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) # 4. Separate image to several connencted components components = getConnectedComponents(img_bit) print("components num: %d" % len(components)) # smooth components one by one components_smoothed = [] # components_smoothed.append(autoSmoothContoursOfComponent(components[1])) for comp in components: # smooth each component comp_smoothed = autoSmoothContoursOfComponent(comp) components_smoothed.append(comp_smoothed) for i in range(len(components)): cv2.imshow("cd_%d", components[1]) for i in range(len(components_smoothed)): cv2.imshow("c_%d" % i, components_smoothed[i]) # merge all smoothed components togeter to one character img_smoothed = createBlankGrayscaleImage(img_gray) for comp in components_smoothed: for y in range(comp.shape[0]): for x in range(comp.shape[1]): if comp[y][x] == 0.0: img_smoothed[y][x] = 0.0 return img_smoothed
stroke_square[int((temp_minw - temp_minh) / 2):int((temp_minw - temp_minh) / 2) + temp_minh, 0:temp_minw] = stroke_box elif temp_minw < temp_minh: stroke_square[0:temp_minh, int((temp_minh - temp_minw) / 2):int((temp_minh - temp_minw) / 2) + temp_minw] = stroke_box if temp_box_width > src_box_width: stroke_square = cv2.resize(stroke_square, (src_square.shape[0], src_square.shape[1])) temp_strokes_resized.append(stroke_square) # detect components in src image src_components = getConnectedComponents(src_square, connectivity=8) print("src components num: ", len(src_components)) # detect components in template image temp_components = getConnectedComponents(temp_square) print("template components num: ", len(temp_components)) if len(temp_components) == len(src_components): print("components are same!") else: print("components are not same!!!") # detect template char components and strokes contain relation temp_comp_contains_strokes = [] for i in range(len(temp_components)): strokes = []
def autoSmoothContoursOfComponent(component, blockSize=3, ksize=3, k=0.04): """ :param component: :return: """ if component is None: return # 5. Using corner detection to get corner regions corner_component = np.float32(component) dst = cv2.cornerHarris(corner_component, blockSize=blockSize, ksize=ksize, k=k) dst = cv2.dilate(dst, None) corners_area_points = [] for y in range(dst.shape[0]): for x in range(dst.shape[1]): if dst[y][x] > 0.1 * dst.max(): corners_area_points.append((x, y)) print("corner area points num: %d" % len(corners_area_points)) # 6. Determine center points of corner areas blank_gray = createBlankGrayscaleImage(component) for pt in corners_area_points: blank_gray[pt[1]][pt[0]] = 0.0 rectangles = getAllMiniBoundingBoxesOfImage(blank_gray) corners_area_center_points = [] for rect in rectangles: corners_area_center_points.append( (rect[0] + int(rect[2] / 2.), rect[1] + int(rect[3] / 2.))) print("corner area center points num: %d" % len(corners_area_center_points)) # based the distance to end points and cross points, remove extra corners area center points component_skeleton = getSkeletonOfImage(component) end_points = getEndPointsOfSkeletonLine(component_skeleton) cross_points = getCrossPointsOfSkeletonLine(component_skeleton) # remove extra branches # img_skeleton = removeBranchOfSkeletonLine(img_skeleton, end_points, cross_points) # end_points = getEndPointsOfSkeletonLine(img_skeleton) # cross_points = getEndPointsOfSkeletonLine(img_skeleton) # detect valid corner region center points closed to end points and cross points valid_corners_area_center_points = [] dist_threshold = 40 for pt in corners_area_center_points: is_valid = False for ept in end_points: dist = math.sqrt((pt[0] - ept[0])**2 + (pt[1] + ept[1])**2) if dist <= dist_threshold: is_valid = True break if is_valid: valid_corners_area_center_points.append(pt) continue for cpt in cross_points: dist = math.sqrt((pt[0] - cpt[0])**2 + (pt[1] - cpt[1])**2) if dist <= dist_threshold: is_valid = True break if is_valid: valid_corners_area_center_points.append(pt) print("valid corner area center points num: %d" % len(valid_corners_area_center_points)) del blank_gray # 7. Get all contours of component component_contours = getContourOfImage(component) contours = getConnectedComponents(component_contours, connectivity=8) print("contours num: %d" % len(contours)) # 8. Process contours to get closed and 1-pixel width contours contours_processed = [] for cont in contours: cont = removeBreakPointsOfContour(cont) contours_processed.append(cont) print("contours processed num: %d" % len(contours_processed)) # 9. Find corner points of conthours closed to corner region center points. For each contour, there is a coner points list. contours_corner_points = [] for i in range(len(contours_processed)): corner_points = [] contour = contours_processed[i] for pt in valid_corners_area_center_points: x0 = target_x = pt[0] y0 = target_y = pt[1] min_dist = 10000 # search target point in region: 20 * 20 of center is (x0, y0) for y in range(y0 - 10, y0 + 10): for x in range(x0 - 10, x0 + 10): if contour[y][x] == 255: continue dist = math.sqrt((x - x0)**2 + (y - y0)**2) if dist < min_dist: min_dist = dist target_x = x target_y = y if min_dist < 5: corner_points.append((target_x, target_y)) contours_corner_points.append(corner_points) total_num = 0 for cont in contours_corner_points: total_num += len(cont) if total_num == len(valid_corners_area_center_points): print("corner points not ignored") else: print("corner points be ignored") # 10. Separate contours into sub-contours based on the corner points on different contours sub_contours = [] for i in range(len(contours_processed)): contour = contours_processed[i] corner_points = contours_corner_points[i] # sorted the contour contour_points_sorted = sortPointsOnContourOfImage(contour) # sorted the corner points corner_points_sorted = [] for pt in contour_points_sorted: if pt in corner_points: corner_points_sorted.append(pt) # sepate the contour into sub-contour for j in range(len(corner_points_sorted)): start_pt = corner_points_sorted[j] end_pt = None if j == len(corner_points_sorted) - 1: end_pt = corner_points_sorted[0] else: end_pt = corner_points_sorted[j + 1] # find indexes of start point and end point in contour_points_sorted start_index = contour_points_sorted.index(start_pt) end_index = contour_points_sorted.index(end_pt) # separate sub_contour = None if start_index <= end_index: if end_index == len(contour_points_sorted) - 1: sub_contour = contour_points_sorted[ start_index:len(contour_points_sorted)] sub_contour.append(contour_points_sorted[0]) else: sub_contour = contour_points_sorted[start_index:end_index + 1] else: sub_contour = contour_points_sorted[ start_index:len(contour_points_sorted )] + contour_points_sorted[0:end_index + 1] sub_contours.append(sub_contour) print("sub contours num: %d" % len(sub_contours)) # 11. Beizer curve fit all sub-contours under maximal error max_error = 100 sub_contours_smoothed = [] for id in range(len(sub_contours)): # single sub-contour sub_contour = np.array(sub_contours[id]) if len(sub_contour) < 2: continue beziers = fitCurve(sub_contour, maxError=max_error) sub_contour_smoothed = [] for bez in beziers: bezier_points = draw_cubic_bezier(bez[0], bez[1], bez[2], bez[3]) sub_contour_smoothed += bezier_points sub_contours_smoothed.append(sub_contour_smoothed) # 12. Merge sub-contours together img_smoothed_gray = createBlankGrayscaleImage(component) # merge all smoothed sub-contours for sub in sub_contours_smoothed: for pt in sub: img_smoothed_gray[pt[1]][pt[0]] = 0.0 # process smoothed contours to get closed and 1-pixel width img_smoothed_gray = getSkeletonOfImage(img_smoothed_gray) # remove single points that 8 cv2.imshow("img_smoothed_gray", img_smoothed_gray) contours_smoothed = getConnectedComponents(img_smoothed_gray) if len(contours_smoothed) == 1: # no hole exist, directly fill black in the contour cont = contours_smoothed[0] cont_points = sortPointsOnContourOfImage(cont) cont_points = np.array([cont_points], "int32") fill_contour_smooth = np.ones_like(component) * 255 fill_contour_smooth = np.array(fill_contour_smooth, dtype=np.uint8) fill_contour_smooth = cv2.fillPoly(fill_contour_smooth, cont_points, 0) return fill_contour_smooth else: # exist hole, should processed print("there are holes!") fill_img_list = [] hole_points = [] for cont in contours_smoothed: cont_points = sortPointsOnContourOfImage(cont) cont_points = np.array([cont_points], "int32") fill_contour_smooth = np.ones_like(component) * 255 fill_contour_smooth = np.array(fill_contour_smooth, dtype=np.uint8) fill_contour_smooth = cv2.fillPoly(fill_contour_smooth, cont_points, 0) valid_num = same_num = 0 for y in range(component.shape[0]): for x in range(component.shape[1]): if component[y][x] == 0.0: valid_num += 1 if fill_contour_smooth[y][x] == 0.0: same_num += 1 if 1.0 * same_num / valid_num > 0.8: fill_img_list.append(fill_contour_smooth) print("ratio: %f" % (1.0 * same_num / valid_num)) else: print("ratio: %f" % (1.0 * same_num / valid_num)) for y in range(fill_contour_smooth.shape[0]): for x in range(fill_contour_smooth.shape[1]): if fill_contour_smooth[y][x] == 0.0: hole_points.append((x, y)) # merge filled images blank_temp = np.ones_like(component) * 255 for fl in fill_img_list: for y in range(fl.shape[0]): for x in range(fl.shape[1]): if fl[y][x] == 0.0: blank_temp[y][x] = fl[y][x] # hole points for pt in hole_points: blank_temp[pt[1]][pt[0]] = 255 return blank_temp
def autoStrokeExtracting(index, image, threshold_value=200): """ Automatic strokes extracting :param image: grayscale image :return: strokes images with same size """ strokes = [] if image is None: return strokes # get connected components from the grayscale image, not for the binary image. contour_img = getContourImage(image) contours = getConnectedComponents(contour_img) # no holes, num=1, holes exist, num >= 2 print("contours num: %d" % len(contours)) corners_points_sorted = [] for ct in contours: points = sortPointsOnContourOfImage(ct) corners_points_sorted.append(points) if len(corners_points_sorted) == 1: print("No holes exist!") elif len(corners_points_sorted) >= 2: print("Holes exist!") # grayscale image to binary image _, img_bit = cv2.threshold(image, threshold_value, 255, cv2.THRESH_BINARY) # skeleton image of width 1 pixel of binray image skeleton_img = getSkeletonOfImage(img_bit) skeleton_img = removeExtraBranchesOfSkeleton(skeleton_img) end_points = getEndPointsOfSkeletonLine(skeleton_img) # end points cross_points = getCrossPointsOfSkeletonLine(skeleton_img) # croiss points print("end points num: %d" % len(end_points)) print("cross points num: %d" % len(cross_points)) if len(cross_points) == 0: print("no cross points!") strokes.append(image) return strokes # corner area points corners_all_points = getCornersPoints(image.copy(), contour_img, blockSize=3, ksize=3, k=0.04) corners_points = getValidCornersPoints(corners_all_points, cross_points, end_points, distance_threshold=30) print("corners points num: %d" % len(corners_points)) if len(corners_points) == 0: print("no corner point") strokes.append(image) return strokes contour_rgb = cv2.cvtColor(contour_img, cv2.COLOR_GRAY2RGB) contour_gray = cv2.cvtColor(contour_rgb, cv2.COLOR_RGB2GRAY) _, contour_gray = cv2.threshold(contour_gray, 200, 255, cv2.THRESH_BINARY) for pt in corners_points: contour_rgb[pt[1]][pt[0]] = (0, 0, 255) # cluster corners points based on the cross point corner_points_cluster = getClusterOfCornerPoints(corners_points, cross_points) # cropping lines based on the corner points crop_lines = getCropLines(corner_points_cluster, None) for line in crop_lines: cv2.line(contour_rgb, line[0], line[1], (0, 255, 0), 1) cv2.line(contour_gray, line[0], line[1], 0, 1) # split contour to components ret, labels = cv2.connectedComponents(contour_gray, connectivity=4) components = [] for r in range(1, ret): img_ = createBlankGrayscaleImage(contour_gray) for y in range(contour_gray.shape[0]): for x in range(contour_gray.shape[1]): if labels[y][x] == r: img_[y][x] = 0.0 if img_[0][0] != 0.0 and isValidComponent(img_, img_bit): components.append(img_) print("components num : %d" % len(components)) used_components = [] component_line_relation = {} # {component_id: [line_id1, line_id2]} # merge contour to components for i in range(len(components)): merge_points = [] for y in range(1, contour_gray.shape[0]-1): for x in range(1, contour_gray.shape[1]-1): if contour_gray[y][x] == 0.0: # 4 nearby pixels should be black in components if components[i][y-1][x] == 0.0 or components[i][y][x+1] == 0.0 or components[i][y+1][x] == 0.0 or \ components[i][y][x-1] == 0.0: merge_points.append((x, y)) for pt in merge_points: components[i][pt[1]][pt[0]] = 0.0 # merge cropping lines on the components for i in range(len(crop_lines)): components_id = [] line = crop_lines[i] for j in range(len(components)): dist_startpt = getDistanceBetweenPointAndComponent(line[0], components[j]) # print("dist startpt:%f" % dist_startpt) dist_endpt = getDistanceBetweenPointAndComponent(line[1], components[j]) # print("dist end pt: %f" % dist_endpt) if dist_startpt < 3 and dist_endpt < 3: cv2.line(components[j], line[0], line[1], 0, 1) components_id.append(j) if len(components_id) >= 2: break # find overlap region components overlap_components = [] for i in range(len(components)): part = components[i] part_lines = [] part_lines_id = [] for j in range(len(crop_lines)): line = crop_lines[j] if part[line[0][1]][line[0][0]] == 0.0 and part[line[1][1]][line[1][0]] == 0.0: part_lines.append(line) part_lines_id.append(j) # check number of lines == 4 and cross each other if len(part_lines) == 4: points_set = set() for line in part_lines: points_set.add(line[0]) points_set.add(line[1]) if len(points_set) == 4: # only 4 points overlap_components.append(part) used_components.append(i) component_line_relation[i] = part_lines_id print("overlap components num: %d" % len(overlap_components)) print(component_line_relation) # cluster components based on the cropping lines for i in range(len(components)): part = components[i] part_lines = [] # used to detect overlap region components. # find single part is stroke is_single = True for line in crop_lines: x1 = line[0][0]; y1 = line[0][1] x2 = line[1][0]; y2 = line[1][1] if part[y1][x1] == 0.0 and part[y2][x2] != 0.0 or part[y1][x1] != 0.0 and part[y2][x2] == 0.0: is_single = False break if part[y1][x1] == 0.0 and part[y2][x2] == 0.0: part_lines.append(line) if is_single and isIndependentCropLines(part_lines): strokes.append(part) used_components.append(i) print("single stroke num: %d" % len(strokes)) print("used components num: %d" % len(used_components)) # cluster components based on the cropping lines for i in range(len(components)): if i in used_components: continue # find corresponding crop lines lines_id = [] for j in range(len(crop_lines)): line = crop_lines[j] if components[i][line[0][1]][line[0][0]] == 0.0 and components[i][line[1][1]][line[1][0]] != 0.0: lines_id.append(j) if components[i][line[0][1]][line[0][0]] != 0.0 and components[i][line[1][1]][line[1][0]] == 0.0: lines_id.append(j) if len(lines_id) == 0: continue component_line_relation[i] = lines_id used_components.append(i) # cluster components based on the relations and merge those related components clusters = [] for k1, v1 in component_line_relation.items(): cluster = [k1]; value_sets = [set(v1)] for k2, v2 in component_line_relation.items(): is_related = True for value in value_sets: if not value.intersection(set(v2)): is_related = False break if is_related and k2 not in cluster: cluster.append(k2) value_sets.append(set(v2)) cluster = sorted(cluster) if cluster not in clusters: clusters.append(cluster) print(clusters) # merge components based on the cluster for i in range(len(clusters)): cluster = clusters[i] bk = createBlankGrayscaleImage(image) for clt in cluster: bk = mergeBkAndComponent(bk, components[clt]) # add to strokes strokes.append(bk) # check the stroke is valid for i in range(len(strokes)): stroke = strokes[i] cv2.imshow("radical_%d" % index, contour_rgb) cv2.imshow("radical_gray_%d" % index, contour_gray) return strokes
# 1. Load image img = cv2.imread(path) img_gray = None # 2. RGB image -> grayscale and bitmap if len(img.shape) == 3: # rgb image img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) elif len(img.shape) == 2: # grayscale img img_gray = img _, img_bit = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) components = getConnectedComponents(img_bit) # for i in range(len(components)): # cv2.imshow("c_%d" % i, components[i]) component = components[1] skeleton = getSkeletonOfImage(component) corner_component = np.float32(component) dst = cv2.cornerHarris(corner_component, blockSize=3, ksize=3, k=0.04) dst = cv2.dilate(dst, None) corners_area_points = [] for y in range(dst.shape[0]):
def autoStrokeExtractFromCharacter(rgb): """ Automatically stroke extract. :param rgb: rbg image :return: strokes images """ if rgb is None: return # 1. Load RGB image; # 2. Convert RGB image to gryascale image; img_gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY) # 3. Convert grayscale to binary image; _, img_bit = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) # 4. Get components from binary image; components = getConnectedComponents(img_bit, connectivity=8) print("components num: %d" % len(components)) # 5. Process each component; strokes = [] for comp in components: sub_strokes = autoStrokeExtractFromComponent(comp) print("sub_strokes num: %d" % len(sub_strokes)) strokes += sub_strokes break return strokes # # def autoStrokeExtractFromComponent(component): # """ # Automatically strokes extract from the component. # :param component: # :return: # """ # strokes = [] # if component is None: # return strokes # # # 6. Get skeletons of component. # comp_skeleton = getSkeletonOfImage(component.copy()) # # cv2.imshow("skeleton_original", comp_skeleton) # # # 7. Process the skeleton by remove extra branches. # comp_skeleton = removeShortBranchesOfSkeleton(comp_skeleton, length_threshold=30) # # cv2.imshow("skeleton_smoothed", comp_skeleton) # # # 8. Get the end points and cross points after skeleton processed # end_points = getEndPointsOfSkeletonLine(comp_skeleton) # cross_points = getCrossPointsOfSkeletonLine(comp_skeleton) # print("end points num: %d ,and cross points num: %d" % (len(end_points), len(cross_points))) # # # 9. Get contour image of component # comp_contours_img = getContourImage(component.copy()) # # # 10. Detect the number of contours and return all contours # comp_contours = getConnectedComponents(comp_contours_img, connectivity=8) # print("contours num: %d" % len(comp_contours)) # # # 11. Detect the corner regions of component # # Harris corner detector # corner_region_img = np.float32(component.copy()) # dst = cv2.cornerHarris(corner_region_img, blockSize=3, ksize=3, k=0.04) # dst = cv2.dilate(dst, None) # # # get all points in corners area # corners_area_points = [] # for y in range(dst.shape[0]): # for x in range(dst.shape[1]): # if dst[y][x] > 0.1 * dst.max(): # corners_area_points.append((x, y)) # # # get all center points of corner area # corners_img = createBlankGrayscaleImage(component) # for pt in corners_area_points: # corners_img[pt[1]][pt[0]] = 0.0 # # rectangles = getAllMiniBoundingBoxesOfImage(corners_img) # # corners_area_center_points = [] # for rect in rectangles: # corners_area_center_points.append((rect[0] + int(rect[2] / 2.), rect[1] + int(rect[3] / 2.))) # # # get all corner points in coutour image. # corners_points = [] # for pt in corners_area_center_points: # if comp_contours_img[pt[1]][pt[0]] == 0.0: # corners_points.append(pt) # else: # min_dist = 100000 # min_x = min_y = 0 # for y in range(comp_contours_img.shape[0]): # for x in range(comp_contours_img.shape[1]): # cpt = comp_contours_img[y][x] # if cpt == 0.0: # dist = math.sqrt((x - pt[0]) ** 2 + (y - pt[1]) ** 2) # if dist < min_dist: # min_dist = dist # min_x = x # min_y = y # # points on contour # corners_points.append((min_x, min_y)) # print("corners points num: %d" % len(corners_points)) # # # 12. Get valid corner points based on the end points and cross points # corners_points = getValidCornersPoints(corners_points, cross_points, end_points, distance_threshold=30) # print("corners points num: %d" % len(corners_points)) # # if len(corners_points) == 0: # print("no corner points") # strokes.append(component) # return strokes # # # 13. Cluster these corner points based on the distance between them and cross points # corners_points_cluster = getClusterOfCornerPoints(corners_points, cross_points) # print("corner points cluster num: %d" % len(corners_points_cluster)) # # # 14. Generate cropping lines between two corner points # crop_lines = getCropLines(corners_points_cluster, None) # print("cropping lines num: %d" % len(crop_lines)) # # # 15. Separate the components based on the cropping lines # component_ = component.copy() # # # add white and 1-pixel width line in component to separate it. # for line in crop_lines: # cv2.line(component_, line[0], line[1], 255, 1) # # # 16. Get parts of component. # comp_parts = [] # # invert color !!! # component_ = 255 - component_ # ret, labels = cv2.connectedComponents(component_, connectivity=4) # print(ret) # for r in range(1, ret): # img_ = createBlankGrayscaleImage(component_) # for y in range(component_.shape[0]): # for x in range(component_.shape[1]): # if labels[y][x] == r: # img_[y][x] = 0.0 # if img_[0][0] != 0.0 and isValidComponent(img_, component): # comp_parts.append(img_) # print("parts of component num: %d" % len(comp_parts)) # # # 17. Add cropping lines to corresponding parts of component # # add lines to parts of component. # for i in range(len(comp_parts)): # part = comp_parts[i] # # for line in crop_lines: # start_dist = getDistanceBetweenPointAndComponent(line[0], part) # end_dist = getDistanceBetweenPointAndComponent(line[1], part) # # if start_dist <= 3 and end_dist <= 3: # cv2.line(part, line[0], line[1], 0, 1) # # # 18. Find intersection parts of component # used_index = [] # intersect_parts_index = [] # for i in range(len(comp_parts)): # part = comp_parts[i] # num = 0 # number of lines in part # for line in crop_lines: # if part[line[0][1]][line[0][0]] == 0.0 and part[line[1][1]][line[1][0]] == 0.0: # num += 1 # if num == 4: # 4 lines in one part, this part is the intersection part # intersect_parts_index.append(i) # used_index.append(i) # print("intersection parts num: %d" % len(intersect_parts_index)) # # # 19. Find the relation part and crop lines - one line -> one part or three part (part1 + intersect_part + part2) # intersect_parts_crop_lines_index = [] # for i in range(len(crop_lines)): # line = crop_lines[i] # for index in intersect_parts_index: # intersect_part = comp_parts[index] # if intersect_part[line[0][1]][line[0][0]] == 0.0 and intersect_part[line[1][1]][line[1][0]] == 0.0: # # this line in intersection part # intersect_parts_crop_lines_index.append(i) # print("crop lines in intersection part num: %d" % len(intersect_parts_crop_lines_index)) # # # Cropping lines are divided into two types: in intersect part and not in this part. # line_parts_relation = [] # for index in intersect_parts_crop_lines_index: # line = crop_lines[index] # # # line and parts that are connected by this crop line: A - intersect_part - B # line_connected_parts = [] # # find intersection part contains this line # for i in intersect_parts_index: # intersect_part = comp_parts[i] # if intersect_part[line[0][1]][line[0][0]] == 0.0 and intersect_part[line[1][1]][line[1][0]] == 0.0: # # line in this intersect part # line_connected_parts.append(i) # # find two parts connectd by this crop line # for i in range(len(comp_parts)): # # # part should not be the intersect part # if i in intersect_parts_index: # continue # # # check only end point of line in part. # part = comp_parts[i] # if part[line[0][1]][line[0][0]] == 0.0 and part[line[1][1]][line[1][0]] != 0.0 or \ # part[line[0][1]][line[0][0]] != 0.0 and part[line[1][1]][line[1][0]] == 0.0: # line_connected_parts.append(i) # # # add line connected parts to relation list. # if line_connected_parts not in line_parts_relation: # line_parts_relation.append(line_connected_parts) # # # add independent parts to relation of line and parts # for i in range(len(comp_parts)): # line_connected_parts = [] # is_independent = True # for relation in line_parts_relation: # if i in relation: # is_independent = False # # check this part is independent or not # if is_independent: # line_connected_parts.append(i) # # if line_connected_parts != []: # line_parts_relation.append(line_connected_parts) # # # 20. Merge parts based on the line parts relation # for i in range(len(line_parts_relation)): # # blank image # blank_ = createBlankGrayscaleImage(component) # # parts relation list # relation = line_parts_relation[i] # # for rel in relation: # part = comp_parts[rel] # if part is None: # continue # # merge part and blank image # for y in range(part.shape[0]): # for x in range(part.shape[1]): # if part[y][x] == 0.0: # blank_[y][x] = 0.0 # # # add to strokes list # strokes.append(blank_) # return strokes # # # def autoStrokeExtractFromCharacter(rgb): # """ # Automatically stroke extract. # :param rgb: rbg image # :return: strokes images # """ # if rgb is None: # return # # # 1. Load RGB image; # # 2. Convert RGB image to gryascale image; # img_gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY) # # # 3. Convert grayscale to binary image; # _, img_bit = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) # # # 4. Get components from binary image; # components = getConnectedComponents(img_bit, connectivity=8) # print("components num: %d" % len(components)) # # # 5. Process each component; # strokes = [] # for comp in components: # sub_strokes = autoStrokeExtractFromComponent(comp) # print("sub_strokes num: %d" % len(sub_strokes)) # strokes += sub_strokes # # return strokes
def autoStrokeExtractFromComponent(component): """ Automatically strokes extract from the component. :param component: :return: """ strokes = [] if component is None: return strokes # 6. Get skeletons of component. comp_skeleton = getSkeletonOfImage(component.copy()) # cv2.imshow("skeleton_original", comp_skeleton) # 7. Process the skeleton by remove extra branches. comp_skeleton = removeShortBranchesOfSkeleton(comp_skeleton, length_threshold=30) # cv2.imshow("skeleton_smoothed", comp_skeleton) # 8. Get the end points and cross points after skeleton processed end_points = getEndPointsOfSkeletonLine(comp_skeleton) cross_points = getCrossPointsOfSkeletonLine(comp_skeleton) print("end points num: %d ,and cross points num: %d" % (len(end_points), len(cross_points))) # 9. Get contour image of component comp_contours_img = getContourImage(component.copy()) # 10. Detect the number of contours and return all contours comp_contours = getConnectedComponents(comp_contours_img, connectivity=8) print("contours num: %d" % len(comp_contours)) # 11. Get points on contours corners_points = [] for cont in comp_contours: cont = removeBreakPointsOfContour(cont) cont_sorted = sortPointsOnContourOfImage(cont) cont_points = rdp(cont_sorted, 5) corners_points += cont_points print("corner points num:", len(corners_points)) CORNER_CROSS_DIST_THRESHOLD = 30 corners_points_merged = [] for pt in corners_points: for cpt in cross_points: dist = math.sqrt((pt[0] - cpt[0]) ** 2 + (pt[1] - cpt[1]) ** 2) if dist < CORNER_CROSS_DIST_THRESHOLD: corners_points_merged.append(pt) break corners_points = corners_points_merged print("merged corner points num:", len(corners_points)) # # 11. Detect the corner regions of component # # Harris corner detector # corner_region_img = np.float32(component.copy()) # dst = cv2.cornerHarris(corner_region_img, blockSize=3, ksize=3, k=0.04) # dst = cv2.dilate(dst, None) # # # get all points in corners area # corners_area_points = [] # for y in range(dst.shape[0]): # for x in range(dst.shape[1]): # if dst[y][x] > 0.1 * dst.max(): # corners_area_points.append((x, y)) # # # get all center points of corner area # corners_img = createBlankGrayscaleImage(component) # for pt in corners_area_points: # corners_img[pt[1]][pt[0]] = 0.0 # # rectangles = getAllMiniBoundingBoxesOfImage(corners_img) # # corners_area_center_points = [] # for rect in rectangles: # corners_area_center_points.append((rect[0] + int(rect[2] / 2.), rect[1] + int(rect[3] / 2.))) # # # get all corner points in coutour image. # corners_points = [] # for pt in corners_area_center_points: # if comp_contours_img[pt[1]][pt[0]] == 0.0: # corners_points.append(pt) # else: # min_dist = 100000 # min_x = min_y = 0 # for y in range(comp_contours_img.shape[0]): # for x in range(comp_contours_img.shape[1]): # cpt = comp_contours_img[y][x] # if cpt == 0.0: # dist = math.sqrt((x - pt[0]) ** 2 + (y - pt[1]) ** 2) # if dist < min_dist: # min_dist = dist # min_x = x # min_y = y # # points on contour # corners_points.append((min_x, min_y)) print("corners points num: %d" % len(corners_points)) # 12. Get valid corner points based on the end points and cross points corners_points = getValidCornersPoints(corners_points, cross_points, end_points, distance_threshold=30) print("corners points num: %d" % len(corners_points)) if len(corners_points) == 0: print("no corner points") strokes.append(component) return strokes # 13. Cluster these corner points based on the distance between them and cross points corners_points_cluster = getClusterOfCornerPoints(corners_points, cross_points, threshold_distance=70) print("corner points cluster num: %d" % len(corners_points_cluster)) print(corners_points_cluster) # 14. Generate cropping lines between two corner points crop_lines = getCropLines(corners_points_cluster, comp_contours) print("cropping lines num: %d" % len(crop_lines)) # 15. Separate the components based on the cropping lines component_ = component.copy() # add white and 1-pixel width line in component to separate it. for line in crop_lines: cv2.line(component_, line[0], line[1], 255, 1) # 16. Get parts of component. comp_parts = [] # invert color !!! component_ = 255 - component_ ret, labels = cv2.connectedComponents(component_, connectivity=4) print(ret) for r in range(1, ret): img_ = createBlankGrayscaleImage(component_) for y in range(component_.shape[0]): for x in range(component_.shape[1]): if labels[y][x] == r: img_[y][x] = 0.0 if img_[0][0] != 0.0 and isValidComponent(img_, component): comp_parts.append(img_) print("parts of component num: %d" % len(comp_parts)) # 17. Add cropping lines to corresponding parts of component # add lines to parts of component. for i in range(len(comp_parts)): part = comp_parts[i] for line in crop_lines: start_dist = getDistanceBetweenPointAndComponent(line[0], part) end_dist = getDistanceBetweenPointAndComponent(line[1], part) if start_dist <= 3 and end_dist <= 3: cv2.line(part, line[0], line[1], 0, 1) # 18. Find intersection parts of component used_index = [] intersect_parts_index = [] for i in range(len(comp_parts)): part = comp_parts[i] num = 0 # number of lines in part for line in crop_lines: if part[line[0][1]][line[0][0]] == 0.0 and part[line[1][1]][line[1][0]] == 0.0: num += 1 if num == 4: # 4 lines in one part, this part is the intersection part intersect_parts_index.append(i) used_index.append(i) print("intersection parts num: %d" % len(intersect_parts_index)) # 19. Find the relation part and crop lines - one line -> one part or three part (part1 + intersect_part + part2) intersect_parts_crop_lines_index = [] for i in range(len(crop_lines)): line = crop_lines[i] for index in intersect_parts_index: intersect_part = comp_parts[index] if intersect_part[line[0][1]][line[0][0]] == 0.0 and intersect_part[line[1][1]][line[1][0]] == 0.0: # this line in intersection part intersect_parts_crop_lines_index.append(i) print("crop lines in intersection part num: %d" % len(intersect_parts_crop_lines_index)) # Cropping lines are divided into two types: in intersect part and not in this part. line_parts_relation = [] for index in intersect_parts_crop_lines_index: line = crop_lines[index] # line and parts that are connected by this crop line: A - intersect_part - B line_connected_parts = [] # find intersection part contains this line for i in intersect_parts_index: intersect_part = comp_parts[i] if intersect_part[line[0][1]][line[0][0]] == 0.0 and intersect_part[line[1][1]][line[1][0]] == 0.0: # line in this intersect part line_connected_parts.append(i) # find two parts connectd by this crop line for i in range(len(comp_parts)): # part should not be the intersect part if i in intersect_parts_index: continue # check only end point of line in part. part = comp_parts[i] if part[line[0][1]][line[0][0]] == 0.0 and part[line[1][1]][line[1][0]] != 0.0 or \ part[line[0][1]][line[0][0]] != 0.0 and part[line[1][1]][line[1][0]] == 0.0: line_connected_parts.append(i) # add line connected parts to relation list. if line_connected_parts not in line_parts_relation: line_parts_relation.append(line_connected_parts) # add independent parts to relation of line and parts for i in range(len(comp_parts)): line_connected_parts = [] is_independent = True for relation in line_parts_relation: if i in relation: is_independent = False # check this part is independent or not if is_independent: line_connected_parts.append(i) if line_connected_parts != []: line_parts_relation.append(line_connected_parts) # 20. Merge parts based on the line parts relation for i in range(len(line_parts_relation)): # blank image blank_ = createBlankGrayscaleImage(component) # parts relation list relation = line_parts_relation[i] for rel in relation: part = comp_parts[rel] if part is None: continue # merge part and blank image for y in range(part.shape[0]): for x in range(part.shape[1]): if part[y][x] == 0.0: blank_[y][x] = 0.0 # add to strokes list strokes.append(blank_) return strokes
from utils.Functions import getConnectedComponents, getSkeletonOfImage, getEndPointsOfSkeletonLine, \ getCrossPointsOfSkeletonLine, createBlankGrayscaleImage, getCropLines, \ getClusterOfCornerPoints, getAllMiniBoundingBoxesOfImage, getContourImage, \ getValidCornersPoints, getDistanceBetweenPointAndComponent, isValidComponent, \ removeShortBranchesOfSkeleton, sortPointsOnContourOfImage, removeBreakPointsOfContour from utils.stroke_extraction_algorithm import autoStrokeExtractFromCharacter path = "../test_images/page1_char_3.png" # Image porcessing img = cv2.imread(path, 0) _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) components = getConnectedComponents(img, connectivity=8) print("components num: %d" % len(components)) # 6. Get skeletons of component. comp_skeleton = getSkeletonOfImage(components[2]) comp_skeleton = getSkeletonOfImage(comp_skeleton) cv2.imshow("skeleton_original", comp_skeleton) # 7. Process the skeleton by remove extra branches. # comp_skeleton = removeShortBranchesOfSkeleton(comp_skeleton, length_threshold=30) comp_skeleton_rgb = cv2.cvtColor(comp_skeleton, cv2.COLOR_GRAY2RGB) cv2.imshow("skeleton_smoothed", comp_skeleton) # 8. Get the end points and cross points after skeleton processed end_points = getEndPointsOfSkeletonLine(comp_skeleton) cross_points = getCrossPointsOfSkeletonLine(comp_skeleton)
from algorithms.RDP import rdp from utils.contours_smoothed_algorithm import autoSmoothContoursOfCharacter path = "../test_images/page1_char_3.png" img = cv2.imread(path, 0) _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) # get contours img_contour = getContourImage(img) # get skeleton img_skel = getSkeletonOfImage(img) # corner points contours = getConnectedComponents(img_contour, connectivity=8) print("contours num:", len(contours)) corner_points = [] for cont in contours: cont_sorted = sortPointsOnContourOfImage(cont) cont_points = rdp(cont_sorted, 5) corner_points.append(cont_points) print("corner points num:", len(corner_points)) img_contour_rgb = cv2.cvtColor(img_contour, cv2.COLOR_GRAY2RGB) # for i in range(len(corner_points)):
getCornerPointsOfImage, getClusterOfCornerPoints, getCropLinesPoints # 1133壬 2252支 0631叟 0633口 path = "1133壬.jpg" img = cv2.imread(path) # contour contour = getContourOfImage(img) contour = getSkeletonOfImage(contour) contour = np.array(contour, dtype=np.uint8) img = cv2.imread(path, 0) _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY) img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) components = getConnectedComponents(img) print("radicals num: %d" % len(components)) radicals = components[0] radicals = np.array(radicals, dtype=np.uint8) # sorted the contour po contour_sorted = sortPointsOnContourOfImage(contour) contour_rgb = cv2.cvtColor(contour, cv2.COLOR_GRAY2RGB) # # skeleton skeleton = getSkeletonOfImage(radicals) # # remove extra branches skeleton = removeExtraBranchesOfSkeleton(skeleton, distance_threshod=20) # end_points = getEndPointsOfSkeletonLine(skeleton)
def autoSmoothContoursOfComponent(component, eplison=10, max_error=200): """ Automatically smooth the contours of component. :param component: :return: """ if component is None: return # 5. Get contours of this component component_contours = getContourOfImage(component) contours = getConnectedComponents(component_contours, connectivity=8) print("contours num: ", len(contours)) # 6. Process contours to get closed and 1-pixel width contours by removing break points contours_processed = [] for cont in contours: cont = removeBreakPointsOfContour(cont) contours_processed.append(cont) print("contours processed num: %d" % len(contours_processed)) # 7. Smooth contours with RDP and cubic bezeir fit curve contours_smoothed = [] for cont in contours_processed: cont_smoothed = [] # sorted points on contour cont_sorted = sortPointsOnContourOfImage(cont) # simplify contour with RDP cont_simp = rdp(cont_sorted, eplison) print("cont simp num: ", len(cont_simp)) # split contour into sub-contours for i in range(len(cont_simp) - 1): start_pt = cont_simp[i] end_pt = cont_simp[i + 1] start_index = cont_sorted.index(start_pt) end_index = cont_sorted.index(end_pt) sub_cont_points = np.array(cont_sorted[start_index:end_index + 1]) beziers = fitCurve(sub_cont_points, maxError=max_error) for bez in beziers: bezier_points = draw_cubic_bezier(bez[0], bez[1], bez[2], bez[3]) cont_smoothed += bezier_points contours_smoothed.append(cont_smoothed) print("contours smoothed num: ", len(contours_smoothed)) # fill black color in contour area if len(contours_smoothed) == 1: # no hole exist, directly fill black in the contour cont = contours_smoothed[0] # cont_points = sortPointsOnContourOfImage(cont) cont_points = np.array([cont], "int32") fill_contour_smooth = np.ones_like(component) * 255 fill_contour_smooth = np.array(fill_contour_smooth, dtype=np.uint8) fill_contour_smooth = cv2.fillPoly(fill_contour_smooth, cont_points, 0) return fill_contour_smooth else: # exist hole, should processed print("there are holes!") fill_img_list = [] hole_points = [] for cont in contours_smoothed: # cont_points = sortPointsOnContourOfImage(cont) cont_points = np.array([cont], "int32") fill_contour_smooth = np.ones_like(component) * 255 fill_contour_smooth = np.array(fill_contour_smooth, dtype=np.uint8) fill_contour_smooth = cv2.fillPoly(fill_contour_smooth, cont_points, 0) valid_num = same_num = 0 for y in range(component.shape[0]): for x in range(component.shape[1]): if component[y][x] == 0.0: valid_num += 1 if fill_contour_smooth[y][x] == 0.0: same_num += 1 if 1.0 * same_num / valid_num > 0.8: fill_img_list.append(fill_contour_smooth) print("ratio: %f" % (1.0 * same_num / valid_num)) else: print("ratio: %f" % (1.0 * same_num / valid_num)) for y in range(fill_contour_smooth.shape[0]): for x in range(fill_contour_smooth.shape[1]): if fill_contour_smooth[y][x] == 0.0: hole_points.append((x, y)) # merge filled images blank_temp = np.ones_like(component) * 255 for fl in fill_img_list: for y in range(fl.shape[0]): for x in range(fl.shape[1]): if fl[y][x] == 0.0: blank_temp[y][x] = fl[y][x] # hole points for pt in hole_points: blank_temp[pt[1]][pt[0]] = 255 return blank_temp