def _merge_groups(groups, merge_threshold): label = 0 merged_groups = [(label, groups[0])] if len(groups) == 1: return merged_groups for i, g in enumerate(groups[1:]): real_index = i + 1 distance_of_left = min( get_distance_of_two_segments(s1, s2) for s1, s2 in product(groups[real_index - 1], g)) for s1, s2 in product(groups[real_index - 1], g): gg = [] if distance_of_left == get_distance_of_two_segments(s1, s2): gg.append(s1) gg.append(s2) last_element_label = merged_groups[-1][0] if distance_of_left <= 0: merged_groups.append((last_element_label, g)) else: merged_groups.append((last_element_label + 1, g)) assert len(merged_groups) == len(groups) return merged_groups
def _merge_groups(groups, merge_threshold): """ :param groups:[[line.start, line.end], [((start_x, start_y), (end_x, end_y))]] :param merge_threshold: :return: """ label = 0 merged_groups = [(label, groups[0])] if len(groups) == 1: return merged_groups for i, g in enumerate(groups[1:]): real_index = i + 1 distance_of_left = min(get_distance_of_two_segments(s1, s2) for s1, s2 in product(groups[real_index - 1], g) ) # s1, s2 # [(((line1_sx, line1_sy), (line1_ex, line1_ey)), ((line2_sx, line2_sy), (line2_ex, line2_ey)))] tight_close_threshold = 15 for s1, s2 in product(groups[real_index - 1], g): gg = [] if distance_of_left == get_distance_of_two_segments(s1, s2): gg.append(s1) gg.append(s2) min_len = min(get_line_length(gg)) last_element_label = merged_groups[-1][0] if distance_of_left <= 0: # min_len / tight_close_threshold: merged_groups.append((last_element_label, g)) else: merged_groups.append((last_element_label + 1, g)) assert len(merged_groups) == len(groups) return merged_groups
def _merge_groups_according_to_position(groups): labels = [-1 for i in range(len(groups))] dst_group = groups count = 0 for i in range(len(dst_group) - 30): if i != 0 and labels[i] != -1: continue labels[i] = count count += 1 group1 = dst_group[i] remain_group_list = dst_group[i + 1:i + 30] for j, group2 in enumerate(remain_group_list): distance_of_left = min( get_distance_of_two_segments(s1, s2) for s1, s2 in product(group1, group2)) if distance_of_left > 0.1: continue max_len = max(get_line_length(group1 + group2)) def is_percent_sign(): if len(group1) == 1: len1 = max(get_line_length(group1)) len2 = max(get_line_length(group2)) else: len1 = max(get_line_length(group2)) len2 = max(get_line_length(group1)) return distance_of_left < max_len / 4 and min(len(group1), len(group2)) == 1 \ and max(len(group1), len(group2)) == 10 and len1 > 5 * len2 and max_len < 0.2 if is_percent_sign(): if labels[i + j + 1] == -1: labels[i + j + 1] = labels[i] for label_index in range(len(labels)): if labels[label_index] == -1: labels[label_index] = count count += 1 merge_group = [] for index in range(len(labels)): label = labels[index] if len(merge_group) < label + 1: merge_group.append(dst_group[index]) else: merge_group[label] += dst_group[index] return merge_group, labels
def _merge_groups_according_to_position_char(groups, type_index): labels = [-1 for i in range(len(groups))] dst_group = groups count = 0 merge_percent = 1 merge_tight_and_i_j = 2 merge_special_char = 3 for i in range(len(dst_group) - 35): if i != 0 and labels[i] != -1: continue labels[i] = count count += 1 group1 = dst_group[i] remain_group_list = dst_group[i + 1:i + 35] for j, group2 in enumerate(remain_group_list): distance_of_left = min( get_distance_of_two_segments(s1, s2) for s1, s2 in product(group1, group2)) if distance_of_left > 0.1: continue min_len = min(get_line_length(group1 + group2)) max_len = max(get_line_length(group1 + group2)) bounding_box1 = get_bounding_box_list(group1) bounding_box2 = get_bounding_box_list(group2) bounding_box12 = get_bounding_box_list(group1 + group2) x1, y1 = perpendicular_of_box(bounding_box1) x2, y2 = perpendicular_of_box(bounding_box2) (min_x1, min_y1), (max_x1, max_y1) = get_bounding_box_list(group1) (min_x2, min_y2), (max_x2, max_y2) = get_bounding_box_list(group2) tight_close_threshold = 0.0001 distance_and_max_len_ratio1 = 2 distance_and_max_len_ratio2 = 3 dot_line_nums = 4 comma_line_nums = 7 i_minus_dot_line_nums = 1 j_minus_dot_line_nums = 4 big_ration_of_line = 11 vertical_margin = 0.001 text_line_margin = 0.1 def ration_between_distance_and_max_len_of_i_j(): return max_len / distance_and_max_len_ratio2 < distance_of_left < max_len / distance_and_max_len_ratio1 def big_num_of_lines(num): return max(len(group1), len(group2)) == num def small_num_of_lines(num): return min(len(group1), len(group2)) == num def is_tight_close(): return distance_of_left < tight_close_threshold def not_form_line(): return max_len < text_line_margin def is_vertical(margin): return abs(x1 - x2) < margin def big_ration_between_group_line(ratio): return max_len > ratio * min_len distance_margin = 0.1 def is_distance_legal(): distance_of_left < distance_margin def is_i(): return ration_between_distance_and_max_len_of_i_j() and big_ration_between_group_line(big_ration_of_line)\ and is_vertical(vertical_margin)\ and small_num_of_lines(i_minus_dot_line_nums) \ and big_num_of_lines(dot_line_nums) and not_form_line() def is_j(): return ration_between_distance_and_max_len_of_i_j() and big_ration_between_group_line(big_ration_of_line) \ and max_x1 == max_x2 \ and small_num_of_lines(dot_line_nums) \ and big_num_of_lines(j_minus_dot_line_nums) and not_form_line() def size_of_bounding_box(bounding_box): (min_x, min_y), (max_x, max_y) = bounding_box return abs(max_x - min_x) * abs(max_y - min_y) def is_colon(): return small_num_of_lines(dot_line_nums) and big_num_of_lines(dot_line_nums) \ and is_vertical(0) \ and size_of_bounding_box(bounding_box12) < \ 10 * min(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2))\ and is_distance_legal()\ and not_form_line() def is_semicolon(): return small_num_of_lines(dot_line_nums) and big_num_of_lines(comma_line_nums) \ and is_vertical(0) \ and size_of_bounding_box(bounding_box12) < \ 15 * min(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2)) \ and min_len > max_len / 2 and is_distance_legal() def is_percent_sign(): if len(group1) == 1: len1 = max(get_line_length(group1)) len2 = max(get_line_length(group2)) else: len1 = max(get_line_length(group2)) len2 = max(get_line_length(group1)) return distance_of_left < max_len / 4 and min(len(group1), len(group2)) == 1 \ and max(len(group1), len(group2)) == 10 and len1 > 5 * len2 and max_len < 0.2 def is_exclamation_mark(): return min(len(group1), len(group2)) == 1 and max(len(group1), len(group2)) == 12 and \ distance_of_left < max_len / 2 and abs(x1 - x2) < 0.01 def is_equals(): return min(len(group1), len(group2)) == 1 and max(len(group1), len(group2)) == 1 \ and min_len > max_len / 1.2 and distance_of_left < min_len / 2 and abs(x1 - x2) < 0.01 def is_minus_and_plus(): return min(len(group1), len(group2)) == 1 and max(len(group1), len(group2)) == 2 \ and min_len > max_len / 1.2 and distance_of_left < min_len / 5 def is_special_char(): return any([ is_colon(), is_semicolon(), is_exclamation_mark(), is_equals(), is_minus_and_plus() ]) def is_lower_i_or_j(): return any([is_i(), is_j()]) def is_merged_before(): return labels[i + j + 1] == -1 if all([ type_index == merge_percent, is_percent_sign(), is_merged_before() ]): labels[i + j + 1] = labels[i] elif all([ type_index == merge_tight_and_i_j, is_tight_close() or is_lower_i_or_j(), is_merged_before() ]): labels[i + j + 1] = labels[i] elif all([ type_index == merge_special_char, is_special_char(), is_merged_before() ]): labels[i + j + 1] = labels[i] for label_index in range(len(labels)): if labels[label_index] == -1: labels[label_index] = count count += 1 merge_group = [] for index in range(len(labels)): label = labels[index] if len(merge_group) < label + 1: merge_group.append(dst_group[index]) else: merge_group[label] += dst_group[index] return merge_group, labels
def _merge_groups1(groups): label = 0 merged_groups = [(label, groups[0])] if len(groups) == 1: return merged_groups print('len=', len(groups[1:])) for i, g in enumerate(groups[1:]): real_index = i + 1 min_len = 0 distance_of_left = min( get_distance_of_two_segments(s1, s2) for s1, s2 in product(groups[real_index - 1], g)) max_len = 0 for s1, s2 in product(groups[real_index - 1], g): gg = [] if distance_of_left == get_distance_of_two_segments(s1, s2): gg.append(s1) gg.append(s2) min_len = min(get_line_length(gg)) max_len = max(get_line_length(gg)) last_element_label = merged_groups[-1][0] tight_close_threshold = 15 big_ratio_threshold = 11 small_ratio_threshold_of_colon = 1.1 small_ratio_threshold = 1.2 small_ratio_threshold_of_semicolon = 1.5 close_threshold_of_i_and_j = 2.3 close_threshold_of_percent = 4 dot_lines_num = 6 dot_lines_num_in_gdo = 4 lower_i_without_dot = 1 lower_j_without_dot = 6 dot_of_percent = 12 dot_of_oblique_line = 1 lines_of_above_semicolon = 4 lines_of_below_semicolon = 5 lines_of_half_equals = 1 lines_of_plus = 2 lines_of_minus = 1 def get_one_group_line_number_by_group(func, n): return func((len(merged_groups[-1][1]), len(g))) == n def get_smaller_group_line_number_by_group(n): return get_one_group_line_number_by_group(min, n) def get_larger_group_line_number_by_group(n): return get_one_group_line_number_by_group(max, n) def big_line_ration_with_long_and_short(ratio_threshold): return max_len > ratio_threshold * min_len def small_line_ration_with_long_and_short(ratio_threshold): return min_len > max_len / ratio_threshold def is_groups_are_close(threshold): return distance_of_left <= max_len / threshold def is_lower_i(): return all([ get_smaller_group_line_number_by_group(lower_i_without_dot), get_larger_group_line_number_by_group(dot_lines_num) or get_larger_group_line_number_by_group(dot_lines_num_in_gdo), big_line_ration_with_long_and_short(big_ratio_threshold), is_groups_are_close(close_threshold_of_i_and_j) ]) def is_lower_j(): return all([ get_smaller_group_line_number_by_group(lower_j_without_dot), get_larger_group_line_number_by_group(dot_lines_num), big_line_ration_with_long_and_short(big_ratio_threshold), is_groups_are_close(close_threshold_of_i_and_j) ]) def is_percent(): return ( (get_smaller_group_line_number_by_group(dot_of_oblique_line) and get_larger_group_line_number_by_group(dot_of_percent)) or (get_smaller_group_line_number_by_group(dot_of_percent) and get_larger_group_line_number_by_group(dot_of_percent + dot_of_oblique_line) )) and is_groups_are_close(close_threshold_of_percent) def is_colon(): return all([ get_smaller_group_line_number_by_group(dot_lines_num), get_larger_group_line_number_by_group(dot_lines_num), small_line_ration_with_long_and_short( small_ratio_threshold_of_colon) ]) def is_semicolon(): return all([ get_smaller_group_line_number_by_group( lines_of_above_semicolon), get_larger_group_line_number_by_group( lines_of_below_semicolon), small_line_ration_with_long_and_short( small_ratio_threshold_of_semicolon) ]) def is_equals(): return all([ get_smaller_group_line_number_by_group(lines_of_half_equals), get_larger_group_line_number_by_group(lines_of_half_equals), small_line_ration_with_long_and_short(small_ratio_threshold), distance_of_left <= max_len / 1.2 ]) def is_plus_minus(): return all([ get_smaller_group_line_number_by_group(lines_of_plus), get_larger_group_line_number_by_group(lines_of_minus), small_line_ration_with_long_and_short(small_ratio_threshold) ]) def is_tight_close(): return distance_of_left <= min_len / tight_close_threshold if any([ is_tight_close(), is_lower_i(), is_lower_j(), is_percent(), is_colon(), is_semicolon(), is_equals(), is_plus_minus() ]) and max_len < 0.1: merged_groups.append((last_element_label, g)) else: merged_groups.append((last_element_label + 1, g)) assert len(merged_groups) == len(groups) return merged_groups
def _merge_groups_according_to_position2(groups): labels = [-1 for i in range(len(groups))] dst_group = groups count = 0 for i in range(len(dst_group) - 30): if i != 0 and labels[i] != -1: continue labels[i] = count count += 1 group1 = dst_group[i] remain_group_list = dst_group[i + 1:i + 30] for j, group2 in enumerate(remain_group_list): distance_of_left = min( get_distance_of_two_segments(s1, s2) for s1, s2 in product(group1, group2)) if distance_of_left > 0.1: continue min_len = min(get_line_length(group1 + group2)) max_len = max(get_line_length(group1 + group2)) bounding_box1 = get_bounding_box_list(group1) bounding_box2 = get_bounding_box_list(group2) bounding_box12 = get_bounding_box_list(group1 + group2) x1, y1 = perpendicular_of_box(bounding_box1) x2, y2 = perpendicular_of_box(bounding_box2) (min_x1, min_y1), (max_x1, max_y1) = get_bounding_box_list(group1) (min_x2, min_y2), (max_x2, max_y2) = get_bounding_box_list(group2) def is_tight_close(): return distance_of_left < 0.0001 def is_i(): return max_len / 3 < distance_of_left < max_len / 2 and max_len > 11 * min_len and abs( x1 - x2) < 0.01 and \ min(len(group1), len(group2)) == 1 and max(len(group1), len(group2)) == 4 and max_len < 0.1 def is_j(): return max_len / 3 < distance_of_left < max_len / 2 and max_len > 11 * min_len and \ min(len(group1), len(group2)) == 4 and max(len(group1), len(group2)) == 4 and max_len < 0.1 \ and max_x1 == max_x2 def size_of_bounding_box(bounding_box): (min_x, min_y), (max_x, max_y) = bounding_box return abs(max_x - min_x) * abs(max_y - min_y) def is_colon(): return abs(x1 - x2) == 0 and distance_of_left < 0.1 and \ min(len(group1), len(group2)) == 4 and max(len(group1), len(group2)) == 4 and \ max_len < 0.1 and size_of_bounding_box(bounding_box12) < \ 10 * min(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2)) def is_semicolon(): return abs(x1 - x2) == 0 and min_len > max_len / 2 and distance_of_left < 0.1 and \ min(len(group1), len(group2)) == 4 and max(len(group1), len(group2)) == 7 and \ size_of_bounding_box(bounding_box12) < 15 * min(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2)) def is_exclamation_mark(): return min(len(group1), len(group2)) == 1 and max(len(group1), len(group2)) == 12 and \ distance_of_left < max_len / 2 and abs(x1 - x2) < 0.01 def is_equals(): return min(len(group1), len(group2)) == 1 and max(len(group1), len(group2)) == 1 \ and min_len > max_len / 1.2 and distance_of_left < min_len / 2 and abs(x1 - x2) < 0.01 def is_minus_and_plus(): return min(len(group1), len(group2)) == 1 and max(len(group1), len(group2)) == 2 \ and min_len > max_len / 1.2 and distance_of_left < min_len / 5 def is_special_char(): return any([ is_colon(), is_semicolon(), is_exclamation_mark(), is_equals(), is_minus_and_plus() ]) def is_lower_i_or_j(): return any([is_i(), is_j()]) if is_special_char(): if labels[i + j + 1] == -1: labels[i + j + 1] = labels[i] for label_index in range(len(labels)): if labels[label_index] == -1: labels[label_index] = count count += 1 merge_group = [] for index in range(len(labels)): label = labels[index] if len(merge_group) < label + 1: merge_group.append(dst_group[index]) else: merge_group[label] += dst_group[index] return merge_group, labels
def _merge_groups1(groups): label = 0 merged_groups = [(label, groups[0])] if len(groups) == 1: return merged_groups print('len=', len(groups[1:])) for i, g in enumerate(groups[1:]): real_index = i + 1 min_len = 0 # min(getlinelength(g)) distance_of_left = min(get_distance_of_two_segments(s1, s2) for s1, s2 in product(groups[real_index - 1], g) ) max_len = 0 for s1, s2 in product(groups[real_index - 1], g): gg = [] if distance_of_left == get_distance_of_two_segments(s1, s2): gg.append(s1) gg.append(s2) min_len = min(get_line_length(gg)) max_len = max(get_line_length(gg)) last_element_label = merged_groups[-1][0] tight_close_threshold = 15 big_ratio_threshold = 11 small_ratio_threshold_of_colon = 1.1 small_ratio_threshold = 1.2 small_ratio_threshold_of_semicolon = 1.5 close_threshold_of_i_and_j = 2.3 close_threshold_of_percent = 4 dot_lines_num = 6 dot_lines_num_in_gdo = 4 lower_i_without_dot = 1 lower_j_without_dot = 6 dot_of_percent = 12 dot_of_oblique_line = 1 lines_of_above_semicolon = 4 lines_of_below_semicolon = 5 lines_of_half_equals = 1 lines_of_plus = 2 lines_of_minus = 1 lines_of_index = merged_groups[-1][1] bounding_box1 = get_bounding_box_list(lines_of_index) bounding_box2 = get_bounding_box_list(g) bounding_box12 = get_bounding_box_list(lines_of_index + g) # print(bounding_box1, bounding_box2, bounding_box12) def size_of_bounding_box(bounding_box): (min_x, min_y), (max_x, max_y) = bounding_box return (max_x - min_x) * (max_y - min_y) def perpendicular_of_box(bounding_box): (min_x, min_y), (max_x, max_y) = bounding_box p_x = (min_x + max_x) / 2 p_y = (min_y + max_y) / 2 return p_x, p_y def is_low_i_or_colon_equals_minus_and_plus(): (min_x, min_y), (max_x, max_y) = bounding_box1 (min_x1, min_y1), (max_x1, max_y1) = bounding_box2 min_size = min(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2)) max_size = max(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2)) merge_size = size_of_bounding_box(bounding_box12) x1, y1 = perpendicular_of_box(bounding_box1) x2, y2 = perpendicular_of_box(bounding_box2) span = max(max_x - min_x, max_x1 - min_x1) def is_perpendicular(): return (abs(x1 - x2) < span) or (abs(y1 - y2) < span) return is_perpendicular() and distance_of_left < max_len / 2 \ and min((len(merged_groups[-1][1]), len(g))) == 1 \ # and max((len(merged_groups[-1][1]), len(g))) == 4 # """ # ":" # if is_perpendicular() \ # and min_len < max_len / 1.1 and merge_size > 10 * max_size: # return True # # "=" # if is_perpendicular() \ # and min_len < max_len / 1.2 and max((len(merged_groups[-1][1]), len(g))) == 1: # return True # ";" # if is_perpendicular() \ # and min_len < max_len / 1.5 and merge_size > 5 < min_size: # return True # "+_" # if is_perpendicular() \ # and min_len < max_len / 1.2 and min((len(merged_groups[-1][1]), len(g))) == 1\ # and max((len(merged_groups[-1][1]), len(g))) == 2: # return True # """ def is_colon_or_i(): min_size = min(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2)) max_size = max(size_of_bounding_box(bounding_box1), size_of_bounding_box(bounding_box2)) merge_size = size_of_bounding_box(bounding_box12) if min_size < math.e ** -16: return False if merge_size < 10 * min_size and min_size < 0.0001: print(bounding_box1, bounding_box2) return True return False def get_one_group_line_number_by_group(func, n): return func((len(merged_groups[-1][1]), len(g))) == n def get_smaller_group_line_number_by_group(n): return get_one_group_line_number_by_group(min, n) def get_larger_group_line_number_by_group(n): return get_one_group_line_number_by_group(max, n) def big_line_ration_with_long_and_short(ratio_threshold): return max_len > ratio_threshold * min_len def small_line_ration_with_long_and_short(ratio_threshold): return min_len > max_len / ratio_threshold def is_groups_are_close(threshold): return distance_of_left <= max_len / threshold def is_lower_i(): return all([get_smaller_group_line_number_by_group(lower_i_without_dot), get_larger_group_line_number_by_group(dot_lines_num) or get_larger_group_line_number_by_group(dot_lines_num_in_gdo), big_line_ration_with_long_and_short(big_ratio_threshold), is_groups_are_close(close_threshold_of_i_and_j)]) def is_lower_i_gdo(): return all([ distance_of_left < max_len ]) def is_colon_gdo(): return all([get_smaller_group_line_number_by_group(dot_lines_num_in_gdo), get_larger_group_line_number_by_group(dot_lines_num_in_gdo), distance_of_left < 0.2, small_line_ration_with_long_and_short(small_ratio_threshold_of_colon)]) def is_lower_j(): return all([get_smaller_group_line_number_by_group(lower_j_without_dot), get_larger_group_line_number_by_group(dot_lines_num), big_line_ration_with_long_and_short(big_ratio_threshold), is_groups_are_close(close_threshold_of_i_and_j)]) def is_percent(): return ((get_smaller_group_line_number_by_group(dot_of_oblique_line) and get_larger_group_line_number_by_group(dot_of_percent)) or ( get_smaller_group_line_number_by_group(dot_of_percent) and get_larger_group_line_number_by_group(dot_of_percent + dot_of_oblique_line) )) and is_groups_are_close(close_threshold_of_percent) def is_colon(): return all([get_smaller_group_line_number_by_group(dot_lines_num), get_larger_group_line_number_by_group(dot_lines_num), small_line_ration_with_long_and_short(small_ratio_threshold_of_colon), distance_of_left > 10 * max_len]) def is_semicolon(): return all([get_smaller_group_line_number_by_group(lines_of_above_semicolon), get_larger_group_line_number_by_group(lines_of_below_semicolon), small_line_ration_with_long_and_short(small_ratio_threshold_of_semicolon)]) def is_equals(): return all([get_smaller_group_line_number_by_group(lines_of_half_equals), get_larger_group_line_number_by_group(lines_of_half_equals), small_line_ration_with_long_and_short(small_ratio_threshold), distance_of_left <= max_len / 1.2]) def is_plus_minus(): return all([get_smaller_group_line_number_by_group(lines_of_plus), get_larger_group_line_number_by_group(lines_of_minus), small_line_ration_with_long_and_short(small_ratio_threshold)]) def is_tight_close(): return distance_of_left <= min_len / tight_close_threshold if any([is_tight_close(), is_lower_i(), is_lower_j(), is_percent(), is_colon(), is_semicolon(), is_equals(), is_plus_minus()]) and max_len < 0.1: # if is_low_i_or_colon_equals_minus_and_plus(): # print('hhhhh') merged_groups.append((last_element_label, g)) else: merged_groups.append((last_element_label + 1, g)) assert len(merged_groups) == len(groups) return merged_groups
def _merge_groups_according_to_position(groups): label = 0 dst_group = copy.deepcopy(groups) merged_groups = [(label, dst_group[0])] if len(groups) == 1: return merged_groups "merge approach box lines,dynamic dst_group" labels = [0 for i in range(len(dst_group))] for i in range(len(dst_group) - 1): if labels[i] == 0: labels[i] = label[i - 1] if i >= len(dst_group) - 20: break g = dst_group[i] new_list = dst_group[i + 1:i + 20] minimum_distance = min(dist_of_box(line_box1, g) for line_box1 in new_list) line_combination = [] for item in new_list: for item1 in item: for item2 in g: line_combination.append((item1, item2)) distance_of_left = min(get_distance_of_two_segments(s1, s2) for s1, s2 in line_combination) for j, line_box1 in enumerate(new_list): if True: bounding_box1 = get_bounding_box_list(line_box1) bounding_box2 = get_bounding_box_list(g) dist = dist_of_box(line_box1, g) def perpendicular_of_box(bounding_box): (min_x, min_y), (max_x, max_y) = bounding_box p_x = (min_x + max_x) / 2 p_y = (min_y + max_y) / 2 return p_x, p_y x1, y1 = perpendicular_of_box(bounding_box1) x2, y2 = perpendicular_of_box(bounding_box2) (min_x, min_y), (max_x, max_y) = bounding_box1 (min_x1, min_y1), (max_x1, max_y1) = bounding_box2 span = max(max_x - min_x, max_x1 - min_x1) def is_perpendicular(): return (abs(x1 - x2) < span) or (abs(y1 - y2) < span) min_len = min(get_line_length(line_box1 + g)) max_len = max(get_line_length(line_box1 + g)) # if is_merged(line_box1, g):is_perpendicular() and if \ min((len(line_box1)), len(g)) == 1 and \ max_len > 10 * min_len and dist < 0.1 and \ max((len(line_box1)), len(g)) == 6: print('hhhhh') print(len(line_box1), len(g)) labels[i + j] = label[i] else: pass "get merged label" merged_groups = [(label, dst_group[0])] for i, g in enumerate(dst_group[1:]): index = i + 1 merged_groups.append((labels[index], g)) return merged_groups