def method2(): filename = tkinter.filedialog.askopenfilename() start = time.perf_counter() if filename != '': la = LayoutAnalyzerEx() area_analyzer = TextAreaAnalyzerEx() pro = ImagePreprocessor() img_path = filename old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 0) im = cv2.adaptiveThreshold(old_im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) old_im = pro.reshape(im) height, width = old_im.shape rot_img = pro.rotateImg(old_im) cv2.imwrite('c:/ml_dir/mask/thresh.png', rot_img) rot_img = cv2.imread('c:/ml_dir/mask/thresh.png') pro_img = pro.detectTable(rot_img, rot_img) cv2.imwrite('c:/ml_dir/mask/nomask.png', pro_img) rot_img = cv2.imread('c:/ml_dir/mask/nomask.png') pro = ImagePreprocessor() img_path = 'C:/ml_dir/mask/nomask.png' old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 0) im = cv2.adaptiveThreshold(old_im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) old_im = pro.reshape(im) height, width = old_im.shape root_area, all_areas = la.get_page_layout(im) window = tk.Tk() window.title('The OCR Results') window.geometry('%dx%d' % (width, height)) labels = range(len(all_areas)) result = '' for i in range(len(all_areas)): text, porp = area_analyzer.get_sub_area_text( all_areas[i].get_sub_image()) result = result + text + '\n' l = tk.Label( window, text=text, ) l.place(x=all_areas[i].tl_x, y=all_areas[i].tl_y, width=all_areas[i].width, height=all_areas[i].height) end = time.perf_counter() print('Running time: %10.10s Seconds' % (end - start)) lb.config(text='计算时间: %10.5s 秒' % (end - start)) cv2.destroyAllWindows() pass else: lb.config(text='您没有选择任何文件')
def method1(): filename = tkinter.filedialog.askopenfilename() start = time.perf_counter() if filename != '': la = LayoutAnalyzerEx() area_analyzer = TextAreaAnalyzerEx() img_path = filename old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 0) im = cv2.adaptiveThreshold(old_im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) height, width = old_im.shape root_area, all_areas = la.get_page_layout(im) window = tk.Tk() window.title('The OCR Results') window.geometry('%dx%d' % (width, height)) result = '' for i in range(len(all_areas)): text, porp = area_analyzer.get_sub_area_text( all_areas[i].get_sub_image()) result = result + text + '\n' l = tk.Label( window, text=text, ) l.place(x=all_areas[i].tl_x, y=all_areas[i].tl_y, width=all_areas[i].width, height=all_areas[i].height) end = time.perf_counter() print('Running time: %10.10s Seconds' % (end - start)) f = open('C:/ml_dir/result.txt', 'w') f.write(result) f.close() lb.config(text='计算时间: %10.5s 秒' % (end - start)) cv2.destroyAllWindows() else: lb.config(text='您没有选择任何文件')
#结果是三元组,[字,类型,概率] def get_result_from_json_string(self, json_str): obj = json.loads(json_str) #obj = demjson.decode(json_str) results = [] for res in obj: results.append([res['word'], res['type'], float(res['prop'])]) #按照概率进行排序 print("Results:", results, type(results)) results.sort(key=lambda x:x[2], reverse=True) return results if __name__=='__main__': #测试识别结果 la = LayoutAnalyzerEx() #img_path = 'D:\\ml_dir\\test\\027.tif' img_path = 'C:/ml_dir/test/1.png' old_im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1) gray = cv2.cvtColor(old_im, cv2.COLOR_BGR2GRAY) im = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU|cv2.THRESH_BINARY)[1] cv2.namedWindow('src_img',0) cv2.imshow('src_img',old_im) root_area, all_areas = la.get_page_layout(im) area_analyzer = TextAreaAnalyzerEx() #text, porp = area_analyzer.get_sub_area_text(all_areas[0].get_sub_image()) #print(text, str(porp))
def __init__(self): self.layout_analysis = LayoutAnalyzerEx() self.text_area_analyzer = TextAreaAnalyzerEx() pass
class PageAnalysis(object): def __init__(self): self.layout_analysis = LayoutAnalyzerEx() self.text_area_analyzer = TextAreaAnalyzerEx() pass def get_page_text(self, page_img_file): #read file original_image = cv2.imdecode( np.fromfile(page_img_file, dtype=np.uint8), 1) gray = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY) im = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)[1] t1 = time.perf_counter() root_area, all_areas = self.layout_analysis.get_page_layout(im.copy()) t2 = time.perf_counter() print('time for layout analysis: ', t2 - t1) #显示布局 aa = im.copy() for area in all_areas: cv2.rectangle(aa, (area.tl_x, area.tl_y), (area.tl_x + area.width, area.tl_y + area.height), [0, 0, 0], 1) cv2.imshow('x', aa) cv2.waitKey() text, prop = self.get_area_text_ex1(root_area, original_image) t3 = time.clock() print('time for text analysis: ', t3 - t2) return text, prop def get_area_text(self, text_area, original_image): text = '' if text_area.layout == LAYOUT_UNKNOWN: text, prop = self.text_area_analyzer.get_text( text_area, original_image) elif text_area.layout == LAYOUT_HORIZONTAL: for i in range(len(text_area.sub_areas)): ret_text, prop = self.get_area_text(text_area.sub_areas[i], original_image) if ret_text == '' or ret_text is None: continue text = text + ' ' + ret_text # + '('+str(prop)+')' elif text_area.layout == LAYOUT_VERTICAL: for i in range(len(text_area.sub_areas)): ret_text, prop = self.get_area_text(text_area.sub_areas[i], original_image) if ret_text == '' or ret_text is None: continue text = text + '\r\n' + ret_text # + '('+str(prop)+')' return text, prop #获取区域文字(扩展实现1) #主要处理方法: #1、对于独立区域: #1.1 如果该区域无左右邻接区域,则直接识别 #1.2 对于存在邻接区域的情况: #1.2.1 如果是非边缘区域,且左右邻接区域为相似行,则参照行高分析 #(1)如果该独立区域可能为标点符号或单字,则比照相邻行高进行识别 #(2)对于非(1)的情况,直接识别 #1.2.2 如果是边缘区域,则参照相邻区域行高进行分析 #(1)如果该独立区域可能为标点符号或单字,则分别按照实际高度、参照行高进行识别,选取可能性较高的结果 #(2)对于非(1)的情况,直接识别 #2、对于水平分布的区域,建立邻接关系,逐步识别 #3、对于垂直分布的区域: #3.1 如果符合组合识别的条件,则比照组合识别和单独识别的可能性,按较高的情况予以认定 #3.2 对于不符合组合识别条件的情况,则直接识别 def get_area_text_ex1(self, text_area, original_image): text = '' total_prop = 0 if text_area.layout == LAYOUT_HORIZONTAL: #对于水平分布的区域,建立邻接关系,逐步识别 for i in range(len(text_area.sub_areas)): #建立左右区域关系 if i == 0 and len(text_area.sub_areas) > 1: text_area.sub_areas[i].set_neighbors_area( None, text_area.sub_areas[i + 1]) if i == len(text_area.sub_areas) - 1 and len( text_area.sub_areas) > 1: text_area.sub_areas[i].set_neighbors_area( text_area.sub_areas[i - 1], None) if i > 0 and i < len(text_area.sub_areas) - 1: text_area.sub_areas[i].set_neighbors_area( text_area.sub_areas[i - 1], text_area.sub_areas[i + 1]) ret_text, prop = self.get_area_text_ex1( text_area.sub_areas[i], original_image) if ret_text == '' or ret_text is None: continue total_prop = total_prop + prop if i == 0: text = text + ret_text else: text = text + ' ' + ret_text total_prop = total_prop / len(text_area.sub_areas) return text, total_prop elif text_area.layout == LAYOUT_VERTICAL: #3、对于垂直分布的区域: cmb_text = '' cmb_prop = 0 if len(text_area.sub_areas) <= 3 and len(text_area.sub_areas) > 1: #3.1 如果符合组合识别的条件,则比照组合识别和单独识别的可能性,按较高的情况予以认定 #要求所有子区域都是叶区域 all_leaf_areas = True tl_point = [] br_point = [] for idx in range(len(text_area.sub_areas)): if len(text_area.sub_areas[idx].sub_areas) != 0: #all_leaf_areas = False #break pass if idx == 0: tl_point = [ text_area.sub_areas[idx].tl_x, text_area.sub_areas[idx].tl_y ] br_point = [ text_area.sub_areas[idx].get_br_point()[0], text_area.sub_areas[idx].get_br_point()[1] ] else: if tl_point[0] > text_area.sub_areas[idx].tl_x: tl_point[0] = text_area.sub_areas[idx].tl_x if tl_point[1] > text_area.sub_areas[idx].tl_y: tl_point[1] = text_area.sub_areas[idx].tl_y if br_point[0] < text_area.sub_areas[idx].get_br_point( )[0]: br_point[0] = text_area.sub_areas[ idx].get_br_point()[0] if br_point[1] < text_area.sub_areas[idx].get_br_point( )[1]: br_point[1] = text_area.sub_areas[ idx].get_br_point()[1] if all_leaf_areas and ((br_point[0] - tl_point[0] + 1) * 1.0 / (br_point[1] - tl_point[1] + 1)) <= 1.2: #可能是字或符号,组合识别,如果组合后概率高于分开后的概率 new_area = TextArea(text_area.sub_areas[0].image, tl_point[0], tl_point[1], br_point[1] - tl_point[1] + 1, br_point[0] - tl_point[0] + 1, text_area.sub_areas[0].parent_area) new_area.set_neighbors_area(text_area.left_area, text_area.right_area) ''' cv2.imshow('ss', new_area.get_sub_image()) cv2.waitKey() ''' new_area.layout = LAYOUT_UNKNOWN cmb_text, cmb_prop = self.get_area_text_ex1( new_area, original_image) #3.2 直接识别 split_text = '' for i in range(len(text_area.sub_areas)): ret_text, prop = self.get_area_text_ex1( text_area.sub_areas[i], original_image) if ret_text == '' or ret_text is None: continue total_prop = total_prop + prop if i == 0: split_text = split_text + ret_text else: split_text = split_text + '\r\n' + ret_text total_prop = total_prop / len(text_area.sub_areas) #判断采用组合结果,或者分割识别后的结果 if cmb_prop < total_prop: text = split_text else: text = cmb_text total_prop = cmb_prop return text, total_prop elif text_area.layout == LAYOUT_UNKNOWN: #1、对于独立区域: #1.1 如果该区域无左右邻接区域,则直接识别 if text_area.left_area is None and text_area.right_area is None: text, total_prop = self.text_area_analyzer.get_text( text_area, original_image) return text, total_prop #1.2 对于存在邻接区域的情况: #1.2.1 如果是非边缘区域,且左右邻接区域为相似行,则参照行高分析 if text_area.left_area is not None and text_area.right_area is not None: lr_avg_h = (text_area.left_area.height + text_area.right_area.height) / 2.0 #(1)如果该独立区域可能为标点符号或单字,则比照相邻行高进行识别(需要符合区域高度小于两边平均高度的条件) #如果左右高度相差较小,且基本平齐(高度差小于平均高度20%,中心点纵坐标差小于平均高度10%) if ((abs(text_area.left_area.height - text_area.right_area.height) / lr_avg_h) < 0.2 and (abs(text_area.left_area.get_center_point()[1] - text_area.right_area.get_center_point()[1]) / lr_avg_h < 0.1) and text_area.height < lr_avg_h): if ((text_area.width * 1.0 / text_area.height) < 1.2 and text_area.height < lr_avg_h * 0.4) or ( text_area.width / lr_avg_h < 1.2): #判定是标点符号或汉字,重设左上角纵坐标和高度 new_tl_y = (text_area.left_area.tl_y + text_area.right_area.tl_y) / 2 text_area.set_area_params(text_area.tl_x, new_tl_y, lr_avg_h, text_area.width) #直接识别 text, total_prop = self.text_area_analyzer.get_text( text_area, original_image) #1.2.2 如果是边缘区域,则参照相邻区域行高进行分析 #(1)如果该独立区域可能为标点符号或单字,则分别按照实际高度、参照行高进行识别,选取可能性较高的结果 #(2)对于非(1)的情况,直接识别 else: nb_height = 0 nb_area = None ext_text = '' ext_prop = 0 if text_area.left_area is not None: nb_height = text_area.left_area.height nb_area = text_area.left_area else: nb_height = text_area.right_area.height nb_area = text_area.right_area #当高度明显较小的情况,尝试调整高度识别 if text_area.height < nb_height * 0.7 and ( ((text_area.width * 1.0 / text_area.height) < 1.2 and text_area.height < nb_height * 0.4) or (text_area.width / nb_height < 1.2)): new_tl_y = nb_area.tl_y new_area = TextArea(text_area.image, text_area.tl_x, new_tl_y, nb_height, text_area.width, text_area.parent_area) ext_text, ext_prop = self.text_area_analyzer.get_text( new_area, original_image) #直接识别的结果 text, total_prop = self.text_area_analyzer.get_text( text_area, original_image) if total_prop < ext_prop: text = ext_text total_prop = ext_prop return text, total_prop else: return None, 0