def model(img, detectAngle=False, config={}, leftAdjust=False, rightAdjust=False, alph=0.2, ifadjustDegree=False): """ @@param:img, @@param:ifadjustDegree 调整文字识别倾斜角度 @@param:detectAngle,是否检测文字朝向 """ angle, degree, img = eval_angle(img, detectAngle=detectAngle, ifadjustDegree=ifadjustDegree) if opencvFlag != 'keras': img, f = letterbox_image(img, IMGSIZE) else: f = 1.0 ##解决box在原图坐标不一致问题 config['img'] = img timeTake = time.time() text_recs = text_detect(**config) timeTake = time.time() - timeTake print('Detect take:{}s'.format(timeTake)) newBox = sort_box(text_recs) result = crnnRec(np.array(img), newBox, leftAdjust, rightAdjust, alph, 1.0 / f) return img, result, angle
def test_keras_detect(img, **args): scale = args.get('scale', 608) maxScale = args.get('maxScale', 608) boxes, scores = text_detect(img, scale, maxScale, prob=0.07) boxes, scores = box_cluster(img, boxes, scores, **args) boxes = sort_box(boxes) leftAdjustAlph = args.get('leftAdjustAlph', 0.05) rightAdjustAlph = args.get('rightAdjustAlph', 0.05) tmp = plot_box(img, boxes) tmp.save('./14_text_nms.jpg')
def model_PSENET(self, img, **args): detectAngle = args.get('detectAngle', False) if detectAngle: img, angle = self.detect_angle(img) else: angle = 0 scale = args.get('scale', 608) maxScale = args.get('maxScale', 608) MAX_HORIZONTAL_GAP = args.get('MAX_HORIZONTAL_GAP', 15) MIN_V_OVERLAPS = args.get('MIN_V_OVERLAPS', 0.01) TEXT_PROPOSALS_MIN_SCORE = args.get('TEXT_PROPOSALS_MIN_SCORE', 0.9) Adjustbox = args.get('Adjustbox', [-5, -5, 5, 5]) print('scale', scale, MAX_HORIZONTAL_GAP) boxes = \ self.textModel(img, min_len=scale, max_len=maxScale, score_thre=TEXT_PROPOSALS_MIN_SCORE, max_dist=MAX_HORIZONTAL_GAP, threshold_overlap_v=MIN_V_OVERLAPS, move_rect=Adjustbox) import cv2 im = img.copy() bo = [] for rt in boxes: if not (rt[0] == rt[2] or rt[1] == rt[3]): bo.append(rt) cv2.rectangle(im, (rt[0], rt[1]), (rt[2], rt[3]), (0, 0, 255), 2) boxes = np.array(bo) im_show = cv2.resize(im, (im.shape[1] // 2, im.shape[0] // 2)) image = cv2.cvtColor(im_show, cv2.COLOR_BGR2RGB) # cv2.imshow('img2', im_show) # cv2.waitKey(0) boxes_8 = np.zeros((len(boxes), 8), np.int32) for i in range(len(boxes)): boxes_8[i, 0] = boxes[i, 0] # x1 boxes_8[i, 1] = boxes[i, 1] # y1 boxes_8[i, 2] = boxes[i, 2] # x2 boxes_8[i, 3] = boxes[i, 1] # y1 boxes_8[i, 4] = boxes[i, 2] # x2 boxes_8[i, 5] = boxes[i, 3] # y2 boxes_8[i, 6] = boxes[i, 0] # x1 boxes_8[i, 7] = boxes[i, 3] # y2 # print(boxes_8,'boxes_8') boxes = sort_box(boxes_8) # print(boxes,'boxes') leftAdjustAlph = args.get('leftAdjustAlph', 0) rightAdjustAlph = args.get('rightAdjustAlph', 0) res = self.ocr_batch(img, boxes, leftAdjustAlph, rightAdjustAlph) return res, angle, image
def model(self, img, **args): detectAngle = args.get('detectAngle', False) if detectAngle: img, angle = self.detect_angle(img) else: angle = 0 scale = args.get('scale', 608) maxScale = args.get('maxScale', 608) boxes, scores = self.detect_box(img, scale, maxScale) ##文字检测 boxes, scores = self.box_cluster(img, boxes, scores, **args) boxes = sort_box(boxes) leftAdjustAlph = args.get('leftAdjustAlph', 0) rightAdjustAlph = args.get('rightAdjustAlph', 0) res = self.ocr_batch(img, boxes, leftAdjustAlph, rightAdjustAlph) return res, angle
def model(img, detectAngle=False, config={}, leftAdjust=False, rightAdjust=False, alph=0.2): """ @@param:img, @@param:ifadjustDegree 调整文字识别倾斜角度 @@param:detectAngle,是否检测文字朝向 """ angle, img = eval_angle(img, detectAngle=detectAngle) ##文字方向检测 if opencvFlag != 'keras': img, f = letterbox_image(Image.fromarray(img), IMGSIZE) ## pad img = np.array(img) else: f = 1.0 ##解决box在原图坐标不一致问题 config['img'] = img text_recs = text_detect(**config) ##文字检测 newBox = sort_box(text_recs) ##行文本识别 result = crnnRec(np.array(img), newBox, leftAdjust, rightAdjust, alph, 1.0 / f) return img, result, angle
def model(img, file_name, detectAngle=False, config={}, leftAdjust=False, rightAdjust=False, alpha=0.2): """ @@param:img, @@param:ifadjustDegree 调整文字识别倾斜角度 @@param:detectAngle,是否检测文字朝向 """ angle, img = eval_angle(img, detectAngle=detectAngle) # 进行文字方向检测 if opencvFlag != 'keras': img, f = letterbox_image(Image.fromarray(img), IMGSIZE) # pad img = np.array(img) else: f = 1.0 # 解决box在原图坐标不一致问题 config['img'] = img image_cv = copy.copy(img) cv2.imshow("model", image_cv) text_recs = text_detect(**config) # 文字检测 # print('text_recs', text_recs) # draw_boxes(image_cv, text_recs, "text_recs", (255, 255, 0)) newBox = sort_box(text_recs) # 文本进行排序(从上到下进行排序) print('newBox', newBox) draw_boxes(image_cv, text_recs, file_name, (255, 0, 0), "./output") # print('model', img.shape) result = crnnRec(np.array(img), newBox, leftAdjust, rightAdjust, alpha, 1.0 / f) # ocr识别 print('result', result) return img, result, angle
def model(img, detectAngle=False, config={}, leftAdjust=False, rightAdjust=False, alph=0.2, bili=1.2): """ @@param:img, @@param:ifadjustDegree 调整文字识别倾斜角度 @@param:detectAngle,是否检测文字朝向 """ print(22222222222) ''' 这个地方需要加入图片预处理!!!!!!滤波等. 确实这个算法没有加入,需要加入!!!!!!!!!!! ''' angle, img = eval_angle(img, detectAngle=detectAngle) ##文字方向检测 if opencvFlag != 'keras': img, f = letterbox_image(Image.fromarray(img), IMGSIZE) ## pad img = np.array(img) else: f = 1.0 ##解决box在原图坐标不一致问题 config['img'] = img config['bili'] = bili text_recs, scores, boxForSingleAfterNMS, scoresForSingle, keepIndForSingle, tp_groups, Allboxes, Allscores = text_detect( **config) ##文字检测 newBox, tp_groups = sort_box(text_recs, tp_groups) #按照列高排序,符合我们阅读顺序! ##下行行文本识别 print(newBox) result = crnnRec(np.array(img), newBox, leftAdjust, rightAdjust, alph, 1.0 / f, tp_groups, boxForSingleAfterNMS, scoresForSingle) return img, result, angle, scores, text_recs, newBox, boxForSingleAfterNMS, scoresForSingle, keepIndForSingle, tp_groups, Allboxes, Allscores
def net_output_process(batch_preds, batch_shape, batch_shape_padded, prob=0.05): """ 将主干网络的批输出转换为boxes,scores,这里方便兼容之前代码, 暂且使用for循环,以后可替换为vectorize处理 @params batch_preds(list of arrays):list长度代表n个采样尺度,其中每个\ array形状为(batch_size,grid_size_w,grid_size_h,3*(4+1+num_classes)) @params batch_shape(list of tuples):图片原始长宽 @params prob(float):置信度小于prob的box将被忽略 @returns batch_boxes(array): [字符区域数量,8] @returns batch_scores:[字符区域数量,] """ batch_boxes = [] batch_scores = [] MAX_HORIZONTAL_GAP = 100 MIN_V_OVERLAPS = 0.6 MIN_SIZE_SIM = 0.6 textdetector = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM) TEXT_PROPOSALS_MIN_SCORE = 0.1 TEXT_PROPOSALS_NMS_THRESH = 0.3 TEXT_LINE_NMS_THRESH = 0.99 LINE_MIN_SCORE = 0.1 leftAdjustAlph = 0.01 rightAdjustAlph = 0.01 # 首先初步对模型主干输出进行预处理 for y1, y2, y3, image_shape, input_shape in zip(batch_preds[0], batch_preds[1], batch_preds[2], batch_shape, batch_shape_padded): outputs = [y1, y2, y3, image_shape, input_shape] box, scores = box_layer(outputs, anchors, num_classes) h, w = image_shape keep = np.where(scores > prob) # box[:, 0:4][box[:, 0:4]<0] = 0 box = np.array(box) scores = np.array(scores) box[box < 0] = 0 box[:, 0][box[:, 0] >= w] = w - 1 box[:, 1][box[:, 1] >= h] = h - 1 box[:, 2][box[:, 2] >= w] = w - 1 box[:, 3][box[:, 3] >= h] = h - 1 boxes = box[keep[0]] scores = scores[keep[0]] # 筛选出需要的box,并且进行nms,字符行组合 boxes, scores = textdetector.detect(boxes, scores[:, np.newaxis], (h, w), TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH, LINE_MIN_SCORE) boxes = sort_box(boxes) batch_boxes.append(boxes) batch_scores.append(scores) # print('done') return batch_boxes, batch_scores
def model_CRAFT(self, img, **args): detectAngle = args.get('detectAngle', False) if detectAngle: img, angle = self.detect_angle(img) else: angle = 0 scale = args['scale'] MAX_HORIZONTAL_GAP = args['MAX_HORIZONTAL_GAP'] MIN_V_OVERLAPS = args['MIN_V_OVERLAPS'] TEXT_PROPOSALS_MIN_SCORE = args['TEXT_PROPOSALS_MIN_SCORE'] Adjustbox = args['Adjustbox'] pixel_filter = args['pixel_filter'] batch_by_1 = args['batch_by_1'] scoremap_enhance_pixel = args['scoremap_enhance_pixel'] boxes = \ self.textModel(img, image_inference_scale=scale, score_thre=TEXT_PROPOSALS_MIN_SCORE, batch_by_1=batch_by_1, max_dist=MAX_HORIZONTAL_GAP, threshold_overlap_v=MIN_V_OVERLAPS, move_rect=Adjustbox, pixel_filter=pixel_filter,scoremap_enhance_pixel=scoremap_enhance_pixel) bo = [] for rt in boxes: Xs = [rt[0], rt[2], rt[4], rt[6]] Ys = [rt[1], rt[3], rt[5], rt[7]] x1 = min(Xs) x2 = max(Xs) y1 = min(Ys) y2 = max(Ys) hight = y2 - y1 width = x2 - x1 # print(hight, '-------------------', width) if hight <= 10 or hight >= 300: pass else: bo.append(rt) boxes = sort_box(bo) leftAdjustAlph = args.get('leftAdjustAlph', 0) rightAdjustAlph = args.get('rightAdjustAlph', 0) # for rt in boxes: # cv.rectangle(img, (rt[0], rt[1]), (rt[2], rt[3]), (0, 0, 255), 2) res = self.ocr_batch(img, boxes, leftAdjustAlph, rightAdjustAlph) import cv2 im = img.copy() for pts in boxes: # print(pts) pts = pts.reshape((-1, 1, 2)) # print() # print(pts) cv2.polylines(im, [pts], True, (0, 0, 255), 1) # cv2.imshow('img',im) # cv2.waitKey() # cv.imwrite('label.jpg',im) return res, angle, im
def crnnRec(im, boxes, leftAdjust=False, rightAdjust=False, alph=0.2, f=1.0): """ crnn模型,ocr识别 @@model, @@converter, @@im:Array @@text_recs:text box @@ifIm:是否输出box对应的img """ results = [] # print("orcModel:",ocrModel) # print("ocrFlag:",ocrFlag) im = Image.fromarray(im) # print("boxes:",boxes) boxes = sort_box(boxes) # 没有排序 # print("boxes:",boxes) import time i = 1 for index, box in enumerate(boxes): start_time = time.time() degree, w, h, cx, cy = box # partImg, newW, newH = rotate_cut_img(im, 90 + degree , cx, cy, w, h, leftAdjust, rightAdjust, alph) partImg = crop_rect(im, ((cx, cy), (h, w), degree)) newW, newH = partImg.size # partImg.thumbnail(newW*2,newH*2) # partImg_array = np.uint8(partImg) # if newH > 1.5 * newW: # partImg_array = np.rot90(partImg_array, 1) # partImg = Image.fromarray(partImg_array).convert("RGB") # partImg.save("./debug_im/{}.jpg".format(index)) # angel_index = angle_handle.predict(partImg_array) # # angel_class = lable_map_dict[angel_index] # # print(angel_class) # rotate_angle = rotae_map_dict[angel_class] # # if rotate_angle != 0: # partImg_array = np.rot90(partImg_array, rotate_angle // 90) # partImg, box = rotate_cut_img(im, box, leftAdjust, rightAdjust) # partImg = Image.fromarray(partImg_array).convert("RGB") # partImg.save("./debug_im/{}.jpg".format(index)) # partImg.save(r'outputs/vis_invoice/{}.png'.format(index)) partImg_ = partImg.convert('L') try: # if crnn_vertical_handle is not None and angel_class in ["shudao", "shuzhen"]: # # simPred = crnn_verticv2.cvtColor(cal_handle.predict(partImg_) # else: pre = time.time() simPred = crnn.predict(partImg_) # print("simPred:",simPred)##识别的文本 # simPred = recognizer(partImg_) except: continue if simPred[0].strip() != []: # results.append({'cx': box['cx'] * f, 'cy': box['cy'] * f, 'text': simPred, 'w': box['w'] * f, 'h': box['h'] * f, # 'degree': box['degree']}) # f 默认为 1 results.append({ 'cx': cx * f, 'cy': cy * f, 'text': simPred[0], 'candidate': simPred[1], 'w': newW * f, 'h': newH * f, 'degree': degree }) # print("results:",results) i += 1 return results