def text_detect(img, MAX_HORIZONTAL_GAP=30, MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.7, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.3): boxes, scores = detect.text_detect(np.array(img)) boxes = np.array(boxes, dtype=np.float32) scores = np.array(scores, dtype=np.float32) textdetector = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM) shape = img.shape[:2] boxes = textdetector.detect(boxes, scores[:, np.newaxis], shape, TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH) text_recs = get_boxes(boxes) newBox = [] rx = 1 ry = 1 for box in text_recs: x1, y1 = (box[0], box[1]) x2, y2 = (box[2], box[3]) x3, y3 = (box[6], box[7]) x4, y4 = (box[4], box[5]) newBox.append([ x1 * rx, y1 * ry, x2 * rx, y2 * ry, x3 * rx, y3 * ry, x4 * rx, y4 * ry ]) return newBox
def text_detect(img, MAX_HORIZONTAL_GAP=30, MIN_V_OVERLAPS=0.6, MIN_SIZE_SIM=0.6, TEXT_PROPOSALS_MIN_SCORE=0.7, TEXT_PROPOSALS_NMS_THRESH=0.3, TEXT_LINE_NMS_THRESH=0.3, bili=1.2): #下面8行检测单个文字 #下面几行是用yolo给出框. Image.fromarray(img).save("look.png") #看看boxes,scores的含义 是所有rpn的结果. boxes, scores = detect.text_detect(np.array(img)) #这里面用的是yolo boxes = np.array(boxes, dtype=np.float32) scores = np.array(scores, dtype=np.float32) Allboxes = boxes AllScores = scores #函数下面部分是做行拼接. textdetector = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM) shape = img.shape[:2] #看看下行boxes 的含义. scores:表示最后抽取的汉字对应的score?????????对的,下行的scores就是最后每行的 #分数了!!!!!!!!!!!!!1 非常重要的参数. #下面几行做文字box拼接成seq #tp_groups 表示每一行的文字对应 #boxesForSingle 中的index boxes, scores, keepIndForSingle, tp_groups, boxesForSingle, scoresForSingle = textdetector.detect( boxes, scores[:, np.newaxis], shape, TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH, bili) #tp_groups 是boxes对应的 box标号. text_recs = get_boxes(boxes) print(text_recs.shape, "text_recs.shape") newBox = [] rx = 1 ry = 1 for box in text_recs: x1, y1 = (box[0], box[1]) x2, y2 = (box[2], box[3]) x3, y3 = (box[6], box[7]) x4, y4 = (box[4], box[5]) newBox.append([ x1 * rx, y1 * ry, x2 * rx, y2 * ry, x3 * rx, y3 * ry, x4 * rx, y4 * ry ]) return newBox, scores, boxesForSingle, scoresForSingle, keepIndForSingle, tp_groups, Allboxes, AllScores
def box_cluster(img, boxes, scores, **args): MAX_HORIZONTAL_GAP = args.get('MAX_HORIZONTAL_GAP', 30) MIN_V_OVERLAPS = args.get('MIN_V_OVERLAPS', 0.6) MIN_SIZE_SIM = args.get('MIN_SIZE_SIM', 0.6) textdetector = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM) shape = img.shape[:2] TEXT_PROPOSALS_MIN_SCORE = args.get('TEXT_PROPOSALS_MIN_SCORE', 0.07) TEXT_PROPOSALS_NMS_THRESH = args.get('TEXT_PROPOSALS_NMS_THRESH', 0.7) TEXT_LINE_NMS_THRESH = args.get('TEXT_LINE_NMS_THRESH', 0.9) LINE_MIN_SCORE = args.get('LINE_MIN_SCORE', 0.07) boxes, scores = textdetector.detect(boxes, scores[:, np.newaxis], shape, TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH, LINE_MIN_SCORE) return boxes, scores
def net_output_process(batch_preds, batch_shape, batch_shape_padded, prob=0.05): """ 将主干网络的批输出转换为boxes,scores,这里方便兼容之前代码, 暂且使用for循环,以后可替换为vectorize处理 @params batch_preds(list of arrays):list长度代表n个采样尺度,其中每个\ array形状为(batch_size,grid_size_w,grid_size_h,3*(4+1+num_classes)) @params batch_shape(list of tuples):图片原始长宽 @params prob(float):置信度小于prob的box将被忽略 @returns batch_boxes(array): [字符区域数量,8] @returns batch_scores:[字符区域数量,] """ batch_boxes = [] batch_scores = [] MAX_HORIZONTAL_GAP = 100 MIN_V_OVERLAPS = 0.6 MIN_SIZE_SIM = 0.6 textdetector = TextDetector(MAX_HORIZONTAL_GAP, MIN_V_OVERLAPS, MIN_SIZE_SIM) TEXT_PROPOSALS_MIN_SCORE = 0.1 TEXT_PROPOSALS_NMS_THRESH = 0.3 TEXT_LINE_NMS_THRESH = 0.99 LINE_MIN_SCORE = 0.1 leftAdjustAlph = 0.01 rightAdjustAlph = 0.01 # 首先初步对模型主干输出进行预处理 for y1, y2, y3, image_shape, input_shape in zip(batch_preds[0], batch_preds[1], batch_preds[2], batch_shape, batch_shape_padded): outputs = [y1, y2, y3, image_shape, input_shape] box, scores = box_layer(outputs, anchors, num_classes) h, w = image_shape keep = np.where(scores > prob) # box[:, 0:4][box[:, 0:4]<0] = 0 box = np.array(box) scores = np.array(scores) box[box < 0] = 0 box[:, 0][box[:, 0] >= w] = w - 1 box[:, 1][box[:, 1] >= h] = h - 1 box[:, 2][box[:, 2] >= w] = w - 1 box[:, 3][box[:, 3] >= h] = h - 1 boxes = box[keep[0]] scores = scores[keep[0]] # 筛选出需要的box,并且进行nms,字符行组合 boxes, scores = textdetector.detect(boxes, scores[:, np.newaxis], (h, w), TEXT_PROPOSALS_MIN_SCORE, TEXT_PROPOSALS_NMS_THRESH, TEXT_LINE_NMS_THRESH, LINE_MIN_SCORE) boxes = sort_box(boxes) batch_boxes.append(boxes) batch_scores.append(scores) # print('done') return batch_boxes, batch_scores