Exemplo n.º 1
0
    def get_sub_area_text(self, sub_im):
        #1. 垂直投影,切分
        prj_val = prj.get_image_projection(sub_im, 'ver')
        ratio_val = np.fromiter(prj_val,
                                dtype=np.float32) / (sub_im.shape[0] * 255)
        #2. 获得区域
        range_list = prj.get_range_list(ratio_val, 0.995)
        #3. 对每个区域进行进一步切割
        sub_ranges = []
        for rg in range_list:
            tmp_im = sub_im[0:sub_im.shape[0],
                            rg.begin:rg.begin + rg.get_length()]

            tmp_ranges = self.split_sub_ranges(tmp_im, rg)
            sub_ranges.extend(tmp_ranges)

        #4. 绘制区域
        '''
        draw_img = sub_im.copy()
        for rg in sub_ranges:
            cv2.line(draw_img, (rg.begin, 0), (rg.begin, sub_im.shape[0]), [0, 255, 0], 1)
            cv2.line(draw_img, (rg.end, 0), (rg.end, sub_im.shape[0]), [0, 255, 0], 1)
        cv2.namedWindow('x', cv2.WINDOW_NORMAL)
        cv2.imshow('x', draw_img)
        cv2.waitKey()
        '''

        #分析区域
        #连接识别网络服务
        client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        client.connect(("127.0.0.1", 8009))
        text = self.analyze_sub_ranges(sub_im, sub_ranges, client)

        client.send('bye'.encode())
        client.close()

        return text
Exemplo n.º 2
0
 def get_sub_area_text(self, sub_im):
     #0. 接识别网络服务
     client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     client.connect(("127.0.0.1", 8009))
     #1. 垂直投影,切分
     prj_val = prj.get_image_projection(sub_im, 'ver')
     ratio_val = np.fromiter(prj_val, dtype = np.float32) / (sub_im.shape[0]*255)
     #去除左右空白区域
     range_list = prj.get_range_list(ratio_val, 0.995)
     if len(range_list)==0:
         return None, 0
     else:
         sub_im = sub_im[:, range_list[0].begin:range_list[len(range_list)-1].end+1]
         ratio_val = ratio_val[range_list[0].begin:range_list[len(range_list)-1].end+1]
     #用来缓存OCR结果,减少OCR次数
     ocr_cache = {}
     #2. 尝试三次beamsearch搜索,阈值从0.95逐步减小
     threshold = 0.98
     best_sol = None     #用于保存最佳的方案
     for i in range(0, 4):
         print('try threshold ', str(threshold))
         #候选解的格式[分割点ID集合[],识别文字集合[],平均概率],其中文字为三元组(文字,概率,类型,(非空白起始位置、非空白结束为止))
         sol = self.beam_search_solution(sub_im, ratio_val, client, ocr_cache, threshold)
         if best_sol is None:
             best_sol = sol
         elif best_sol[2]<sol[2]:
             best_sol = sol
         #如果sol中存在负数识别区,则降低阈值重试
         invalid = True
         word_set = sol[1]
         
         prop_list = []
         for word in word_set:
             if word[1]!=0:
                 prop_list.append(word[1])
             if word[1]<0:
                 invalid = False
                 break
         if invalid and len(word_set)==0 and sol[2]<0:
             invalid = False
         #如果存在整块未识别区,则降低阈值重试
         if invalid==False:
             threshold = threshold - 0.05
             continue
         #分析方案中是否存在明显低于概率均值的情况,如果有尝试进行修复
         #计算prop_list的标准差
         prop_ver = np.fromiter(prop_list, dtype=np.float)
         
         std = np.std(prop_ver)
         mean = np.mean(prop_ver)
         for i in range(len(word_set)):
             #认为误差在允许范围内
             if word_set[i][1]>=mean-std:
                 continue
             #对于中文字符,尝试拆分
             if word_set[i][2]=='cn':
                 (begin_pos, end_pos) = word_set[i][3]
                 mid_pos = int((begin_pos+end_pos)/2)
                 result1 = self.get_split_area_text(sub_im, ratio_val, begin_pos, mid_pos, client)
                 result2 = self.get_split_area_text(sub_im, ratio_val, mid_pos, end_pos, client)
                 if word_set[i][1]<(result1[1]+result2[1])/2.0 and result1[2]!='cn' and result2[2]!='cn':
                     word_set[i][0] = result1[0]+result2[0]
             #对于非中文字符,尝试与前后进行组合识别
             if word_set[i][2]=='en' or word_set[i][2]=='ccn':
                 #稍后实现
                 print('important')                    
         break
     
     #3. 获取结果
     text = ''
     for i in range(len(best_sol[1])):
         text = text + best_sol[1][i][0]
     #final. destroy
     client.send('bye'.encode())
     client.close()
     return text, best_sol[2]
Exemplo n.º 3
0
    def create(self):
        #生成writer
        tfr_writer = tf.python_io.TFRecordWriter(self.tfrecords_filename)
        #读取idx.dat文件
        fr = open(self.base_dir + '/idx.dat', encoding='utf-8')
        count = 0
        img_files = []
        for line in fr.readlines():
            _ = line.split(' ')
            if len(_) < 3:
                print('err data', _)
                break
            _label = int(_[1])
            _word = _[2]
            _img_path = self.base_dir + _[0]
            img_files.append((_img_path, _label, _word))

        random.shuffle(img_files)

        for _img_path, _label, _word in img_files:
            print('process %d: %s...' % (count, _img_path))
            _img = cv2.imdecode(np.fromfile(_img_path, dtype=np.uint8),
                                flags=0)

            #对文字进行垂直投影,如果宽高比小于1,就放到比例为1的画布中央
            prj_val_ver = prj.get_image_projection(_img, 'ver')
            ratio_val = np.fromiter(prj_val_ver,
                                    dtype=np.float32) / (_img.shape[0] * 255)
            range_list_ver = prj.get_range_list(ratio_val, 1)
            if len(range_list_ver) == 0:
                continue
            _img = _img[:, range_list_ver[0].
                        begin:range_list_ver[len(range_list_ver) - 1].end + 1]
            if _img.shape[1] * 1.0 / _img.shape[0] < 1:
                #按照比例1确定画布
                _new_img = np.zeros((_img.shape[0], _img.shape[0]),
                                    dtype=np.uint8)
                _new_img = (_new_img + 1) * 255
                x_pos = int((_new_img.shape[1] - _img.shape[1]) / 2)
                _new_img[:, x_pos:x_pos + _img.shape[1]] = _img
                _img = _new_img

            #将数据压缩到48*48的范围,如果原来不足则填0
            _img, _shape = self.encode_img(_img, (48, 48))
            _height = _shape[0]
            _width = _shape[1]
            example = tf.train.Example(features=tf.train.Features(
                feature={
                    'height': self._int64_feature(_height),
                    'width': self._int64_feature(_width),
                    'word': self._bytes_feature(_word.encode(
                        encoding="utf-8")),
                    'img_raw': self._bytes_feature(_img.tostring()),
                    'label': self._int64_feature(_label)
                }))
            tfr_writer.write(example.SerializeToString())
            count += 1

        #关闭句柄
        fr.close()
        tfr_writer.close()
        pass
Exemplo n.º 4
0
    def inference_images_with_server(self, sess, endpoints, images):
        img_matrix = None
        #将图片清单转化为matrix
        for img_file in images:
            #1. 读文件(灰度)
            #print(img_file)
            if os.path.exists(img_file) == False:
                return None
            im = cv2.imdecode(np.fromfile(img_file, dtype=np.uint8), flags=0)
            if im is None:
                return None

            #2. 二值化
            im = cv2.threshold(im, 0, 255,
                               cv2.THRESH_OTSU | cv2.THRESH_BINARY)[1]

            #3.对文字进行垂直投影,如果宽高比小于1,就放到比例为1的画布中央
            prj_val_ver = prj.get_image_projection(im, 'ver')
            ratio_val = np.fromiter(prj_val_ver,
                                    dtype=np.float32) / (im.shape[0] * 255)
            range_list_ver = prj.get_range_list(ratio_val, 1)
            if len(range_list_ver) == 0:
                continue
            #进行水平投影,如果顶部和底部没有空白或空白区域过小,则增加空白(顶部按照高度最少10%或2px、底部按照最少10%或1px计算)
            prj_val_hor = prj.get_image_projection(im, 'hor')
            ratio_val = np.fromiter(prj_val_hor,
                                    dtype=np.float32) / (im.shape[1] * 255)
            range_list_hor = prj.get_range_list(ratio_val, 1)
            if len(range_list_hor) == 0:
                continue
            top_empty = range_list_hor[0].begin
            new_top_empty = top_empty
            bottom_empty = im.shape[0] - 1 - range_list_hor[len(range_list_hor)
                                                            - 1].end
            new_bottom_empty = bottom_empty
            if top_empty < 3 or top_empty < int(im.shape[0] * 0.2):
                new_top_empty = max(3, int(im.shape[0] * 0.2))
            if bottom_empty < 2 or bottom_empty < int(im.shape[0] * 0.15):
                new_bottom_empty = max(2, int(im.shape[0] * 0.15))
            #print(top_empty, new_top_empty, bottom_empty, new_bottom_empty)
            if new_top_empty > top_empty or new_bottom_empty > bottom_empty:
                top_empty_dif = max(new_top_empty - top_empty, 0)
                bottom_empty_dif = max(new_bottom_empty - bottom_empty, 0)
                _new_img = np.zeros(
                    (im.shape[0] + top_empty_dif + bottom_empty_dif,
                     im.shape[1]))
                _new_img = (_new_img + 1) * 255
                _new_img[top_empty_dif:top_empty_dif + im.shape[0], :] = im
                im = _new_img

            im = im[:, range_list_ver[0].
                    begin:range_list_ver[len(range_list_ver) - 1].end + 1]
            if im.shape[1] * 1.0 / im.shape[0] < 1:
                #按照比例1确定画布
                _new_img = np.zeros((im.shape[0], im.shape[0]), dtype=np.uint8)
                _new_img = (_new_img + 1) * 255
                x_pos = int((_new_img.shape[1] - im.shape[1]) / 2)
                _new_img[:, x_pos:x_pos + im.shape[1]] = im
                im = _new_img
            '''   
            cv2.imshow('net', im)
            cv2.waitKey()
            '''
            #4. 调整样本至新的比例
            im = cv2.resize(im, (SAMPLE_SIZE, SAMPLE_SIZE),
                            interpolation=cv2.INTER_CUBIC)
            #cv2.imshow('max', im)
            #cv2.waitKey()
            #5. 归一化
            im = im.astype('float')
            im = im / 255.0
            #6. 扁平
            im = im.reshape([1, SAMPLE_SIZE * SAMPLE_SIZE])
            #7. 添加到矩阵
            if img_matrix is None:
                img_matrix = im
            else:
                img_matrix = np.concatenate((img_matrix, [im]))

        #转入网络获取结果
        predict_val, predict_idx = sess.run(
            [endpoints['predict_val_top3'], endpoints['predict_idx_top3']],
            feed_dict={endpoints['inputs.data']: img_matrix})
        #从结果中获取索引号
        s = predict_idx.shape[:2]
        result_mtx = None
        for i in range(s[0]):
            rec_words = []
            for j in range(s[1]):
                rec_words.append([
                    map_id_cw[predict_idx[i][j]][0],
                    map_id_cw[predict_idx[i][j]][1], predict_val[i][j]
                ])
            if result_mtx is None:
                result_mtx = [rec_words]
            else:
                result_mtx = np.concatenate((result_mtx, [rec_words]))
        return result_mtx
Exemplo n.º 5
0
    def split_sub_ranges(self, sub_im, org_range):
        #1. 垂直投影,切分
        prj_val = prj.get_image_projection(sub_im, 'ver')
        ratio_val = np.fromiter(prj_val,
                                dtype=np.float32) / (sub_im.shape[0] * 255)
        #2. 获得区域
        range_list = prj.get_range_list(ratio_val, 0.99)
        #3. 对区域进行逐个分析
        sub_ranges = []
        h = sub_im.shape[0]
        min_ratio = 0.7
        max_ratio = 1.2
        for i in range(len(range_list)):

            tmp_im = sub_im[:, range_list[i].begin:range_list[i].end + 1]
            '''
            cv2.imshow('z', tmp_im)
            cv2.waitKey()
            '''

            range_width = range_list[i].get_length()
            #对于某一行,假定其高度为h,如果其宽度与高度比为[0.8-1.2],那么认为其为单字,OVER
            if range_width >= h * min_ratio and range_width <= h * max_ratio:
                sub_ranges.append(range_list[i].adjust(org_range))
                continue
            #假定宽度与高度比小于0.8,那么将其待定为数字、字母或单字一部分,留待后续确定
            if range_width < h * min_ratio:
                rg = range_list[i]
                rg.mark = 1  #待定标记
                sub_ranges.append(rg.adjust(org_range))
                continue
            #如果宽高比大于1.2
            try_width = range(int(h * min_ratio), int(h * max_ratio) + 1)
            split_plans = []
            for w in try_width:
                #三元组标识切割方案,分别为起始位置、宽度、平方和均值平方根
                sub_plans = []
                #尝试从0-w-1开始,以w为阶,进行切割,计算切割区域垂直投影的平方和的均值平方根
                for pos in range(0, min(w, range_list[i].get_length() - w)):
                    #计算切割点数目
                    length = range_list[i].get_length() - pos
                    split_points_count = int(length * 1.0 / w) + 1
                    error = 0
                    for k in range(0, split_points_count):
                        global_pos = range_list[i].begin + pos + k * w
                        if global_pos >= len(ratio_val):
                            split_points_count = split_points_count - 1
                            break
                        error = error + ratio_val[global_pos] * ratio_val[
                            global_pos]
                    error = math.sqrt(error / split_points_count)
                    plan = SplitPlan(pos, w, error)
                    sub_plans.append(plan)
                if len(sub_plans) == 0:
                    continue
                sub_plans.sort()
                split_plans.append(sub_plans[len(sub_plans) - 1])
            #从split_plans中查找error最大的plan
            split_plans = sorted(split_plans)
            #对最大的两个方案(如果误差在,选取分块数目最多的一个
            max_plan = split_plans[len(split_plans) - 1]

            if len(split_plans) > 2:
                plan1 = split_plans[len(split_plans) - 1]
                plan2 = split_plans[len(split_plans) - 2]
                if plan1.errs != 0 and (plan1.errs -
                                        plan2.errs) / plan1.errs <= 0.05:
                    #比较分块数目
                    if plan2.get_split_count(
                            range_list[i]) > plan1.get_split_count(
                                range_list[i]):
                        max_plan = plan2

            #输出max_plan
            print('maxplan: ', max_plan)
            #采用max_plan进行切割
            length = range_list[i].get_length() - max_plan.pos
            split_points_count = int(length * 1.0 / w) + 1
            if max_plan.pos != 0:
                #将首段加入待定
                rg = Range(range_list[i].begin,
                           range_list[i].begin + max_plan.pos - 1)
                rg.mark = 1
                sub_ranges.append(rg.adjust(org_range))
            for k in range(0, split_points_count - 1):
                #这里还要加上联通区域分析(如果内容整体联通,则按照整体加入,否则切割加入)
                rg = Range(
                    range_list[i].begin + max_plan.pos + k * max_plan.width,
                    range_list[i].begin + max_plan.pos +
                    (k + 1) * max_plan.width - 1)
                #获取该区域对应的图
                '''
                rg_im = tmp_im[:, rg.begin: rg.end+1]
                _, contours, heris = cv2.findContours(rg_im, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                for contour in contours:
                    x1, y1, w1, h1 = cv2.boundingRect(contour)
                    #cv2.rectangle(rg_im, (x1, y1), (x1+w1, y1+h1), [0, 0, 0], 1)
                    print('%d, %d, %d, %d\n' % (x1, y1, x1+w1, y1+h1))
                cv2.imshow('rg_im', rg_im)
                cv2.waitKey()
                '''
                rg.mark = 1
                sub_ranges.append(rg.adjust(org_range))
            #将末端加入待定
            k = split_points_count
            if max_plan.pos + (
                    k - 1) * max_plan.width < range_list[i].get_length():
                rg = Range(
                    range_list[i].begin + max_plan.pos +
                    (k - 1) * max_plan.width, range_list[i].end)
                rg.mark = 1
                sub_ranges.append(rg.adjust(org_range))

        #返回子区域
        return sub_ranges