示例#1
0
def detect_text():
    img = read_image_data(request)

    socketio.emit('new image', {'path': TextDetection.save_image(img)})

    boxes = td.expand_text_box(td.prediction_function(img)['text_lines'])

    HEIGHT, WIDTH, _ = img.shape

    if boxes:
        min_x, min_y, max_x, max_y = td.text_bounding_rect(
            boxes, HEIGHT, WIDTH)

        rotated_x = float(min_x + int((max_x - min_x) / 2))
        rotated_y = float(min_y + int((max_y - min_y) / 2))

        mid_point = {
            'x': rotated_y,
            'y': WIDTH - rotated_x,
        }

        print('Trying to consume:')
        td.draw_text_boxes(img, boxes)

        socketio.emit('recognized_text',
                      {'path': TextDetection.save_image(img)})

        print(mid_point)
        return jsonify(mid_point)
    else:
        return jsonify({})
示例#2
0
    def __init__(self, url, settingValueDict):
        self.url = url
        print(settingValueDict)
        self.textSegmentation = TextSegmenation()
        self.textDetection = TextDetection(settingValueDict["ContourSize"])
        self.textOcr = TextOcr(settingValueDict["OCR"])
        self.textTranslator = TextTranslator(settingValueDict["Translator"],
                                             settingValueDict["Language"])
        self.textDraw = TextDraw(settingValueDict["FontStyle"],
                                 settingValueDict["FontSize"])
        self.folder = FolderManager()
        self.downloader = DownloaderManager()

        self.customTqdm = tqdm
示例#3
0
 def __init__(self, url,settingValueDict):
     self.url=url
     
     self.translatorType=settingValueDict["translator"]
     self.language=settingValueDict["language"]
     self.font=settingValueDict["fontstyle"]
     self.fontsize=settingValueDict["fontsize"]
     
     
        
     self.textSegmentation=TextSegmenation()
     self.textDetection=TextDetection()
     self.textOcr=TextOcr()
     self.textTranslator=TextTranslator(self.translatorType,self.language)
     self.textDraw=TextDraw(self.font,self.fontsize)
     self.folder=FolderManager()
     
     
     
     self.customTqdm=tqdm
示例#4
0
def ocr():

    img = read_image_data(request)
    socketio.emit('new image', {'path': TextDetection.save_image(img)})

    boxes = td.expand_text_box(td.prediction_function(img)['text_lines'])

    if boxes:
        drawn_img = img.copy()
        td.draw_text_boxes(drawn_img, boxes)
        drawn_path = TextDetection.save_image(drawn_img)
        socketio.emit('recognized_text', {'path': drawn_path})

        texts = [
            result['text'] for result in OCR.get_text_cells(img, boxes)
            if result['text']
        ]

        socketio.emit('ocr_result', {'image': drawn_path, 'text': texts})

        return jsonify({"results": texts})
示例#5
0
def get_image():
    img = read_image_data(request)

    filename = TextDetection.save_image(img)
    socketio.emit('new image', {'path': filename})

    td.in_queue.put_nowait((img.copy(), request.form['user_id']))

    for filename in td.out_queue.get():
        socketio.emit('new image', {'path': filename})

    return 'gotcha'
示例#6
0
def init_ocr_model(rec_path, det_path):
    detection_pb = det_path
    recognition_pb = rec_path
    with tf.device('/cpu:0'):

        tf_config = tf.ConfigProto(device_count={'GPU': 0},
                                   allow_soft_placement=True)

        detection_model = TextDetection(detection_pb, tf_config, max_size=1600)
        recognition_model = TextRecognition(recognition_pb,
                                            seq_len=27,
                                            config=tf_config)

    label_dict = np.load(
        './reverse_label_dict_with_rects.npy', allow_pickle=True)[(
        )]  # reverse_label_dict_with_rects.npy  reverse_label_dict
    return detection_model, recognition_model, label_dict
示例#7
0
def test_ocr(img_name):
    img = cv2.imread(f'examples/{img_name}.jpg')
    boxes = td.expand_text_box(td.prediction_function(img)['text_lines'])

    if boxes:
        drawn_img = img.copy()
        td.draw_text_boxes(drawn_img, boxes)
        drawn_path = TextDetection.save_image(drawn_img)
        socketio.emit('recognized_text', {'path': drawn_path})

        texts = [
            result['text'] for result in OCR.get_text_cells(img, boxes)
            if result['text']
        ]

        socketio.emit('ocr_result', {'image': drawn_path, 'text': texts})

        return jsonify({"results": texts})
示例#8
0
def init_ocr_model(rec_path, det_path):
    detection_pb = det_path
    recognition_pb = rec_path
    with tf.device('/gpu:0'):

        tf_config = tf.ConfigProto(
            gpu_options=tf.GPUOptions(allow_growth=True),
            allow_soft_placement=True,
            log_device_placement=False)

        detection_model = TextDetection(detection_pb, tf_config, max_size=1600)
        recognition_model = TextRecognition(recognition_pb,
                                            seq_len=27,
                                            config=tf_config)

    label_dict = np.load(
        './reverse_label_dict_with_rects.npy', allow_pickle=True)[(
        )]  # reverse_label_dict_with_rects.npy  reverse_label_dict
    return detection_model, recognition_model, label_dict
示例#9
0
def init_ocr_model():
    detection_pb = './checkpoint/ICDAR_0.7.pb'  # './checkpoint/ICDAR_0.7.pb'
    # recognition_checkpoint='/data/zhangjinjin/icdar2019/LSVT/full/recognition/checkpoint_3x_single_gpu/OCR-443861'
    # recognition_pb = './checkpoint/text_recognition_5435.pb' #
    recognition_pb = './checkpoint/text_recognition.pb'
    # os.environ["CUDA_VISIBLE_DEVICES"] = "9"
    with tf.device('/gpu:2'):
        tf_config = tf.ConfigProto(
            gpu_options=tf.GPUOptions(
                allow_growth=True),  #, visible_device_list="9"),
            allow_soft_placement=True)

        detection_model = TextDetection(detection_pb, tf_config, max_size=1600)
        recognition_model = TextRecognition(recognition_pb,
                                            seq_len=27,
                                            config=tf_config)
    label_dict = np.load('./reverse_label_dict_with_rects.npy')[(
    )]  # reverse_label_dict_with_rects.npy  reverse_label_dict
    return detection_model, recognition_model, label_dict
示例#10
0
def main(eval_=True):
    global_start = time.time()

    print('-- READING IMAGES --')
    start = time.time()
    db_paths = sorted(glob(db_path + os.sep + '*.jpg'))
    qs_paths = sorted(glob(qs1_w5 + os.sep + '*.jpg'))
    db_images = [[cv2.imread(path)] for path in db_paths]
    qs_images = [cv2.imread(path) for path in qs_paths]
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- DENOISING IMAGES --')
    start = time.time()
    if not os.path.isfile(res_root + os.sep + 'denoised.pkl'):
        denoiser = Denoiser(qs_images)
        qs_denoised = denoiser.denoise()
        with open(res_root + os.sep + 'denoised.pkl', 'wb') as ff:
            pickle.dump(qs_denoised, ff)
    else:
        with open(res_root + os.sep + 'denoised.pkl', 'rb') as ff:
            qs_denoised = pickle.load(ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- DETECTING ORIENTATION --')
    start = time.time()
    if not (os.path.isfile(res_root + os.sep + 'angles.pkl')
            and os.path.isfile(res_root + os.sep + 'rotated.pkl')
            and os.path.isfile(res_root + os.sep + 'angles_real.pkl')):
        orientation = Orientation(qs_denoised)
        qs_angles, qs_angles_real, qs_rotated = orientation.compute_orientation(
        )
        with open(res_root + os.sep + 'angles.pkl', 'wb') as ff:
            pickle.dump(qs_angles, ff)
        with open(res_root + os.sep + 'angles_real.pkl', 'wb') as ff:
            pickle.dump(qs_angles_real, ff)
        with open(res_root + os.sep + 'rotated.pkl', 'wb') as ff:
            pickle.dump(qs_rotated, ff)
    else:
        with open(res_root + os.sep + 'angles.pkl', 'rb') as ff:
            qs_angles = pickle.load(ff)
        with open(res_root + os.sep + 'angles_real.pkl', 'rb') as ff:
            qs_angles_real = pickle.load(ff)
        with open(res_root + os.sep + 'rotated.pkl', 'rb') as ff:
            qs_rotated = pickle.load(ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    if eval_:
        print('-- EVALUATING ANGLES --')
        start = time.time()
        angle_evaluator = EvaluateAngles(qs_angles,
                                         qs1_w5 + os.sep + 'angles_qsd1w5.pkl')
        score = angle_evaluator.evaluate(degree_margin=1.5)
        print('-- DONE: Time: ' + str(time.time() - start))

    print('-- SPLITTING IMAGES --')
    start = time.time()
    if not (os.path.isfile(res_root + os.sep + 'splitted.pkl')
            and os.path.isfile(res_root + os.sep + 'qs_displays.pkl')):
        spliter = SplitImages(qs_rotated)
        qs_splitted, qs_displays = spliter.get_paintings()
        with open(res_root + os.sep + 'splitted.pkl', 'wb') as ff:
            pickle.dump(qs_splitted, ff)
        with open(res_root + os.sep + 'qs_displays.pkl', 'wb') as ff:
            pickle.dump(qs_displays, ff)
    else:
        with open(res_root + os.sep + 'splitted.pkl', 'rb') as ff:
            qs_splitted = pickle.load(ff)
        with open(res_root + os.sep + 'qs_displays.pkl', 'rb') as ff:
            qs_displays = pickle.load(ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- COMPUTE FOREGROUND --')
    start = time.time()
    if not (os.path.isfile(res_root + os.sep + 'qs_masks_rot.pkl')
            and os.path.isfile(res_root + os.sep + 'qs_bboxs_rot.pkl')):
        removal = BackgroundRemoval(qs_splitted)
        qs_masks_rot, qs_bboxs_rot = removal.remove_background()
        with open(res_root + os.sep + 'qs_masks_rot.pkl', 'wb') as ff:
            pickle.dump(qs_masks_rot, ff)
        with open(res_root + os.sep + 'qs_bboxs_rot.pkl', 'wb') as ff:
            pickle.dump(qs_bboxs_rot, ff)
    else:
        with open(res_root + os.sep + 'qs_masks_rot.pkl', 'rb') as ff:
            qs_masks_rot = pickle.load(ff)
        with open(res_root + os.sep + 'qs_bboxs_rot.pkl', 'rb') as ff:
            qs_bboxs_rot = pickle.load(ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- UNROTATE MASKS AND FOREGROUND BOUNDING BOXES --')
    start = time.time()
    if not (os.path.isfile(res_root + os.sep + 'qs_masks.pkl')
            and os.path.isfile(res_root + os.sep + 'qs_bboxs.pkl')):
        undo_rotation = Unrotate(qs_images)
        qs_masks, qs_bboxs = undo_rotation.unrotate(qs_angles, qs_bboxs_rot,
                                                    qs_masks_rot, qs_displays)
        with open(res_root + os.sep + 'qs_masks.pkl', 'wb') as ff:
            pickle.dump(qs_masks, ff)
        with open(res_root + os.sep + 'qs_bboxs.pkl', 'wb') as ff:
            pickle.dump(qs_bboxs, ff)
    else:
        with open(res_root + os.sep + 'qs_masks.pkl', 'rb') as ff:
            qs_masks = pickle.load(ff)
        with open(res_root + os.sep + 'qs_bboxs.pkl', 'rb') as ff:
            qs_bboxs = pickle.load(ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- COMPUTE FRAMES OUTPUT PICKLE --')
    start = time.time()
    if not os.path.isfile(res_root + os.sep + 'frames.pkl'):
        qs_frames = []
        for ind, bboxs in enumerate(qs_bboxs):
            qs_frames.append([])
            for ind2, bbox in enumerate(bboxs):
                qs_frames[-1].append([qs_angles[ind], bbox])
        with open(res_root + os.sep + 'frames.pkl', 'wb') as ff:
            pickle.dump(qs_frames, ff)
    else:
        with open(res_root + os.sep + 'frames.pkl', 'rb') as ff:
            qs_frames = pickle.load(ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- COMPUTE TEXTBOXES --')
    start = time.time()
    if not os.path.isfile(res_root + os.sep + 'text_masks.pkl'):
        text_removal = TextDetection(qs_splitted)
        text_masks = text_removal.detect()
        with open(res_root + os.sep + 'text_masks.pkl', 'wb') as ff:
            pickle.dump(text_masks, ff)
    else:
        with open(res_root + os.sep + 'text_masks.pkl', 'rb') as ff:
            text_masks = pickle.load(ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- COMPUTE DESCRIPTORS --')
    start = time.time()
    #db_desc = SIFTDescriptor(db_images,None,None)
    #qs_desc = SIFTDescriptor(qs_splitted,mask_list=qs_masks_rot,bbox_list=text_masks)
    db_desc = ORBDescriptor(db_images, None, None)
    qs_desc = ORBDescriptor(qs_splitted,
                            mask_list=qs_masks_rot,
                            bbox_list=text_masks)
    db_desc.compute_descriptors()
    qs_desc.compute_descriptors()
    print('-- DONE: Time: ' + str(time.time() - start))

    print('-- COMPUTE MATCHES --')
    start = time.time()
    matcher = MatcherFLANN(db_desc.result, qs_desc.result, flag=True)
    matcher.match(min_matches=12, match_ratio=0.65)
    with open('../results/result.pkl', 'wb') as ff:
        pickle.dump(matcher.result, ff)
    print('-- DONE: Time: ' + str(time.time() - start))

    if eval_:
        print('-- EVALUATING DESCRIPTORS --')
        start = time.time()
        desc_evaluator = EvaluateDescriptors(
            matcher.result, qs1_w5 + os.sep + 'gt_corresps.pkl')
        desc_evaluator.compute_mapatk(limit=1)
        print('MAP@1: [{0}]'.format(desc_evaluator.score))
        desc_evaluator.compute_mapatk(limit=5)
        print('MAP@5: [{0}]'.format(desc_evaluator.score))
    print('-- Total time: ' + str(time.time() - global_start))
示例#11
0
class MangaTranslator():
    def __init__(self, url,settingValueDict):
        self.url=url
        
        self.translatorType=settingValueDict["translator"]
        self.language=settingValueDict["language"]
        self.font=settingValueDict["fontstyle"]
        self.fontsize=settingValueDict["fontsize"]
        
        
           
        self.textSegmentation=TextSegmenation()
        self.textDetection=TextDetection()
        self.textOcr=TextOcr()
        self.textTranslator=TextTranslator(self.translatorType,self.language)
        self.textDraw=TextDraw(self.font,self.fontsize)
        self.folder=FolderManager()
        
        
        
        self.customTqdm=tqdm
        
    def processTranslation(self,):
        ###folder init
        self.folder.removeDir([self.folder.downloadPath])


        ####download
        downloader=DownloaderManager()
        downloadFileList,mangaName=downloader.downloadUrl(self.url)
        #downloadFileList,mangaName=downloader.getDownloadedFilePathList()
        
        if mangaName=="":
            print("download fail")
            return -1
        
        
        oriFileList=self.folder.intitFolderEnv(downloadFileList,mangaName)
        self.sendInfo(mangaName,oriFileList[0],len(oriFileList))
        print(mangaName)
        
        
        self.threadCounter=0
        self.lock = threading.Lock()
        self.lock1 = threading.Lock()
        self.lock2 = threading.Lock()
        self.lock3 = threading.Lock()
        self.lock4 = threading.Lock()
        self.lock5 = threading.Lock()
        #forloop
        #for fileName in tqdm(oriFileList): 
        #    self.processTranslationTask(fileName)
        
        
        #thread start
        tList=[]
        for fileName in oriFileList:
          t = threading.Thread(target=self.processTranslationTask, args=(fileName,))
          t.daemon = True
          t.start()
          tList+=[t]
        print("progess")
        #thread progress
        for i in self.customTqdm(range(len(oriFileList))):
          while self.threadCounter<=i:
            time.sleep(0.5)
        
        
        ###save_file
        self.folder.saveFileAndRemove(mangaName)
        
        return 1
        
        
        
    def processTranslationTask(self,fileName):
                
        self.lock1.acquire()
        ###segmentation
        self.textSegmentation.segmentPage(fileName,self.folder.inpaintedFolder,self.folder.textOnlyFolder)
        self.lock1.release()
        self.lock2.acquire()
        
        ###text_detection
        textBoxList=self.textDetection.textDetect(fileName,self.folder.textOnlyFolder)
        self.lock2.release()
        self.lock3.acquire()
        
        ###text_ocr
        textList=self.textOcr.getTextFromImg(fileName,textBoxList,self.folder.textOnlyFolder)
        self.lock3.release()
        self.lock4.acquire()
        

        ###text_translation
        textList_trans=self.textTranslator.translate(textList)
        self.lock4.release()
        self.lock5.acquire()
        
        
        ###text_draw
        self.textDraw.drawTextToImage(fileName,textBoxList,textList_trans,self.folder.inpaintedFolder,self.folder.transalatedFolder)
        self.lock5.release()
        
        
        #count finish
        self.lock.acquire()
        self.threadCounter+=1
        self.lock.release()

        
        
    
        
    def sendInfo(self,title,image,pages):
        pass
示例#12
0
import pytz
import imutils
from flask import Flask, request, render_template, jsonify
from flask_socketio import SocketIO

from text_detection import (
    OCR,
    TextDetection,
)

tz = pytz.timezone('Europe/Berlin')

app = Flask(__name__)
socketio = SocketIO(app)

td = TextDetection(split=False)


def read_image_data(request):
    image_data = base64.b64decode(request.form['image'])

    return imutils.rotate_bound(
        cv2.imdecode(np.fromstring(image_data, dtype=np.uint8),
                     cv2.IMREAD_COLOR), 90)


@app.route('/', methods=['GET'])
def index():
    return render_template('index.html')