def detect_text(): img = read_image_data(request) socketio.emit('new image', {'path': TextDetection.save_image(img)}) boxes = td.expand_text_box(td.prediction_function(img)['text_lines']) HEIGHT, WIDTH, _ = img.shape if boxes: min_x, min_y, max_x, max_y = td.text_bounding_rect( boxes, HEIGHT, WIDTH) rotated_x = float(min_x + int((max_x - min_x) / 2)) rotated_y = float(min_y + int((max_y - min_y) / 2)) mid_point = { 'x': rotated_y, 'y': WIDTH - rotated_x, } print('Trying to consume:') td.draw_text_boxes(img, boxes) socketio.emit('recognized_text', {'path': TextDetection.save_image(img)}) print(mid_point) return jsonify(mid_point) else: return jsonify({})
def __init__(self, url, settingValueDict): self.url = url print(settingValueDict) self.textSegmentation = TextSegmenation() self.textDetection = TextDetection(settingValueDict["ContourSize"]) self.textOcr = TextOcr(settingValueDict["OCR"]) self.textTranslator = TextTranslator(settingValueDict["Translator"], settingValueDict["Language"]) self.textDraw = TextDraw(settingValueDict["FontStyle"], settingValueDict["FontSize"]) self.folder = FolderManager() self.downloader = DownloaderManager() self.customTqdm = tqdm
def __init__(self, url,settingValueDict): self.url=url self.translatorType=settingValueDict["translator"] self.language=settingValueDict["language"] self.font=settingValueDict["fontstyle"] self.fontsize=settingValueDict["fontsize"] self.textSegmentation=TextSegmenation() self.textDetection=TextDetection() self.textOcr=TextOcr() self.textTranslator=TextTranslator(self.translatorType,self.language) self.textDraw=TextDraw(self.font,self.fontsize) self.folder=FolderManager() self.customTqdm=tqdm
def ocr(): img = read_image_data(request) socketio.emit('new image', {'path': TextDetection.save_image(img)}) boxes = td.expand_text_box(td.prediction_function(img)['text_lines']) if boxes: drawn_img = img.copy() td.draw_text_boxes(drawn_img, boxes) drawn_path = TextDetection.save_image(drawn_img) socketio.emit('recognized_text', {'path': drawn_path}) texts = [ result['text'] for result in OCR.get_text_cells(img, boxes) if result['text'] ] socketio.emit('ocr_result', {'image': drawn_path, 'text': texts}) return jsonify({"results": texts})
def get_image(): img = read_image_data(request) filename = TextDetection.save_image(img) socketio.emit('new image', {'path': filename}) td.in_queue.put_nowait((img.copy(), request.form['user_id'])) for filename in td.out_queue.get(): socketio.emit('new image', {'path': filename}) return 'gotcha'
def init_ocr_model(rec_path, det_path): detection_pb = det_path recognition_pb = rec_path with tf.device('/cpu:0'): tf_config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) detection_model = TextDetection(detection_pb, tf_config, max_size=1600) recognition_model = TextRecognition(recognition_pb, seq_len=27, config=tf_config) label_dict = np.load( './reverse_label_dict_with_rects.npy', allow_pickle=True)[( )] # reverse_label_dict_with_rects.npy reverse_label_dict return detection_model, recognition_model, label_dict
def test_ocr(img_name): img = cv2.imread(f'examples/{img_name}.jpg') boxes = td.expand_text_box(td.prediction_function(img)['text_lines']) if boxes: drawn_img = img.copy() td.draw_text_boxes(drawn_img, boxes) drawn_path = TextDetection.save_image(drawn_img) socketio.emit('recognized_text', {'path': drawn_path}) texts = [ result['text'] for result in OCR.get_text_cells(img, boxes) if result['text'] ] socketio.emit('ocr_result', {'image': drawn_path, 'text': texts}) return jsonify({"results": texts})
def init_ocr_model(rec_path, det_path): detection_pb = det_path recognition_pb = rec_path with tf.device('/gpu:0'): tf_config = tf.ConfigProto( gpu_options=tf.GPUOptions(allow_growth=True), allow_soft_placement=True, log_device_placement=False) detection_model = TextDetection(detection_pb, tf_config, max_size=1600) recognition_model = TextRecognition(recognition_pb, seq_len=27, config=tf_config) label_dict = np.load( './reverse_label_dict_with_rects.npy', allow_pickle=True)[( )] # reverse_label_dict_with_rects.npy reverse_label_dict return detection_model, recognition_model, label_dict
def init_ocr_model(): detection_pb = './checkpoint/ICDAR_0.7.pb' # './checkpoint/ICDAR_0.7.pb' # recognition_checkpoint='/data/zhangjinjin/icdar2019/LSVT/full/recognition/checkpoint_3x_single_gpu/OCR-443861' # recognition_pb = './checkpoint/text_recognition_5435.pb' # recognition_pb = './checkpoint/text_recognition.pb' # os.environ["CUDA_VISIBLE_DEVICES"] = "9" with tf.device('/gpu:2'): tf_config = tf.ConfigProto( gpu_options=tf.GPUOptions( allow_growth=True), #, visible_device_list="9"), allow_soft_placement=True) detection_model = TextDetection(detection_pb, tf_config, max_size=1600) recognition_model = TextRecognition(recognition_pb, seq_len=27, config=tf_config) label_dict = np.load('./reverse_label_dict_with_rects.npy')[( )] # reverse_label_dict_with_rects.npy reverse_label_dict return detection_model, recognition_model, label_dict
def main(eval_=True): global_start = time.time() print('-- READING IMAGES --') start = time.time() db_paths = sorted(glob(db_path + os.sep + '*.jpg')) qs_paths = sorted(glob(qs1_w5 + os.sep + '*.jpg')) db_images = [[cv2.imread(path)] for path in db_paths] qs_images = [cv2.imread(path) for path in qs_paths] print('-- DONE: Time: ' + str(time.time() - start)) print('-- DENOISING IMAGES --') start = time.time() if not os.path.isfile(res_root + os.sep + 'denoised.pkl'): denoiser = Denoiser(qs_images) qs_denoised = denoiser.denoise() with open(res_root + os.sep + 'denoised.pkl', 'wb') as ff: pickle.dump(qs_denoised, ff) else: with open(res_root + os.sep + 'denoised.pkl', 'rb') as ff: qs_denoised = pickle.load(ff) print('-- DONE: Time: ' + str(time.time() - start)) print('-- DETECTING ORIENTATION --') start = time.time() if not (os.path.isfile(res_root + os.sep + 'angles.pkl') and os.path.isfile(res_root + os.sep + 'rotated.pkl') and os.path.isfile(res_root + os.sep + 'angles_real.pkl')): orientation = Orientation(qs_denoised) qs_angles, qs_angles_real, qs_rotated = orientation.compute_orientation( ) with open(res_root + os.sep + 'angles.pkl', 'wb') as ff: pickle.dump(qs_angles, ff) with open(res_root + os.sep + 'angles_real.pkl', 'wb') as ff: pickle.dump(qs_angles_real, ff) with open(res_root + os.sep + 'rotated.pkl', 'wb') as ff: pickle.dump(qs_rotated, ff) else: with open(res_root + os.sep + 'angles.pkl', 'rb') as ff: qs_angles = pickle.load(ff) with open(res_root + os.sep + 'angles_real.pkl', 'rb') as ff: qs_angles_real = pickle.load(ff) with open(res_root + os.sep + 'rotated.pkl', 'rb') as ff: qs_rotated = pickle.load(ff) print('-- DONE: Time: ' + str(time.time() - start)) if eval_: print('-- EVALUATING ANGLES --') start = time.time() angle_evaluator = EvaluateAngles(qs_angles, qs1_w5 + os.sep + 'angles_qsd1w5.pkl') score = angle_evaluator.evaluate(degree_margin=1.5) print('-- DONE: Time: ' + str(time.time() - start)) print('-- SPLITTING IMAGES --') start = time.time() if not (os.path.isfile(res_root + os.sep + 'splitted.pkl') and os.path.isfile(res_root + os.sep + 'qs_displays.pkl')): spliter = SplitImages(qs_rotated) qs_splitted, qs_displays = spliter.get_paintings() with open(res_root + os.sep + 'splitted.pkl', 'wb') as ff: pickle.dump(qs_splitted, ff) with open(res_root + os.sep + 'qs_displays.pkl', 'wb') as ff: pickle.dump(qs_displays, ff) else: with open(res_root + os.sep + 'splitted.pkl', 'rb') as ff: qs_splitted = pickle.load(ff) with open(res_root + os.sep + 'qs_displays.pkl', 'rb') as ff: qs_displays = pickle.load(ff) print('-- DONE: Time: ' + str(time.time() - start)) print('-- COMPUTE FOREGROUND --') start = time.time() if not (os.path.isfile(res_root + os.sep + 'qs_masks_rot.pkl') and os.path.isfile(res_root + os.sep + 'qs_bboxs_rot.pkl')): removal = BackgroundRemoval(qs_splitted) qs_masks_rot, qs_bboxs_rot = removal.remove_background() with open(res_root + os.sep + 'qs_masks_rot.pkl', 'wb') as ff: pickle.dump(qs_masks_rot, ff) with open(res_root + os.sep + 'qs_bboxs_rot.pkl', 'wb') as ff: pickle.dump(qs_bboxs_rot, ff) else: with open(res_root + os.sep + 'qs_masks_rot.pkl', 'rb') as ff: qs_masks_rot = pickle.load(ff) with open(res_root + os.sep + 'qs_bboxs_rot.pkl', 'rb') as ff: qs_bboxs_rot = pickle.load(ff) print('-- DONE: Time: ' + str(time.time() - start)) print('-- UNROTATE MASKS AND FOREGROUND BOUNDING BOXES --') start = time.time() if not (os.path.isfile(res_root + os.sep + 'qs_masks.pkl') and os.path.isfile(res_root + os.sep + 'qs_bboxs.pkl')): undo_rotation = Unrotate(qs_images) qs_masks, qs_bboxs = undo_rotation.unrotate(qs_angles, qs_bboxs_rot, qs_masks_rot, qs_displays) with open(res_root + os.sep + 'qs_masks.pkl', 'wb') as ff: pickle.dump(qs_masks, ff) with open(res_root + os.sep + 'qs_bboxs.pkl', 'wb') as ff: pickle.dump(qs_bboxs, ff) else: with open(res_root + os.sep + 'qs_masks.pkl', 'rb') as ff: qs_masks = pickle.load(ff) with open(res_root + os.sep + 'qs_bboxs.pkl', 'rb') as ff: qs_bboxs = pickle.load(ff) print('-- DONE: Time: ' + str(time.time() - start)) print('-- COMPUTE FRAMES OUTPUT PICKLE --') start = time.time() if not os.path.isfile(res_root + os.sep + 'frames.pkl'): qs_frames = [] for ind, bboxs in enumerate(qs_bboxs): qs_frames.append([]) for ind2, bbox in enumerate(bboxs): qs_frames[-1].append([qs_angles[ind], bbox]) with open(res_root + os.sep + 'frames.pkl', 'wb') as ff: pickle.dump(qs_frames, ff) else: with open(res_root + os.sep + 'frames.pkl', 'rb') as ff: qs_frames = pickle.load(ff) print('-- DONE: Time: ' + str(time.time() - start)) print('-- COMPUTE TEXTBOXES --') start = time.time() if not os.path.isfile(res_root + os.sep + 'text_masks.pkl'): text_removal = TextDetection(qs_splitted) text_masks = text_removal.detect() with open(res_root + os.sep + 'text_masks.pkl', 'wb') as ff: pickle.dump(text_masks, ff) else: with open(res_root + os.sep + 'text_masks.pkl', 'rb') as ff: text_masks = pickle.load(ff) print('-- DONE: Time: ' + str(time.time() - start)) print('-- COMPUTE DESCRIPTORS --') start = time.time() #db_desc = SIFTDescriptor(db_images,None,None) #qs_desc = SIFTDescriptor(qs_splitted,mask_list=qs_masks_rot,bbox_list=text_masks) db_desc = ORBDescriptor(db_images, None, None) qs_desc = ORBDescriptor(qs_splitted, mask_list=qs_masks_rot, bbox_list=text_masks) db_desc.compute_descriptors() qs_desc.compute_descriptors() print('-- DONE: Time: ' + str(time.time() - start)) print('-- COMPUTE MATCHES --') start = time.time() matcher = MatcherFLANN(db_desc.result, qs_desc.result, flag=True) matcher.match(min_matches=12, match_ratio=0.65) with open('../results/result.pkl', 'wb') as ff: pickle.dump(matcher.result, ff) print('-- DONE: Time: ' + str(time.time() - start)) if eval_: print('-- EVALUATING DESCRIPTORS --') start = time.time() desc_evaluator = EvaluateDescriptors( matcher.result, qs1_w5 + os.sep + 'gt_corresps.pkl') desc_evaluator.compute_mapatk(limit=1) print('MAP@1: [{0}]'.format(desc_evaluator.score)) desc_evaluator.compute_mapatk(limit=5) print('MAP@5: [{0}]'.format(desc_evaluator.score)) print('-- Total time: ' + str(time.time() - global_start))
class MangaTranslator(): def __init__(self, url,settingValueDict): self.url=url self.translatorType=settingValueDict["translator"] self.language=settingValueDict["language"] self.font=settingValueDict["fontstyle"] self.fontsize=settingValueDict["fontsize"] self.textSegmentation=TextSegmenation() self.textDetection=TextDetection() self.textOcr=TextOcr() self.textTranslator=TextTranslator(self.translatorType,self.language) self.textDraw=TextDraw(self.font,self.fontsize) self.folder=FolderManager() self.customTqdm=tqdm def processTranslation(self,): ###folder init self.folder.removeDir([self.folder.downloadPath]) ####download downloader=DownloaderManager() downloadFileList,mangaName=downloader.downloadUrl(self.url) #downloadFileList,mangaName=downloader.getDownloadedFilePathList() if mangaName=="": print("download fail") return -1 oriFileList=self.folder.intitFolderEnv(downloadFileList,mangaName) self.sendInfo(mangaName,oriFileList[0],len(oriFileList)) print(mangaName) self.threadCounter=0 self.lock = threading.Lock() self.lock1 = threading.Lock() self.lock2 = threading.Lock() self.lock3 = threading.Lock() self.lock4 = threading.Lock() self.lock5 = threading.Lock() #forloop #for fileName in tqdm(oriFileList): # self.processTranslationTask(fileName) #thread start tList=[] for fileName in oriFileList: t = threading.Thread(target=self.processTranslationTask, args=(fileName,)) t.daemon = True t.start() tList+=[t] print("progess") #thread progress for i in self.customTqdm(range(len(oriFileList))): while self.threadCounter<=i: time.sleep(0.5) ###save_file self.folder.saveFileAndRemove(mangaName) return 1 def processTranslationTask(self,fileName): self.lock1.acquire() ###segmentation self.textSegmentation.segmentPage(fileName,self.folder.inpaintedFolder,self.folder.textOnlyFolder) self.lock1.release() self.lock2.acquire() ###text_detection textBoxList=self.textDetection.textDetect(fileName,self.folder.textOnlyFolder) self.lock2.release() self.lock3.acquire() ###text_ocr textList=self.textOcr.getTextFromImg(fileName,textBoxList,self.folder.textOnlyFolder) self.lock3.release() self.lock4.acquire() ###text_translation textList_trans=self.textTranslator.translate(textList) self.lock4.release() self.lock5.acquire() ###text_draw self.textDraw.drawTextToImage(fileName,textBoxList,textList_trans,self.folder.inpaintedFolder,self.folder.transalatedFolder) self.lock5.release() #count finish self.lock.acquire() self.threadCounter+=1 self.lock.release() def sendInfo(self,title,image,pages): pass
import pytz import imutils from flask import Flask, request, render_template, jsonify from flask_socketio import SocketIO from text_detection import ( OCR, TextDetection, ) tz = pytz.timezone('Europe/Berlin') app = Flask(__name__) socketio = SocketIO(app) td = TextDetection(split=False) def read_image_data(request): image_data = base64.b64decode(request.form['image']) return imutils.rotate_bound( cv2.imdecode(np.fromstring(image_data, dtype=np.uint8), cv2.IMREAD_COLOR), 90) @app.route('/', methods=['GET']) def index(): return render_template('index.html')