def get_text_bounding_boxes(image, psm=12): bounding_boxes = [] height, width = image.shape[:2] if height <= 0 or width <= 0: return bounding_boxes image_pil = Image.fromarray(image) # Load PIL image from numpy api = PyTessBaseAPI(psm=psm, oem=OEM.LSTM_ONLY) try: # api.SetVariable('textord_tabfind_find_tables', 'true') # api.SetVariable('textord_tablefind_recognize_tables', 'true') api.SetImage(image_pil) api.Recognize() boxes = api.GetComponentImages(RIL.TEXTLINE, True) for (im, box, _, _) in boxes: x, y, w, h = box['x'], box['y'], box['w'], box['h'] bounding_boxes.append((x, y, w, h)) finally: api.End() return bounding_boxes
def read_text_with_confidence(image, lang='fast_ind', path='/usr/share/tesseract-ocr/5/tessdata', psm=4, whitelist=''): height, width = image.shape[:2] if height <= 0 or width <= 0: return '', 0 image_pil = Image.fromarray(image) api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY) try: api.SetImage(image_pil) if whitelist != '': api.SetVariable('tessedit_char_whitelist', whitelist) api.Recognize() text = api.GetUTF8Text() confidence = api.MeanTextConf() except Exception: print("[ERROR] Tesseract exception") finally: api.End() return text, confidence
def ocr(img, level): """Use tesseract OCR to detection images. Args: imagePath: File path of image. level: Iteration level. Returns: An array with coordinate of boxes. """ result = [] with c_locale(): from tesserocr import PyTessBaseAPI api = PyTessBaseAPI() api.SetPageSegMode(PSM.AUTO_OSD) # api.SetImageFile(imagePath) api.SetImage(Image.fromarray(img)) blockIter = api.AnalyseLayout() while blockIter.Next(level): pt = blockIter.BlockType() #result.append(blockIter.Baseline(level)) if pt in [1, 6]: result.append(blockIter.BoundingBox(level) + (pt, )) api.End() return result
class TextExtractor: def __init__(self, image_path, seg_mode=PSM.SPARSE_TEXT): self.api = PyTessBaseAPI() self.api.SetPageSegMode(seg_mode) self.api.SetImageFile(image_path) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def _extract(self) -> Tuple: text = self.api.GetUTF8Text() conf = self.api.MeanTextConf() return text, conf def _extract_from_rect(self, x, y, w, h) -> Tuple: self.api.SetRectangle(x, y, w, h) return self._extract() #TODO: Add support of zero values def extract(self, x=None, y=None, w=None, h=None) -> Tuple: if all([x, y, w, h]): return self._extract_from_rect(x, y, w, h) else: return self._extract() def close(self): self.api.End()
def tess_ocr(img): """Get text from an image. Args: img: The file path of image. Returns: A string. Raises: IOError: An error occurred accessing the img object. """ with c_locale(): from tesserocr import PyTessBaseAPI, PSM api = PyTessBaseAPI(lang='chi_sim', psm=PSM.AUTO_OSD) api.SetImageFile(img) text = api.GetUTF8Text() api.End() return text
def get_boxes(image_filename: str) -> list: image = Image.open(image_filename) width = image.width height = image.height max_width = width // 2 max_height = height // 2 api = PyTessBaseAPI(lang="jpn_vert") # api.ReadConfigFile("tess.conf") api.SetPageSegMode(PSM.SPARSE_TEXT_OSD) api.SetImage(image) api.Recognize(0) ri = api.GetIterator() level = RIL.WORD boxes = [] for r in iterate_level(ri, level): conf = r.Confidence(level) text = r.GetUTF8Text(level) left, top, right, bottom = r.BoundingBox(level) # boxes = api.GetComponentImages(RIL.SYMBOL, True) # for im, rect, _, _ in boxes: # # im is a PIL image object # # rect is a dict with x, y, w and h keys # left, top, right, bottom = rect['x'], rect['y'], rect['w'], rect['h'] # api.SetRectangle(left, top, right, bottom) # text = api.GetUTF8Text() # conf = api.MeanTextConf() print("'%s' \tConf: %.2f \tCoords: %d,%d,%d,%d" % (text, conf, left, top, right, bottom)) box = { 'text': text, 'left': left, 'top': top, 'width': right - left, 'height': bottom - top } if should_ignore_box(conf, box, max_width, max_height): continue boxes.append(box) api.End() image.close() return boxes
class OcrWrapper(BaseImageToString): _OPTIONS = ('tessedit_char_whitelist', '0123456789ABCDEF.-') def __init__(self): if sys.platform == 'win32': self._ocr = PyTessBaseAPI( path="C:\\Program Files\\Tesseract-OCR\\tessdata") else: self._ocr = PyTessBaseAPI() self._ocr.SetVariable(self._OPTIONS[0], self._OPTIONS[1]) pass def image_to_string(self, image: Image) -> str: image.format = 'PNG' self._ocr.SetImage(image) raw_data = self._ocr.GetUTF8Text() return raw_data def end(self): self._ocr.End()
class Analyzer(object): TEXT_TYPES = set([ PT.FLOWING_TEXT, PT.HEADING_TEXT, PT.PULLOUT_TEXT, PT.VERTICAL_TEXT, PT.CAPTION_TEXT ]) def __init__(self, lang=None): super(Analyzer, self).__init__() kwargs = {} if lang is not None: kwargs['lang'] = lang self.api = PyTessBaseAPI(psm=PSM.AUTO_OSD, **kwargs) def analyze_image(self, image): page = Page() self.api.SetImage(image) self.api.Recognize() iterator = self.api.GetIterator() page.blocks = self.__decode_blocks(iterator, image) page.size = Size(*image.size) return page def close(self): self.api.End() def __decode_blocks(self, iterator, image): blocks = [] for tesseract_block in iterate_level(iterator, RIL.BLOCK): block = Block() block.bounding_box = BoundingBox.from_coordinates( *tesseract_block.BoundingBox(RIL.BLOCK)) if not tesseract_block.GetUTF8Text(RIL.BLOCK).strip(): block.image = tesseract_block.GetImage(RIL.BLOCK, 0, image) blocks.append(block) continue block.paragraphs = self.__decode_paragraphs(iterator) blocks.append(block) return blocks def __decode_paragraphs(self, iterator): paragraphs = [] for tesseract_paragraph in iterate_level(iterator, RIL.PARA): paragraph = Paragraph() paragraph.bounding_box = BoundingBox.from_coordinates( *tesseract_paragraph.BoundingBox(RIL.PARA)) paragraph.lines = self.__decode_lines(iterator) paragraphs.append(paragraph) if iterator.IsAtFinalElement(RIL.BLOCK, RIL.PARA): break return paragraphs def __decode_lines(self, iterator): lines = [] for tesseract_line in iterate_level(iterator, RIL.TEXTLINE): line = TextLine() line.bounding_box = BoundingBox.from_coordinates( *tesseract_line.BoundingBox(RIL.TEXTLINE)) line.words = self.__decode_words(iterator) lines.append(line) if iterator.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE): break return lines def __decode_words(self, iterator): words = [] for tesseract_word in iterate_level(iterator, RIL.WORD): font_attributes = tesseract_word.WordFontAttributes() word = Word() word.bounding_box = BoundingBox.from_coordinates( *tesseract_word.BoundingBox(RIL.WORD)) word.confidence = float(tesseract_word.Confidence( RIL.WORD)) / 100.0 word.text = tesseract_word.GetUTF8Text(RIL.WORD) word.symbols = self.__decode_symbols(iterator) font = Font() font.bold = font_attributes['bold'] font.italic = font_attributes['italic'] font.underline = font_attributes['underlined'] font.monospace = font_attributes['monospace'] font.serif = font_attributes['serif'] font.pointsize = font_attributes['pointsize'] font.id = font_attributes['font_id'] for symbol in word.symbols: symbol.font = font words.append(word) if iterator.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD): break return words def __decode_symbols(self, iterator): symbols = [] for tesseract_symbol in iterate_level(iterator, RIL.SYMBOL): symbol = Symbol() symbol.bounding_box = BoundingBox.from_coordinates( *tesseract_symbol.BoundingBox(RIL.SYMBOL)) symbol.confidence = float(tesseract_symbol.Confidence( RIL.SYMBOL)) / 100.0 symbol.text = tesseract_symbol.GetUTF8Text(RIL.SYMBOL) symbol.image = tesseract_symbol.GetBinaryImage(RIL.SYMBOL).convert( '1', dither=Image.NONE) symbols.append(symbol) if iterator.IsAtFinalElement(RIL.WORD, RIL.SYMBOL): break return symbols
class OCR(object): MAX_MODELS = 5 DEFAULT_MODE = PSM.AUTO_OSD # DEFAULT_MODE = PSM.AUTO def __init__(self): # Tesseract language types: _, self.supported = get_languages() def language_list(self, languages): models = [c for c in alpha3(languages) if c in self.supported] if len(models) > self.MAX_MODELS: log.warning("Too many models, limit: %s", self.MAX_MODELS) models = models[:self.MAX_MODELS] models.append('eng') return '+'.join(sorted(set(models))) def configure_engine(self, languages, mode): # log.info("Configuring OCR engine (%s)", languages) if not hasattr(self, 'api') or self.api is None: self.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY) if languages != self.api.GetInitLanguagesAsString(): self.api.Init(lang=languages, oem=OEM.LSTM_ONLY) if mode != self.api.GetPageSegMode(): self.api.SetPageSegMode(mode) return self.api def clear_engine(self): """Shut down tesseract and clear all memory.""" try: self.api.End() except Exception: log.exception("Failed to shut down tesseract") self.api = None def extract_text(self, data, languages=None, mode=DEFAULT_MODE): """Extract text from a binary string of data.""" try: image = Image.open(BytesIO(data)) image.load() except Exception: log.exception("Cannot open image data using Pillow") return None try: languages = self.language_list(languages) api = self.configure_engine(languages, mode) # TODO: play with contrast and sharpening the images. start_time = time.time() api.SetImage(image) text = api.GetUTF8Text() confidence = api.MeanTextConf() end_time = time.time() duration = end_time - start_time log.info("[OCR] %s chars (w: %s, h: %s, l: %s, c: %s), took: %.5f", len(text), image.width, image.height, languages, confidence, duration) return text finally: api.Clear()
def run_ocr_in_chart(chart, pad=0, psm=PSM.SINGLE_LINE): """ Run OCR for all the boxes. :param img: :param boxes: :param pad: padding before applying ocr :param psm: PSM.SINGLE_WORD or PSM.SINGLE_LINE :return: """ img = chart.image # add a padding to the initial figure fpad = 1 img = cv2.copyMakeBorder(img.copy(), fpad, fpad, fpad, fpad, cv2.BORDER_CONSTANT, value=(255, 255, 255)) fh, fw, _ = img.shape api = PyTessBaseAPI(psm=psm, lang='eng') clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4)) for tbox in chart.texts: # adding a pad to original image. Some case in quartz corpus, the text touch the border. x, y, w, h = ru.wrap_rect(u.ttoi(tbox.rect), (fh, fw), padx=pad, pady=pad) x, y = x + fpad, y + fpad if w * h == 0: tbox.text = '' continue # crop region of interest roi = img[y:y + h, x:x + w] # to gray scale roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # roi_gray = cv2.resize(roi_gray, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC) # binarization _, roi_bw = cv2.threshold(roi_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # removing noise from borders roi_bw = 255 - clear_border(255-roi_bw) # roi_gray = cv2.copyMakeBorder(roi_gray, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=255) # when testing boxes from csv files if tbox.num_comp == 0: # Apply Contrast Limited Adaptive Histogram Equalization roi_gray2 = clahe.apply(roi_gray) _, roi_bw2 = cv2.threshold(roi_gray2, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) _, num_comp = morphology.label(roi_bw2, return_num=True, background=255) tbox.regions.extend(range(num_comp)) pil_img = smp.toimage(roi_bw) if SHOW: pil_img.show() max_conf = -np.inf min_dist = np.inf correct_text = '' correct_angle = 0 u.log('---------------') for angle in [0, -90, 90]: rot_img = pil_img.rotate(angle, expand=1) api.SetImage(rot_img) conf = api.MeanTextConf() text = api.GetUTF8Text().strip() dist = abs(len(text.replace(' ', '')) - tbox.num_comp) u.log('text: %s conf: %f dist: %d' % (text, conf, dist)) if conf > max_conf and dist <= min_dist: max_conf = conf correct_text = text correct_angle = angle min_dist = dist tbox.text = post_process_text(lossy_unicode_to_ascii(correct_text)) tbox.text_conf = max_conf tbox.text_dist = min_dist tbox.text_angle = correct_angle u.log('num comp %d' % tbox.num_comp) u.log(u'** text: {} conf: {} angle: {}'.format(correct_text, max_conf, correct_angle)) api.End()
def capture_mrz(window: sg.Window, camera_id: int) -> Tuple[List[str], Image.Image]: """ Capture the MRZ by using OCR and the camera footage. :returns: MRZ lines in a list """ cap = cv2.VideoCapture(camera_id) tess_api = PyTessBaseAPI(init=False, psm=PSM.SINGLE_BLOCK_VERT_TEXT) tess_api.InitFull( # https://github.com/DoubangoTelecom/ultimateMRZ-SDK/tree/master/assets/models path="text_detection", lang="mrz", variables={ "load_system_dawg": "false", "load_freq_dawg": "false", "tessedit_char_whitelist": "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<", }, ) # mrz_list: List[List[str]] = [] pool = ThreadPool(processes=1) ocr_running = False while True: _, frame = cap.read() mrz = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 10) # mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,3,2) # mrz = cv2.GaussianBlur(mrz, (5,5), 0) # _, mrz = cv2.threshold(mrz, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # mrz = cv2.GaussianBlur(mrz, (5,5), 0) # mrz = cv2.medianBlur(mrz, 3) frame_shown = copy.deepcopy(mrz) width = 320 height = int(frame_shown.shape[0] * (320 / frame_shown.shape[1])) frame_shown = cv2.resize(frame_shown, (width, height)) alpha = 0.8 frame_overlay = add_mrz_overlay(copy.deepcopy(frame_shown), "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 3, 0.9, False) frame_overlay = add_mrz_overlay( frame_overlay, "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 2, 0.9, True) cv2.addWeighted(frame_shown, alpha, frame_overlay, 1 - alpha, 0, frame_shown) imgbytes = cv2.imencode(".png", frame_shown)[1].tobytes() window.write_event_value("-SHOW MRZ-", [imgbytes]) mrz = Image.fromarray(mrz) if not ocr_running: checked_frame = Image.fromarray(frame[:, :, ::-1]) tess_api.SetImage(mrz) async_result = pool.apply_async(tess_api.GetUTF8Text) ocr_running = True if async_result.ready(): ocr_running = False mrz_text = async_result.get() result = parse_mrz_ocr(mrz_text) if result is not None: break # if result and len(mrz_list) < 3: # mrz_list.append(result) # elif not result: # mrz_list = [] # else: # if all(x == mrz_list[0] for x in mrz_list): # break # When everything done, release the capture cap.release() # cv2.destroyAllWindows() tess_api.End() # return mrz_list[0] window.write_event_value("-HIDE MRZ-", "") return (result, checked_frame)
class OCREngine(): def __init__(self, extra_whitelist='', all_unicode=False, lang='eng'): """ Args: extra_whitelist: string of extra chars for Tesseract to consider only takes effect when all_unicode is False all_unicode: if True, Tess will consider all possible unicode characters lang: OCR language """ self.tess = PyTessBaseAPI(psm=PSM_MODE, lang=lang) self.is_closed = False if all_unicode: self.whitelist_chars = None else: self.whitelist_chars = ("abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "1234567890" r"~!@#$%^&*()_+-={}|[]\:;'<>?,./" '"' "©") + extra_whitelist self.tess.SetVariable('tessedit_char_whitelist', self.whitelist_chars) def check_engine(self): if self.is_closed: raise RuntimeError('OCREngine has been closed.') def recognize(self, image, min_text_size=MIN_TEXT_SIZE, max_text_size=MAX_TEXT_SIZE, uniformity_thresh=UNIFORMITY_THRESH, thin_line_thresh=THIN_LINE_THRESH, conf_thresh=CONF_THRESH, box_expand_factor=BOX_EXPAND_FACTOR, horizontal_pooling=HORIZONTAL_POOLING): """ Generator: Blob http://stackoverflow.com/questions/23506105/extracting-text-opencv Args: input_image: can be one of the following types: - string: image file path - ndarray: numpy image - PIL.Image.Image: PIL image min_text_size: min text height/width in pixels, below which will be ignored max_text_size: max text height/width in pixels, above which will be ignored uniformity_thresh (0.0 < _ < 1.0): remove all black or all white regions ignore a region if the number of pixels neither black nor white < [thresh] thin_line_thresh (must be odd int): remove all lines thinner than [thresh] pixels. can be used to remove the thin borders of web page textboxes. conf_thresh (0 < _ < 100): ignore regions with OCR confidence < thresh. box_expand_factor (0.0 < _ < 1.0): expand the bounding box outwards in case certain chars are cutoff. horizontal_pooling: result bounding boxes will be more connected with more pooling, but large pooling might lower accuracy. """ self.check_engine() # param sanity check assert max_text_size > min_text_size > 0 assert 0.0 <= uniformity_thresh < 1.0 assert thin_line_thresh % 2 == 1 assert 0 <= conf_thresh < 100 assert 0.0 <= box_expand_factor < 1.0 assert horizontal_pooling > 0 image = get_np_img(image) img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) img_bw = cv2.adaptiveThreshold(img_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 5) img = img_gray # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel) # cut off all gray pixels < 30. # `cv2.THRESH_BINARY | cv2.THRESH_OTSU` is also good, but might overlook certain light gray areas _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY) # connect horizontally oriented regions kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_pooling, 1)) img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel) # remove all thin textbox borders (e.g. web page textbox) if thin_line_thresh > 0: kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (thin_line_thresh, thin_line_thresh)) img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: x, y, w, h = box = Box(*cv2.boundingRect(contour)) # remove regions that are beyond size limits if (w < min_text_size or h < min_text_size or h > max_text_size): continue # remove regions that are almost uniformly white or black binary_region = crop(img_bw, box) uniformity = np.count_nonzero(binary_region) / float(w * h) if (uniformity > 1 - uniformity_thresh or uniformity < uniformity_thresh): continue # expand the borders a little bit to include cutoff chars expansion = int(min(h, w) * box_expand_factor) x = max(0, x - expansion) y = max(0, y - expansion) h, w = h + 2 * expansion, w + 2 * expansion if h > w: # further extend the long axis h += 2 * expansion elif w > h: w += 2 * expansion # image passed to Tess should be grayscale. # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11 box = Box(x, y, w, h) img_crop = crop(img_gray, box) # make sure that crops passed in tesseract have minimum x-height # http://github.com/tesseract-ocr/tesseract/wiki/FAQ#is-there-a-minimum-text-size-it-wont-read-screen-text img_crop = cv2.resize(img_crop, (int(img_crop.shape[1] * CROP_RESIZE_HEIGHT / img_crop.shape[0]), CROP_RESIZE_HEIGHT)) ocr_text, conf = self.run_tess(img_crop) if conf > conf_thresh: yield Blob(ocr_text, box, conf) def _experiment_segment(self, img, min_text_size=MIN_TEXT_SIZE, max_text_size=MAX_TEXT_SIZE, uniformity_thresh=UNIFORMITY_THRESH, horizontal_pooling=HORIZONTAL_POOLING): """ PRIVATE: experiment only """ img_init = img # preserve initial image img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_bw = cv2.adaptiveThreshold(img_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 5) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, morph_kernel) disp(img) # morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) # img = cv2.dilate(img, morph_kernel) # OTSU thresholding # _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY) # img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,9,2) disp(img) # connect horizontally oriented regions morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_pooling, 1)) img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, morph_kernel) disp(img) if 0: morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (horizontal_pooling, 3)) img = cv2.erode(img, morph_kernel, iterations=1) disp(img) morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (6, 6)) img = cv2.dilate(img, morph_kernel, iterations=1) elif 1: morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) img = cv2.morphologyEx(img, cv2.MORPH_OPEN, morph_kernel) disp(img) # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) img_copy = np.copy(img_init) for contour in contours: x, y, w, h = cv2.boundingRect(contour) draw_rect(img_copy, x, y, w, h) if (w < min_text_size or h < min_text_size or h > max_text_size): continue binary_region = img_bw[y:y + h, x:x + w] uniformity = np.count_nonzero(binary_region) / float(w * h) if (uniformity > 1 - uniformity_thresh or uniformity < uniformity_thresh): # ignore mostly white or black regions # print(w, h) # disp(binary_region) continue # the image must be grayscale, otherwise Tesseract will SegFault # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11 draw_rect(img_init, x, y, w, h) disp(img_copy) disp(img_init, 0) def run_tess(self, img): """ Tesseract python API source code: https://github.com/sirfz/tesserocr/blob/master/tesserocr.pyx Returns: (ocr_text, confidence) """ if isinstance(img, np.ndarray): img = np2PIL(img) self.tess.SetImage(img) ocr_text = self.tess.GetUTF8Text().strip() conf = self.tess.MeanTextConf() return ocr_text, conf def _deprec_run_tess(self, img): "GetComponentImages throws SegFault randomly. No way to fix. :(" if isinstance(img, np.ndarray): img = np2PIL(img) components = self.tess.GetComponentImages(RIL.TEXTLINE, True) for _, inner_box, block_id, paragraph_id in components: # box is a dict with x, y, w and h keys inner_box = Box(**inner_box) if inner_box.w < MIN_TEXT_SIZE or inner_box.h < MIN_TEXT_SIZE: continue self.tess.SetRectangle(*inner_box) ocr_text = self.tess.GetUTF8Text().strip() conf = self.tess.MeanTextConf() yield ocr_text, inner_box, conf def close(self): self.tess.End() self.is_closed = True def __enter__(self): return self def __exit__(self, type, value, traceback): self.close()
if not player_dead_for_round and maskCounts[i] >= mask_threshold: if not cur_visibility_events[i].valid: cur_visibility_events[i].valid = True cur_visibility_events[i].spotted = players[i] cur_visibility_events[i].spotted_id = getPlayerId(players[i]) cur_visibility_events[i].start_game_tick = cur_tick cur_visibility_events[i].start_frame_num = frame_id cur_visibility_events[i].color = colors[i] last_hit_frame = frame_id last_tick = cur_tick if last_tick + 10 >= max_tick: print("finished all ticks, skipped last 10 for acceptable error bound") break tessocr_api.End() finishTick(True, cur_tick, frame_id, maskCounts) df_visibility = pd.DataFrame([ e for e in finished_visibility_events if e['end_game_tick'] - e['start_game_tick'] > 2 ]) df_visibility_sorted = df_visibility.sort_values( ['demo', 'spotter', 'spotted', 'start_game_tick']) dicts_sorted = df_visibility_sorted.to_dict(orient='records') dicts_output = [] i = 0 while i < len(dicts_sorted): j = 1 while j < len(dicts_sorted) - i: next_row = dicts_sorted[i + j]
class OCREngine: def __init__(self, psm: int = 3, config: dict = {}): logging.info('Initializing OCR engine with PSM=%d and configs=%s' % (psm, config)) self.api = PyTessBaseAPI(psm=psm) for key in config.keys(): self.api.SetVariable(key, config[key]) logging.debug('OCR engine initialized') def build_graph(self, image_path: str, scheme: str = None) -> DocumentGraph: hocr = self._get_hocr(image_path) words = self._get_words(hocr, scheme) dg = DocumentGraph(words) return dg def _get_hocr(self, image_path: str) -> str: logging.info('Reading to hOCR from image: %s' % image_path) self.api.SetImageFile(image_path) hocr_text = self.api.GetHOCRText(0) logging.debug('Image read') return hocr_text def _get_words(self, hocr: str, scheme: str = None): logging.info('Extracting words from hOCR.') if scheme is None: logging.warning('No scheme specified. Assuming xyxy') scheme = 'xyxy' soup = BeautifulSoup(hocr, 'html.parser') word_tags = soup.select('.ocrx_word') word_nodes = [self._make_node(tag, scheme=scheme) for tag in word_tags] word_nodes = list(filter(lambda node: node is not None, word_nodes)) return word_nodes def _make_node(self, tag: dict, scheme: str) -> WordNode: fields = tag['title'].split(';') if not len(fields) == 2: logging.warn('Malformed tag: %s. Skipping.' % tag) return None word = tag.text coordinates = tuple(map(int, fields[0].split()[1:])) conf = int(fields[1].split()[1]) wn = WordNode(word, WordNode.convert_coords(coordinates, scheme), conf) logging.debug('Made word: %s' % wn.__repr__()) return wn def close(self): self.api.End() logging.debug('OCR engine closed') def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): if exc_type: print("type: %s\nvalue: %s\ntrace: %s" % (exc_type, exc_value, traceback)) self.close()