def read_text_with_confidence(image, lang='fast_ind', path='/usr/share/tesseract-ocr/5/tessdata', psm=4, whitelist=''): height, width = image.shape[:2] if height <= 0 or width <= 0: return '', 0 image_pil = Image.fromarray(image) api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY) try: api.SetImage(image_pil) if whitelist != '': api.SetVariable('tessedit_char_whitelist', whitelist) api.Recognize() text = api.GetUTF8Text() confidence = api.MeanTextConf() except Exception: print("[ERROR] Tesseract exception") finally: api.End() return text, confidence
def get_text_bounding_boxes(image, psm=12): bounding_boxes = [] height, width = image.shape[:2] if height <= 0 or width <= 0: return bounding_boxes image_pil = Image.fromarray(image) # Load PIL image from numpy api = PyTessBaseAPI(psm=psm, oem=OEM.LSTM_ONLY) try: # api.SetVariable('textord_tabfind_find_tables', 'true') # api.SetVariable('textord_tablefind_recognize_tables', 'true') api.SetImage(image_pil) api.Recognize() boxes = api.GetComponentImages(RIL.TEXTLINE, True) for (im, box, _, _) in boxes: x, y, w, h = box['x'], box['y'], box['w'], box['h'] bounding_boxes.append((x, y, w, h)) finally: api.End() return bounding_boxes
def __init__(self, image_file, tessdata): api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD) api.SetImageFile(image_file) api.SetVariable("textord_tablefind_recognize_tables", "T") api.SetVariable("textord_tabfind_find_tables", "T") api.Recognize() self.api = api
def getWords(pages, letters_cache): standard_words, split_words, letters = [], [], {'bid': [], 'letters': []} prev_word = None letter_detect = PyTessBaseAPI(psm=8, lang='eng') letter_detect.SetVariable('tessedit_char_whitelist', ascii_uppercase) bid = 0 for pg_num in pages: page = doc[pg_num] # get initial block bounding boxes blocks = [] for block in page.getText("blocks"): bbox = block[:4] text = block[4].strip() if len(text) != 1: # not a single letter blocks.append({ 'bid': bid, 'bbox': bbox, 'pg': page.number, 'text': text }) bid += 1 elif not letters_cache: # maps each bid to a corresponding dictionary letter # this provides a heuristic for our search sf, eps = 25 / 6, 1 pix = page.getPixmap(matrix=fitz.Matrix(sf, sf)) img = Image.open(io.BytesIO(pix.getPNGData())) bbox = resize(bbox, sf, eps) block_img = img.crop(bbox) letter_detect.SetImage(block_img) letter_detect.Recognize() letter = letter_detect.AllWords()[0] assert (len(letter) == 1) letters['bid'].append(bid) letters['letters'].append(letter.lower()) standard, split, prev_word, insert_word = groupBlocks( blocks, prev_word, pg_num) # last block from previous page (no spillover) if insert_word: add_word(standard, insert_word) # clean up standard_words.extend(standard) split_words.extend(split) # add the last word if prev_word: add_word(standard, prev_word) # make sure all the blocks are properly formatted for word in chain(standard_words, split_words): test_word_format(word) return standard_words, split_words, letters
def read_char(image, whitelist=None): """ OCR a single character from an image. Useful for captchas.""" api = PyTessBaseAPI() api.SetPageSegMode(10) if whitelist is not None: api.SetVariable("tessedit_char_whitelist", whitelist) api.SetImage(image) api.Recognize() return api.GetUTF8Text().strip()
def read_word(image, whitelist=None, chars=None, spaces=False): """ OCR a single word from an image. Useful for captchas. Image should be pre-processed to remove noise etc. """ api = PyTessBaseAPI() api.SetPageSegMode(8) if whitelist is not None: api.SetVariable("tessedit_char_whitelist", whitelist) api.SetImage(image) api.Recognize() guess = api.GetUTF8Text() if not spaces: guess = ''.join([c for c in guess if c != " "]) guess = guess.strip() if chars is not None and len(guess) != chars: return guess, None return guess, api.MeanTextConf()
def get_boxes(image_filename: str) -> list: image = Image.open(image_filename) width = image.width height = image.height max_width = width // 2 max_height = height // 2 api = PyTessBaseAPI(lang="jpn_vert") # api.ReadConfigFile("tess.conf") api.SetPageSegMode(PSM.SPARSE_TEXT_OSD) api.SetImage(image) api.Recognize(0) ri = api.GetIterator() level = RIL.WORD boxes = [] for r in iterate_level(ri, level): conf = r.Confidence(level) text = r.GetUTF8Text(level) left, top, right, bottom = r.BoundingBox(level) # boxes = api.GetComponentImages(RIL.SYMBOL, True) # for im, rect, _, _ in boxes: # # im is a PIL image object # # rect is a dict with x, y, w and h keys # left, top, right, bottom = rect['x'], rect['y'], rect['w'], rect['h'] # api.SetRectangle(left, top, right, bottom) # text = api.GetUTF8Text() # conf = api.MeanTextConf() print("'%s' \tConf: %.2f \tCoords: %d,%d,%d,%d" % (text, conf, left, top, right, bottom)) box = { 'text': text, 'left': left, 'top': top, 'width': right - left, 'height': bottom - top } if should_ignore_box(conf, box, max_width, max_height): continue boxes.append(box) api.End() image.close() return boxes
class Analyzer(object): TEXT_TYPES = set([ PT.FLOWING_TEXT, PT.HEADING_TEXT, PT.PULLOUT_TEXT, PT.VERTICAL_TEXT, PT.CAPTION_TEXT ]) def __init__(self, lang=None): super(Analyzer, self).__init__() kwargs = {} if lang is not None: kwargs['lang'] = lang self.api = PyTessBaseAPI(psm=PSM.AUTO_OSD, **kwargs) def analyze_image(self, image): page = Page() self.api.SetImage(image) self.api.Recognize() iterator = self.api.GetIterator() page.blocks = self.__decode_blocks(iterator, image) page.size = Size(*image.size) return page def close(self): self.api.End() def __decode_blocks(self, iterator, image): blocks = [] for tesseract_block in iterate_level(iterator, RIL.BLOCK): block = Block() block.bounding_box = BoundingBox.from_coordinates( *tesseract_block.BoundingBox(RIL.BLOCK)) if not tesseract_block.GetUTF8Text(RIL.BLOCK).strip(): block.image = tesseract_block.GetImage(RIL.BLOCK, 0, image) blocks.append(block) continue block.paragraphs = self.__decode_paragraphs(iterator) blocks.append(block) return blocks def __decode_paragraphs(self, iterator): paragraphs = [] for tesseract_paragraph in iterate_level(iterator, RIL.PARA): paragraph = Paragraph() paragraph.bounding_box = BoundingBox.from_coordinates( *tesseract_paragraph.BoundingBox(RIL.PARA)) paragraph.lines = self.__decode_lines(iterator) paragraphs.append(paragraph) if iterator.IsAtFinalElement(RIL.BLOCK, RIL.PARA): break return paragraphs def __decode_lines(self, iterator): lines = [] for tesseract_line in iterate_level(iterator, RIL.TEXTLINE): line = TextLine() line.bounding_box = BoundingBox.from_coordinates( *tesseract_line.BoundingBox(RIL.TEXTLINE)) line.words = self.__decode_words(iterator) lines.append(line) if iterator.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE): break return lines def __decode_words(self, iterator): words = [] for tesseract_word in iterate_level(iterator, RIL.WORD): font_attributes = tesseract_word.WordFontAttributes() word = Word() word.bounding_box = BoundingBox.from_coordinates( *tesseract_word.BoundingBox(RIL.WORD)) word.confidence = float(tesseract_word.Confidence( RIL.WORD)) / 100.0 word.text = tesseract_word.GetUTF8Text(RIL.WORD) word.symbols = self.__decode_symbols(iterator) font = Font() font.bold = font_attributes['bold'] font.italic = font_attributes['italic'] font.underline = font_attributes['underlined'] font.monospace = font_attributes['monospace'] font.serif = font_attributes['serif'] font.pointsize = font_attributes['pointsize'] font.id = font_attributes['font_id'] for symbol in word.symbols: symbol.font = font words.append(word) if iterator.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD): break return words def __decode_symbols(self, iterator): symbols = [] for tesseract_symbol in iterate_level(iterator, RIL.SYMBOL): symbol = Symbol() symbol.bounding_box = BoundingBox.from_coordinates( *tesseract_symbol.BoundingBox(RIL.SYMBOL)) symbol.confidence = float(tesseract_symbol.Confidence( RIL.SYMBOL)) / 100.0 symbol.text = tesseract_symbol.GetUTF8Text(RIL.SYMBOL) symbol.image = tesseract_symbol.GetBinaryImage(RIL.SYMBOL).convert( '1', dither=Image.NONE) symbols.append(symbol) if iterator.IsAtFinalElement(RIL.WORD, RIL.SYMBOL): break return symbols
class TesseractOCR: #private static TESSERACT_ENGINE_MODE = TessAPI1.TessOcrEngineMode.OEM_DEFAULT # # bpp - bits per pixel, represents the bit depth of the image, with 1 for # binary bitmap, 8 for gray, and 24 for color RGB. # BBP = 8 DEFAULT_CONFIDENT_THRESHOLD = 60.0 MINIMUM_DESKEW_THRESHOLD = 0.05 def __init__(self, rgbaImage, dipCalculator, language): self.mRgbaImage = rgbaImage self.mDipCalculator = dipCalculator self.mHandle = PyTessBaseAPI() self.mOcrTextWrappers = [] self.mOcrBlockWrappers = [] self.mOcrLineWrappers = [] self.raWrappers = [] # self.mLanguage = language self.mBufferedImageRgbaImage = Image.fromarray(self.mRgbaImage) self.initOCR() def baseInit(self, iteratorLevel): width = 0 height = 0 channels = 1 if len(self.mRgbaImage.shape) == 2: height, width = self.mRgbaImage.shape else: height, width, channels = self.mRgbaImage.shape return self.baseInitIter(self.mRgbaImage, Rect(0, 0, width, height), channels, iteratorLevel) def baseInitIter(self, imageMat, rect, channels, iteratorLevel): listdata = [] parentX = rect.x parentY = rect.y # subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x] # # if(channels != 1): # subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x, 0:channels] #tessAPI = PyTessBaseAPI() #Convert to PIL image imgPIL = Image.fromarray(imageMat) self.mHandle.SetImage(imgPIL) boxes = self.mHandle.GetComponentImages(iteratorLevel, True) for i, (im, box, _, _) in enumerate(boxes): wrapper = OCRTextWrapper.OCRTextWrapper() self.mHandle.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = self.mHandle.GetUTF8Text() wrapper.text = ocrResult conf = self.mHandle.MeanTextConf() wrapper.confidence = conf self.mHandle.Recognize() iterator = self.mHandle.GetIterator() fontAttribute = iterator.WordFontAttributes() wrapper.x = box['x'] + parentX wrapper.y = box['y'] + parentY wrapper.width = box['w'] wrapper.height = box['h'] wrapper.rect = Rect(wrapper.x, wrapper.y, wrapper.width, wrapper.height) # print(box) # if (fontAttribute != None): wrapper.fontName = fontAttribute['font_name'] wrapper.bold = fontAttribute['bold'] wrapper.italic = fontAttribute['italic'] wrapper.underlined = fontAttribute['underlined'] wrapper.monospace = fontAttribute['monospace'] wrapper.serif = fontAttribute['serif'] wrapper.smallcaps = fontAttribute['smallcaps'] wrapper.fontSize = fontAttribute['pointsize'] wrapper.fontId = fontAttribute['font_id'] listdata.append(wrapper) return listdata def getBlockWithLocation(self, rect): wrappers = [] for ocrTextWrapper in self.mOcrBlockWrappers: bound = ocrTextWrapper.rect if (RectUtil.contains(rect, bound)): wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper)) return wrappers def getImage(self, rect): x2 = rect.x + rect.width y2 = rect.y + rect.height mat = self.mRgbaImage[rect.y:y2, rect.x:x2] return Image.fromarray(mat) def getText(self, rect): try: self.mHandle.SetImage(self.mBufferedImageRgbaImage) self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height) text = self.mHandle.GetUTF8Text() return text except Exception as error: print('Caught this error: ' + repr(error)) return "" def getLineText(self, rect): try: self.mHandle.SetImage(self.mBufferedImageRgbaImage) self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height) text = self.mHandle.GetUTF8Text() if (TextUtils.isEmpty(text)): self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_LINE) self.mHandle.SetImage(self.mBufferedImageRgbaImage) self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height) text = self.mHandle.GetUTF8Text() if (TextUtils.isEmpty(text)): self.mHandle.SetImage(self.getImage(rect)) text = self.mHandle.GetUTF8Text() self.mHandle = PyTessBaseAPI(psm=PSM.AUTO) return text except Exception as error: print('Caught this error: ' + repr(error)) return "" def getRectWordForLowConfidence(self, ocr): try: rect = ocr.bound() self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_WORD) self.mHandle.SetImage(self.mBufferedImageRgbaImage) self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height) ocr.text = self.mHandle.GetUTF8Text() ocr.confidence = self.mHandle.MeanTextConf() if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD): self.mHandle.SetImage(self.getImage(rect)) ocr.text = self.mHandle.GetUTF8Text() ocr.confidence = self.mHandle.MeanTextConf() if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD): return False self.mHandle.Recognize() iterator = self.mHandle.GetIterator() fontAttribute = iterator.WordFontAttributes() if (fontAttribute != None): ocr.fontName = fontAttribute['font_name'] ocr.bold = fontAttribute['bold'] ocr.italic = fontAttribute['italic'] ocr.underlined = fontAttribute['underlined'] ocr.monospace = fontAttribute['monospace'] ocr.serif = fontAttribute['serif'] ocr.smallcaps = fontAttribute['smallcaps'] ocr.fontSize = fontAttribute['pointsize'] ocr.fontId = fontAttribute['font_id'] # ocr.fontsize = self.getPreferenceFontSize(ocr) self.mHandle = PyTessBaseAPI(psm=PSM.AUTO) return True except Exception as error: print('Caught this error: ' + repr(error)) return False def getWordsIn(self, rect): wrappers = [] for ocrTextWrapper in self.mOcrTextWrappers: bound = ocrTextWrapper.bound() if (RectUtil.contains(rect, bound)): wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper)) return wrappers def initOCR(self): # self.initText() self.initBlock() # self.initPara() self.initLine() # def initBlock(self): self.mOcrBlockWrappers = self.baseInit(RIL.BLOCK) def initLine(self): self.mOcrLineWrappers = self.baseInit(RIL.TEXTLINE) invalidLineWrappers = [] # a line cannot contain another lines for ocrLine in self.mOcrLineWrappers: for otherOcrLine in self.mOcrLineWrappers: if (ocrLine != otherOcrLine and RectUtil.contains( ocrLine.bound(), otherOcrLine.bound())): invalidLineWrappers.append(ocrLine) self.mOcrLineWrappers = [ x for x in self.mOcrLineWrappers if x not in invalidLineWrappers ] def initPara(self): self.mOcrParaWrappers = self.baseInit(RIL.PARA) def initText(self): self.mOcrTextWrappers = self.baseInit(RIL.WORD) def isOverlapText(self, rect, confident): for ocrTextWrapper in self.mOcrTextWrappers: bound = ocrTextWrapper.bound() if (ocrTextWrapper.getConfidence() >= confident and RectUtil.intersects(rect, bound)): return True return False def reset(self): self.mOcrTextWrappers = [] self.mOcrLineWrappers = [] self.initOCR() # def rotateImage(bi) : # iden = ImageDeskew(bi) # imageSkewAngle = iden.getSkewAngle() # determine skew angle # if imageSkewAngle > MINIMUM_DESKEW_THRESHOLD or imageSkewAngle < -MINIMUM_DESKEW_THRESHOLD : # bi = ImageHelper.rotateImage(bi, -imageSkewAngle) # deskew # return bi def getPreferenceFontSize(self, ocrTextWrapper, parentHeight): # TODO TODO fontName = ocrTextWrapper.fontName fontSize = ocrTextWrapper.fontSize height = ocrTextWrapper.height * Constants.TEXT_BOX_AND_TEXT_HEIGHT_RATIO # height = ocrTextWrapper.height textHeight = int( self.mDipCalculator.pxToHeightDip(min(parentHeight, height))) # font = QFont(fontName, fontSize) newFontSize = fontSize if (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName, fontSize) == textHeight): newFontSize = fontSize elif (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName, fontSize) < textHeight): while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName, fontSize) < textHeight): fontSize = fontSize + 1 newFontSize = fontSize else: while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName, fontSize) > textHeight): fontSize = fontSize - 1 newFontSize = fontSize return newFontSize def getTextHeightUsingFontMetrics(self, ocrTextWrapper, fontName, fontSize): # class SIZE(ctypes.Structure): # _fields_ = [("cx", ctypes.c_long), ("cy", ctypes.c_long)] # hdc = ctypes.windll.user32.GetDC(0) # hfont = ctypes.windll.gdi32.CreateFontA(-fontSize, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName) # hfont_old = ctypes.windll.gdi32.SelectObject(hdc, hfont) # size = SIZE(0, 0) # ctypes.windll.gdi32.GetTextExtentPoint32A(hdc, text, len(text), ctypes.byref(size)) # ctypes.windll.gdi32.SelectObject(hdc, hfont_old) # ctypes.windll.gdi32.DeleteObject(hfont) # return size.cy file = "fonts//" + fontName + ".ttf" font = ImageFont.truetype(file, fontSize) fontSize = font.getsize(ocrTextWrapper.text) return fontSize[1] def validCharacter(self, word): return self.mHandle.IsValidCharacter(word) #Don't have this method return TessAPI1.TessBaseAPIIsValidWord(mHandle, word) != 0 # return True #TODO # def getTextHeightUsingTextLayout(self,ocrTextWrapper, font) : # frc = self.mGraphics.getFontRenderContext() # loc = Point(0, 0) # layout = TextLayout(ocrTextWrapper.text, font, frc) # layout.draw(self.mGraphics, float(loc.x, loc.y)) # bounds = layout.getBounds() # height = bounds.getHeight() # return height # def isValidTextUsingConfidentAndBoundaryCheck(self, ocrTextWrapper) : # if (ocrTextWrapper.getConfidence() > Constants.TEXT_CONFIDENT_THRESHOLD + Constants.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE) : # return True # # elif (ocrTextWrapper.getConfidence() <= Constants.TEXT_CONFIDENT_THRESHOLD) : # return False # # return self.isValidTextUsingBoundaryCheck(ocrTextWrapper) # # def getTextDimensions(self, text, fontName, fontSize): file = "fonts//" + fontName + ".ttf" try: font = ImageFont.truetype(file, fontSize) fontSize = font.getsize(text) return fontSize except OSError: print(file) def isValidTextUsingBoundaryCheck(self, ocrTextWrapper): # confident between TextProcessor.TEXT_CONFIDENT_THRESHOLD and # TextProcessor.TEXT_CONFIDENT_THRESHOLD + # TextProcessor.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE if (TextUtils.isEmpty(ocrTextWrapper.text)): # We cannot calculate width of empty text return True # return True # frc = mGraphics.getFontRenderContext() # font = QFont(ocrTextWrapper.fontName,ocrTextWrapper.fontSize) # loc = Point(0, 0) # layout = TextLayout(ocrTextWrapper.text,font, frc) # layout.draw(mGraphics, loc.getX(), loc.getY()) # bound = layout.getBounds() width, height = self.getTextDimensions(ocrTextWrapper.text, ocrTextWrapper.fontName, ocrTextWrapper.fontSize) fontRatio = float(height / width) boundRatio = float(ocrTextWrapper.height / ocrTextWrapper.width) fontArea = self.mDipCalculator.dipToHeightPx( height) * self.mDipCalculator.dipToWidthPx(width) boundArea = float(ocrTextWrapper.width * ocrTextWrapper.height) # # the different between dimensions of the text should be smaller than # 10% of the max dimension. # System.out.prln(" Ratio: " + fontRatio + ", " + boundRatio + ", " # + Math.abs(boundRatio - fontRatio) # / Math.max(boundRatio, fontRatio) + "," + fontArea + ", " # + boundArea + ", " + Math.min(fontArea, boundArea) # / Math.max(fontArea, boundArea)) # It the bound is square, it less likely that this text is correct # TODO: This rule may not need it # if (float(min(ocrTextWrapper.getWidth(),ocrTextWrapper.getHeight()) / max( ocrTextWrapper.getWidth(), # ocrTextWrapper.getHeight())) > 0.95) : # # if drawing text cannot create square, sorry -> invalid # if (float(min(width, height) / max(width, height)) <= 0.95 and not validWord(ocrTextWrapper.text)) : # return False # # # # print(self.mDipCalculator.dipToWidthPx(width), self.mDipCalculator.dipToHeightPx(height)) # print( ocrTextWrapper.width, ocrTextWrapper.height) dimension = abs(boundRatio - fontRatio) / max(boundRatio, fontRatio) # print(dimension) dimensionCheck = abs(boundRatio - fontRatio) / max( boundRatio, fontRatio ) <= Constants.TEXT_CONFIDENT_ACCEPTANCE_DIMENSION_RATIO_DIFFERENCE_THRESHOLD areaCheckVal = min(fontArea, boundArea) / max(fontArea, boundArea) # print(areaCheckVal) # print(ocrTextWrapper.text) areaCheck = min(fontArea, boundArea) / max( fontArea, boundArea) >= Constants.TEXT_AREA_ACCEPTANCE_DIFFERENCE_THRESHOLD return dimensionCheck and areaCheck def destroy(self): self.mHandle.End
def run_tesseract(image_file): if tessdata: api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD) else: api = PyTessBaseAPI(psm=PSM.AUTO_OSD) api.SetImageFile(image_file) api.SetVariable("textord_tablefind_recognize_tables", "T") api.SetVariable("textord_tabfind_find_tables", "T") api.Recognize() document = {} it = api.AnalyseLayout() if it is not None: orientation, direction, order, deskew_angle = it.Orientation() api.Recognize() ri = api.GetIterator() if ri is not None: document = { "orientation": orientation, "writing_direction": direction, "text_direction": order, "deskew_angle": deskew_angle, "blocks": [] } while ri.IsAtBeginningOf(RIL.BLOCK): block = { "block_type": ri.BlockType(), "block_type_str": BlockType[ri.BlockType()], "box": ri.BoundingBox(RIL.BLOCK), "ocr_text": ri.GetUTF8Text(RIL.BLOCK), "confidence": ri.Confidence(RIL.BLOCK), "paragraphs": [] } break_para = False while True: if ri.IsAtFinalElement(RIL.BLOCK, RIL.PARA): break_para = True break_line = False paragraph = { "box": ri.BoundingBox(RIL.PARA), "ocr_text": ri.GetUTF8Text(RIL.PARA), "paragraph_info": list(ri.ParagraphInfo()), "confidence": ri.Confidence(RIL.PARA), "lines": [] } while True: if ri.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE): break_line = True break_word = False line = { "box": ri.BoundingBox(RIL.TEXTLINE), "ocr_text": ri.GetUTF8Text(RIL.TEXTLINE), "confidence": ri.Confidence(RIL.TEXTLINE), "words": [] } while True: word = { "box": ri.BoundingBox(RIL.WORD), "ocr_text": ri.GetUTF8Text(RIL.WORD), "confidence": ri.Confidence(RIL.WORD), "attributes": ri.WordFontAttributes() } if ri.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD): break_word = True line["words"].append(word) if break_word: break ri.Next(RIL.WORD) paragraph["lines"].append(line) if break_line: break ri.Next(RIL.TEXTLINE) block["paragraphs"].append(paragraph) if break_para: break ri.Next(RIL.PARA) document["blocks"].append(block) ri.Next(RIL.BLOCK) return document
class GameAdaptor: def __init__(self, window_name): self._window_name = window_name self._hwnd = win32gui.FindWindow(None, window_name) _options = dict(psm=PSM.SINGLE_LINE, oem=OEM.LSTM_ONLY) self._api = PyTessBaseAPI('tessdata', 'eng', **_options) self._image = None self._lock = Lock() self._work = 0 if self._hwnd == 0: raise Exception('Window Handle Not Found! xD') def _get_window_region(self): bl, bt, br, bb = 12, 31, 12, 20 l, t, r, b = win32gui.GetWindowRect(self._hwnd) w = r - l - br - bl h = b - t - bt - bb return l, t, w, h, bl, bt @contextmanager def _window_device_context(self): wdc = win32gui.GetWindowDC(self._hwnd) dc_obj = win32ui.CreateDCFromHandle(wdc) c_dc = dc_obj.CreateCompatibleDC() yield dc_obj, c_dc dc_obj.DeleteDC() c_dc.DeleteDC() win32gui.ReleaseDC(self._hwnd, wdc) def _capture(self): x, y, w, h, bx, by = self._get_window_region() with self._window_device_context() as (dc_obj, cdc): bmp = win32ui.CreateBitmap() bmp.CreateCompatibleBitmap(dc_obj, w, h) cdc.SelectObject(bmp) cdc.BitBlt((0, 0), (w, h), dc_obj, (bx, by), win32con.SRCCOPY) bmp_info = bmp.GetInfo() img = np.frombuffer(bmp.GetBitmapBits(True), dtype=np.uint8) win32gui.DeleteObject(bmp.GetHandle()) return img.reshape(bmp_info['bmHeight'], bmp_info['bmWidth'], 4)[:, :, :-1] def _do_capture(self): while self._work == 1: temp_image = self._capture() self._lock.acquire() self._image = temp_image self._lock.release() sleep(0.001) self._work = -1 def start_capture(self): self._work = 1 Thread(target=self._do_capture).start() while self._image is None: sleep(0.001) def stop_capture(self): self._work = 0 while self._work != -1: sleep(0.001) self._image = None def get_image(self): self._lock.acquire() res = self._image self._lock.release() return res def send_keys(self, *keys): for k in keys: win32gui.PostMessage(self._hwnd, win32con.WM_KEYDOWN, k, 0) def get_text(self, region): temp_pil_image = Image.fromarray(self.get_image()) self._api.SetImage(temp_pil_image) while region is not None: x, y, w, h = region self._api.SetRectangle(x, y, w, h) self._api.Recognize(0) region = yield self._api.GetUTF8Text()