Python PyTessBaseAPI.MeanTextConf примеры использования

Язык программирования: Python

Пространство имен/Пакет: tesserocr

Класс/Тип: PyTessBaseAPI

Метод/Функция: MeanTextConf

Примеров на hotexamples.com: 10

Python PyTessBaseAPI.MeanTextConf - 10 примеров найдено. Это лучшие примеры Python кода для tesserocr.PyTessBaseAPI.MeanTextConf, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SetImage(30)

GetUTF8Text(30)

PyTessBaseAPI(30)

SetVariable(21)

SetImageFile(17)

End(14)

SetPageSegMode(12)

Recognize(11)

MeanTextConf(10)

SetRectangle(9)

GetComponentImages(5)

Init(5)

GetThresholdedImage(4)

AllWordConfidences(4)

GetIterator(4)

Clear(3)

GetInitLanguagesAsString(2)

AnalyseLayout(2)

SetSourceResolution(2)

GetPageSegMode(2)

GetHOCRText(1)

__exit__(1)

DetectOrientationScript(1)

GetTextlines(1)

SetImageBytes(1)

AllWords(1)

GetRegions(1)

IsValidCharacter(1)

InitFull(1)

GetTSVText(1)

__init__(1)

Пример #1

Показать файл

class Ocr:
    def __init__(self):
        self.api = None

    def __enter__(self):
        self.api = PyTessBaseAPI().__enter__()
        self.api.SetVariable('tessedit_char_whitelist',
                             OCR_CHARACTER_WHITELIST)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.api.__exit__(exc_type, exc_val, exc_tb)

    def get_characters(self, image):
        h, w = image.shape[:2]

        if h < 1 or w < 1:
            raise NoImageError()

        img_pil = Image.fromarray(image)
        self.api.SetImage(img_pil)
        cell_text = self.api.GetUTF8Text().strip()
        confidence = self.api.MeanTextConf()

        return cell_text, confidence

Пример #2

Показать файл

Файл: tesseract_helpers.py Проект: albarppt/kk-project

def read_text_with_confidence(image,
                              lang='fast_ind',
                              path='/usr/share/tesseract-ocr/5/tessdata',
                              psm=4,
                              whitelist=''):
    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return '', 0

    image_pil = Image.fromarray(image)

    api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY)

    try:
        api.SetImage(image_pil)

        if whitelist != '':
            api.SetVariable('tessedit_char_whitelist', whitelist)

        api.Recognize()

        text = api.GetUTF8Text()
        confidence = api.MeanTextConf()
    except Exception:
        print("[ERROR] Tesseract exception")
    finally:
        api.End()

    return text, confidence

Пример #3

Показать файл

Файл: extractor.py Проект: Tishka17/badgerdoc

class TextExtractor:
    def __init__(self, image_path, seg_mode=PSM.SPARSE_TEXT):
        self.api = PyTessBaseAPI()
        self.api.SetPageSegMode(seg_mode)
        self.api.SetImageFile(image_path)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def _extract(self) -> Tuple:
        text = self.api.GetUTF8Text()
        conf = self.api.MeanTextConf()
        return text, conf

    def _extract_from_rect(self, x, y, w, h) -> Tuple:
        self.api.SetRectangle(x, y, w, h)
        return self._extract()

    #TODO: Add support of zero values
    def extract(self, x=None, y=None, w=None, h=None) -> Tuple:
        if all([x, y, w, h]):
            return self._extract_from_rect(x, y, w, h)
        else:
            return self._extract()

    def close(self):
        self.api.End()

Пример #4

Показать файл

Файл: recognize.py Проект: gavinrozzi/aleph

class OCR(object):
    MAX_MODELS = 5
    MIN_WIDTH = 10
    MIN_HEIGHT = 10

    def __init__(self):
        # Tesseract language types:
        _, self.supported = get_languages()
        self.reset_engine('eng')

    def language_list(self, languages):
        models = [c for c in alpha3(languages) if c in self.supported]
        if len(models) > self.MAX_MODELS:
            log.warning("Too many models, limit: %s", self.MAX_MODELS)
            models = models[:self.MAX_MODELS]
        models.append('eng')
        return '+'.join(sorted(set(models)))

    def reset_engine(self, languages):
        if hasattr(self, 'api'):
            self.api.Clear()
            self.api.End()
        self.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY)

    def extract_text(self, data, languages=None, mode=PSM.AUTO_OSD):
        """Extract text from a binary string of data."""
        languages = self.language_list(languages)
        if languages != self.api.GetInitLanguagesAsString():
            self.reset_engine(languages)

        try:
            image = Image.open(BytesIO(data))
            # TODO: play with contrast and sharpening the images.
            if image.width <= self.MIN_WIDTH:
                return
            if image.height <= self.MIN_HEIGHT:
                return

            if mode != self.api.GetPageSegMode():
                self.api.SetPageSegMode(mode)

            self.api.SetImage(image)
            text = self.api.GetUTF8Text()
            confidence = self.api.MeanTextConf()
            log.info("%s chars (w: %s, h: %s, langs: %s, confidence: %s)",
                     len(text), image.width, image.height, languages,
                     confidence)
            return text
        except Exception as ex:
            log.exception("Failed to OCR: %s", languages)
        finally:
            self.api.Clear()

Пример #5

Показать файл

Файл: ocr.py Проект: MediaUncovered/memorious

def read_word(image, whitelist=None, chars=None, spaces=False):
    """ OCR a single word from an image. Useful for captchas.
        Image should be pre-processed to remove noise etc. """
    api = PyTessBaseAPI()
    api.SetPageSegMode(8)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    guess = api.GetUTF8Text()

    if not spaces:
        guess = ''.join([c for c in guess if c != " "])
        guess = guess.strip()

    if chars is not None and len(guess) != chars:
        return guess, None

    return guess, api.MeanTextConf()

Пример #6

Показать файл

Файл: TesseractOCR.py Проект: MulongXie/FSE-UIDETECTION-APPROACHES

class TesseractOCR:

    #private static   TESSERACT_ENGINE_MODE = TessAPI1.TessOcrEngineMode.OEM_DEFAULT

    #
    # bpp - bits per pixel, represents the bit depth of the image, with 1 for
    # binary bitmap, 8 for gray, and 24 for color RGB.
    #
    BBP = 8
    DEFAULT_CONFIDENT_THRESHOLD = 60.0
    MINIMUM_DESKEW_THRESHOLD = 0.05

    def __init__(self, rgbaImage, dipCalculator, language):
        self.mRgbaImage = rgbaImage
        self.mDipCalculator = dipCalculator
        self.mHandle = PyTessBaseAPI()

        self.mOcrTextWrappers = []
        self.mOcrBlockWrappers = []
        self.mOcrLineWrappers = []
        self.raWrappers = []
        #         self.mLanguage = language

        self.mBufferedImageRgbaImage = Image.fromarray(self.mRgbaImage)
        self.initOCR()

    def baseInit(self, iteratorLevel):
        width = 0
        height = 0
        channels = 1

        if len(self.mRgbaImage.shape) == 2:
            height, width = self.mRgbaImage.shape
        else:
            height, width, channels = self.mRgbaImage.shape

        return self.baseInitIter(self.mRgbaImage, Rect(0, 0, width, height),
                                 channels, iteratorLevel)

    def baseInitIter(self, imageMat, rect, channels, iteratorLevel):
        listdata = []
        parentX = rect.x
        parentY = rect.y
        #        subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x]
        #
        #        if(channels != 1):
        #            subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x, 0:channels]

        #tessAPI = PyTessBaseAPI()
        #Convert to PIL image
        imgPIL = Image.fromarray(imageMat)
        self.mHandle.SetImage(imgPIL)
        boxes = self.mHandle.GetComponentImages(iteratorLevel, True)

        for i, (im, box, _, _) in enumerate(boxes):

            wrapper = OCRTextWrapper.OCRTextWrapper()
            self.mHandle.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = self.mHandle.GetUTF8Text()
            wrapper.text = ocrResult
            conf = self.mHandle.MeanTextConf()
            wrapper.confidence = conf
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            wrapper.x = box['x'] + parentX
            wrapper.y = box['y'] + parentY
            wrapper.width = box['w']
            wrapper.height = box['h']
            wrapper.rect = Rect(wrapper.x, wrapper.y, wrapper.width,
                                wrapper.height)
            #            print(box)
            #
            if (fontAttribute != None):
                wrapper.fontName = fontAttribute['font_name']
                wrapper.bold = fontAttribute['bold']
                wrapper.italic = fontAttribute['italic']
                wrapper.underlined = fontAttribute['underlined']
                wrapper.monospace = fontAttribute['monospace']
                wrapper.serif = fontAttribute['serif']
                wrapper.smallcaps = fontAttribute['smallcaps']
                wrapper.fontSize = fontAttribute['pointsize']
                wrapper.fontId = fontAttribute['font_id']

            listdata.append(wrapper)

        return listdata

    def getBlockWithLocation(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrBlockWrappers:
            bound = ocrTextWrapper.rect
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def getImage(self, rect):
        x2 = rect.x + rect.width
        y2 = rect.y + rect.height
        mat = self.mRgbaImage[rect.y:y2, rect.x:x2]
        return Image.fromarray(mat)

    def getText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getLineText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            if (TextUtils.isEmpty(text)):
                self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_LINE)
                self.mHandle.SetImage(self.mBufferedImageRgbaImage)
                self.mHandle.SetRectangle(rect.x, rect.y, rect.width,
                                          rect.height)
                text = self.mHandle.GetUTF8Text()
                if (TextUtils.isEmpty(text)):
                    self.mHandle.SetImage(self.getImage(rect))
                    text = self.mHandle.GetUTF8Text()

                self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getRectWordForLowConfidence(self, ocr):
        try:
            rect = ocr.bound()
            self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_WORD)
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            ocr.text = self.mHandle.GetUTF8Text()
            ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                self.mHandle.SetImage(self.getImage(rect))
                ocr.text = self.mHandle.GetUTF8Text()
                ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                return False
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            if (fontAttribute != None):
                ocr.fontName = fontAttribute['font_name']
                ocr.bold = fontAttribute['bold']
                ocr.italic = fontAttribute['italic']
                ocr.underlined = fontAttribute['underlined']
                ocr.monospace = fontAttribute['monospace']
                ocr.serif = fontAttribute['serif']
                ocr.smallcaps = fontAttribute['smallcaps']
                ocr.fontSize = fontAttribute['pointsize']
                ocr.fontId = fontAttribute['font_id']
#                ocr.fontsize = self.getPreferenceFontSize(ocr)

            self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return True
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return False

    def getWordsIn(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def initOCR(self):

        #
        self.initText()

        self.initBlock()
        #        self.initPara()
        self.initLine()
#

    def initBlock(self):
        self.mOcrBlockWrappers = self.baseInit(RIL.BLOCK)

    def initLine(self):
        self.mOcrLineWrappers = self.baseInit(RIL.TEXTLINE)
        invalidLineWrappers = []
        # a line cannot contain another lines
        for ocrLine in self.mOcrLineWrappers:
            for otherOcrLine in self.mOcrLineWrappers:
                if (ocrLine != otherOcrLine and RectUtil.contains(
                        ocrLine.bound(), otherOcrLine.bound())):
                    invalidLineWrappers.append(ocrLine)
        self.mOcrLineWrappers = [
            x for x in self.mOcrLineWrappers if x not in invalidLineWrappers
        ]

    def initPara(self):
        self.mOcrParaWrappers = self.baseInit(RIL.PARA)

    def initText(self):
        self.mOcrTextWrappers = self.baseInit(RIL.WORD)

    def isOverlapText(self, rect, confident):
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (ocrTextWrapper.getConfidence() >= confident
                    and RectUtil.intersects(rect, bound)):
                return True
        return False

    def reset(self):
        self.mOcrTextWrappers = []
        self.mOcrLineWrappers = []
        self.initOCR()

#    def rotateImage(bi) :
#        iden = ImageDeskew(bi)
#        imageSkewAngle = iden.getSkewAngle() # determine skew angle
#        if imageSkewAngle > MINIMUM_DESKEW_THRESHOLD or imageSkewAngle < -MINIMUM_DESKEW_THRESHOLD :
#            bi = ImageHelper.rotateImage(bi, -imageSkewAngle) # deskew
#        return bi

    def getPreferenceFontSize(self, ocrTextWrapper, parentHeight):

        #        TODO TODO
        fontName = ocrTextWrapper.fontName
        fontSize = ocrTextWrapper.fontSize

        height = ocrTextWrapper.height * Constants.TEXT_BOX_AND_TEXT_HEIGHT_RATIO

        #        height = ocrTextWrapper.height
        textHeight = int(
            self.mDipCalculator.pxToHeightDip(min(parentHeight, height)))
        #        font = QFont(fontName, fontSize)
        newFontSize = fontSize
        if (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                               fontSize) == textHeight):
            newFontSize = fontSize

        elif (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                 fontSize) < textHeight):
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) < textHeight):
                fontSize = fontSize + 1
            newFontSize = fontSize

        else:
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) > textHeight):
                fontSize = fontSize - 1

            newFontSize = fontSize

        return newFontSize

    def getTextHeightUsingFontMetrics(self, ocrTextWrapper, fontName,
                                      fontSize):
        #        class SIZE(ctypes.Structure):
        #            _fields_ = [("cx", ctypes.c_long), ("cy", ctypes.c_long)]
        #        hdc = ctypes.windll.user32.GetDC(0)
        #        hfont = ctypes.windll.gdi32.CreateFontA(-fontSize, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName)
        #        hfont_old = ctypes.windll.gdi32.SelectObject(hdc, hfont)
        #        size = SIZE(0, 0)
        #        ctypes.windll.gdi32.GetTextExtentPoint32A(hdc, text, len(text), ctypes.byref(size))
        #        ctypes.windll.gdi32.SelectObject(hdc, hfont_old)
        #        ctypes.windll.gdi32.DeleteObject(hfont)
        #        return size.cy
        file = "fonts//" + fontName + ".ttf"

        font = ImageFont.truetype(file, fontSize)
        fontSize = font.getsize(ocrTextWrapper.text)
        return fontSize[1]

    def validCharacter(self, word):
        return self.mHandle.IsValidCharacter(word)

        #Don't have this method return TessAPI1.TessBaseAPIIsValidWord(mHandle, word) != 0
#        return True

#TODO
#    def getTextHeightUsingTextLayout(self,ocrTextWrapper, font) :
#        frc = self.mGraphics.getFontRenderContext()
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text, font, frc)
#        layout.draw(self.mGraphics, float(loc.x, loc.y))
#        bounds = layout.getBounds()
#        height = bounds.getHeight()
#        return height

#    def isValidTextUsingConfidentAndBoundaryCheck(self, ocrTextWrapper) :
#        if (ocrTextWrapper.getConfidence() > Constants.TEXT_CONFIDENT_THRESHOLD + Constants.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE) :
#            return True
#
#        elif (ocrTextWrapper.getConfidence() <= Constants.TEXT_CONFIDENT_THRESHOLD) :
#            return False
#
#        return self.isValidTextUsingBoundaryCheck(ocrTextWrapper)
#
#

    def getTextDimensions(self, text, fontName, fontSize):
        file = "fonts//" + fontName + ".ttf"
        try:
            font = ImageFont.truetype(file, fontSize)
            fontSize = font.getsize(text)
            return fontSize
        except OSError:
            print(file)

    def isValidTextUsingBoundaryCheck(self, ocrTextWrapper):
        # confident between TextProcessor.TEXT_CONFIDENT_THRESHOLD and
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD +
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE
        if (TextUtils.isEmpty(ocrTextWrapper.text)):
            # We cannot calculate width of empty text
            return True
#        return True

#        frc = mGraphics.getFontRenderContext()
#        font = QFont(ocrTextWrapper.fontName,ocrTextWrapper.fontSize)
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text,font, frc)
#        layout.draw(mGraphics,  loc.getX(), loc.getY())
#        bound = layout.getBounds()
        width, height = self.getTextDimensions(ocrTextWrapper.text,
                                               ocrTextWrapper.fontName,
                                               ocrTextWrapper.fontSize)

        fontRatio = float(height / width)
        boundRatio = float(ocrTextWrapper.height / ocrTextWrapper.width)
        fontArea = self.mDipCalculator.dipToHeightPx(
            height) * self.mDipCalculator.dipToWidthPx(width)
        boundArea = float(ocrTextWrapper.width * ocrTextWrapper.height)
        #
        # the different between dimensions of the text should be smaller than
        # 10% of the max dimension.
        # System.out.prln(" Ratio: " + fontRatio + ", " + boundRatio + ", "
        # + Math.abs(boundRatio - fontRatio)
        # / Math.max(boundRatio, fontRatio) + "," + fontArea + ", "
        # + boundArea + ", " + Math.min(fontArea, boundArea)
        # / Math.max(fontArea, boundArea))

        # It the bound is square, it less likely that this text is correct
        # TODO: This rule may not need it
        #        if (float(min(ocrTextWrapper.getWidth(),ocrTextWrapper.getHeight()) / max( ocrTextWrapper.getWidth(),
        #						ocrTextWrapper.getHeight())) > 0.95) :
        #			# if drawing text cannot create square, sorry -> invalid
        #            if (float(min(width, height) / max(width, height)) <= 0.95 and not validWord(ocrTextWrapper.text)) :
        #                return False
        #
        #
        #

        #        print(self.mDipCalculator.dipToWidthPx(width), self.mDipCalculator.dipToHeightPx(height))
        #        print( ocrTextWrapper.width, ocrTextWrapper.height)
        dimension = abs(boundRatio - fontRatio) / max(boundRatio, fontRatio)
        #        print(dimension)

        dimensionCheck = abs(boundRatio - fontRatio) / max(
            boundRatio, fontRatio
        ) <= Constants.TEXT_CONFIDENT_ACCEPTANCE_DIMENSION_RATIO_DIFFERENCE_THRESHOLD

        areaCheckVal = min(fontArea, boundArea) / max(fontArea, boundArea)
        #        print(areaCheckVal)
        #        print(ocrTextWrapper.text)
        areaCheck = min(fontArea, boundArea) / max(
            fontArea,
            boundArea) >= Constants.TEXT_AREA_ACCEPTANCE_DIFFERENCE_THRESHOLD

        return dimensionCheck and areaCheck

    def destroy(self):
        self.mHandle.End

Пример #7

Показать файл

Файл: ocr.py Проект: wintericie/rev

def run_ocr_in_chart(chart, pad=0, psm=PSM.SINGLE_LINE):
    """
    Run OCR for all the boxes.
    :param img:
    :param boxes:
    :param pad: padding before applying ocr
    :param psm: PSM.SINGLE_WORD or PSM.SINGLE_LINE
    :return:
    """
    img = chart.image

    # add a padding to the initial figure
    fpad = 1
    img = cv2.copyMakeBorder(img.copy(), fpad, fpad, fpad, fpad, cv2.BORDER_CONSTANT, value=(255, 255, 255))
    fh, fw, _ = img.shape

    api = PyTessBaseAPI(psm=psm, lang='eng')
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))

    for tbox in chart.texts:
        # adding a pad to original image. Some case in quartz corpus, the text touch the border.
        x, y, w, h = ru.wrap_rect(u.ttoi(tbox.rect), (fh, fw), padx=pad, pady=pad)
        x, y = x + fpad, y + fpad

        if w * h == 0:
            tbox.text = ''
            continue

        # crop region of interest
        roi = img[y:y + h, x:x + w]
        #  to gray scale
        roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        #
        roi_gray = cv2.resize(roi_gray, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC)
        # binarization
        _, roi_bw = cv2.threshold(roi_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # removing noise from borders
        roi_bw = 255 - clear_border(255-roi_bw)

        # roi_gray = cv2.copyMakeBorder(roi_gray, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=255)

        # when testing boxes from csv files
        if tbox.num_comp == 0:
            # Apply Contrast Limited Adaptive Histogram Equalization
            roi_gray2 = clahe.apply(roi_gray)
            _, roi_bw2 = cv2.threshold(roi_gray2, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
            _, num_comp = morphology.label(roi_bw2, return_num=True, background=255)
            tbox.regions.extend(range(num_comp))

        pil_img = smp.toimage(roi_bw)
        if SHOW:
            pil_img.show()
        max_conf = -np.inf
        min_dist = np.inf
        correct_text = ''
        correct_angle = 0
        u.log('---------------')
        for angle in [0, -90, 90]:
            rot_img = pil_img.rotate(angle, expand=1)

            api.SetImage(rot_img)
            conf = api.MeanTextConf()
            text = api.GetUTF8Text().strip()
            dist = abs(len(text.replace(' ', '')) - tbox.num_comp)

            u.log('text: %s  conf: %f  dist: %d' % (text, conf, dist))
            if conf > max_conf and dist <= min_dist:
                max_conf = conf
                correct_text = text
                correct_angle = angle
                min_dist = dist

        tbox.text = post_process_text(lossy_unicode_to_ascii(correct_text))
        tbox.text_conf = max_conf
        tbox.text_dist = min_dist
        tbox.text_angle = correct_angle

        u.log('num comp %d' % tbox.num_comp)
        u.log(u'** text: {} conf: {} angle: {}'.format(correct_text, max_conf, correct_angle))

    api.End()

Пример #8

Показать файл

    #image = Image.
    #image.save('page0y.png')
    #image = Image.open('page0.png')
    title = ''
    max_h = 0
    min_y = 10000
    api.SetImage(image)
    #api.SetImageFile(filename)
    boxes = api.GetComponentImages(RIL.TEXTLINE, True)
    #print('Found {} textline image components.'.format(len(boxes)))
    for i, (im, box, _, _) in enumerate(boxes):
        # im is a PIL image object
        # box is a dict with x, y, w and h keys
        api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
        ocrResult = api.GetUTF8Text().replace('\n', ' ').strip()
        conf = api.MeanTextConf()
        print("Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
              "confidence: {1}, text: {2}".format(i, conf, ocrResult, **box))

        text = ' '.join(alpha_re.findall(ocrResult.strip()))
        if len(text) < 5:
            continue

        if box['y'] <= 50:
            continue

        if box['y'] < min_y:
            min_y = box['y']
            title = text
            break

Пример #9

Показать файл

def ocr_on_bounding_boxes(img, components):

    blurbs = []
    for component in components:
        (aspect, vertical, horizontal) = segment_into_lines(img, component)
        #if len(vertical)<2 and len(horizontal)<2:continue

        #attempt to separately process furigana
        #(furigana, non_furigana) = estimate_furigana(vertical)
        '''
      from http://code.google.com/p/tesseract-ocr/wiki/ControlParams
      Useful parameters for Japanese and Chinese

      Some Japanese tesseract user found these parameters helpful for increasing tesseract-ocr (3.02) accuracy for Japanese :

      Name 	Suggested value 	Description
      chop_enable 	T 	Chop enable.
      use_new_state_cost 	F 	Use new state cost heuristics for segmentation state evaluation
      segment_segcost_rating 	F 	Incorporate segmentation cost in word rating?
      enable_new_segsearch 	0 	Enable new segmentation search path.
      language_model_ngram_on 	0 	Turn on/off the use of character ngram model.
      textord_force_make_prop_words 	F 	Force proportional word segmentation on all rows. 
    '''
        #now run OCR on this bounding box
        api = PyTessBaseAPI(path='C:/Program Files/Tesseract-OCR/tessdata')
        api.Init(".", "jpn", tesseract.OEM_DEFAULT)
        #handle single column lines as "vertical align" and Auto segmentation otherwise
        if len(vertical) < 2:
            api.SetPageSegMode(
                5)  #tesseract.PSM_VERTICAL_ALIGN)#PSM_AUTO)#PSM_SINGLECHAR)#
        else:
            api.SetPageSegMode(tesseract.PSM_AUTO)  #PSM_SINGLECHAR)#
        api.SetVariable('chop_enable', 'T')
        api.SetVariable('use_new_state_cost', 'F')
        api.SetVariable('segment_segcost_rating', 'F')
        api.SetVariable('enable_new_segsearch', '0')
        api.SetVariable('language_model_ngram_on', '0')
        api.SetVariable('textord_force_make_prop_words', 'F')
        api.SetVariable('tessedit_char_blacklist', '}><L')
        api.SetVariable('textord_debug_tabfind', '0')

        x = component[1].start
        y = component[0].start
        w = component[1].stop - x
        h = component[0].stop - y
        roi = cv2.cv.CreateImage((w, h), 8, 1)
        sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
        cv2.cv.Copy(sub, roi)
        tesseract.SetCvImage(roi, api)
        txt = api.GetUTF8Text()
        conf = api.MeanTextConf()
        if conf > 0 and len(txt) > 0:
            blurb = Blurb(x, y, w, h, txt, confidence=conf)
            blurbs.append(blurb)
        '''
    for line in non_furigana:
      x=line[1].start
      y=line[0].start
      w=line[1].stop-x
      h=line[0].stop-y
      roi = cv2.cv.CreateImage((w,h), 8, 1)
      sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
      cv2.cv.Copy(sub, roi)
      tesseract.SetCvImage(roi, api)
      txt=api.GetUTF8Text()
      conf=api.MeanTextConf()
      if conf>0:
        blurb = Blurb(x, y, w, h, txt, confidence=conf)
        blurbs.append(blurb)
    '''
    return blurbs

Пример #10

Показать файл

class OCREngine():
    def __init__(self, extra_whitelist='', all_unicode=False, lang='eng'):
        """
        Args:
          extra_whitelist: string of extra chars for Tesseract to consider
              only takes effect when all_unicode is False
          all_unicode: if True, Tess will consider all possible unicode characters
          lang: OCR language
        """
        self.tess = PyTessBaseAPI(psm=PSM_MODE, lang=lang)
        self.is_closed = False
        if all_unicode:
            self.whitelist_chars = None
        else:
            self.whitelist_chars = ("abcdefghijklmnopqrstuvwxyz"
                                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                    "1234567890"
                                    r"~!@#$%^&*()_+-={}|[]\:;'<>?,./"
                                    '"'
                                    "©") + extra_whitelist
            self.tess.SetVariable('tessedit_char_whitelist',
                                  self.whitelist_chars)

    def check_engine(self):
        if self.is_closed:
            raise RuntimeError('OCREngine has been closed.')

    def recognize(self,
                  image,
                  min_text_size=MIN_TEXT_SIZE,
                  max_text_size=MAX_TEXT_SIZE,
                  uniformity_thresh=UNIFORMITY_THRESH,
                  thin_line_thresh=THIN_LINE_THRESH,
                  conf_thresh=CONF_THRESH,
                  box_expand_factor=BOX_EXPAND_FACTOR,
                  horizontal_pooling=HORIZONTAL_POOLING):
        """
        Generator: Blob
        http://stackoverflow.com/questions/23506105/extracting-text-opencv

        Args:
          input_image: can be one of the following types:
            - string: image file path
            - ndarray: numpy image
            - PIL.Image.Image: PIL image
          min_text_size:
            min text height/width in pixels, below which will be ignored
          max_text_size:
            max text height/width in pixels, above which will be ignored
          uniformity_thresh (0.0 < _ < 1.0):
            remove all black or all white regions
            ignore a region if the number of pixels neither black nor white < [thresh]
          thin_line_thresh (must be odd int):
            remove all lines thinner than [thresh] pixels.
            can be used to remove the thin borders of web page textboxes.
          conf_thresh (0 < _ < 100):
            ignore regions with OCR confidence < thresh.
          box_expand_factor (0.0 < _ < 1.0):
            expand the bounding box outwards in case certain chars are cutoff.
          horizontal_pooling:
            result bounding boxes will be more connected with more pooling,
            but large pooling might lower accuracy.
        """
        self.check_engine()
        # param sanity check
        assert max_text_size > min_text_size > 0
        assert 0.0 <= uniformity_thresh < 1.0
        assert thin_line_thresh % 2 == 1
        assert 0 <= conf_thresh < 100
        assert 0.0 <= box_expand_factor < 1.0
        assert horizontal_pooling > 0

        image = get_np_img(image)
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)
        img = img_gray
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
        # cut off all gray pixels < 30.
        # `cv2.THRESH_BINARY | cv2.THRESH_OTSU` is also good, but might overlook certain light gray areas
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        # connect horizontally oriented regions
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                           (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        # remove all thin textbox borders (e.g. web page textbox)
        if thin_line_thresh > 0:
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (thin_line_thresh, thin_line_thresh))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = box = Box(*cv2.boundingRect(contour))
            # remove regions that are beyond size limits
            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue
            # remove regions that are almost uniformly white or black
            binary_region = crop(img_bw, box)
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                continue
            # expand the borders a little bit to include cutoff chars
            expansion = int(min(h, w) * box_expand_factor)
            x = max(0, x - expansion)
            y = max(0, y - expansion)
            h, w = h + 2 * expansion, w + 2 * expansion
            if h > w:  # further extend the long axis
                h += 2 * expansion
            elif w > h:
                w += 2 * expansion
            # image passed to Tess should be grayscale.
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            box = Box(x, y, w, h)
            img_crop = crop(img_gray, box)
            # make sure that crops passed in tesseract have minimum x-height
            # http://github.com/tesseract-ocr/tesseract/wiki/FAQ#is-there-a-minimum-text-size-it-wont-read-screen-text
            img_crop = cv2.resize(img_crop,
                                  (int(img_crop.shape[1] * CROP_RESIZE_HEIGHT /
                                       img_crop.shape[0]), CROP_RESIZE_HEIGHT))
            ocr_text, conf = self.run_tess(img_crop)
            if conf > conf_thresh:
                yield Blob(ocr_text, box, conf)

    def _experiment_segment(self,
                            img,
                            min_text_size=MIN_TEXT_SIZE,
                            max_text_size=MAX_TEXT_SIZE,
                            uniformity_thresh=UNIFORMITY_THRESH,
                            horizontal_pooling=HORIZONTAL_POOLING):
        """
        PRIVATE: experiment only
        """
        img_init = img  # preserve initial image
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, morph_kernel)
        disp(img)
        #         morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        #         img = cv2.dilate(img, morph_kernel)
        # OTSU thresholding
        #         _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        #         img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,9,2)
        disp(img)
        # connect horizontally oriented regions
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                                 (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, morph_kernel)
        disp(img)

        if 0:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,
                                                     (horizontal_pooling, 3))
            img = cv2.erode(img, morph_kernel, iterations=1)
            disp(img)
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (6, 6))
            img = cv2.dilate(img, morph_kernel, iterations=1)
        elif 1:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, morph_kernel)
        disp(img)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        img_copy = np.copy(img_init)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            draw_rect(img_copy, x, y, w, h)

            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue

            binary_region = img_bw[y:y + h, x:x + w]
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                # ignore mostly white or black regions
                #                 print(w, h)
                #                 disp(binary_region)
                continue
            # the image must be grayscale, otherwise Tesseract will SegFault
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            draw_rect(img_init, x, y, w, h)
        disp(img_copy)
        disp(img_init, 0)

    def run_tess(self, img):
        """
        Tesseract python API source code:
        https://github.com/sirfz/tesserocr/blob/master/tesserocr.pyx

        Returns:
          (ocr_text, confidence)
        """
        if isinstance(img, np.ndarray):
            img = np2PIL(img)
        self.tess.SetImage(img)
        ocr_text = self.tess.GetUTF8Text().strip()
        conf = self.tess.MeanTextConf()
        return ocr_text, conf

    def _deprec_run_tess(self, img):
        "GetComponentImages throws SegFault randomly. No way to fix. :("
        if isinstance(img, np.ndarray):
            img = np2PIL(img)

        components = self.tess.GetComponentImages(RIL.TEXTLINE, True)
        for _, inner_box, block_id, paragraph_id in components:
            # box is a dict with x, y, w and h keys
            inner_box = Box(**inner_box)
            if inner_box.w < MIN_TEXT_SIZE or inner_box.h < MIN_TEXT_SIZE:
                continue
            self.tess.SetRectangle(*inner_box)
            ocr_text = self.tess.GetUTF8Text().strip()
            conf = self.tess.MeanTextConf()
            yield ocr_text, inner_box, conf

    def close(self):
        self.tess.End()
        self.is_closed = True

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()