Пример #1
0
    def extract_text_from_image(self, data):
        """Extract text from a binary string of data."""
        tessdata = '/usr/share/tesseract-ocr/'
        tessdata = self.manager.get_env('TESSDATA_PREFIX', tessdata)
        languages = self.get_languages(self.result.languages)

        key = sha1(data)
        key.update(languages)
        key = key.hexdigest()
        text = self.manager.get_cache(key)
        if text is not None:
            return text

        api = PyTessBaseAPI(lang=languages, path=tessdata)
        try:
            image = Image.open(StringIO(data))
            # TODO: play with contrast and sharpening the images.
            api.SetImage(image)
            text = api.GetUTF8Text()
        except DecompressionBombWarning as dce:
            log.warning("Image too large: %r", dce)
            return None
        except IOError as ioe:
            log.warning("Unknown image format: %r", ioe)
            return None
        finally:
            api.Clear()

        log.debug('[%s] OCR: %s, %s characters extracted', self.result,
                  languages, len(text))
        self.manager.set_cache(key, text)
        return text
Пример #2
0
def ocr(img, level):
    """Use tesseract OCR to detection images.

    Args:
        imagePath: File path of image.
        level: Iteration level.

    Returns:
        An array with coordinate of boxes.

    """
    result = []
    with c_locale():
        from tesserocr import PyTessBaseAPI
        api = PyTessBaseAPI()
        api.SetPageSegMode(PSM.AUTO_OSD)
        # api.SetImageFile(imagePath)
        api.SetImage(Image.fromarray(img))
        blockIter = api.AnalyseLayout()
        while blockIter.Next(level):
            pt = blockIter.BlockType()
            #result.append(blockIter.Baseline(level))
            if pt in [1, 6]:
                result.append(blockIter.BoundingBox(level) + (pt, ))
        api.End()
    return result
Пример #3
0
class Ocr:
    def __init__(self):
        self.api = None

    def __enter__(self):
        self.api = PyTessBaseAPI().__enter__()
        self.api.SetVariable('tessedit_char_whitelist',
                             OCR_CHARACTER_WHITELIST)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.api.__exit__(exc_type, exc_val, exc_tb)

    def get_characters(self, image):
        h, w = image.shape[:2]

        if h < 1 or w < 1:
            raise NoImageError()

        img_pil = Image.fromarray(image)
        self.api.SetImage(img_pil)
        cell_text = self.api.GetUTF8Text().strip()
        confidence = self.api.MeanTextConf()

        return cell_text, confidence
Пример #4
0
def read_text_with_confidence(image,
                              lang='fast_ind',
                              path='/usr/share/tesseract-ocr/5/tessdata',
                              psm=4,
                              whitelist=''):
    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return '', 0

    image_pil = Image.fromarray(image)

    api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY)

    try:
        api.SetImage(image_pil)

        if whitelist != '':
            api.SetVariable('tessedit_char_whitelist', whitelist)

        api.Recognize()

        text = api.GetUTF8Text()
        confidence = api.MeanTextConf()
    except Exception:
        print("[ERROR] Tesseract exception")
    finally:
        api.End()

    return text, confidence
Пример #5
0
def get_text_bounding_boxes(image, psm=12):
    bounding_boxes = []

    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return bounding_boxes

    image_pil = Image.fromarray(image)  # Load PIL image from numpy

    api = PyTessBaseAPI(psm=psm, oem=OEM.LSTM_ONLY)

    try:
        # api.SetVariable('textord_tabfind_find_tables', 'true')
        # api.SetVariable('textord_tablefind_recognize_tables', 'true')
        api.SetImage(image_pil)

        api.Recognize()

        boxes = api.GetComponentImages(RIL.TEXTLINE, True)

        for (im, box, _, _) in boxes:
            x, y, w, h = box['x'], box['y'], box['w'], box['h']

            bounding_boxes.append((x, y, w, h))
    finally:
        api.End()

    return bounding_boxes
Пример #6
0
 async def get_ocr_text(self, thresh_img):
     # OCRにかけられるように画像変換
     pil_img = Image.fromarray(thresh_img)
     # OCR処理
     api = PyTessBaseAPI(psm=PSM.AUTO, lang='jpn')
     api.SetImage(pil_img)
     # 空白文字と改行を除去して結果を返却
     return api.GetUTF8Text().replace(' ', '').replace('\n', '')
Пример #7
0
def getWords(pages, letters_cache):

    standard_words, split_words, letters = [], [], {'bid': [], 'letters': []}
    prev_word = None
    letter_detect = PyTessBaseAPI(psm=8, lang='eng')
    letter_detect.SetVariable('tessedit_char_whitelist', ascii_uppercase)
    bid = 0
    for pg_num in pages:
        page = doc[pg_num]

        # get initial block bounding boxes
        blocks = []
        for block in page.getText("blocks"):
            bbox = block[:4]
            text = block[4].strip()
            if len(text) != 1:  # not a single letter
                blocks.append({
                    'bid': bid,
                    'bbox': bbox,
                    'pg': page.number,
                    'text': text
                })
                bid += 1
            elif not letters_cache:
                # maps each bid to a corresponding dictionary letter
                # this provides a heuristic for our search
                sf, eps = 25 / 6, 1
                pix = page.getPixmap(matrix=fitz.Matrix(sf, sf))
                img = Image.open(io.BytesIO(pix.getPNGData()))
                bbox = resize(bbox, sf, eps)
                block_img = img.crop(bbox)
                letter_detect.SetImage(block_img)
                letter_detect.Recognize()
                letter = letter_detect.AllWords()[0]
                assert (len(letter) == 1)
                letters['bid'].append(bid)
                letters['letters'].append(letter.lower())

        standard, split, prev_word, insert_word = groupBlocks(
            blocks, prev_word, pg_num)

        # last block from previous page (no spillover)
        if insert_word:
            add_word(standard, insert_word)

        # clean up
        standard_words.extend(standard)
        split_words.extend(split)

    # add the last word
    if prev_word:
        add_word(standard, prev_word)

    # make sure all the blocks are properly formatted
    for word in chain(standard_words, split_words):
        test_word_format(word)

    return standard_words, split_words, letters
Пример #8
0
def read_char(image, whitelist=None):
    """ OCR a single character from an image. Useful for captchas."""
    api = PyTessBaseAPI()
    api.SetPageSegMode(10)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    return api.GetUTF8Text().strip()
Пример #9
0
def getTextTest():
    image = cv2.imread("okey-check6.jpg", 0)
    api = PyTessBaseAPI()
    api.SetImage(Image.fromarray(image))
    # print api.GetUTF8Text()
    image = api.GetThresholdedImage()
    image.save("rus.new.exp6.tiff")
    text = image_to_text(image, lang="rus")
    # text = image_to_string(image, lang="rus")
    print text
    return receiptParser.getJson(text)
Пример #10
0
class OCR(object):
    MAX_MODELS = 5
    MIN_WIDTH = 10
    MIN_HEIGHT = 10

    def __init__(self):
        # Tesseract language types:
        _, self.supported = get_languages()
        self.reset_engine('eng')

    def language_list(self, languages):
        models = [c for c in alpha3(languages) if c in self.supported]
        if len(models) > self.MAX_MODELS:
            log.warning("Too many models, limit: %s", self.MAX_MODELS)
            models = models[:self.MAX_MODELS]
        models.append('eng')
        return '+'.join(sorted(set(models)))

    def reset_engine(self, languages):
        if hasattr(self, 'api'):
            self.api.Clear()
            self.api.End()
        self.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY)

    def extract_text(self, data, languages=None, mode=PSM.AUTO_OSD):
        """Extract text from a binary string of data."""
        languages = self.language_list(languages)
        if languages != self.api.GetInitLanguagesAsString():
            self.reset_engine(languages)

        try:
            image = Image.open(BytesIO(data))
            # TODO: play with contrast and sharpening the images.
            if image.width <= self.MIN_WIDTH:
                return
            if image.height <= self.MIN_HEIGHT:
                return

            if mode != self.api.GetPageSegMode():
                self.api.SetPageSegMode(mode)

            self.api.SetImage(image)
            text = self.api.GetUTF8Text()
            confidence = self.api.MeanTextConf()
            log.info("%s chars (w: %s, h: %s, langs: %s, confidence: %s)",
                     len(text), image.width, image.height, languages,
                     confidence)
            return text
        except Exception as ex:
            log.exception("Failed to OCR: %s", languages)
        finally:
            self.api.Clear()
Пример #11
0
def getText(rotation, image):
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    image = Image.open(image)
    image.save("loaded.jpg", dpi=(600, 600))
    image = cv2.imread("loaded.jpg", 0)
    if rotation != 0:
        image = np.rot90(image, (360 - rotation) / 90)
    # cv2.imwrite("rotated.jpg", image)
    api = PyTessBaseAPI()
    api.SetImage(Image.fromarray(image))
    image = api.GetThresholdedImage()
    text = image_to_text(image, lang="rus")
    return receiptParser.getJson(text)
Пример #12
0
def getText(rotation, image):
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    image = Image.open(image).convert("RGB")
    b, g, r = image.split()
    image = Image.merge("RGB", (r, g, b))
    image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
    if rotation != 0:
        image = np.rot90(image, (360 - rotation) / 90)
    api = PyTessBaseAPI()
    api.SetImage(Image.fromarray(image))
    image = api.GetThresholdedImage()
    text = image_to_text(image, lang="rus")
    # print text
    return receiptParser.getJson(text)
Пример #13
0
def add_ocrinfo(tree, imgfile):
    imgpil = Image.open(imgfile)
    (orig_width, orig_height) = (imgpil.width, imgpil.height)

    #root_width = tree[min(tree)]['width']
    ratio = 1.0 * orig_width / config.width
    #imgpil = imgpil.convert("RGB").resize(
    #    (orig_width * OCR_RATIO, orig_height * OCR_RATIO))

    tesapi = PyTessBaseAPI(lang='eng')
    tesapi.SetImage(imgpil)
    tesapi.SetSourceResolution(config.ocr_resolution)

    for nodeid in tree:
        node = tree[nodeid]

        if node['children'] and node['text'] == '':
            node['ocr'] = ''
            continue

        x = max(node['x'] * ratio - 1, 0)
        y = max(node['y'] * ratio - 1, 0)
        x2 = min((node['x'] + node['width']) * ratio + 1, orig_width)
        y2 = min((node['y'] + node['height']) * ratio + 1, orig_height)
        width = int(x2 - x)
        height = int(y2 - y)

        if width > 3 and height > 3:
            #tesapi.SetRectangle(int(x * OCR_RATIO), int(y * OCR_RATIO),
            #                    int(width * OCR_RATIO), int(height * OCR_RATIO))
            #print(int(x), int(y), int(width), int(height), orig_width, orig_height)
            tesapi.SetRectangle(int(x), int(y), int(width), int(height))
            ocr_text = tesapi.GetUTF8Text().strip().replace('\n', ' ')
            if ocr_text.strip() == '':
                x = min(x + width * 0.05, orig_width)
                y = min(y + height * 0.05, orig_height)
                width *= 0.9
                height *= 0.9
                tesapi.SetRectangle(int(x), int(y), int(width), int(height))
                ocr_text = tesapi.GetUTF8Text().strip().replace('\n', ' ')

        else:
            ocr_text = ''

        node['ocr'] = ocr_text
Пример #14
0
def read_word(image, whitelist=None, chars=None, spaces=False):
    """ OCR a single word from an image. Useful for captchas.
        Image should be pre-processed to remove noise etc. """
    api = PyTessBaseAPI()
    api.SetPageSegMode(8)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    guess = api.GetUTF8Text()

    if not spaces:
        guess = ''.join([c for c in guess if c != " "])
        guess = guess.strip()

    if chars is not None and len(guess) != chars:
        return guess, None

    return guess, api.MeanTextConf()
Пример #15
0
def get_boxes(image_filename: str) -> list:
    image = Image.open(image_filename)
    width = image.width
    height = image.height
    max_width = width // 2
    max_height = height // 2

    api = PyTessBaseAPI(lang="jpn_vert")
    # api.ReadConfigFile("tess.conf")
    api.SetPageSegMode(PSM.SPARSE_TEXT_OSD)
    api.SetImage(image)
    api.Recognize(0)
    ri = api.GetIterator()
    level = RIL.WORD
    boxes = []
    for r in iterate_level(ri, level):
        conf = r.Confidence(level)
        text = r.GetUTF8Text(level)
        left, top, right, bottom = r.BoundingBox(level)
        # boxes = api.GetComponentImages(RIL.SYMBOL, True)
        # for im, rect, _, _ in boxes:
        #     # im is a PIL image object
        #     # rect is a dict with x, y, w and h keys
        #     left, top, right, bottom = rect['x'], rect['y'], rect['w'], rect['h']
        #     api.SetRectangle(left, top, right, bottom)
        #     text = api.GetUTF8Text()
        #     conf = api.MeanTextConf()
        print("'%s' \tConf: %.2f \tCoords: %d,%d,%d,%d" %
              (text, conf, left, top, right, bottom))
        box = {
            'text': text,
            'left': left,
            'top': top,
            'width': right - left,
            'height': bottom - top
        }
        if should_ignore_box(conf, box, max_width, max_height):
            continue
        boxes.append(box)
    api.End()
    image.close()
    return boxes
Пример #16
0
    def read_box(self, crop, filtered, read_primes, text, table, old_read_primes, num):
        cur_time = datetime.now().strftime(self.datetime_format)
        #print("reading box")
        api = None
        try:
            api = self.api.get(block=False)
            #print("reading box")
        except queue.Empty:
            api = PyTessBaseAPI()

        api.SetImage(Image.fromarray(filtered[crop[1]:crop[3], crop[0]:crop[2]]))
        ocr_output = api.GetUTF8Text()
        self.api.put(api)
        #self.log.write("{}: Succeeded reading image x={} num_api={}\n".format(cur_time, crop[0], self.api.qsize()))
        #self.log.flush()

        sanitized = self.sanitize(ocr_output)
        ocr_text = self.title_case(sanitized)
        #self.log.write("{}: ocr text={}\n".format(cur_time, ocr_text))
        text[crop[0] + crop[1]] = ocr_text
        dict_text = self.dict_match(ocr_text)
        self.update_table(dict_text, table, read_primes, old_read_primes)
Пример #17
0
class OcrWrapper(BaseImageToString):

    _OPTIONS = ('tessedit_char_whitelist', '0123456789ABCDEF.-')

    def __init__(self):
        if sys.platform == 'win32':
            self._ocr = PyTessBaseAPI(
                path="C:\\Program Files\\Tesseract-OCR\\tessdata")
        else:
            self._ocr = PyTessBaseAPI()

        self._ocr.SetVariable(self._OPTIONS[0], self._OPTIONS[1])
        pass

    def image_to_string(self, image: Image) -> str:
        image.format = 'PNG'
        self._ocr.SetImage(image)
        raw_data = self._ocr.GetUTF8Text()
        return raw_data

    def end(self):
        self._ocr.End()
    def extract(self, page: Poppler.Page):
        from tesserocr import PyTessBaseAPI  # NOQA Stupid assert on LC_* == 'C'

        ocr = PyTessBaseAPI(lang=settings.CAMPUSONLINE_BULLETIN_OCR_LANGUAGE)
        text = page.text(QRectF()).strip()
        if len(text) > settings.CAMPUSONLINE_BULLETIN_OCR_THRESHOLD:
            self.clean = True
            self.text = text
            return
        dpi = settings.CAMPUSONLINE_BULLETIN_OCR_DPI
        buf = QBuffer()
        buf.open(QIODevice.ReadWrite)
        page.renderToImage(dpi, dpi).save(buf, "PNG")
        bio = BytesIO()
        bio.write(buf.data())
        buf.close()
        bio.seek(0)
        img = Image.open(bio)
        ocr.SetImage(img)
        scanned = ocr.GetUTF8Text().strip()
        img.close()
        bio.close()
        self.clean = False
        self.text = scanned
Пример #19
0
    class __impl:
        def __init__(self, vs, imgOutput):
            self.__vs = vs
            self.__imgOutput = imgOutput
            self.image = None
            self.logger = Logger()
            self.state = State()
            self.tesseract = PyTessBaseAPI(psm=PSM.SINGLE_CHAR,
                                           oem=OEM.LSTM_ONLY,
                                           lang="digits")
            self.filter = Filter()

            self.signalThresholdY = 160
            self.LAPPatternSesibility = 5

            self.recordStamp = time.strftime(self.logger.timeFormat)
            self.recordNum = 0
            self.recordFolder = None
            self.cntNum = 0

            if (self.state.RecordImage):
                root = 'record'
                if not os.path.isdir(root):
                    os.mkdir(root)
                self.recordFolder = os.path.join(root, self.recordStamp)
                if not os.path.isdir(self.recordFolder):
                    os.mkdir(self.recordFolder)

        def showImg(self, window, image):
            if self.__imgOutput:
                cv2.imshow(window, image)

        def warmup(self):
            time.sleep(2.0)
            self.tesserOCR(np.zeros((1, 1, 3), np.uint8))

        def tesserOCR(self, image):
            self.tesseract.SetImage(Image.fromarray(image))
            return self.tesseract.GetUTF8Text(
            ), self.tesseract.AllWordConfidences()

        def dominantColor(self, img, clusters=2):
            data = np.reshape(img, (-1, 3))
            data = np.float32(data)

            criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10,
                        1.0)
            flags = cv2.KMEANS_RANDOM_CENTERS
            _, _, centers = cv2.kmeans(data, 1, None, criteria, 10, flags)
            return centers[0].astype(np.int32)

        def analyzeRect(self, image, warped, box, x, y):
            # find amount of color blue in warped area, assuming over X% is the lap signal
            if (self.getAmountOfColor(warped, Colors.lower_blue_color,
                                      Colors.upper_blue_color, True) > 0.1):
                self.logger.info("Rundensignal")
                self.state.setCurrentSignal(Signal.LAP)
                return "Rundensignal"

        def analyzeSquare(self, image, warped, box, x, y):

            #dominantColor, percent, _ = self.dominantColor(warped, 3)
            # dominantColor = self.dominantColor(
            #    cv2.cvtColor(warped, cv2.COLOR_BGR2HSV), 3)
            """  color = 'k'
             # find amount of color black in warped area, assuming over X% is a numeric signal
             if ((dominantColor <= 70).all()):
                 color = 'Black'

             elif ((dominantColor >= 180).all()):
                 color = 'White'

             if (color): """
            resizedWarp = cv2.resize(warped,
                                     None,
                                     fx=2.0,
                                     fy=2.0,
                                     interpolation=cv2.INTER_CUBIC)

            # gray
            optimized = cv2.cvtColor(resizedWarp, cv2.COLOR_BGR2GRAY)

            # blur
            optimized = cv2.GaussianBlur(optimized, (5, 5), 0)

            # binary image
            optimized = cv2.threshold(optimized, 0, 255,
                                      cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

            # binary inversion if dominant color is black
            """ if (color == 'Black'):
                optimized = cv2.bitwise_not(optimized) """

            # now check the frame (1px) of the image.. there shouldn't be any noise since its a clean signal background
            h, w = optimized.shape[0:2]
            clean = optimized[0, 0]
            for iFrame in range(0, 2):
                for iHeight in range(h):
                    if not (optimized[iHeight, iFrame] == clean) or not (
                            optimized[iHeight, w - 1 - iFrame] == clean):
                        return False
                for iWidth in range(w):
                    # or not(optimized[h - iFrame, iWidth])
                    if not (optimized[iFrame, iWidth] == clean):
                        return False

            # cv2.imwrite("records/opt/" + str(self.cntNum) + ".jpg", optimized)

            output, confidence = self.tesserOCR(optimized)

            # if the resulting text is below X% confidence threshold, we skip it
            if not output or confidence[0] < 95:
                return False

            # clean up output from tesseract
            output = output.replace('\n', '')
            output = output.replace(' ', '')

            if output.isdigit() and 0 < int(output) < 10:
                """ self.showImg("opt " + str(self.cntNum),
                                np.hstack((resizedWarp, cv2.cvtColor(optimized, cv2.COLOR_GRAY2BGR)))) """
                if y <= self.signalThresholdY:
                    self.logger.info('Stop Signal OCR: ' + output + ' X: ' +
                                     str(x) + ' Y: ' + str(y) +
                                     ' Confidence: ' + str(confidence[0]) +
                                     '%')  # + ' DC: ' + str(dominantColor))
                    self.state.setStopSignal(int(output))
                    return 'S: ' + output
                elif self.state.StopSignalNum != 0:
                    self.logger.info('Info Signal OCR: ' + output + ' X: ' +
                                     str(x) + ' Y: ' + str(y) +
                                     ' Confidence: ' + str(confidence[0]) +
                                     '%')  # + ' DC: ' + str(dominantColor))
                    self.state.setCurrentSignal(Signal.UPPER, int(output))
                    return 'I: ' + output

        def getAmountOfColor(self,
                             img,
                             lowerColor,
                             upperColor,
                             convert2hsv=True):
            if (convert2hsv):
                img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

            # create mask from color range
            maskColor = cv2.inRange(img, lowerColor, upperColor)
            # get ratio of active pixels
            ratio_color = cv2.countNonZero(maskColor) / (img.size)
            return ratio_color

        # color picker for manual debugging
        def pick_color(self, event, x, y, flags, param):
            if event == cv2.EVENT_LBUTTONDOWN:
                pixel = self.image[y, x]
                color = np.array([pixel[0], pixel[1], pixel[2]])
                self.logger.info(pixel)

        # capture frames from the camera
        def capture(self, savedImg=None):
            if (savedImg is not None):
                image = savedImg
            else:
                image = self.__vs.read()
                if (self.state.InvertCamera):
                    image = imutils.rotate(image, angle=180)

            self.image = image

            if (self.state.RecordImage):
                self.recordNum += 1
                cv2.imwrite(
                    os.path.join(self.recordFolder,
                                 str(self.recordNum) + ".jpg"), image)
                return

            if (self.state.Approaching == Signal.UPPER
                    or self.state.Approaching == Signal.LOWER):
                self.findNumberSignal(image)
            elif (self.state.Approaching == Signal.LAP):
                self.findLapSignal(image)

        def findLapSignal(self, image):
            contourImage = image.copy()

            blur = cv2.GaussianBlur(image, (3, 3), 0)
            hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)
            self.image = hsv
            mask = cv2.inRange(hsv, Colors.lower_blue_color,
                               Colors.upper_blue_color)

            cnts = imutils.grab_contours(
                cv2.findContours(mask.copy(), cv2.RETR_LIST,
                                 cv2.CHAIN_APPROX_SIMPLE))

            if len(cnts) > 0:

                # transform all contours to rects
                rects = [cv2.boundingRect(cnt) for cnt in cnts]

                # now iterate all of the rects, trying to find an approximated sibiling shifted in Y-direction
                for rect in rects:
                    (x, y, w, h) = rect
                    cv2.rectangle(contourImage, (x, y), (x + w, y + h),
                                  (0, 0, 255), 2)

                    # try to match the pattern from a given rect in all rects
                    counterPart = [
                        counterRect for counterRect in rects
                        if (counterRect != rect and x - 5 <= counterRect[0] <=
                            x + 5 and 2 * -(h + 5) <= y - counterRect[1] <= 2 *
                            (h + 5) and w - 5 <= counterRect[2] <= w + 5)
                        and h - 5 <= counterRect[3] <= h + 5
                    ]

                    if (counterPart):
                        (x, y, w, h) = counterPart[0]
                        cv2.rectangle(contourImage, (x, y), (x + w, y + h),
                                      (0, 255, 0), 2)
                        self.logger.info('LAP Signal')
                        self.state.captureLapSignal()
                        break

            self.showImg(
                'contourImage',
                np.hstack(
                    (hsv, contourImage, cv2.cvtColor(mask,
                                                     cv2.COLOR_GRAY2BGR))))
            cv2.setMouseCallback('contourImage', self.pick_color)

        def findNumberSignal(self, image):

            image_height = np.size(image, 0)
            image_width = np.size(image, 1)

            contourImage = image.copy()

            # focus only on the part of the image, where a signal could occur
            # image = image[int(image.shape[0] * 0.2):int(image.shape[0] * 0.8), 0:int(image.shape[1]*0.666)]

            mask = self.filter.autoCanny(image, 2, 3)

            # get a list of contours in the mask, chaining to just endpoints
            cnts = imutils.grab_contours(
                cv2.findContours(mask.copy(), cv2.RETR_LIST,
                                 cv2.CHAIN_APPROX_SIMPLE))

            # only proceed if at least one contour was found
            if len(cnts) > 0:
                # loop contours
                for self.cntNum, cnt in enumerate(cnts):

                    rect = cv2.minAreaRect(cnt)
                    _, _, angle = rect

                    # approximate shape
                    approx = cv2.approxPolyDP(cnt,
                                              0.02 * cv2.arcLength(cnt, True),
                                              True)

                    # the rectangle must not have a too big rotation (+/-10)
                    # and more than 3 connecting points
                    if len(approx) >= 3 and (-90 <= angle <= -80
                                             or angle >= -10):

                        box = cv2.boxPoints(rect)
                        box = np.int0(box)

                        (x, y, w, h) = cv2.boundingRect(approx)

                        # limit viewing range
                        if (y <= image_height * 0.2 or x >= image_width * 0.8):
                            continue

                        if (w <= 5 or h <= 5):
                            continue

                        # we are in approaching mode, thus we only care for the lower signals <= threshold
                        if ((self.state.Approaching == Signal.UPPER
                             and y >= self.signalThresholdY)
                                and not self.state.Standalone):
                            continue
                        elif ((self.state.Approaching == Signal.LOWER
                               and y <= self.signalThresholdY)
                              and not self.state.Standalone):
                            continue

                        sideRatio = w / float(h)

                        absoluteSizeToImageRatio = (
                            100 / (image_width * image_height)) * (w * h)

                        # calculate area of the bounding rectangle
                        rArea = w * float(h)

                        # calculate area of the contour
                        cArea = cv2.contourArea(cnt)
                        if (cArea):
                            rectContAreaRatio = (100 / rArea) * cArea
                        else:
                            continue

                        # cv2.drawContours(contourImage, [box], 0, (255, 0, 0), 1)
                        result = None

                        # is the rectangle sideways, check for lap signal
                        # if (h*2 < w and y <= self.signalThresholdY and rectContAreaRatio >= 80):
                        #result = self.analyzeRect(image, four_point_transform(image, [box][0]), box, x, y)
                        # find all contours looking like a signal with minimum area (1%)
                        if absoluteSizeToImageRatio >= 0.01:
                            # is it approx a square, or standing rect? then check for info or stop signal
                            if 0.2 <= sideRatio <= 1.1:
                                # transform ROI
                                if (sideRatio <= 0.9):
                                    coords, size, angle = rect
                                    size = size[0] + 8, size[1] + 4
                                    coords = coords[0] + 1, coords[1] + 1

                                    rect = coords, size, angle
                                    box = cv2.boxPoints(rect)
                                    box = np.int0(box)
                                """ cv2.drawContours(
                                    contourImage, [box], 0, (0, 255, 0), 1) """

                                warp = four_point_transform(image, [box][0])

                                result = self.analyzeSquare(
                                    image, warp, box, x, y)

                        if (result):
                            if (self.__imgOutput):
                                color = None
                                if (y >= self.signalThresholdY):
                                    color = (0, 0, 255)
                                else:
                                    color = (255, 0, 0)

                                cv2.drawContours(contourImage, [box], 0, color,
                                                 1)
                                cv2.drawContours(contourImage, [cnt], -1,
                                                 color, 2)
                                """ M = cv2.moments(cnt)
                                cX = int(M["m10"] / M["m00"])
                                cY = int(M["m01"] / M["m00"])
                                cv2.putText(contourImage, str(
                                    self.cntNum), (cX - 30, cY - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) """

                                self.logger.debug(
                                    "[" + str(self.cntNum) + "] SideRatio: " +
                                    str(sideRatio) + " AreaRatio: " +
                                    str(rectContAreaRatio) + " ContArea: " +
                                    str(cArea) + " RectArea: " + str(rArea) +
                                    " AbsSize: " +
                                    str(absoluteSizeToImageRatio) +
                                    " CntPoints: " + str(len(approx)) +
                                    " Size: " + str(w) + "x" + str(h))
            """ if (self.__imgOutput):  # hsv img output
                hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
                cv2.namedWindow('contourImage')
                cv2.setMouseCallback('contourImage', self.pick_color)
                # self.showImg("hsv", hsv) """

            self.showImg(
                "contourImage",
                np.hstack((contourImage, cv2.cvtColor(mask,
                                                      cv2.COLOR_GRAY2BGR))))
Пример #20
0
class Analyzer(object):
    TEXT_TYPES = set([
        PT.FLOWING_TEXT, PT.HEADING_TEXT, PT.PULLOUT_TEXT, PT.VERTICAL_TEXT,
        PT.CAPTION_TEXT
    ])

    def __init__(self, lang=None):
        super(Analyzer, self).__init__()
        kwargs = {}
        if lang is not None:
            kwargs['lang'] = lang
        self.api = PyTessBaseAPI(psm=PSM.AUTO_OSD, **kwargs)

    def analyze_image(self, image):
        page = Page()

        self.api.SetImage(image)
        self.api.Recognize()
        iterator = self.api.GetIterator()
        page.blocks = self.__decode_blocks(iterator, image)
        page.size = Size(*image.size)

        return page

    def close(self):
        self.api.End()

    def __decode_blocks(self, iterator, image):
        blocks = []
        for tesseract_block in iterate_level(iterator, RIL.BLOCK):
            block = Block()
            block.bounding_box = BoundingBox.from_coordinates(
                *tesseract_block.BoundingBox(RIL.BLOCK))
            if not tesseract_block.GetUTF8Text(RIL.BLOCK).strip():
                block.image = tesseract_block.GetImage(RIL.BLOCK, 0, image)
                blocks.append(block)
                continue
            block.paragraphs = self.__decode_paragraphs(iterator)
            blocks.append(block)
        return blocks

    def __decode_paragraphs(self, iterator):
        paragraphs = []
        for tesseract_paragraph in iterate_level(iterator, RIL.PARA):
            paragraph = Paragraph()
            paragraph.bounding_box = BoundingBox.from_coordinates(
                *tesseract_paragraph.BoundingBox(RIL.PARA))
            paragraph.lines = self.__decode_lines(iterator)
            paragraphs.append(paragraph)
            if iterator.IsAtFinalElement(RIL.BLOCK, RIL.PARA):
                break
        return paragraphs

    def __decode_lines(self, iterator):
        lines = []
        for tesseract_line in iterate_level(iterator, RIL.TEXTLINE):
            line = TextLine()
            line.bounding_box = BoundingBox.from_coordinates(
                *tesseract_line.BoundingBox(RIL.TEXTLINE))
            line.words = self.__decode_words(iterator)
            lines.append(line)
            if iterator.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE):
                break
        return lines

    def __decode_words(self, iterator):
        words = []
        for tesseract_word in iterate_level(iterator, RIL.WORD):
            font_attributes = tesseract_word.WordFontAttributes()
            word = Word()
            word.bounding_box = BoundingBox.from_coordinates(
                *tesseract_word.BoundingBox(RIL.WORD))
            word.confidence = float(tesseract_word.Confidence(
                RIL.WORD)) / 100.0
            word.text = tesseract_word.GetUTF8Text(RIL.WORD)
            word.symbols = self.__decode_symbols(iterator)
            font = Font()
            font.bold = font_attributes['bold']
            font.italic = font_attributes['italic']
            font.underline = font_attributes['underlined']
            font.monospace = font_attributes['monospace']
            font.serif = font_attributes['serif']
            font.pointsize = font_attributes['pointsize']
            font.id = font_attributes['font_id']
            for symbol in word.symbols:
                symbol.font = font
            words.append(word)
            if iterator.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD):
                break
        return words

    def __decode_symbols(self, iterator):
        symbols = []
        for tesseract_symbol in iterate_level(iterator, RIL.SYMBOL):
            symbol = Symbol()
            symbol.bounding_box = BoundingBox.from_coordinates(
                *tesseract_symbol.BoundingBox(RIL.SYMBOL))
            symbol.confidence = float(tesseract_symbol.Confidence(
                RIL.SYMBOL)) / 100.0
            symbol.text = tesseract_symbol.GetUTF8Text(RIL.SYMBOL)
            symbol.image = tesseract_symbol.GetBinaryImage(RIL.SYMBOL).convert(
                '1', dither=Image.NONE)
            symbols.append(symbol)
            if iterator.IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
                break
        return symbols
class TesseractOCR:

    #private static   TESSERACT_ENGINE_MODE = TessAPI1.TessOcrEngineMode.OEM_DEFAULT

    #
    # bpp - bits per pixel, represents the bit depth of the image, with 1 for
    # binary bitmap, 8 for gray, and 24 for color RGB.
    #
    BBP = 8
    DEFAULT_CONFIDENT_THRESHOLD = 60.0
    MINIMUM_DESKEW_THRESHOLD = 0.05

    def __init__(self, rgbaImage, dipCalculator, language):
        self.mRgbaImage = rgbaImage
        self.mDipCalculator = dipCalculator
        self.mHandle = PyTessBaseAPI()

        self.mOcrTextWrappers = []
        self.mOcrBlockWrappers = []
        self.mOcrLineWrappers = []
        self.raWrappers = []
        #         self.mLanguage = language

        self.mBufferedImageRgbaImage = Image.fromarray(self.mRgbaImage)
        self.initOCR()

    def baseInit(self, iteratorLevel):
        width = 0
        height = 0
        channels = 1

        if len(self.mRgbaImage.shape) == 2:
            height, width = self.mRgbaImage.shape
        else:
            height, width, channels = self.mRgbaImage.shape

        return self.baseInitIter(self.mRgbaImage, Rect(0, 0, width, height),
                                 channels, iteratorLevel)

    def baseInitIter(self, imageMat, rect, channels, iteratorLevel):
        listdata = []
        parentX = rect.x
        parentY = rect.y
        #        subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x]
        #
        #        if(channels != 1):
        #            subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x, 0:channels]

        #tessAPI = PyTessBaseAPI()
        #Convert to PIL image
        imgPIL = Image.fromarray(imageMat)
        self.mHandle.SetImage(imgPIL)
        boxes = self.mHandle.GetComponentImages(iteratorLevel, True)

        for i, (im, box, _, _) in enumerate(boxes):

            wrapper = OCRTextWrapper.OCRTextWrapper()
            self.mHandle.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = self.mHandle.GetUTF8Text()
            wrapper.text = ocrResult
            conf = self.mHandle.MeanTextConf()
            wrapper.confidence = conf
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            wrapper.x = box['x'] + parentX
            wrapper.y = box['y'] + parentY
            wrapper.width = box['w']
            wrapper.height = box['h']
            wrapper.rect = Rect(wrapper.x, wrapper.y, wrapper.width,
                                wrapper.height)
            #            print(box)
            #
            if (fontAttribute != None):
                wrapper.fontName = fontAttribute['font_name']
                wrapper.bold = fontAttribute['bold']
                wrapper.italic = fontAttribute['italic']
                wrapper.underlined = fontAttribute['underlined']
                wrapper.monospace = fontAttribute['monospace']
                wrapper.serif = fontAttribute['serif']
                wrapper.smallcaps = fontAttribute['smallcaps']
                wrapper.fontSize = fontAttribute['pointsize']
                wrapper.fontId = fontAttribute['font_id']

            listdata.append(wrapper)

        return listdata

    def getBlockWithLocation(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrBlockWrappers:
            bound = ocrTextWrapper.rect
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def getImage(self, rect):
        x2 = rect.x + rect.width
        y2 = rect.y + rect.height
        mat = self.mRgbaImage[rect.y:y2, rect.x:x2]
        return Image.fromarray(mat)

    def getText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getLineText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            if (TextUtils.isEmpty(text)):
                self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_LINE)
                self.mHandle.SetImage(self.mBufferedImageRgbaImage)
                self.mHandle.SetRectangle(rect.x, rect.y, rect.width,
                                          rect.height)
                text = self.mHandle.GetUTF8Text()
                if (TextUtils.isEmpty(text)):
                    self.mHandle.SetImage(self.getImage(rect))
                    text = self.mHandle.GetUTF8Text()

                self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getRectWordForLowConfidence(self, ocr):
        try:
            rect = ocr.bound()
            self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_WORD)
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            ocr.text = self.mHandle.GetUTF8Text()
            ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                self.mHandle.SetImage(self.getImage(rect))
                ocr.text = self.mHandle.GetUTF8Text()
                ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                return False
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            if (fontAttribute != None):
                ocr.fontName = fontAttribute['font_name']
                ocr.bold = fontAttribute['bold']
                ocr.italic = fontAttribute['italic']
                ocr.underlined = fontAttribute['underlined']
                ocr.monospace = fontAttribute['monospace']
                ocr.serif = fontAttribute['serif']
                ocr.smallcaps = fontAttribute['smallcaps']
                ocr.fontSize = fontAttribute['pointsize']
                ocr.fontId = fontAttribute['font_id']
#                ocr.fontsize = self.getPreferenceFontSize(ocr)

            self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return True
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return False

    def getWordsIn(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def initOCR(self):

        #
        self.initText()

        self.initBlock()
        #        self.initPara()
        self.initLine()
#

    def initBlock(self):
        self.mOcrBlockWrappers = self.baseInit(RIL.BLOCK)

    def initLine(self):
        self.mOcrLineWrappers = self.baseInit(RIL.TEXTLINE)
        invalidLineWrappers = []
        # a line cannot contain another lines
        for ocrLine in self.mOcrLineWrappers:
            for otherOcrLine in self.mOcrLineWrappers:
                if (ocrLine != otherOcrLine and RectUtil.contains(
                        ocrLine.bound(), otherOcrLine.bound())):
                    invalidLineWrappers.append(ocrLine)
        self.mOcrLineWrappers = [
            x for x in self.mOcrLineWrappers if x not in invalidLineWrappers
        ]

    def initPara(self):
        self.mOcrParaWrappers = self.baseInit(RIL.PARA)

    def initText(self):
        self.mOcrTextWrappers = self.baseInit(RIL.WORD)

    def isOverlapText(self, rect, confident):
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (ocrTextWrapper.getConfidence() >= confident
                    and RectUtil.intersects(rect, bound)):
                return True
        return False

    def reset(self):
        self.mOcrTextWrappers = []
        self.mOcrLineWrappers = []
        self.initOCR()

#    def rotateImage(bi) :
#        iden = ImageDeskew(bi)
#        imageSkewAngle = iden.getSkewAngle() # determine skew angle
#        if imageSkewAngle > MINIMUM_DESKEW_THRESHOLD or imageSkewAngle < -MINIMUM_DESKEW_THRESHOLD :
#            bi = ImageHelper.rotateImage(bi, -imageSkewAngle) # deskew
#        return bi

    def getPreferenceFontSize(self, ocrTextWrapper, parentHeight):

        #        TODO TODO
        fontName = ocrTextWrapper.fontName
        fontSize = ocrTextWrapper.fontSize

        height = ocrTextWrapper.height * Constants.TEXT_BOX_AND_TEXT_HEIGHT_RATIO

        #        height = ocrTextWrapper.height
        textHeight = int(
            self.mDipCalculator.pxToHeightDip(min(parentHeight, height)))
        #        font = QFont(fontName, fontSize)
        newFontSize = fontSize
        if (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                               fontSize) == textHeight):
            newFontSize = fontSize

        elif (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                 fontSize) < textHeight):
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) < textHeight):
                fontSize = fontSize + 1
            newFontSize = fontSize

        else:
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) > textHeight):
                fontSize = fontSize - 1

            newFontSize = fontSize

        return newFontSize

    def getTextHeightUsingFontMetrics(self, ocrTextWrapper, fontName,
                                      fontSize):
        #        class SIZE(ctypes.Structure):
        #            _fields_ = [("cx", ctypes.c_long), ("cy", ctypes.c_long)]
        #        hdc = ctypes.windll.user32.GetDC(0)
        #        hfont = ctypes.windll.gdi32.CreateFontA(-fontSize, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName)
        #        hfont_old = ctypes.windll.gdi32.SelectObject(hdc, hfont)
        #        size = SIZE(0, 0)
        #        ctypes.windll.gdi32.GetTextExtentPoint32A(hdc, text, len(text), ctypes.byref(size))
        #        ctypes.windll.gdi32.SelectObject(hdc, hfont_old)
        #        ctypes.windll.gdi32.DeleteObject(hfont)
        #        return size.cy
        file = "fonts//" + fontName + ".ttf"

        font = ImageFont.truetype(file, fontSize)
        fontSize = font.getsize(ocrTextWrapper.text)
        return fontSize[1]

    def validCharacter(self, word):
        return self.mHandle.IsValidCharacter(word)

        #Don't have this method return TessAPI1.TessBaseAPIIsValidWord(mHandle, word) != 0
#        return True

#TODO
#    def getTextHeightUsingTextLayout(self,ocrTextWrapper, font) :
#        frc = self.mGraphics.getFontRenderContext()
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text, font, frc)
#        layout.draw(self.mGraphics, float(loc.x, loc.y))
#        bounds = layout.getBounds()
#        height = bounds.getHeight()
#        return height

#    def isValidTextUsingConfidentAndBoundaryCheck(self, ocrTextWrapper) :
#        if (ocrTextWrapper.getConfidence() > Constants.TEXT_CONFIDENT_THRESHOLD + Constants.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE) :
#            return True
#
#        elif (ocrTextWrapper.getConfidence() <= Constants.TEXT_CONFIDENT_THRESHOLD) :
#            return False
#
#        return self.isValidTextUsingBoundaryCheck(ocrTextWrapper)
#
#

    def getTextDimensions(self, text, fontName, fontSize):
        file = "fonts//" + fontName + ".ttf"
        try:
            font = ImageFont.truetype(file, fontSize)
            fontSize = font.getsize(text)
            return fontSize
        except OSError:
            print(file)

    def isValidTextUsingBoundaryCheck(self, ocrTextWrapper):
        # confident between TextProcessor.TEXT_CONFIDENT_THRESHOLD and
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD +
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE
        if (TextUtils.isEmpty(ocrTextWrapper.text)):
            # We cannot calculate width of empty text
            return True
#        return True

#        frc = mGraphics.getFontRenderContext()
#        font = QFont(ocrTextWrapper.fontName,ocrTextWrapper.fontSize)
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text,font, frc)
#        layout.draw(mGraphics,  loc.getX(), loc.getY())
#        bound = layout.getBounds()
        width, height = self.getTextDimensions(ocrTextWrapper.text,
                                               ocrTextWrapper.fontName,
                                               ocrTextWrapper.fontSize)

        fontRatio = float(height / width)
        boundRatio = float(ocrTextWrapper.height / ocrTextWrapper.width)
        fontArea = self.mDipCalculator.dipToHeightPx(
            height) * self.mDipCalculator.dipToWidthPx(width)
        boundArea = float(ocrTextWrapper.width * ocrTextWrapper.height)
        #
        # the different between dimensions of the text should be smaller than
        # 10% of the max dimension.
        # System.out.prln(" Ratio: " + fontRatio + ", " + boundRatio + ", "
        # + Math.abs(boundRatio - fontRatio)
        # / Math.max(boundRatio, fontRatio) + "," + fontArea + ", "
        # + boundArea + ", " + Math.min(fontArea, boundArea)
        # / Math.max(fontArea, boundArea))

        # It the bound is square, it less likely that this text is correct
        # TODO: This rule may not need it
        #        if (float(min(ocrTextWrapper.getWidth(),ocrTextWrapper.getHeight()) / max( ocrTextWrapper.getWidth(),
        #						ocrTextWrapper.getHeight())) > 0.95) :
        #			# if drawing text cannot create square, sorry -> invalid
        #            if (float(min(width, height) / max(width, height)) <= 0.95 and not validWord(ocrTextWrapper.text)) :
        #                return False
        #
        #
        #

        #        print(self.mDipCalculator.dipToWidthPx(width), self.mDipCalculator.dipToHeightPx(height))
        #        print( ocrTextWrapper.width, ocrTextWrapper.height)
        dimension = abs(boundRatio - fontRatio) / max(boundRatio, fontRatio)
        #        print(dimension)

        dimensionCheck = abs(boundRatio - fontRatio) / max(
            boundRatio, fontRatio
        ) <= Constants.TEXT_CONFIDENT_ACCEPTANCE_DIMENSION_RATIO_DIFFERENCE_THRESHOLD

        areaCheckVal = min(fontArea, boundArea) / max(fontArea, boundArea)
        #        print(areaCheckVal)
        #        print(ocrTextWrapper.text)
        areaCheck = min(fontArea, boundArea) / max(
            fontArea,
            boundArea) >= Constants.TEXT_AREA_ACCEPTANCE_DIFFERENCE_THRESHOLD

        return dimensionCheck and areaCheck

    def destroy(self):
        self.mHandle.End
Пример #22
0
class TesserOCR(object):

    def __init__(self,image=None,lang='chi_sim',psm=PSM.SINGLE_CHAR):
        self.api = PyTessBaseAPI(lang=lang,psm=psm)

    @staticmethod    
    def getinfo():
        print(tesserocr.tesseract_version())  # print tesseract-ocr version
        print(tesserocr.get_languages())

    @staticmethod        
    def img2string(image):
        pimg = opencv2pillow(image)
        print(tesserocr.image_to_text(pimg))

    @staticmethod
    def analyzeRIL(image):
        pimg = opencv2pillow(image)
        with PyTessBaseAPI() as api:
            api.SetImage(pimg)
            boxes = api.GetComponentImages(RIL.TEXTLINE, True)
            print('Found {} textline image components.'.format(len(boxes)))
            for i, (im, box, _, _) in enumerate(boxes):
                # im is a PIL image object
                # box is a dict with x, y, w and h keys
                api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
                ocrResult = api.GetUTF8Text()
                conf = api.MeanTextConf()
                print (u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
                       "confidence: {1}, text: {2}").format(i, conf, ocrResult, **box)
        
    def img2string_single(self,image):
        pimg = opencv2pillow(image)
        self.api.SetImage(pimg)
        return self.api.GetUTF8Text()

    @staticmethod
    def detect(image):
        pimg = opencv2pillow(image)
        with PyTessBaseAPI(psm=PSM.AUTO_OSD) as api:
            api.SetImage(pimg)
            api.Recognize()   
            it = api.AnalyseLayout()
            orientation, direction, order, deskew_angle = it.Orientation()
            print("Orientation: {:d}".format(orientation))
            print("WritingDirection: {:d}".format(direction))
            print("TextlineOrder: {:d}".format(order))
            print("Deskew angle: {:.4f}".format(deskew_angle))

    @staticmethod    
    def segmentation(image):
        pimg = opencv2pillow(image)
        with PyTessBaseAPI(psm=PSM.OSD_ONLY) as api:
            api.SetImage(pimg)    
            os = api.DetectOS()
            print ("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n"
                   "Script: {script}\nScript confidence: {sconfidence}").format(**os)
            
    # this functions works with tesseract4+
    @staticmethod
    def analyzeLSTM(image):
        pimg = opencv2pillow(image)
        with PyTessBaseAPI(psm=PSM.OSD_ONLY, oem=OEM.LSTM_ONLY) as api:
            api.SetImage(pimg)
            os = api.DetectOrientationScript()
            print ("Orientation: {orient_deg}\nOrientation confidence: {orient_conf}\n"
                   "Script: {script_name}\nScript confidence: {script_conf}").format(**os)
    
    @staticmethod
    def rect2string(image,rect):
        with PyTessBaseAPI() as api:
            api.SetImage(image)
            api.SetVariable("save_blob_choices", "T")
            api.SetRectangle(rect)#(37, 228, 548, 31)
            api.Recognize()
        
            ri = api.GetIterator()
            level = RIL.SYMBOL
            for r in iterate_level(ri, level):
                symbol = r.GetUTF8Text(level)  # r == ri
                conf = r.Confidence(level)
                if symbol:
                    print(u'symbol {}, conf: {}'.format(symbol, conf))
                indent = False
                ci = r.GetChoiceIterator()
                for c in ci:
                    if indent:
                        print('\t\t ')
                    print('\t- ')
                    choice = c.GetUTF8Text()  # c == ci
                    print(u'{} conf: {}'.format(choice, c.Confidence()))
                    indent = True
                print('---------------------------------------------')
Пример #23
0
    action_number = int(key) - 1

    # get the tick number
    (death_left, death_top, death_width, death_height) = death_location
    tick_shot_pil = pyautogui.screenshot(region=(death_left + death_width,
                                                 death_top, tick_width,
                                                 tick_height))
    tick_shot_cv = np.array(tick_shot_pil)

    tick_shot_cv_gray = cv2.cvtColor(tick_shot_cv, cv2.COLOR_RGB2GRAY)
    (_, tick_shot_cv_black) = cv2.threshold(tick_shot_cv_gray, 190, 255,
                                            cv2.THRESH_BINARY_INV)

    pil_cap_black_text_demoUI = Image.fromarray(tick_shot_cv_black)
    tessocr_api.SetImage(pil_cap_black_text_demoUI)
    text = tessocr_api.GetUTF8Text()

    cur_tick_string_match = re.search('k[^\d]*(\d+)[^\d]*\/[^\d]*(\d+)', text)
    if cur_tick_string_match:
        cur_tick = int(cur_tick_string_match.group(1))
    else:
        print("skipping as didn't find tick")
        continue

    addActionDict(action_number, cur_tick)
    processing_end_time = time.time()
    time_budget = 0.4 - (processing_end_time - processing_start_time)
    if time_budget > 0:
        print(f'''hit budget, sleeping for {time_budget}''')
        time.sleep(time_budget)
Пример #24
0
def run_ocr_in_chart(chart, pad=0, psm=PSM.SINGLE_LINE):
    """
    Run OCR for all the boxes.
    :param img:
    :param boxes:
    :param pad: padding before applying ocr
    :param psm: PSM.SINGLE_WORD or PSM.SINGLE_LINE
    :return:
    """
    img = chart.image

    # add a padding to the initial figure
    fpad = 1
    img = cv2.copyMakeBorder(img.copy(), fpad, fpad, fpad, fpad, cv2.BORDER_CONSTANT, value=(255, 255, 255))
    fh, fw, _ = img.shape

    api = PyTessBaseAPI(psm=psm, lang='eng')
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))

    for tbox in chart.texts:
        # adding a pad to original image. Some case in quartz corpus, the text touch the border.
        x, y, w, h = ru.wrap_rect(u.ttoi(tbox.rect), (fh, fw), padx=pad, pady=pad)
        x, y = x + fpad, y + fpad

        if w * h == 0:
            tbox.text = ''
            continue

        # crop region of interest
        roi = img[y:y + h, x:x + w]
        #  to gray scale
        roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        #
        roi_gray = cv2.resize(roi_gray, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC)
        # binarization
        _, roi_bw = cv2.threshold(roi_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # removing noise from borders
        roi_bw = 255 - clear_border(255-roi_bw)

        # roi_gray = cv2.copyMakeBorder(roi_gray, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=255)

        # when testing boxes from csv files
        if tbox.num_comp == 0:
            # Apply Contrast Limited Adaptive Histogram Equalization
            roi_gray2 = clahe.apply(roi_gray)
            _, roi_bw2 = cv2.threshold(roi_gray2, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
            _, num_comp = morphology.label(roi_bw2, return_num=True, background=255)
            tbox.regions.extend(range(num_comp))

        pil_img = smp.toimage(roi_bw)
        if SHOW:
            pil_img.show()
        max_conf = -np.inf
        min_dist = np.inf
        correct_text = ''
        correct_angle = 0
        u.log('---------------')
        for angle in [0, -90, 90]:
            rot_img = pil_img.rotate(angle, expand=1)

            api.SetImage(rot_img)
            conf = api.MeanTextConf()
            text = api.GetUTF8Text().strip()
            dist = abs(len(text.replace(' ', '')) - tbox.num_comp)

            u.log('text: %s  conf: %f  dist: %d' % (text, conf, dist))
            if conf > max_conf and dist <= min_dist:
                max_conf = conf
                correct_text = text
                correct_angle = angle
                min_dist = dist

        tbox.text = post_process_text(lossy_unicode_to_ascii(correct_text))
        tbox.text_conf = max_conf
        tbox.text_dist = min_dist
        tbox.text_angle = correct_angle

        u.log('num comp %d' % tbox.num_comp)
        u.log(u'** text: {} conf: {} angle: {}'.format(correct_text, max_conf, correct_angle))

    api.End()
Пример #25
0
def capture_mrz(window: sg.Window,
                camera_id: int) -> Tuple[List[str], Image.Image]:
    """
    Capture the MRZ by using OCR and the camera footage.

    :returns: MRZ lines in a list
    """

    cap = cv2.VideoCapture(camera_id)

    tess_api = PyTessBaseAPI(init=False, psm=PSM.SINGLE_BLOCK_VERT_TEXT)
    tess_api.InitFull(
        # https://github.com/DoubangoTelecom/ultimateMRZ-SDK/tree/master/assets/models
        path="text_detection",
        lang="mrz",
        variables={
            "load_system_dawg": "false",
            "load_freq_dawg": "false",
            "tessedit_char_whitelist": "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<",
        },
    )
    # mrz_list: List[List[str]] = []

    pool = ThreadPool(processes=1)
    ocr_running = False
    while True:
        _, frame = cap.read()

        mrz = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                    cv2.THRESH_BINARY, 21, 10)
        # mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,3,2)
        # mrz = cv2.GaussianBlur(mrz, (5,5), 0)
        # _, mrz = cv2.threshold(mrz, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # mrz = cv2.GaussianBlur(mrz, (5,5), 0)
        # mrz = cv2.medianBlur(mrz, 3)
        frame_shown = copy.deepcopy(mrz)
        width = 320
        height = int(frame_shown.shape[0] * (320 / frame_shown.shape[1]))
        frame_shown = cv2.resize(frame_shown, (width, height))

        alpha = 0.8
        frame_overlay = add_mrz_overlay(copy.deepcopy(frame_shown),
                                        "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 3,
                                        0.9, False)
        frame_overlay = add_mrz_overlay(
            frame_overlay, "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 2,
            0.9, True)
        cv2.addWeighted(frame_shown, alpha, frame_overlay, 1 - alpha, 0,
                        frame_shown)

        imgbytes = cv2.imencode(".png", frame_shown)[1].tobytes()
        window.write_event_value("-SHOW MRZ-", [imgbytes])

        mrz = Image.fromarray(mrz)
        if not ocr_running:
            checked_frame = Image.fromarray(frame[:, :, ::-1])
            tess_api.SetImage(mrz)
            async_result = pool.apply_async(tess_api.GetUTF8Text)
            ocr_running = True

        if async_result.ready():
            ocr_running = False
            mrz_text = async_result.get()
            result = parse_mrz_ocr(mrz_text)

            if result is not None:
                break

            # if result and len(mrz_list) < 3:
            #     mrz_list.append(result)
            # elif not result:
            #     mrz_list = []
            # else:
            #     if all(x == mrz_list[0] for x in mrz_list):
            #         break

    # When everything done, release the capture
    cap.release()
    # cv2.destroyAllWindows()
    tess_api.End()

    # return mrz_list[0]
    window.write_event_value("-HIDE MRZ-", "")

    return (result, checked_frame)
Пример #26
0
from tesserocr import PyTessBaseAPI
from PIL import Image

api = PyTessBaseAPI(lang='script/Thai')
api.SetImage(Image.open("sample/thaiid.jpeg"))
# api.SetImage(Image.open("sample/thaiid.jpeg"))
print(api.GetUTF8Text())

Пример #27
0
class ViewerWindow(Gtk.Window):
    def __init__(self, filenames, kind, show, ml):
        Gtk.Window.__init__(self)
        self.ptx = 0
        self.pty = 0
        self.focus_id = -1
        self.file_idx = 0
        self.kind = kind
        self.show_hidden = show
        self.ml = ml
        self.screen_hint = ''
        self.in_hint_screen = False
        self.colors = {}
        self.memory = {}
        self.elem_models = {}
        self.filenames = filenames
        self.tesapi = PyTessBaseAPI(lang='eng')
        self.tesapi.SetVariable("tessedit_char_whitelist", WHITELIST)
        self.init_ui()
        self.load()

    def init_ui(self):
        self.connect("delete-event", Gtk.main_quit)

        darea = Gtk.DrawingArea()
        darea.connect("draw", self.on_draw)
        darea.connect("motion-notify-event", self.move_over)
        darea.connect("button-release-event", self.click_evt)
        darea.connect("scroll-event", self.scroll_evt)
        darea.connect("key-release-event", self.key_evt)
        darea.set_events(Gdk.EventMask.POINTER_MOTION_MASK
                         | Gdk.EventMask.BUTTON_RELEASE_MASK
                         | Gdk.EventMask.BUTTON_PRESS_MASK
                         | Gdk.EventMask.SCROLL_MASK
                         | Gdk.EventMask.KEY_PRESS_MASK
                         | Gdk.EventMask.KEY_RELEASE_MASK)
        darea.set_can_focus(True)
        self.add(darea)

        self.show_all()

    def load(self, prev=False):
        if self.file_idx == len(self.filenames):
            Gtk.main_quit()
            return
        if prev:
            self.file_idx -= 2
        filename = self.filenames[self.file_idx]
        (self.app, self.scr) = util.get_aux_info(filename)
        if self.app not in self.memory:
            self.memory[self.app] = {}
        self.set_title(filename)
        self.file_idx += 1
        print("Loading %s" % filename)
        self.pngfile = os.path.splitext(filename)[0] + '.png'
        self.descname = os.path.splitext(filename)[0] + '.%s.txt' % self.kind

        starttime = time.time()
        self.tree = analyze.load_tree(filename)
        hidden.find_hidden_ocr(self.tree)
        hidden.mark_children_hidden_ocr(self.tree)
        util.print_tree(self.tree, show_hidden=self.show_hidden)

        if self.ml:
            self.get_ml_rets()
        else:
            self.load_desc()

        endtime = time.time()
        print("Load time: %.3fs" % (endtime - starttime))

        self.focus_id = -1
        self.colors = {}
        self.ptx = self.pty = 0

        self.img = cairo.ImageSurface.create_from_png(self.pngfile)
        print('Image:', self.img.get_width(), self.img.get_height())

        root_item_id = min(self.tree)
        root_node = self.tree[root_item_id]
        print('Root node:', root_node['width'], root_node['height'])
        self.scale = 1.0 * self.img.get_width() / config.width
        #self.scale = analyze.find_closest(self.scale, analyze.SCALE_RATIOS)
        print('Scale:', '%.3f' % self.scale, '->', '%.3f' % self.scale)

        self.resize(self.img.get_width(), self.img.get_height())

        self.mark_depth(self.tree)

        for item_id in self.tree:
            color_r = random.random() / 2
            color_g = random.random() / 2
            color_b = random.random() / 2

            self.colors[item_id] = (color_r, color_g, color_b)

        imgocr = Image.open(self.pngfile)
        self.imgwidth = imgocr.width
        self.imgheight = imgocr.height
        #imgocr2 = imgocr.convert("RGB").resize(
        #    (imgocr.width * OCR_RATIO, imgocr.height * OCR_RATIO))
        self.tesapi.SetImage(imgocr)
        self.tesapi.SetSourceResolution(config.ocr_resolution)

        self.dump_memory()

    def remember(self, node, desc):
        nodeid = node['id']
        if not node['id']:
            return

        if node['id'] in self.memory[self.app]:
            if desc != self.memory[self.app][nodeid]:
                # multiple!
                self.memory[self.app][nodeid] = 'MUL'
        else:
            self.memory[self.app][node['id']] = desc

    def forget(self, node):
        if node['id'] in self.memory[self.app]:
            del self.memory[self.app][node['id']]

    def get_elem_model(self, app):
        elem_clas = elements.getmodel("../model/", "../guis/", app,
                                      "../guis-extra/",
                                      config.extra_element_scrs)
        self.elem_models[app] = elem_clas

    def get_ml_rets(self):
        if self.app not in self.elem_models:
            self.get_elem_model(self.app)

        guess_descs = {}
        guess_items = {}  # type: Dict[str, List[int]]
        guess_score = {}
        elem_clas = self.elem_models[self.app]
        elem_clas.set_imgfile(self.pngfile)
        treeinfo = analyze.collect_treeinfo(self.tree)
        for itemid in self.tree:
            (guess_element,
             score) = elem_clas.classify(self.scr, self.tree, itemid, None,
                                         treeinfo)
            if guess_element != 'NONE':
                if tags.single(guess_element,
                               self.scr) and guess_element in guess_items:
                    old_item = guess_items[guess_element][0]
                    if guess_score[old_item] < score:
                        guess_items[guess_element] = [itemid]
                        guess_score[itemid] = score
                        del guess_descs[old_item]
                        guess_descs[itemid] = guess_element
                else:
                    guess_descs[itemid] = guess_element
                    guess_score[itemid] = score
                    guess_items[guess_element] = (
                        guess_items.get(guess_element, []) + [itemid])
        for nodeid in guess_descs:
            self.tree[nodeid]['label'] = guess_descs[nodeid]

    def load_desc(self):
        if os.path.exists(self.descname):
            with open(self.descname) as inf:
                for line in inf.read().split('\n'):
                    if not line:
                        continue
                    (item_id, desc) = line.split(' ', 1)
                    item_id = int(item_id)
                    found = False
                    for nodeid in self.tree:
                        node = self.tree[nodeid]
                        if item_id in node['raw']:
                            if 'label' in node:
                                node['label'] += ' ' + desc
                            else:
                                node['label'] = desc
                            print(nodeid, '(', item_id, ')', '->', desc)

                            self.remember(node, desc)

                            found = True
                            break
                    if not found:
                        print("WARNING: %s (%s) is missing!" % (item_id, desc))

    def mark_depth(self, tree):
        for item_id in tree:
            node = tree[item_id]
            if 'depth' in node:
                continue
            self.mark_depth_node(tree, item_id, 0)

    def mark_depth_node(self, tree, node_id, depth):
        node = tree[node_id]
        node['depth'] = depth
        node['descs'] = []
        for child in node['children']:
            descs = self.mark_depth_node(tree, child, depth + 1)
            node['descs'] += descs

        return node['descs'] + [node_id]

    def get_node_info(self, node):
        (x, y, width, height, depth) = (node['x'], node['y'], node['width'],
                                        node['height'], node['depth'])
        x *= self.scale
        y *= self.scale
        width *= self.scale
        height *= self.scale

        width = min(width, self.imgwidth)
        height = min(height, self.imgheight)

        if x < 0:
            width += x
            x = 0

        if y < 0:
            height += y
            y = 0

        return (x, y, width, height, depth)

    def find_containing_widget(self, px, py):
        max_depth = 0
        max_id = -1

        for item_id in self.tree:
            node = self.tree[item_id]
            if self.ignore_node(node):
                continue
            if self.inside(node, px, py):
                if node['depth'] > max_depth:
                    max_depth = node['depth']
                    max_id = item_id

        return max_id

    def inside(self, node, px, py):
        (x, y, width, height, depth) = self.get_node_info(node)
        return x <= px and x + width >= px and y <= py and y + height >= py

    def ignore_node(self, node):
        if node['class'].upper() == 'OPTION':
            return True
        if node.get('visible', '') == 'hidden':
            return True
        return False

    def on_draw(self, wid, ctx):
        ctx.select_font_face("Arial", cairo.FONT_SLANT_NORMAL,
                             cairo.FONT_WEIGHT_BOLD)

        ctx.set_source_surface(self.img, 0, 0)
        ctx.paint()

        ctx.set_font_size(20)
        ctx.set_line_width(5)
        ctx.set_source_rgb(1.0, 0.0, 0.0)

        max_click_id = -1
        max_click_depth = 0

        max_id = self.find_containing_widget(self.ptx, self.pty)

        for item_id in self.tree:
            node = self.tree[item_id]
            depth = node['depth']
            if max_id in node['descs'] and node['click']:
                if depth > max_click_depth:
                    max_click_depth = depth
                    max_click_id = item_id

        for item_id in self.tree:
            node = self.tree[item_id]
            if self.ignore_node(node):
                continue

            if item_id == max_id:
                region_mode = False
            else:
                region_mode = True

            (x, y, width, height, depth) = self.get_node_info(node)

            if not self.inside(node, self.ptx, self.pty):
                continue

            self.show_widget(ctx, item_id, not region_mode, not region_mode)

        if max_click_id != -1 and max_click_id != max_id:
            self.show_widget(ctx, max_click_id, False, True)

        if self.focus_id >= 0:
            self.show_widget(ctx, self.focus_id, True, True, (1, 0, 0))

        for itemid in self.tree:
            node = self.tree[itemid]
            if 'label' in node:
                if itemid == self.focus_id:
                    color = (0, 1, 0)
                else:
                    color = (0, 0, 1)
                self.show_widget(ctx, itemid, True, False, (0, 0, 1))
                self.show_desc(ctx, node, color)

        #s.write_to_png('test.png')
        #os.system("%s %s" % (config.picviewer_path, 'test.png'))
        #report_time(start_time, "displayed")

    def move_sibling(self, to_next):
        leaf_list = []
        any_list = []
        for itemid in self.tree:
            node = self.tree[itemid]
            if not self.inside(node, self.clickx, self.clicky):
                continue

            if len(node['children']) == 0:
                leaf_list.append(itemid)
            any_list.append(itemid)

        for i in range(len(leaf_list)):
            if leaf_list[i] == self.focus_id:
                if to_next:
                    idx = (i + 1) % len(leaf_list)
                else:
                    idx = (i - 1) % len(leaf_list)
                self.focus_id = leaf_list[idx]
                return

        if len(leaf_list) == 0:
            for i in range(len(any_list)):
                if any_list[i] == self.focus_id:
                    if to_next:
                        idx = (i + 1) % len(any_list)
                    else:
                        idx = (i - 1) % len(any_list)
                    self.focus_id = any_list[idx]
                    return
            self.focus_id = any_list[0]
        else:
            self.focus_id = leaf_list[0]

    def show_widget(self, ctx, item_id, fill, show_text, colors=None):
        node = self.tree[item_id]

        (x, y, width, height, depth) = self.get_node_info(node)

        if colors is None:
            color_r = self.colors[item_id][0]
            color_g = self.colors[item_id][1]
            color_b = self.colors[item_id][2]
        else:
            (color_r, color_g, color_b) = colors

        ctx.rectangle(x, y, width, height)
        if fill:
            ctx.set_source_rgba(color_r, color_g, color_b, 0.3)
            ctx.fill()
        else:
            ctx.set_source_rgba(color_r, color_g, color_b, 1)
            ctx.set_line_width(5)
            ctx.stroke()

        if show_text:
            max_char = int(width / ctx.text_extents("a")[2])
            text = str(item_id)
            if node['click']:
                text = 'C' + text
            if node['text']:
                text = text + ':' + node['text'][:(max_char - 5)]
            elif node['id']:
                text += '#' + node['id'][:(max_char - 5)]

            self.show_text(ctx, x + width / 2, y + height / 2, text, color_r,
                           color_g, color_b)

    def show_desc(self, ctx, node, color=(0, 0, 1)):
        desc = node['label']
        (x, y, width, height, depth) = self.get_node_info(node)
        self.show_text(ctx, x + width / 2, y + height / 2, desc, color[0],
                       color[1], color[2])

    def show_text(self, ctx, x, y, text, color_r, color_g, color_b):
        x_bearing, y_bearing, text_width, text_height = ctx.text_extents(
            text)[:4]

        ctx.move_to(x - text_width / 2, y + text_height / 2)
        ctx.set_source_rgba(1, 1, 1, 1)
        ctx.set_line_width(5)
        ctx.text_path(text)
        ctx.stroke()

        ctx.move_to(x - text_width / 2, y + text_height / 2)
        ctx.set_source_rgba(color_r, color_g, color_b, 1)
        ctx.text_path(text)
        ctx.fill()

    def move_over(self, widget, evt):
        self.ptx = evt.x
        self.pty = evt.y
        self.queue_draw()

    def click_evt(self, widget, evt):
        if self.in_hint_screen:
            self.process_screen_hint_click(evt)
            return

        if evt.button == 3:
            self.focus_id = -1
        else:
            self.clickx = evt.x
            self.clicky = evt.y
            self.focus_id = self.find_containing_widget(evt.x, evt.y)

        self.queue_draw()

    def scroll_evt(self, widget, evt):
        if self.focus_id == -1:
            return

        scroll_up = evt.direction == Gdk.ScrollDirection.UP
        if scroll_up:
            self.focus_id = self.find_parent_widget(self.focus_id)
        else:
            self.focus_id = self.find_child_widget(self.focus_id)

        self.queue_draw()

    def find_parent_widget(self, wid):
        for itemid in self.tree:
            node = self.tree[itemid]
            if self.ignore_node(node):
                continue
            if wid in node['children']:
                return itemid
        return wid

    def find_child_widget(self, wid):
        for itemid in self.tree[wid]['children']:
            node = self.tree[itemid]
            if self.ignore_node(node):
                continue
            if self.inside(node, self.clickx, self.clicky):
                return itemid
        return wid

    def mark_direct(self):
        enter = self.get_text('Please enter id_label', 'format: <id> <label>')
        if enter is None:
            return
        if ' ' in enter:
            nodeid, label = enter.split(' ')
        else:
            nodeid = enter
            label = ''
        nodeid = int(nodeid)
        if nodeid not in self.tree:
            print('missing node', nodeid)
            return
        node = self.tree[nodeid]

        self.mark_node(node, label)

    def mark_focused(self):
        if self.focus_id < 0:
            return
        node = self.tree[self.focus_id]
        label = self.get_text(
            'Please enter label', 'label for %s: %s (%s) #%s' %
            (self.focus_id, node['text'], node['desc'], node['id']))
        if label is None:
            return

        if self.ml:
            if label == '':
                if 'label' not in self.tree[self.focus_id]:
                    return

                self.generate_negative_hint(self.tree[self.focus_id]['label'])
                del self.tree[self.focus_id]['label']
            else:
                self.generate_hint_for_widget(self.focus_id, label)
                self.add_label(node, label)
        else:
            self.mark_node(node, label)

    def generate_hint_for_widget(self, nodeid, label):
        return self.generate_hint(label,
                                  locator.get_locator(self.tree, nodeid))

    def generate_negative_hint(self, label):
        return self.generate_hint(label, 'notexist')

    def generate_hint(self, label, hint):
        print("@%s.%s %s" % (self.scr, label, hint))

    def mark_node(self, node, label):
        if label == '':
            if 'label' in node:
                del node['label']
                self.forget(node)
        else:
            self.add_label(node, label)
            self.remember(node, label)

        self.save_labels()

    def ocr_text(self):
        node = self.tree[self.focus_id]
        (x, y, width, height, _) = self.get_node_info(node)
        print(x, y, width, height)
        x = max(x - 1, 0)
        y = max(y - 1, 0)
        width = min(width + 2, self.imgwidth)
        height = min(height + 2, self.imgheight)
        #self.tesapi.SetRectangle(x * OCR_RATIO, y * OCR_RATIO,
        #                         width * OCR_RATIO, height * OCR_RATIO)
        self.tesapi.SetRectangle(x, y, width, height)
        print("OCR ret:", self.tesapi.GetUTF8Text())

        x = min(x + width * 0.05, self.imgwidth)
        y = min(y + height * 0.05, self.imgheight)
        width *= 0.9
        height *= 0.9
        self.tesapi.SetRectangle(x, y, width, height)
        print("OCR ret:", self.tesapi.GetUTF8Text())

    def save_region(self):
        if self.focus_id == -1:
            return
        node = self.tree[self.focus_id]
        (x, y, width, height, _) = self.get_node_info(node)
        x = max(x - 1, 0)
        y = max(y - 1, 0)
        width = min(width + 2, self.imgwidth)
        height = min(height + 2, self.imgheight)

        regimg = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width),
                                    int(height))
        ctx = cairo.Context(regimg)
        ctx.set_source_surface(self.img, -x, -y)
        ctx.paint()

        regimg.write_to_png("/tmp/region.png")

    def dump_memory(self):
        for _id in self.memory[self.app]:
            print('MEM %s -> %s' % (_id, self.memory[self.app][_id]))

    def add_label(self, node, desc):
        print('%s -> %s' % (util.describe_node(node, short=True), desc))
        node['label'] = desc

    def auto_label(self):
        for nodeid in self.tree:
            node = self.tree[nodeid]
            if 'label' not in node and node['id'] in self.memory[self.app]:
                if self.memory[self.app][node['id']] != 'MUL':
                    self.add_label(node, self.memory[self.app][node['id']])
                else:
                    print('skip MUL id: %s' % node['id'])
        self.save_labels()

    def remove_all(self):
        for nodeid in self.tree:
            node = self.tree[nodeid]
            if 'label' in node:
                del node['label']

    def process_screen_hint_click(self, evt):
        click_id = self.find_containing_widget(evt.x, evt.y)
        if click_id == -1:
            print('Invalid widget selected')
            return

        hint = locator.get_locator(self.tree, click_id)
        if hint is None:
            print('Cannot generate hint for this widget')
            return

        hint = str(hint)
        if evt.button == 3:
            # negate
            hint = 'not ' + hint

        print('Widget hint: "%s"' % hint)
        self.add_screen_hint(hint)

    def add_screen_hint(self, hint):
        if self.screen_hint == '':
            self.screen_hint = hint
        else:
            self.screen_hint += ' && ' + hint

    def hint_screen(self):
        if not self.in_hint_screen:
            label = self.get_text('Please enter screen name',
                                  'screen name like "signin"')
            if label is None:
                return
            self.screen_hint_label = label

            self.in_hint_screen = True
            self.screen_hint = ''
        else:
            self.in_hint_screen = False
            print("%%%s %s" % (self.screen_hint_label, self.screen_hint))

    def key_evt(self, widget, evt):
        if evt.keyval == Gdk.KEY_space:
            self.mark_focused()
        elif evt.keyval == Gdk.KEY_Tab:
            self.load()
        elif evt.keyval == Gdk.KEY_Left:
            self.move_sibling(to_next=True)
        elif evt.keyval == Gdk.KEY_Right:
            self.move_sibling(to_next=False)
        elif evt.keyval == Gdk.KEY_v:
            self.ocr_text()
        elif evt.keyval == Gdk.KEY_a:
            self.auto_label()
        elif evt.keyval == Gdk.KEY_p:
            self.load(prev=True)
        elif evt.keyval == Gdk.KEY_l:
            self.mark_direct()
        elif evt.keyval == Gdk.KEY_r:
            self.remove_all()
        elif evt.keyval == Gdk.KEY_s:
            self.save_region()
        elif evt.keyval == Gdk.KEY_x:
            self.hint_screen()
        self.queue_draw()

    def save_labels(self):
        with open(self.descname, 'w') as outf:
            for itemid in sorted(self.tree):
                node = self.tree[itemid]
                if 'label' in node:
                    outf.write("%s %s\n" % (itemid, node['label']))

    def get_text(self, title, prompt):
        #base this on a message dialog
        dialog = Gtk.MessageDialog(self, 0, Gtk.MessageType.QUESTION,
                                   Gtk.ButtonsType.OK_CANCEL, title)
        dialog.format_secondary_text(prompt)
        #create the text input field
        entry = Gtk.Entry()
        #allow the user to press enter to do ok
        entry.connect("activate",
                      lambda entry: dialog.response(Gtk.ResponseType.OK))
        #create a horizontal box to pack the entry and a label
        hbox = Gtk.HBox()
        hbox.pack_start(Gtk.Label("Label:"), False, 5, 5)
        hbox.pack_end(entry, True, 0, 0)
        #add it and show it
        dialog.vbox.pack_end(hbox, True, True, 0)
        dialog.show_all()
        #go go go
        response = dialog.run()
        if response == Gtk.ResponseType.OK:
            text = entry.get_text()
        else:
            text = None
        dialog.destroy()
        return text
Пример #28
0
class FeatureCollector(object):
    def __init__(self, tree, imgfile):
        self.tree = tree
        self.collect_texts()
        self.imgfile = imgfile
        self.tesapi = PyTessBaseAPI(lang='eng')
        self.set_tes_image()

    def set_tes_image(self):
        #imgpil = Image.fromarray(numpy.uint8(imgdata * 255))
        imgpil = Image.open(self.imgfile)
        (self.imgwidth, self.imgheight) = (imgpil.width, imgpil.height)
        imgpil = imgpil.convert("RGB").resize(
            (imgpil.width * OCR_RATIO, imgpil.height * OCR_RATIO))
        self.tesapi.SetImage(imgpil)

    def add_ctx_attr(self, ctx, data, attr_re, word_limit=1000):
        if ctx not in self.point:
            self.point[ctx] = ''
        words = attr_re.findall(data)
        if word_limit and len(words) > word_limit:
            return
        for word in words:
            if self.point[ctx]:
                self.point[ctx] += ' '
            self.point[ctx] += '%s' % word.lower()

    def add_ctx(self, ctx, node, attrs, attr_re=anything_re, word_limit=None):
        for attr in attrs:
            self.add_ctx_attr(ctx, node[attr], attr_re, word_limit)

    def collect_texts(self):
        for nodeid in self.tree:
            node = self.tree[nodeid]
            if 'fulltext' not in node:
                self.collect_text(node)

    def collect_text(self, node):
        """ Collect text from node and all its children """
        if 'fulltext' in node:
            return node['fulltext']

        cls = node['class']
        if cls == 'View':
            text = node['desc']
        else:
            text = node['text']

        for child in node['children']:
            text = text.strip() + ' ' + self.collect_text(self.tree[child])

        node['fulltext'] = text
        return text

    def prepare_neighbour(self):
        node = self.tree[self.nodeid]
        self.point['neighbour_ctx'] = ''
        self.point['adj_ctx'] = ''
        neighbour_count = 0
        for other in self.tree:
            if self.tree[other]['parent'] == node[
                    'parent'] and other != self.nodeid:
                self.add_ctx_attr('neighbour_ctx',
                                  self.collect_text(self.tree[other]), text_re)
                self.add_ctx('neighbour_ctx', self.tree[other], ['id'], id_re)
                if self.tree[other]['class'] == node['class']:
                    neighbour_count += 1

            # left sibling
            if (self.tree[other]['parent'] == node['parent']
                    and other != self.nodeid
                    and self.tree[other]['childid'] < node['childid']
                    and self.tree[other]['childid'] > node['childid'] - 2):
                self.add_ctx_attr('adj_ctx',
                                  self.collect_text(self.tree[other]), text_re)
                self.add_ctx('adj_ctx', self.tree[other], ['id'], id_re)

        self.point['neighbour_count'] = neighbour_count

    def ctx_append(self, ctx, kind, clz, detail):
        ret = ctx
        ret += ' ' + kind + clz
        regex = word_re
        for part in regex.findall(detail):
            ret += ' ' + kind + part
        return ret

    def collect_subtree_info(self, node, root):
        if ignore_node(node):
            return {'ctx': '', 'text': '', 'count': 0}
        ctx = ''
        count = 1

        clz = node['class']

        if clz == 'View':
            text = node['desc'][:30]
        else:
            text = node['text'][:30]
        desc = node['desc'][:30]

        ctx += clz + ' '
        ctx += node['id'] + ' '
        ctx += text + ' '
        ctx += desc + ' '
        ctx += gen_ngram(text) + ' '
        ctx += gen_ngram(desc) + ' '

        if root is not None:
            if node['width'] > 0.6 * config.width:
                ctx = self.ctx_append(ctx, "WIDE", clz, node['id'])

            if node['height'] > 0.6 * config.height:
                ctx = self.ctx_append(ctx, "TALL", clz, node['id'])

            if node['y'] + node['height'] < root['y'] + 0.3 * root['height']:
                ctx = self.ctx_append(ctx, "TOP", clz, node['id'])

            if node['x'] + node['width'] < root['x'] + 0.3 * root['width']:
                ctx = self.ctx_append(ctx, "LEFT", clz, node['id'])

            if node['y'] > root['y'] + 0.7 * root['height']:
                ctx = self.ctx_append(ctx, "BOTTOM", clz, node['id'])

            if node['x'] > root['x'] + 0.7 * root['width']:
                ctx = self.ctx_append(ctx, "RIGHT", clz, node['id'])

        for child in node['children']:
            child_info = self.collect_subtree_info(
                self.tree[child], root if root is not None else node)
            ctx = ctx.strip() + ' ' + child_info['ctx']
            count += child_info['count']
            text = text.strip() + ' ' + child_info['text']

        return {'ctx': ctx, 'text': text, 'count': count}

    def prepare_children(self):
        node = self.tree[self.nodeid]
        #self.add_ctx_attr('node_subtree_text', self.collect_text(node), text_re, 10)
        subtree_info = self.collect_subtree_info(node, None)
        self.point['subtree'] = subtree_info['ctx']
        self.point['node_subtree_text'] = subtree_info['text']
        self.point['node_childs'] = subtree_info['count']

    def prepare_ancestor(self):
        node = self.tree[self.nodeid]
        parent = node['parent']
        self.point['parent_ctx'] = ''
        parent_click = parent_scroll = parent_manychild = False
        parent_depth = 0
        while parent != 0 and parent != -1 and parent_depth < PARENT_DEPTH_LIMIT:
            self.add_ctx('parent_ctx', self.tree[parent], ['class', 'id'],
                         id_re)
            parent_click |= self.tree[parent]['click']
            parent_scroll |= self.tree[parent]['scroll']
            parent_manychild |= len(self.tree[parent]['children']) > 1
            parent = self.tree[parent]['parent']
            parent_depth += 1

        self.point['parent_prop'] = [
            parent_click, parent_scroll, parent_manychild
        ]

    def prepare_self(self):
        node = self.tree[self.nodeid]
        # AUX info
        self.point['id'] = self.nodeid
        self.point['str'] = util.describe_node(node, None)

        self.add_ctx('node_text', node, ['text'], text_re, 10)
        self.add_ctx('node_ctx', node, ['desc'], text_re)
        self.add_ctx('node_ctx', node, ['id'], id_re)
        self.add_ctx('node_class', node, ['class'], id_re)
        if 'Recycler' in node['class'] or 'ListView' in node['class']:
            self.point['node_class'] += " ListContainer"
        self.point['node_x'] = node['x']
        self.point['node_y'] = node['y']
        self.point['node_w'] = node['width']
        self.point['node_h'] = node['height']

    def prepare_point(self, nodeid, app, scr, caseid, imgdata, treeinfo, path):
        """Convert a node in the tree into a data point for ML"""
        self.nodeid = nodeid
        self.point = {}

        # AUX info
        self.point['app'] = app
        self.point['scr'] = scr
        self.point['case'] = caseid

        self.prepare_self()
        self.prepare_neighbour()
        self.prepare_ancestor()
        self.prepare_global(path, treeinfo)
        self.prepare_img(imgdata)
        self.prepare_ocr()
        self.prepare_children()

        return self.point

    def prepare_global(self, path, treeinfo):
        node = self.tree[self.nodeid]
        has_dupid = False
        is_itemlike = False
        is_listlike = False
        for _id in node['raw']:
            if _id in treeinfo['dupid']:
                has_dupid = True
                break
        for _id in node['raw']:
            if _id in treeinfo['itemlike']:
                is_itemlike = True
                break
        for _id in node['raw']:
            if _id in treeinfo['listlike']:
                is_listlike = True
                break
        self.point['node_prop'] = [
            node['click'], node['scroll'],
            len(node['children']) > 1, has_dupid, is_itemlike, is_listlike
        ]

        self.point['path'] = path

    def prepare_img(self, imgdata):
        node = self.tree[self.nodeid]
        # your widget should be inside the screenshot
        # not always!
        self.min_x = max(node['x'], 0)
        self.min_y = max(node['y'], 0)
        self.max_x = min(node['x'] + node['width'], self.imgwidth)
        self.max_y = min(node['y'] + node['height'], self.imgheight)
        self.empty = self.max_x <= self.min_x or self.max_y <= self.min_y
        self.point['empty'] = self.empty

    def prepare_ocr(self):
        node = self.tree[self.nodeid]
        if config.region_use_ocr and not self.empty:
            if 'ocr' in node:
                ocr_text = node['ocr']
            else:
                self.tesapi.SetRectangle(self.min_x * OCR_RATIO,
                                         self.min_y * OCR_RATIO,
                                         (self.max_x - self.min_x) * OCR_RATIO,
                                         (self.max_y - self.min_y) * OCR_RATIO)
                try:
                    ocr_text = self.tesapi.GetUTF8Text()
                except:
                    logger.warning("tessearact fail to recognize")
                    ocr_text = ''
                ocr_text = ocr_text.strip().replace('\n', ' ')
        else:
            ocr_text = 'dummy'
        self.point['node_ocr'] = ocr_text
        #if point['node_text'].strip() == '':
        #    point['node_text'] = ocr_text
        logger.debug("%s VS %s" % (ocr_text, node['text']))

        (missing, found, other) = (node['ocr_missing'], node['ocr_found'],
                                   node['ocr_other'])
        self.point['ocr_missing'] = missing
        self.point['ocr_found'] = found
        self.point['ocr_other'] = other
        self.point['ocr_ratio'] = (1.0 * missing / (missing + other)
                                   if missing + other > 0 else 0.0)
        self.point['ocr_visible'] = node['visible']
Пример #29
0
class VariationParser:
    def __init__(self):
        self.tesseract = PyTessBaseAPI(path='./', psm=PSM.SINGLE_LINE)
        self.item_db = json.load(open('en-us-var.json', 'rb'))

        self.items: Set[str] = set()
        self.active_section = 0
        self.section_name = None
        self.for_sale = False

        self._tesseract_cache = {}
        self._item_cache = {}

    def annotate_frame(self, frame: numpy.ndarray) -> numpy.ndarray:
        """Parses various parts of a frame for catalog items and annotates it."""

        # Detect whether we are in in Nook Shopping catalog.
        if not numpy.array_equal(frame[500, 20], (182, 249, 255)):
            text = 'Navigate to Nook catalog to start!'
            opts = {
                'org': (200, 70),
                'fontFace': cv2.FONT_HERSHEY_PLAIN,
                'lineType': cv2.LINE_AA,
                'fontScale': 3
            }
            frame = cv2.putText(frame,
                                text,
                                color=(0, 0, 0),
                                thickness=7,
                                **opts)
            return cv2.putText(frame,
                               text,
                               color=(100, 100, 255),
                               thickness=3,
                               **opts)

        # Show controls on screen.
        cv2.rectangle(frame, (0, 0), (300, 130), (106, 226, 240), -1)
        for i, line in enumerate(TUTORIAL_LINES):
            if line.startswith('F'):
                line += ' (%s)' % ('ON' if self.for_sale else 'OFF')
            frame = cv2.putText(frame, line, (30, 25 + i * 30), 0, 0.8, 0, 2)

        # Show user the item count at the bottom.
        count_text = 'Item count: %d' % len(self.items)
        if not self.section_name:
            count_text = 'Items saved to disk'
        frame = cv2.putText(frame, count_text, (500, 700), 0, 1, 0)

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        section = numpy.nonzero(gray[20, 250:] == 156)[0]
        if section.any() and section[0] != self.active_section:
            # Grab the new section name
            x1, *_, x2 = 250 + section
            section_region = 255 - gray[8:32, x1 + 5:x2 - 5]
            self.section_name = self.image_to_text(section_region)

            # Reset item selection on section change
            self.active_section = section[0]
            self.items = set()
        elif not self.active_section:
            return frame  # Return early if not section is found.

        item_name = None
        variation_name = None

        selected = self.get_selected_item(frame)
        if not selected:  # Quit early if not item is selected
            return frame

        price_region = gray[selected.y1:selected.y2, 1070:1220]
        if self.for_sale and price_region.min() > 100:
            # Skip items not for sale
            p1, p2 = (selected.x1, selected.y1 + 20), (selected.x2,
                                                       selected.y1 + 20)
            return cv2.line(frame, p1, p2, color=(0, 0, 255), thickness=2)

        # Parse item name and display a rectangle around it.
        item_name = self.image_to_text(gray[selected.slice])
        frame = cv2.putText(frame, item_name, selected.p1, 0, 1, 0)

        # Parse variation and draw rectangle around it if there is one.
        variation = self.get_variation(gray)
        if variation:
            frame = cv2.rectangle(frame, variation.p1, variation.p2, 0)
            variation_name = self.image_to_text(gray[variation.slice])
            frame = cv2.putText(frame, variation_name, variation.p1, 1, 2, 0)

        # Match the name and optional variation against database and register it.
        full_name = self.resolve_name(item_name, variation_name)
        if full_name:
            self.items.add(full_name)

        return frame

    def get_selected_item(self, frame: numpy.ndarray) -> Optional[Rectangle]:
        """Returns the rectangle around the selected item name if there is one."""
        # Search for the yellow selected region along the item list area.
        select_region = numpy.nonzero(frame[140:640, 1052, 0] < 100)[0]
        if not select_region.any():
            return None

        rect = Rectangle(x1=635, x2=1050)

        # Find the top/bottom boundaries of the selected area
        rect.y1 = 140 + select_region[0] + 8
        rect.y2 = 140 + select_region[-1] - 4

        if rect.y2 - rect.y1 < 35:
            return None

        # Detect to width of the name by collapsing along the x axis
        # and finding the right-most dark pixel (text).
        item_region = frame[rect.y1:rect.y2, rect.x1:rect.x2, 1]
        detected_text = numpy.nonzero(item_region.min(axis=0) < 50)[0]
        if not detected_text.any():
            return None

        rect.x2 = 635 + detected_text[-1] + 10
        return rect

    def get_variation(self, gray: numpy.ndarray) -> Optional[Rectangle]:
        """Returns the rectangle around the variation text if there is one."""
        # There's a white box if the item has a variation.
        if gray[650, 25] != 250:
            return None

        variation = Rectangle(x1=30, y1=628, y2=665)
        # Find the width of tqhe variation box by horizontal floodfill.
        variation.x2 = numpy.argmax(gray[variation.y1, :] < 250) - 15
        return variation

    def resolve_name(self, item: Optional[str],
                     variation: Optional[str]) -> Optional[str]:
        """Resolves an item and optional variation name against the item database."""
        key = (item, variation)
        if key not in self._item_cache:
            item = best_match(item, self.item_db)
            variation = best_match(variation, self.item_db.get(item))
            if variation:
                self._item_cache[key] = f'{item} [{variation}]'
            elif item and not self.item_db[item]:
                self._item_cache[key] = item
            else:
                self._item_cache[key] = None
        return self._item_cache[key]

    def image_to_text(self, text_area: numpy.ndarray) -> str:
        """Runs OCR over a given image and returns the parsed text."""
        img_hash = str(cv2.img_hash.averageHash(text_area)[0])
        if img_hash not in self._tesseract_cache:
            image = Image.fromarray(text_area)
            self.tesseract.SetImage(image)
            text = self.tesseract.GetUTF8Text().strip()
            self._tesseract_cache[img_hash] = text
        return self._tesseract_cache[img_hash]

    def save_items(self) -> None:
        """"Saves the collected items to a text file on disk and clears list."""
        if not self.items or not self.section_name:
            return

        date = datetime.datetime.now().strftime('%d-%m-%Y %H-%M-%S')
        with open(f'{self.section_name} ({date}).txt', 'w') as fp:
            fp.write('\n'.join(sorted(self.items)))

        self.section_name = None
        self.items = set()
Пример #30
0
class TT2Predictor:
    """holds the several trainer predictor instances and common operations """
    def __init__(self, **kwargs):
        self.trainers_predictors_list = []
        self.text_predictors_list = [
            ("previous_level", (1212, 231, 1230, 280), "0123456789", "8"),
            ("main_level", (1203, 323, 1223, 399), "0123456789", "8"),
            ("next_level", (1212, 445, 1230, 493), "0123456789", "8"),
            ("sub_level", (1177, 625, 1203, 692), "0123456789/", "8"),
            ("gold", (1091, 283, 1126, 471),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "7"),
            ("current_dps_down_no_tab", (389, 562, 423, 709),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "8"),
            ("last_hero", (124, 109, 148, 430),
             "0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
             "7")
        ]
        self.api = PyTessBaseAPI()
        self.api.Init()
        print(tesserocr.tesseract_version())
        print(tesserocr.get_languages())
        self.global_image = None
        self.status = CurrentStatus()

        boss_trainer = TrainerPredictor(
            "boss_active_predictor",
            ["boss_active", "boss_inactive", "no_boss"],
            (1224, 555, 1248, 648), 12, 46, 255.0, [200, 30])
        egg_trainer = TrainerPredictor("egg_active_predictor",
                                       ["egg_active", "egg_inactive"],
                                       (741, 31, 761, 64), 10, 16, 255.0,
                                       [200, 30])
        gold_pet_trainer = TrainerPredictor(
            "gold_pet_predictor",
            ["goldpet", "nopet", "normalpet", "partial pet"],
            (624, 364, 734, 474), 40, 40, 255.0, [200, 30])
        tab_predictor = TrainerPredictor("tab_predictor", [
            "skills_tab", "heroes_tab", "equipment_tab", "pet_tab",
            "relic_tab", "shop_tab", "no_tab"
        ], (51, 1, 59, 717), 2, 179, 255.0, [200, 30])
        self.trainers_predictors_list.append(boss_trainer)
        self.trainers_predictors_list.append(egg_trainer)
        self.trainers_predictors_list.append(gold_pet_trainer)
        self.trainers_predictors_list.append(tab_predictor)
        for trainer in self.trainers_predictors_list:
            pass
            #trainer.crop_images()
            #trainer.process_images()
            #trainer.read_and_pickle()
            #trainer.train_graph()
        saved_classes_file = glo.DATA_FOLDER + "/dataforclassifier/TrainerPredictor_list.pickle"
        save_pickle(saved_classes_file, self.trainers_predictors_list)

    def parse_raw_image(self):
        with open(glo.RAW_FULL_FILE, 'rb') as f:
            image = Image.frombytes('RGBA', (1280, 720), f.read())
        for class_predictor in self.trainers_predictors_list:
            class_predictor.predict_crop(image)
        self.global_image = image
        image.save(glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + "/fullcapture" +
                   time.strftime("%Y%m%d-%H%M%S-%f") +
                   ".png")  # save original capture copy

    def parse_image_text(self, predict_map):
        return_dict = {}
        for text_predictor in self.text_predictors_list:
            if text_predictor[0] in predict_map:
                img = self.global_image.crop(text_predictor[1])

                img = img.convert('L')
                img = img.rotate(90, expand=True)
                self.api.SetImage(img)
                self.api.SetVariable("tessedit_char_whitelist",
                                     text_predictor[2])
                self.api.SetVariable("tessedit_pageseg_mode",
                                     text_predictor[3])
                self.api.SetVariable("language_model_penalty_non_dict_word",
                                     "0")
                self.api.SetVariable("doc_dict_enable", "0")
                text_capture = self.api.GetUTF8Text().encode('utf-8').strip()
                return_dict[text_predictor[0]] = text_capture
                print("raw text capture ", text_predictor[0], ":",
                      text_capture)
                self.api.Clear()
        return return_dict

    def predict_parsed_all(self):
        pred_dict = {}
        for class_predictor in self.trainers_predictors_list:
            pred_dict[class_predictor.name] = class_predictor.predict_parsed()
        return pred_dict

    def predict_parsed(self, predict_map, predict_map_text, **kwargs):
        pred_dict = {"transition_level": False}

        # check if image is level trasitioning. trivial prediction.
        if hasattr(kwargs, "empty_image") and kwargs["empty_image"] is False:
            pass
        else:
            img = self.global_image.crop((0, 0, 100, 100))  # black corner
            extrema = img.convert("L").getextrema()
            if extrema[0] == extrema[1]:  # only one color
                print("warning level transitioning")
                pred_dict["transition_level"] = True
            else:
                pass

        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name in predict_map:
                pred_dict[
                    class_predictor.name] = class_predictor.predict_parsed()
        pred_dict_text = self.parse_image_text(predict_map_text)
        pred_dict.update(pred_dict_text)
        self.status.update_status(pred_dict, self.trainers_predictors_list)
        return pred_dict

    def predict(self):
        self.parse_raw_image()
        return self.predict_parsed_all()

    def check_predict(self, pred_dict, predictor, classification):
        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name == predictor:
                return int(
                    pred_dict[predictor]
                ) == class_predictor.pred_classes.index(classification)