Пример #1
0
def get_text_bounding_boxes(image, psm=12):
    bounding_boxes = []

    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return bounding_boxes

    image_pil = Image.fromarray(image)  # Load PIL image from numpy

    api = PyTessBaseAPI(psm=psm, oem=OEM.LSTM_ONLY)

    try:
        # api.SetVariable('textord_tabfind_find_tables', 'true')
        # api.SetVariable('textord_tablefind_recognize_tables', 'true')
        api.SetImage(image_pil)

        api.Recognize()

        boxes = api.GetComponentImages(RIL.TEXTLINE, True)

        for (im, box, _, _) in boxes:
            x, y, w, h = box['x'], box['y'], box['w'], box['h']

            bounding_boxes.append((x, y, w, h))
    finally:
        api.End()

    return bounding_boxes
Пример #2
0
def read_text_with_confidence(image,
                              lang='fast_ind',
                              path='/usr/share/tesseract-ocr/5/tessdata',
                              psm=4,
                              whitelist=''):
    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return '', 0

    image_pil = Image.fromarray(image)

    api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY)

    try:
        api.SetImage(image_pil)

        if whitelist != '':
            api.SetVariable('tessedit_char_whitelist', whitelist)

        api.Recognize()

        text = api.GetUTF8Text()
        confidence = api.MeanTextConf()
    except Exception:
        print("[ERROR] Tesseract exception")
    finally:
        api.End()

    return text, confidence
Пример #3
0
def ocr(img, level):
    """Use tesseract OCR to detection images.

    Args:
        imagePath: File path of image.
        level: Iteration level.

    Returns:
        An array with coordinate of boxes.

    """
    result = []
    with c_locale():
        from tesserocr import PyTessBaseAPI
        api = PyTessBaseAPI()
        api.SetPageSegMode(PSM.AUTO_OSD)
        # api.SetImageFile(imagePath)
        api.SetImage(Image.fromarray(img))
        blockIter = api.AnalyseLayout()
        while blockIter.Next(level):
            pt = blockIter.BlockType()
            #result.append(blockIter.Baseline(level))
            if pt in [1, 6]:
                result.append(blockIter.BoundingBox(level) + (pt, ))
        api.End()
    return result
Пример #4
0
class TextExtractor:
    def __init__(self, image_path, seg_mode=PSM.SPARSE_TEXT):
        self.api = PyTessBaseAPI()
        self.api.SetPageSegMode(seg_mode)
        self.api.SetImageFile(image_path)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def _extract(self) -> Tuple:
        text = self.api.GetUTF8Text()
        conf = self.api.MeanTextConf()
        return text, conf

    def _extract_from_rect(self, x, y, w, h) -> Tuple:
        self.api.SetRectangle(x, y, w, h)
        return self._extract()

    #TODO: Add support of zero values
    def extract(self, x=None, y=None, w=None, h=None) -> Tuple:
        if all([x, y, w, h]):
            return self._extract_from_rect(x, y, w, h)
        else:
            return self._extract()

    def close(self):
        self.api.End()
Пример #5
0
def tess_ocr(img):
    """Get text from an image.

    Args:
        img: The file path of image.

    Returns:
        A string.
    Raises:
        IOError: An error occurred accessing the img object.

    """
    with c_locale():
        from tesserocr import PyTessBaseAPI, PSM
        api = PyTessBaseAPI(lang='chi_sim', psm=PSM.AUTO_OSD)
        api.SetImageFile(img)
        text = api.GetUTF8Text()
        api.End()
    return text
Пример #6
0
def get_boxes(image_filename: str) -> list:
    image = Image.open(image_filename)
    width = image.width
    height = image.height
    max_width = width // 2
    max_height = height // 2

    api = PyTessBaseAPI(lang="jpn_vert")
    # api.ReadConfigFile("tess.conf")
    api.SetPageSegMode(PSM.SPARSE_TEXT_OSD)
    api.SetImage(image)
    api.Recognize(0)
    ri = api.GetIterator()
    level = RIL.WORD
    boxes = []
    for r in iterate_level(ri, level):
        conf = r.Confidence(level)
        text = r.GetUTF8Text(level)
        left, top, right, bottom = r.BoundingBox(level)
        # boxes = api.GetComponentImages(RIL.SYMBOL, True)
        # for im, rect, _, _ in boxes:
        #     # im is a PIL image object
        #     # rect is a dict with x, y, w and h keys
        #     left, top, right, bottom = rect['x'], rect['y'], rect['w'], rect['h']
        #     api.SetRectangle(left, top, right, bottom)
        #     text = api.GetUTF8Text()
        #     conf = api.MeanTextConf()
        print("'%s' \tConf: %.2f \tCoords: %d,%d,%d,%d" %
              (text, conf, left, top, right, bottom))
        box = {
            'text': text,
            'left': left,
            'top': top,
            'width': right - left,
            'height': bottom - top
        }
        if should_ignore_box(conf, box, max_width, max_height):
            continue
        boxes.append(box)
    api.End()
    image.close()
    return boxes
Пример #7
0
class OcrWrapper(BaseImageToString):

    _OPTIONS = ('tessedit_char_whitelist', '0123456789ABCDEF.-')

    def __init__(self):
        if sys.platform == 'win32':
            self._ocr = PyTessBaseAPI(
                path="C:\\Program Files\\Tesseract-OCR\\tessdata")
        else:
            self._ocr = PyTessBaseAPI()

        self._ocr.SetVariable(self._OPTIONS[0], self._OPTIONS[1])
        pass

    def image_to_string(self, image: Image) -> str:
        image.format = 'PNG'
        self._ocr.SetImage(image)
        raw_data = self._ocr.GetUTF8Text()
        return raw_data

    def end(self):
        self._ocr.End()
Пример #8
0
class Analyzer(object):
    TEXT_TYPES = set([
        PT.FLOWING_TEXT, PT.HEADING_TEXT, PT.PULLOUT_TEXT, PT.VERTICAL_TEXT,
        PT.CAPTION_TEXT
    ])

    def __init__(self, lang=None):
        super(Analyzer, self).__init__()
        kwargs = {}
        if lang is not None:
            kwargs['lang'] = lang
        self.api = PyTessBaseAPI(psm=PSM.AUTO_OSD, **kwargs)

    def analyze_image(self, image):
        page = Page()

        self.api.SetImage(image)
        self.api.Recognize()
        iterator = self.api.GetIterator()
        page.blocks = self.__decode_blocks(iterator, image)
        page.size = Size(*image.size)

        return page

    def close(self):
        self.api.End()

    def __decode_blocks(self, iterator, image):
        blocks = []
        for tesseract_block in iterate_level(iterator, RIL.BLOCK):
            block = Block()
            block.bounding_box = BoundingBox.from_coordinates(
                *tesseract_block.BoundingBox(RIL.BLOCK))
            if not tesseract_block.GetUTF8Text(RIL.BLOCK).strip():
                block.image = tesseract_block.GetImage(RIL.BLOCK, 0, image)
                blocks.append(block)
                continue
            block.paragraphs = self.__decode_paragraphs(iterator)
            blocks.append(block)
        return blocks

    def __decode_paragraphs(self, iterator):
        paragraphs = []
        for tesseract_paragraph in iterate_level(iterator, RIL.PARA):
            paragraph = Paragraph()
            paragraph.bounding_box = BoundingBox.from_coordinates(
                *tesseract_paragraph.BoundingBox(RIL.PARA))
            paragraph.lines = self.__decode_lines(iterator)
            paragraphs.append(paragraph)
            if iterator.IsAtFinalElement(RIL.BLOCK, RIL.PARA):
                break
        return paragraphs

    def __decode_lines(self, iterator):
        lines = []
        for tesseract_line in iterate_level(iterator, RIL.TEXTLINE):
            line = TextLine()
            line.bounding_box = BoundingBox.from_coordinates(
                *tesseract_line.BoundingBox(RIL.TEXTLINE))
            line.words = self.__decode_words(iterator)
            lines.append(line)
            if iterator.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE):
                break
        return lines

    def __decode_words(self, iterator):
        words = []
        for tesseract_word in iterate_level(iterator, RIL.WORD):
            font_attributes = tesseract_word.WordFontAttributes()
            word = Word()
            word.bounding_box = BoundingBox.from_coordinates(
                *tesseract_word.BoundingBox(RIL.WORD))
            word.confidence = float(tesseract_word.Confidence(
                RIL.WORD)) / 100.0
            word.text = tesseract_word.GetUTF8Text(RIL.WORD)
            word.symbols = self.__decode_symbols(iterator)
            font = Font()
            font.bold = font_attributes['bold']
            font.italic = font_attributes['italic']
            font.underline = font_attributes['underlined']
            font.monospace = font_attributes['monospace']
            font.serif = font_attributes['serif']
            font.pointsize = font_attributes['pointsize']
            font.id = font_attributes['font_id']
            for symbol in word.symbols:
                symbol.font = font
            words.append(word)
            if iterator.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD):
                break
        return words

    def __decode_symbols(self, iterator):
        symbols = []
        for tesseract_symbol in iterate_level(iterator, RIL.SYMBOL):
            symbol = Symbol()
            symbol.bounding_box = BoundingBox.from_coordinates(
                *tesseract_symbol.BoundingBox(RIL.SYMBOL))
            symbol.confidence = float(tesseract_symbol.Confidence(
                RIL.SYMBOL)) / 100.0
            symbol.text = tesseract_symbol.GetUTF8Text(RIL.SYMBOL)
            symbol.image = tesseract_symbol.GetBinaryImage(RIL.SYMBOL).convert(
                '1', dither=Image.NONE)
            symbols.append(symbol)
            if iterator.IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
                break
        return symbols
Пример #9
0
class OCR(object):
    MAX_MODELS = 5
    DEFAULT_MODE = PSM.AUTO_OSD

    # DEFAULT_MODE = PSM.AUTO

    def __init__(self):
        # Tesseract language types:
        _, self.supported = get_languages()

    def language_list(self, languages):
        models = [c for c in alpha3(languages) if c in self.supported]
        if len(models) > self.MAX_MODELS:
            log.warning("Too many models, limit: %s", self.MAX_MODELS)
            models = models[:self.MAX_MODELS]
        models.append('eng')
        return '+'.join(sorted(set(models)))

    def configure_engine(self, languages, mode):
        # log.info("Configuring OCR engine (%s)", languages)
        if not hasattr(self, 'api') or self.api is None:
            self.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY)
        if languages != self.api.GetInitLanguagesAsString():
            self.api.Init(lang=languages, oem=OEM.LSTM_ONLY)
        if mode != self.api.GetPageSegMode():
            self.api.SetPageSegMode(mode)
        return self.api

    def clear_engine(self):
        """Shut down tesseract and clear all memory."""
        try:
            self.api.End()
        except Exception:
            log.exception("Failed to shut down tesseract")
        self.api = None

    def extract_text(self, data, languages=None, mode=DEFAULT_MODE):
        """Extract text from a binary string of data."""
        try:
            image = Image.open(BytesIO(data))
            image.load()
        except Exception:
            log.exception("Cannot open image data using Pillow")
            return None

        try:
            languages = self.language_list(languages)
            api = self.configure_engine(languages, mode)
            # TODO: play with contrast and sharpening the images.
            start_time = time.time()
            api.SetImage(image)
            text = api.GetUTF8Text()
            confidence = api.MeanTextConf()
            end_time = time.time()
            duration = end_time - start_time
            log.info("[OCR] %s chars (w: %s, h: %s, l: %s, c: %s), took: %.5f",
                     len(text), image.width, image.height, languages,
                     confidence, duration)
            return text
        finally:
            api.Clear()
Пример #10
0
def run_ocr_in_chart(chart, pad=0, psm=PSM.SINGLE_LINE):
    """
    Run OCR for all the boxes.
    :param img:
    :param boxes:
    :param pad: padding before applying ocr
    :param psm: PSM.SINGLE_WORD or PSM.SINGLE_LINE
    :return:
    """
    img = chart.image

    # add a padding to the initial figure
    fpad = 1
    img = cv2.copyMakeBorder(img.copy(), fpad, fpad, fpad, fpad, cv2.BORDER_CONSTANT, value=(255, 255, 255))
    fh, fw, _ = img.shape

    api = PyTessBaseAPI(psm=psm, lang='eng')
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))

    for tbox in chart.texts:
        # adding a pad to original image. Some case in quartz corpus, the text touch the border.
        x, y, w, h = ru.wrap_rect(u.ttoi(tbox.rect), (fh, fw), padx=pad, pady=pad)
        x, y = x + fpad, y + fpad

        if w * h == 0:
            tbox.text = ''
            continue

        # crop region of interest
        roi = img[y:y + h, x:x + w]
        #  to gray scale
        roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        #
        roi_gray = cv2.resize(roi_gray, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC)
        # binarization
        _, roi_bw = cv2.threshold(roi_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # removing noise from borders
        roi_bw = 255 - clear_border(255-roi_bw)

        # roi_gray = cv2.copyMakeBorder(roi_gray, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value=255)

        # when testing boxes from csv files
        if tbox.num_comp == 0:
            # Apply Contrast Limited Adaptive Histogram Equalization
            roi_gray2 = clahe.apply(roi_gray)
            _, roi_bw2 = cv2.threshold(roi_gray2, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
            _, num_comp = morphology.label(roi_bw2, return_num=True, background=255)
            tbox.regions.extend(range(num_comp))

        pil_img = smp.toimage(roi_bw)
        if SHOW:
            pil_img.show()
        max_conf = -np.inf
        min_dist = np.inf
        correct_text = ''
        correct_angle = 0
        u.log('---------------')
        for angle in [0, -90, 90]:
            rot_img = pil_img.rotate(angle, expand=1)

            api.SetImage(rot_img)
            conf = api.MeanTextConf()
            text = api.GetUTF8Text().strip()
            dist = abs(len(text.replace(' ', '')) - tbox.num_comp)

            u.log('text: %s  conf: %f  dist: %d' % (text, conf, dist))
            if conf > max_conf and dist <= min_dist:
                max_conf = conf
                correct_text = text
                correct_angle = angle
                min_dist = dist

        tbox.text = post_process_text(lossy_unicode_to_ascii(correct_text))
        tbox.text_conf = max_conf
        tbox.text_dist = min_dist
        tbox.text_angle = correct_angle

        u.log('num comp %d' % tbox.num_comp)
        u.log(u'** text: {} conf: {} angle: {}'.format(correct_text, max_conf, correct_angle))

    api.End()
Пример #11
0
def capture_mrz(window: sg.Window,
                camera_id: int) -> Tuple[List[str], Image.Image]:
    """
    Capture the MRZ by using OCR and the camera footage.

    :returns: MRZ lines in a list
    """

    cap = cv2.VideoCapture(camera_id)

    tess_api = PyTessBaseAPI(init=False, psm=PSM.SINGLE_BLOCK_VERT_TEXT)
    tess_api.InitFull(
        # https://github.com/DoubangoTelecom/ultimateMRZ-SDK/tree/master/assets/models
        path="text_detection",
        lang="mrz",
        variables={
            "load_system_dawg": "false",
            "load_freq_dawg": "false",
            "tessedit_char_whitelist": "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<",
        },
    )
    # mrz_list: List[List[str]] = []

    pool = ThreadPool(processes=1)
    ocr_running = False
    while True:
        _, frame = cap.read()

        mrz = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                    cv2.THRESH_BINARY, 21, 10)
        # mrz = cv2.adaptiveThreshold(mrz, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,3,2)
        # mrz = cv2.GaussianBlur(mrz, (5,5), 0)
        # _, mrz = cv2.threshold(mrz, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # mrz = cv2.GaussianBlur(mrz, (5,5), 0)
        # mrz = cv2.medianBlur(mrz, 3)
        frame_shown = copy.deepcopy(mrz)
        width = 320
        height = int(frame_shown.shape[0] * (320 / frame_shown.shape[1]))
        frame_shown = cv2.resize(frame_shown, (width, height))

        alpha = 0.8
        frame_overlay = add_mrz_overlay(copy.deepcopy(frame_shown),
                                        "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 3,
                                        0.9, False)
        frame_overlay = add_mrz_overlay(
            frame_overlay, "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<", 2,
            0.9, True)
        cv2.addWeighted(frame_shown, alpha, frame_overlay, 1 - alpha, 0,
                        frame_shown)

        imgbytes = cv2.imencode(".png", frame_shown)[1].tobytes()
        window.write_event_value("-SHOW MRZ-", [imgbytes])

        mrz = Image.fromarray(mrz)
        if not ocr_running:
            checked_frame = Image.fromarray(frame[:, :, ::-1])
            tess_api.SetImage(mrz)
            async_result = pool.apply_async(tess_api.GetUTF8Text)
            ocr_running = True

        if async_result.ready():
            ocr_running = False
            mrz_text = async_result.get()
            result = parse_mrz_ocr(mrz_text)

            if result is not None:
                break

            # if result and len(mrz_list) < 3:
            #     mrz_list.append(result)
            # elif not result:
            #     mrz_list = []
            # else:
            #     if all(x == mrz_list[0] for x in mrz_list):
            #         break

    # When everything done, release the capture
    cap.release()
    # cv2.destroyAllWindows()
    tess_api.End()

    # return mrz_list[0]
    window.write_event_value("-HIDE MRZ-", "")

    return (result, checked_frame)
Пример #12
0
class OCREngine():
    def __init__(self, extra_whitelist='', all_unicode=False, lang='eng'):
        """
        Args:
          extra_whitelist: string of extra chars for Tesseract to consider
              only takes effect when all_unicode is False
          all_unicode: if True, Tess will consider all possible unicode characters
          lang: OCR language
        """
        self.tess = PyTessBaseAPI(psm=PSM_MODE, lang=lang)
        self.is_closed = False
        if all_unicode:
            self.whitelist_chars = None
        else:
            self.whitelist_chars = ("abcdefghijklmnopqrstuvwxyz"
                                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                    "1234567890"
                                    r"~!@#$%^&*()_+-={}|[]\:;'<>?,./"
                                    '"'
                                    "©") + extra_whitelist
            self.tess.SetVariable('tessedit_char_whitelist',
                                  self.whitelist_chars)

    def check_engine(self):
        if self.is_closed:
            raise RuntimeError('OCREngine has been closed.')

    def recognize(self,
                  image,
                  min_text_size=MIN_TEXT_SIZE,
                  max_text_size=MAX_TEXT_SIZE,
                  uniformity_thresh=UNIFORMITY_THRESH,
                  thin_line_thresh=THIN_LINE_THRESH,
                  conf_thresh=CONF_THRESH,
                  box_expand_factor=BOX_EXPAND_FACTOR,
                  horizontal_pooling=HORIZONTAL_POOLING):
        """
        Generator: Blob
        http://stackoverflow.com/questions/23506105/extracting-text-opencv

        Args:
          input_image: can be one of the following types:
            - string: image file path
            - ndarray: numpy image
            - PIL.Image.Image: PIL image
          min_text_size:
            min text height/width in pixels, below which will be ignored
          max_text_size:
            max text height/width in pixels, above which will be ignored
          uniformity_thresh (0.0 < _ < 1.0):
            remove all black or all white regions
            ignore a region if the number of pixels neither black nor white < [thresh]
          thin_line_thresh (must be odd int):
            remove all lines thinner than [thresh] pixels.
            can be used to remove the thin borders of web page textboxes.
          conf_thresh (0 < _ < 100):
            ignore regions with OCR confidence < thresh.
          box_expand_factor (0.0 < _ < 1.0):
            expand the bounding box outwards in case certain chars are cutoff.
          horizontal_pooling:
            result bounding boxes will be more connected with more pooling,
            but large pooling might lower accuracy.
        """
        self.check_engine()
        # param sanity check
        assert max_text_size > min_text_size > 0
        assert 0.0 <= uniformity_thresh < 1.0
        assert thin_line_thresh % 2 == 1
        assert 0 <= conf_thresh < 100
        assert 0.0 <= box_expand_factor < 1.0
        assert horizontal_pooling > 0

        image = get_np_img(image)
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)
        img = img_gray
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
        # cut off all gray pixels < 30.
        # `cv2.THRESH_BINARY | cv2.THRESH_OTSU` is also good, but might overlook certain light gray areas
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        # connect horizontally oriented regions
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                           (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        # remove all thin textbox borders (e.g. web page textbox)
        if thin_line_thresh > 0:
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (thin_line_thresh, thin_line_thresh))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = box = Box(*cv2.boundingRect(contour))
            # remove regions that are beyond size limits
            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue
            # remove regions that are almost uniformly white or black
            binary_region = crop(img_bw, box)
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                continue
            # expand the borders a little bit to include cutoff chars
            expansion = int(min(h, w) * box_expand_factor)
            x = max(0, x - expansion)
            y = max(0, y - expansion)
            h, w = h + 2 * expansion, w + 2 * expansion
            if h > w:  # further extend the long axis
                h += 2 * expansion
            elif w > h:
                w += 2 * expansion
            # image passed to Tess should be grayscale.
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            box = Box(x, y, w, h)
            img_crop = crop(img_gray, box)
            # make sure that crops passed in tesseract have minimum x-height
            # http://github.com/tesseract-ocr/tesseract/wiki/FAQ#is-there-a-minimum-text-size-it-wont-read-screen-text
            img_crop = cv2.resize(img_crop,
                                  (int(img_crop.shape[1] * CROP_RESIZE_HEIGHT /
                                       img_crop.shape[0]), CROP_RESIZE_HEIGHT))
            ocr_text, conf = self.run_tess(img_crop)
            if conf > conf_thresh:
                yield Blob(ocr_text, box, conf)

    def _experiment_segment(self,
                            img,
                            min_text_size=MIN_TEXT_SIZE,
                            max_text_size=MAX_TEXT_SIZE,
                            uniformity_thresh=UNIFORMITY_THRESH,
                            horizontal_pooling=HORIZONTAL_POOLING):
        """
        PRIVATE: experiment only
        """
        img_init = img  # preserve initial image
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, morph_kernel)
        disp(img)
        #         morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        #         img = cv2.dilate(img, morph_kernel)
        # OTSU thresholding
        #         _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        #         img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,9,2)
        disp(img)
        # connect horizontally oriented regions
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                                 (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, morph_kernel)
        disp(img)

        if 0:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,
                                                     (horizontal_pooling, 3))
            img = cv2.erode(img, morph_kernel, iterations=1)
            disp(img)
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (6, 6))
            img = cv2.dilate(img, morph_kernel, iterations=1)
        elif 1:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, morph_kernel)
        disp(img)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        img_copy = np.copy(img_init)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            draw_rect(img_copy, x, y, w, h)

            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue

            binary_region = img_bw[y:y + h, x:x + w]
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                # ignore mostly white or black regions
                #                 print(w, h)
                #                 disp(binary_region)
                continue
            # the image must be grayscale, otherwise Tesseract will SegFault
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            draw_rect(img_init, x, y, w, h)
        disp(img_copy)
        disp(img_init, 0)

    def run_tess(self, img):
        """
        Tesseract python API source code:
        https://github.com/sirfz/tesserocr/blob/master/tesserocr.pyx

        Returns:
          (ocr_text, confidence)
        """
        if isinstance(img, np.ndarray):
            img = np2PIL(img)
        self.tess.SetImage(img)
        ocr_text = self.tess.GetUTF8Text().strip()
        conf = self.tess.MeanTextConf()
        return ocr_text, conf

    def _deprec_run_tess(self, img):
        "GetComponentImages throws SegFault randomly. No way to fix. :("
        if isinstance(img, np.ndarray):
            img = np2PIL(img)

        components = self.tess.GetComponentImages(RIL.TEXTLINE, True)
        for _, inner_box, block_id, paragraph_id in components:
            # box is a dict with x, y, w and h keys
            inner_box = Box(**inner_box)
            if inner_box.w < MIN_TEXT_SIZE or inner_box.h < MIN_TEXT_SIZE:
                continue
            self.tess.SetRectangle(*inner_box)
            ocr_text = self.tess.GetUTF8Text().strip()
            conf = self.tess.MeanTextConf()
            yield ocr_text, inner_box, conf

    def close(self):
        self.tess.End()
        self.is_closed = True

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()
Пример #13
0
        if not player_dead_for_round and maskCounts[i] >= mask_threshold:
            if not cur_visibility_events[i].valid:
                cur_visibility_events[i].valid = True
                cur_visibility_events[i].spotted = players[i]
                cur_visibility_events[i].spotted_id = getPlayerId(players[i])
                cur_visibility_events[i].start_game_tick = cur_tick
                cur_visibility_events[i].start_frame_num = frame_id
                cur_visibility_events[i].color = colors[i]

    last_hit_frame = frame_id
    last_tick = cur_tick
    if last_tick + 10 >= max_tick:
        print("finished all ticks, skipped last 10 for acceptable error bound")
        break

tessocr_api.End()
finishTick(True, cur_tick, frame_id, maskCounts)

df_visibility = pd.DataFrame([
    e for e in finished_visibility_events
    if e['end_game_tick'] - e['start_game_tick'] > 2
])
df_visibility_sorted = df_visibility.sort_values(
    ['demo', 'spotter', 'spotted', 'start_game_tick'])
dicts_sorted = df_visibility_sorted.to_dict(orient='records')
dicts_output = []
i = 0
while i < len(dicts_sorted):
    j = 1
    while j < len(dicts_sorted) - i:
        next_row = dicts_sorted[i + j]
Пример #14
0
class OCREngine:
    def __init__(self, psm: int = 3, config: dict = {}):
        logging.info('Initializing OCR engine with PSM=%d and configs=%s' %
                     (psm, config))
        self.api = PyTessBaseAPI(psm=psm)
        for key in config.keys():
            self.api.SetVariable(key, config[key])
        logging.debug('OCR engine initialized')

    def build_graph(self,
                    image_path: str,
                    scheme: str = None) -> DocumentGraph:

        hocr = self._get_hocr(image_path)
        words = self._get_words(hocr, scheme)
        dg = DocumentGraph(words)

        return dg

    def _get_hocr(self, image_path: str) -> str:
        logging.info('Reading to hOCR from image: %s' % image_path)
        self.api.SetImageFile(image_path)
        hocr_text = self.api.GetHOCRText(0)
        logging.debug('Image read')

        return hocr_text

    def _get_words(self, hocr: str, scheme: str = None):
        logging.info('Extracting words from hOCR.')
        if scheme is None:
            logging.warning('No scheme specified. Assuming xyxy')
            scheme = 'xyxy'

        soup = BeautifulSoup(hocr, 'html.parser')
        word_tags = soup.select('.ocrx_word')

        word_nodes = [self._make_node(tag, scheme=scheme) for tag in word_tags]
        word_nodes = list(filter(lambda node: node is not None, word_nodes))

        return word_nodes

    def _make_node(self, tag: dict, scheme: str) -> WordNode:
        fields = tag['title'].split(';')
        if not len(fields) == 2:
            logging.warn('Malformed tag: %s. Skipping.' % tag)
            return None

        word = tag.text
        coordinates = tuple(map(int, fields[0].split()[1:]))
        conf = int(fields[1].split()[1])

        wn = WordNode(word, WordNode.convert_coords(coordinates, scheme), conf)
        logging.debug('Made word: %s' % wn.__repr__())

        return wn

    def close(self):
        self.api.End()
        logging.debug('OCR engine closed')

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if exc_type:
            print("type: %s\nvalue: %s\ntrace: %s" %
                  (exc_type, exc_value, traceback))

        self.close()