Пример #1
0
def test_that_match_text_gives_tesseract_a_hint():
    frame = cv2.imread("tests/ocr/itv-player.png")
    if "ITV Player" in stbt.ocr(frame=frame):
        raise SkipTest("Tesseract doesn't need a hint")
    if "ITV Player" not in stbt.ocr(frame=frame, tesseract_user_words=["ITV"]):
        raise SkipTest("Giving tesseract a hint doesn't help")
    assert stbt.match_text("ITV Player", frame=frame)
Пример #2
0
def test_that_with_old_tesseract_ocr_raises_an_exception_with_patterns():
    # pylint: disable=W0212
    if stbt._tesseract_version() >= distutils.version.LooseVersion('3.03'):
        raise SkipTest('tesseract is too new')

    stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=[r'\n\n\n\n\n\n\n\n'])
Пример #3
0
def test_ocr_text_color(image, color, expected, region):
    frame = load_image(image)
    mode = stbt.OcrMode.SINGLE_LINE

    assert expected not in stbt.ocr(frame, region, mode)
    assert expected == stbt.ocr(frame, region, mode, text_color=color)

    assert not stbt.match_text(expected, frame, region, mode)
    assert stbt.match_text(expected, frame, region, mode, text_color=color)
Пример #4
0
def test_ocr_text_color_threshold():
    f = load_image("ocr/blue-search-white-guide.png")
    c = (220, 220, 220)
    assert stbt.ocr(f) != "Guide"
    # pylint:disable=fixme
    # TODO: Find an example where text_color_threshold is necessary. Since
    # tesseract 4.0.0 the default text_color_threshold actually works.
    # assert stbt.ocr(f, text_color=c) != "Guide"
    assert stbt.ocr(f, text_color=c, text_color_threshold=50) == "Guide"
    with temporary_config({'ocr.text_color_threshold': '50'}):
        assert stbt.ocr(f, text_color=c) == "Guide"
Пример #5
0
def test_that_setting_config_options_has_an_effect():
    # Unfortunately there are many tesseract config options and they are very
    # complicated so it's difficult to write a test that tests that a config
    # option is having the correct effect.  Due to the difficulty in determining
    # "correctness" instead here we test that setting a config option has an
    # effect at all.  This at least excercises our code which sets config
    # options.  I'm not happy about this and I hope to be able to replace this
    # once we have more experience with these settings in the real world.
    assert (stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png'),
                     tesseract_config={"tessedit_create_hocr": 1}) !=
            stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png')))
Пример #6
0
def test_that_ocr_engine_has_an_effect():
    if _tesseract_version() < LooseVersion("4.0"):
        raise SkipTest('tesseract is too old')

    f = load_image("ocr/ambig.png")

    # This is a regression in tesseract 4.0's legacy engine, compared to 3.04:
    assert "sillyness" not in stbt.ocr(f, engine=stbt.OcrEngine.TESSERACT)
    assert "sillyness" not in stbt.ocr(f)

    # ...but the new LSTM engine does read it correctly:
    assert "sillyness" in stbt.ocr(f, engine=stbt.OcrEngine.LSTM)
    with temporary_config({'ocr.engine': 'LSTM'}):
        assert "sillyness" in stbt.ocr(f)
Пример #7
0
def test_that_passing_patterns_helps_reading_serial_codes():
    # Test that this test is valid (e.g. tesseract will read it wrong without
    # help):
    assert u'UJJM2LGE' != stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD)

    # pylint: disable=W0212
    if stbt._tesseract_version() < distutils.version.LooseVersion('3.03'):
        raise SkipTest('tesseract is too old')

    # Now the real test:
    eq_(u'UJJM2LGE', stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=[r'\n\n\n\n\n\n\n\n']))
Пример #8
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    frame = cv2.imread('tests/ocr/ambig.png')
    text = stbt.ocr(frame)
    text = text.replace("horizonta|", "horizontal")  # for tesseract < 3.03
    assert ligature_text == text
    assert stbt.match_text("em-dash,", frame)
    assert stbt.match_text(u"em\u2014dash,", frame)
Пример #9
0
    def title(self):
        """
        The base class provides a ``self._frame`` member. Here we're using
        `stbt.ocr` to extract the dialog's title text from this frame. This is
        the basic form that many Frame Object properties will take.

        This property demonstrates an advantage of Frame Objects. Your
        testcases now look like this::

            assert Dialog().title == "Information"

        instead of this::

            assert stbt.ocr(region=stbt.Region(396, 249, 500, 50)) == "Information"

        This is clearer because it reveals the intention of the testcase author
        (we're looking for the word in the *title* of the dialog). It is also
        easier (cheaper) to maintain: If the position of the title moves, you
        only need to update the implementation of ``Dialog.title``; you won't
        need to change any of your testcases.

        When defining Frame Objects you must take care to pass ``self._frame``
        into every call to an image processing function (like our ``title``
        property does when it calls ``ocr``, above). Otherwise the return
        values won't correspond to the frame you were expecting.
        """
        return stbt.ocr(region=stbt.Region(396, 249, 500, 50),
                        frame=self._frame)
 def read_hex(region, frame_=frame):
     return stbt.ocr(
         frame_,
         region,
         stbt.OcrMode.SINGLE_LINE,
         tesseract_config={"tessedit_char_whitelist": "#0123456789abcdef"},
         tesseract_user_patterns=["#\n\n\n\n\n\n"],
     ).replace(" ", "")
Пример #11
0
def test_that_passing_patterns_helps_reading_serial_codes():
    # pylint: disable=W0212
    if _stbt.core._tesseract_version() < distutils.version.LooseVersion('3.03'):
        raise SkipTest('tesseract is too old')

    # Now the real test:
    assert u'UJJM2LGE' == stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=[r'\n\n\n\n\n\n\n\n'])
Пример #12
0
 def text(self):
     if self._text is None and self.__nonzero__():
         diff = cv2.cvtColor(
             cv2.absdiff(
                 crop(self._frame, self.region),
                 numpy.repeat(self._background, self.region.width, 1)),
             cv2.COLOR_BGR2GRAY)
         self._text = stbt.ocr(diff)
     stbt.debug("Selection text: %s" % self._text)
     return self._text
Пример #13
0
 def _read_text(self, title, patterns=None):
     title = stbt.match_text(
         title, frame=self._frame,
         region=stbt.Region(x=620, y=145, right=950, bottom=460),
         text_color=(124, 94, 114))
     if not title:
         stbt.debug("NetworkAbout: Didn't find %r" % title)
         return None
     region = title.region.right_of().extend(x=10, y=-5, bottom=10)
     return stbt.ocr(self._frame, region, tesseract_user_patterns=patterns)
Пример #14
0
def test_tesseract_user_patterns(patterns):
    # pylint:disable=protected-access
    if _tesseract_version() < LooseVersion('3.03'):
        raise SkipTest('tesseract is too old')

    # Now the real test:
    assert u'192.168.10.1' == stbt.ocr(
        frame=load_image('ocr/192.168.10.1.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=patterns)
Пример #15
0
def test_that_match_all_can_be_used_with_ocr_to_read_buttons():
    # Demonstrates how match_all can be used with ocr for UIs consisting of text
    # on buttons
    frame = _imread('buttons.png')
    button = _imread('button.png')

    text = [
        stbt.ocr(frame=cv2.absdiff(_crop(frame, m.region), button))
        for m in stbt.match_all(
            button, frame=frame, match_parameters=mp(confirm_method='none'))]
    text = sorted([t for t in text if t not in ['', '\\s']])
    print text
    assert text == [u'Button 1', u'Button 2', u'Buttons']
Пример #16
0
def test_that_match_all_can_be_used_with_ocr_to_read_buttons():
    # Demonstrates how match_all can be used with ocr for UIs consisting of text
    # on buttons
    frame = stbt.load_image('buttons.png')

    text = [
        stbt.ocr(frame=stbt.crop(
            frame,
            m.region.extend(x=30, y=10, right=-30, bottom=-10)))
        for m in stbt.match_all('button-transparent.png', frame=frame)]
    text = sorted([t for t in text if t not in ['', '\\s']])
    print text
    assert text == [u'Button 1', u'Button 2', u'Buttons']
Пример #17
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    frame = load_image('ocr/ambig.png')
    text = stbt.ocr(frame)
    for bad, good in [
            # tesseract 3.02
            ("horizonta|", "horizontal"),
            # tesseract 4.00 with tessdata 590567f
            ("siIIyness", "sillyness"),
            ("Iigatures", "ligatures"),
    ]:
        text = text.replace(bad, good)
    assert ligature_text == text
    assert stbt.match_text("em-dash,", frame)
    assert stbt.match_text(u"em\u2014dash,", frame)
Пример #18
0
 def message(self):
     """
     This property demonstrates an advantage of Frame Objects over
     stand-alone helper functions. We are using the position of the "info"
     icon to find this message. Because the private ``_info`` property is
     shared between this property and ``is_visible`` we don't need to
     compute it twice -- the ``FrameObject`` base class will remember the
     value from the first time it was computed.
     """
     right_of_info = stbt.Region(
         x=self._info.region.right, y=self._info.region.y,
         width=390, height=self._info.region.height)
     return stbt.ocr(region=right_of_info, frame=self._frame) \
                .replace('\n', ' ')
Пример #19
0
def check(imgname, phrases, params):
    from stbt import ocr

    img = cv2.imread(imgname)
    if img is None:
        raise IOError('No such file or directory "%s"' % imgname)
    text = ocr(img, **params)

    matches = sum(1 for x in phrases if x in text)

    return {
        "matches": matches,
        "total": len(phrases),
        "percentage": float(matches) / len(phrases) * 100,
        "name": os.path.basename(imgname),
        "path": imgname,
        "phrases": [{"text": x, "match": x in text} for x in phrases],
        "text": text,
    }
Пример #20
0
def test_ocr_on_text_next_to_image_match():
    frame = cv2.imread("tests/action-panel.png")
    m = stbt.match("tests/action-panel-blue-button.png", frame)
    assert "YOUVIEW MENU" == stbt.ocr(frame,
                                      region=m.region.right_of(width=150))
Пример #21
0
def test_that_ocr_region_none_isnt_allowed():
    stbt.ocr(frame=cv2.imread("tests/ocr/small.png"), region=None)
Пример #22
0
 def read_hex(region, frame_=frame):
     return stbt.ocr(
         frame_, region, stbt.OcrMode.SINGLE_LINE, tesseract_config={
             'tessedit_char_whitelist': '#0123456789abcdef'},
         tesseract_user_patterns=['#\n\n\n\n\n\n']).replace(' ', '')
Пример #23
0
def test_ocr_on_text_next_to_image_match():
    frame = cv2.imread("tests/action-panel.png")
    m = stbt.match("tests/action-panel-blue-button.png", frame)
    assert "YOUVIEW MENU" == stbt.ocr(frame,
                                      region=m.region.right_of(width=150))
def test_read_menu():
    stbt.press('KEY_CLOSE')
    sleep(1)
    stbt.press('KEY_MENU')
    sleep(1)
    print stbt.ocr()
Пример #25
0
def test_that_ocr_region_none_isnt_allowed():
    with pytest.raises(TypeError):
        stbt.ocr(frame=load_image("ocr/small.png"), region=None)
Пример #26
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    assert isinstance(text, unicode)
    assert u'£500\nDavid Röthlisberger' == text
Пример #27
0
def test_that_ocr_region_none_isnt_allowed():
    stbt.ocr(frame=load_image("ocr/small.png"), region=None)
Пример #28
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    eq_(u'£500\nRöthlisberger', text)
Пример #29
0
 def ocr():
     return stbt.ocr(frame=frame)
Пример #30
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png'))
    text = text.replace("horizonta|", "horizontal")  # for tesseract < 3.03
    eq_(ligature_text, text)
Пример #31
0
def test_that_default_language_is_configurable():
    f = cv2.imread("tests/ocr/unicode.png")
    assert not stbt.match_text(u"Röthlisberger", f)  # reads Réthlisberger
    with temporary_config({"ocr.lang": "deu"}):
        assert stbt.match_text(u"Röthlisberger", f)
        assert u"Röthlisberger" in stbt.ocr(f)
Пример #32
0
def test_that_ocr_can_read_small_text():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/small.png'))
    assert u'Small anti-aliased text is hard to read\nunless you magnify' == \
        text
Пример #33
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    assert isinstance(text, unicode)
    assert u'£500\nDavid Röthlisberger' == text
Пример #34
0
def test_ocr_debug():
    # So that the output directory name doesn't depend on how many tests
    # were run before this one.
    ImageLogger._frame_number = itertools.count(1)  # pylint:disable=protected-access

    f = stbt.load_image("action-panel.png")
    r = stbt.Region(0, 370, right=1280, bottom=410)
    c = (235, 235, 235)
    nonoverlapping = stbt.Region(2000, 2000, width=10, height=10)

    with scoped_curdir(), scoped_debug_level(2):

        stbt.ocr(f)
        stbt.ocr(f, region=r)
        stbt.ocr(f, region=r, text_color=c)
        stbt.ocr(f, region=nonoverlapping)

        stbt.match_text("Summary", f)  # no match
        stbt.match_text("Summary", f, region=r)  # no match
        stbt.match_text("Summary", f, region=r, text_color=c)
        stbt.match_text("Summary", f, region=nonoverlapping)

        files = subprocess.check_output("find stbt-debug | sort", shell=True) \
                          .decode("utf-8")
        assert files == dedent("""\
            stbt-debug
            stbt-debug/00001
            stbt-debug/00001/index.html
            stbt-debug/00001/source.png
            stbt-debug/00001/tessinput.png
            stbt-debug/00001/upsampled.png
            stbt-debug/00002
            stbt-debug/00002/index.html
            stbt-debug/00002/source.png
            stbt-debug/00002/tessinput.png
            stbt-debug/00002/upsampled.png
            stbt-debug/00003
            stbt-debug/00003/index.html
            stbt-debug/00003/source.png
            stbt-debug/00003/tessinput.png
            stbt-debug/00003/text_color_difference.png
            stbt-debug/00003/text_color_threshold.png
            stbt-debug/00003/upsampled.png
            stbt-debug/00004
            stbt-debug/00004/index.html
            stbt-debug/00004/source.png
            stbt-debug/00005
            stbt-debug/00005/index.html
            stbt-debug/00005/source.png
            stbt-debug/00005/tessinput.png
            stbt-debug/00005/upsampled.png
            stbt-debug/00006
            stbt-debug/00006/index.html
            stbt-debug/00006/source.png
            stbt-debug/00006/tessinput.png
            stbt-debug/00006/upsampled.png
            stbt-debug/00007
            stbt-debug/00007/index.html
            stbt-debug/00007/source.png
            stbt-debug/00007/tessinput.png
            stbt-debug/00007/text_color_difference.png
            stbt-debug/00007/text_color_threshold.png
            stbt-debug/00007/upsampled.png
            stbt-debug/00008
            stbt-debug/00008/index.html
            stbt-debug/00008/source.png
            """)
Пример #35
0
def test_that_ocr_returns_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'))
    assert isinstance(text, unicode)
Пример #36
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=load_image('ocr/unicode.png'), lang='eng+deu')
    assert isinstance(text, str)
    assert u'£500\nDavid Röthlisberger' == text
Пример #37
0
def test_user_dictionary_with_non_english_language(words):
    assert u'192.168.10.1' == stbt.ocr(
        frame=load_image('ocr/192.168.10.1.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        lang="deu",
        tesseract_user_words=words)
Пример #38
0
 def selection(self):
     return stbt.ocr(
         frame=self._frame,
         mode=stbt.OcrMode.SINGLE_LINE,
         # Exclude the edges & corners of the button:
         region=self.selection_region.extend(x=5, y=2, right=-5, bottom=-2))
Пример #39
0
def test_user_dictionary_with_non_english_language():
    assert u'UJJM2LGE' == stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        lang="deu",
        tesseract_user_words=[u'UJJM2LGE'])
Пример #40
0
 def selection(self):
     return stbt.ocr(
         frame=self._frame,
         mode=stbt.OcrMode.SINGLE_LINE,
         # Exclude the edges & corners of the button:
         region=self.selection_region.extend(x=5, y=2, right=-5, bottom=-2))
Пример #41
0
def test_that_ocr_still_returns_if_region_doesnt_intersect_with_frame():
    frame = cv2.imread("tests/ocr/menu.png")
    result = stbt.ocr(frame=frame, region=stbt.Region(1280, 0, 1280, 720))
    assert result == u''
Пример #42
0
def test_that_ocr_still_returns_if_region_doesnt_intersect_with_frame(region):
    frame = load_image("ocr/menu.png")
    result = stbt.ocr(frame=frame, region=region)
    assert result == u''
Пример #43
0
def test_that_ocr_can_read_small_text():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/small.png'))
    eq_(u'Small anti-aliased text is hard to read\nunless you magnify', text)
Пример #44
0
def test_that_ocr_returns_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'))
    assert isinstance(text, unicode)
Пример #45
0
def test_ocr_on_static_images(image, expected_text, region, mode):
    kwargs = {"region": region}
    if mode is not None:
        kwargs["mode"] = mode
    text = stbt.ocr(cv2.imread("tests/ocr/" + image), **kwargs)
    assert text == expected_text
Пример #46
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    eq_(u'£500\nRöthlisberger', text)
 def program_title(self):
     return stbt.ocr(frame=self._frame,
                     region=stbt.Region(x=120, y=540, right=880,
                                        bottom=590),
                     text_color=(255, 255, 255))
Пример #48
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png'))
    text = text.replace("horizonta|", "horizontal")  # for tesseract < 3.03
    eq_(ligature_text, text)
Пример #49
0
 def message(self):
     return stbt.ocr(region=stbt.Region(515, 331, 400, 100),
                     frame=self._frame).replace('\n', ' ')
Пример #50
0
def test_char_whitelist():
    # Without char_whitelist tesseract reads "OO" (the letter oh).
    assert u'00' == stbt.ocr(
        frame=load_image('ocr/00.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        char_whitelist="0123456789")
Пример #51
0
def test_that_ocr_still_returns_if_region_doesnt_intersect_with_frame(region):
    frame = cv2.imread("tests/ocr/menu.png")
    result = stbt.ocr(frame=frame, region=region)
    assert result == u''
Пример #52
0
def test_user_dictionary_with_non_english_language():
    eq_(u'UJJM2LGE', stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        lang="deu",
        tesseract_user_words=[u'UJJM2LGE']))
Пример #53
0
def get_program_name_from_infobar():
    stbt.press("KEY_INFO")
    program_name = stbt.ocr(
        region=stbt.Region(x=316, y=627, right=1054, bottom=688))
    return program_name