Exemplo n.º 1
0
def test_that_match_text_gives_tesseract_a_hint():
    frame = cv2.imread("tests/ocr/itv-player.png")
    if "ITV Player" in stbt.ocr(frame=frame):
        raise SkipTest("Tesseract doesn't need a hint")
    if "ITV Player" not in stbt.ocr(frame=frame, tesseract_user_words=["ITV"]):
        raise SkipTest("Giving tesseract a hint doesn't help")
    assert stbt.match_text("ITV Player", frame=frame)
Exemplo n.º 2
0
def test_that_with_old_tesseract_ocr_raises_an_exception_with_patterns():
    # pylint: disable=W0212
    if stbt._tesseract_version() >= distutils.version.LooseVersion('3.03'):
        raise SkipTest('tesseract is too new')

    stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=[r'\n\n\n\n\n\n\n\n'])
Exemplo n.º 3
0
def test_ocr_text_color(image, color, expected, region):
    frame = load_image(image)
    mode = stbt.OcrMode.SINGLE_LINE

    assert expected not in stbt.ocr(frame, region, mode)
    assert expected == stbt.ocr(frame, region, mode, text_color=color)

    assert not stbt.match_text(expected, frame, region, mode)
    assert stbt.match_text(expected, frame, region, mode, text_color=color)
Exemplo n.º 4
0
def test_ocr_text_color_threshold():
    f = load_image("ocr/blue-search-white-guide.png")
    c = (220, 220, 220)
    assert stbt.ocr(f) != "Guide"
    # pylint:disable=fixme
    # TODO: Find an example where text_color_threshold is necessary. Since
    # tesseract 4.0.0 the default text_color_threshold actually works.
    # assert stbt.ocr(f, text_color=c) != "Guide"
    assert stbt.ocr(f, text_color=c, text_color_threshold=50) == "Guide"
    with temporary_config({'ocr.text_color_threshold': '50'}):
        assert stbt.ocr(f, text_color=c) == "Guide"
Exemplo n.º 5
0
def test_that_setting_config_options_has_an_effect():
    # Unfortunately there are many tesseract config options and they are very
    # complicated so it's difficult to write a test that tests that a config
    # option is having the correct effect.  Due to the difficulty in determining
    # "correctness" instead here we test that setting a config option has an
    # effect at all.  This at least excercises our code which sets config
    # options.  I'm not happy about this and I hope to be able to replace this
    # once we have more experience with these settings in the real world.
    assert (stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png'),
                     tesseract_config={"tessedit_create_hocr": 1}) !=
            stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png')))
Exemplo n.º 6
0
def test_that_ocr_engine_has_an_effect():
    if _tesseract_version() < LooseVersion("4.0"):
        raise SkipTest('tesseract is too old')

    f = load_image("ocr/ambig.png")

    # This is a regression in tesseract 4.0's legacy engine, compared to 3.04:
    assert "sillyness" not in stbt.ocr(f, engine=stbt.OcrEngine.TESSERACT)
    assert "sillyness" not in stbt.ocr(f)

    # ...but the new LSTM engine does read it correctly:
    assert "sillyness" in stbt.ocr(f, engine=stbt.OcrEngine.LSTM)
    with temporary_config({'ocr.engine': 'LSTM'}):
        assert "sillyness" in stbt.ocr(f)
Exemplo n.º 7
0
def test_that_passing_patterns_helps_reading_serial_codes():
    # Test that this test is valid (e.g. tesseract will read it wrong without
    # help):
    assert u'UJJM2LGE' != stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD)

    # pylint: disable=W0212
    if stbt._tesseract_version() < distutils.version.LooseVersion('3.03'):
        raise SkipTest('tesseract is too old')

    # Now the real test:
    eq_(u'UJJM2LGE', stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=[r'\n\n\n\n\n\n\n\n']))
Exemplo n.º 8
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    frame = cv2.imread('tests/ocr/ambig.png')
    text = stbt.ocr(frame)
    text = text.replace("horizonta|", "horizontal")  # for tesseract < 3.03
    assert ligature_text == text
    assert stbt.match_text("em-dash,", frame)
    assert stbt.match_text(u"em\u2014dash,", frame)
Exemplo n.º 9
0
    def title(self):
        """
        The base class provides a ``self._frame`` member. Here we're using
        `stbt.ocr` to extract the dialog's title text from this frame. This is
        the basic form that many Frame Object properties will take.

        This property demonstrates an advantage of Frame Objects. Your
        testcases now look like this::

            assert Dialog().title == "Information"

        instead of this::

            assert stbt.ocr(region=stbt.Region(396, 249, 500, 50)) == "Information"

        This is clearer because it reveals the intention of the testcase author
        (we're looking for the word in the *title* of the dialog). It is also
        easier (cheaper) to maintain: If the position of the title moves, you
        only need to update the implementation of ``Dialog.title``; you won't
        need to change any of your testcases.

        When defining Frame Objects you must take care to pass ``self._frame``
        into every call to an image processing function (like our ``title``
        property does when it calls ``ocr``, above). Otherwise the return
        values won't correspond to the frame you were expecting.
        """
        return stbt.ocr(region=stbt.Region(396, 249, 500, 50),
                        frame=self._frame)
 def read_hex(region, frame_=frame):
     return stbt.ocr(
         frame_,
         region,
         stbt.OcrMode.SINGLE_LINE,
         tesseract_config={"tessedit_char_whitelist": "#0123456789abcdef"},
         tesseract_user_patterns=["#\n\n\n\n\n\n"],
     ).replace(" ", "")
Exemplo n.º 11
0
def test_that_passing_patterns_helps_reading_serial_codes():
    # pylint: disable=W0212
    if _stbt.core._tesseract_version() < distutils.version.LooseVersion('3.03'):
        raise SkipTest('tesseract is too old')

    # Now the real test:
    assert u'UJJM2LGE' == stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=[r'\n\n\n\n\n\n\n\n'])
Exemplo n.º 12
0
 def text(self):
     if self._text is None and self.__nonzero__():
         diff = cv2.cvtColor(
             cv2.absdiff(
                 crop(self._frame, self.region),
                 numpy.repeat(self._background, self.region.width, 1)),
             cv2.COLOR_BGR2GRAY)
         self._text = stbt.ocr(diff)
     stbt.debug("Selection text: %s" % self._text)
     return self._text
Exemplo n.º 13
0
 def _read_text(self, title, patterns=None):
     title = stbt.match_text(
         title, frame=self._frame,
         region=stbt.Region(x=620, y=145, right=950, bottom=460),
         text_color=(124, 94, 114))
     if not title:
         stbt.debug("NetworkAbout: Didn't find %r" % title)
         return None
     region = title.region.right_of().extend(x=10, y=-5, bottom=10)
     return stbt.ocr(self._frame, region, tesseract_user_patterns=patterns)
Exemplo n.º 14
0
def test_tesseract_user_patterns(patterns):
    # pylint:disable=protected-access
    if _tesseract_version() < LooseVersion('3.03'):
        raise SkipTest('tesseract is too old')

    # Now the real test:
    assert u'192.168.10.1' == stbt.ocr(
        frame=load_image('ocr/192.168.10.1.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        tesseract_user_patterns=patterns)
Exemplo n.º 15
0
def test_that_match_all_can_be_used_with_ocr_to_read_buttons():
    # Demonstrates how match_all can be used with ocr for UIs consisting of text
    # on buttons
    frame = _imread('buttons.png')
    button = _imread('button.png')

    text = [
        stbt.ocr(frame=cv2.absdiff(_crop(frame, m.region), button))
        for m in stbt.match_all(
            button, frame=frame, match_parameters=mp(confirm_method='none'))]
    text = sorted([t for t in text if t not in ['', '\\s']])
    print text
    assert text == [u'Button 1', u'Button 2', u'Buttons']
Exemplo n.º 16
0
def test_that_match_all_can_be_used_with_ocr_to_read_buttons():
    # Demonstrates how match_all can be used with ocr for UIs consisting of text
    # on buttons
    frame = stbt.load_image('buttons.png')

    text = [
        stbt.ocr(frame=stbt.crop(
            frame,
            m.region.extend(x=30, y=10, right=-30, bottom=-10)))
        for m in stbt.match_all('button-transparent.png', frame=frame)]
    text = sorted([t for t in text if t not in ['', '\\s']])
    print text
    assert text == [u'Button 1', u'Button 2', u'Buttons']
Exemplo n.º 17
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    frame = load_image('ocr/ambig.png')
    text = stbt.ocr(frame)
    for bad, good in [
            # tesseract 3.02
            ("horizonta|", "horizontal"),
            # tesseract 4.00 with tessdata 590567f
            ("siIIyness", "sillyness"),
            ("Iigatures", "ligatures"),
    ]:
        text = text.replace(bad, good)
    assert ligature_text == text
    assert stbt.match_text("em-dash,", frame)
    assert stbt.match_text(u"em\u2014dash,", frame)
Exemplo n.º 18
0
 def message(self):
     """
     This property demonstrates an advantage of Frame Objects over
     stand-alone helper functions. We are using the position of the "info"
     icon to find this message. Because the private ``_info`` property is
     shared between this property and ``is_visible`` we don't need to
     compute it twice -- the ``FrameObject`` base class will remember the
     value from the first time it was computed.
     """
     right_of_info = stbt.Region(
         x=self._info.region.right, y=self._info.region.y,
         width=390, height=self._info.region.height)
     return stbt.ocr(region=right_of_info, frame=self._frame) \
                .replace('\n', ' ')
Exemplo n.º 19
0
def check(imgname, phrases, params):
    from stbt import ocr

    img = cv2.imread(imgname)
    if img is None:
        raise IOError('No such file or directory "%s"' % imgname)
    text = ocr(img, **params)

    matches = sum(1 for x in phrases if x in text)

    return {
        "matches": matches,
        "total": len(phrases),
        "percentage": float(matches) / len(phrases) * 100,
        "name": os.path.basename(imgname),
        "path": imgname,
        "phrases": [{"text": x, "match": x in text} for x in phrases],
        "text": text,
    }
Exemplo n.º 20
0
def test_ocr_on_text_next_to_image_match():
    frame = cv2.imread("tests/action-panel.png")
    m = stbt.match("tests/action-panel-blue-button.png", frame)
    assert "YOUVIEW MENU" == stbt.ocr(frame,
                                      region=m.region.right_of(width=150))
Exemplo n.º 21
0
def test_that_ocr_region_none_isnt_allowed():
    stbt.ocr(frame=cv2.imread("tests/ocr/small.png"), region=None)
Exemplo n.º 22
0
 def read_hex(region, frame_=frame):
     return stbt.ocr(
         frame_, region, stbt.OcrMode.SINGLE_LINE, tesseract_config={
             'tessedit_char_whitelist': '#0123456789abcdef'},
         tesseract_user_patterns=['#\n\n\n\n\n\n']).replace(' ', '')
Exemplo n.º 23
0
def test_ocr_on_text_next_to_image_match():
    frame = cv2.imread("tests/action-panel.png")
    m = stbt.match("tests/action-panel-blue-button.png", frame)
    assert "YOUVIEW MENU" == stbt.ocr(frame,
                                      region=m.region.right_of(width=150))
Exemplo n.º 24
0
def test_read_menu():
    stbt.press('KEY_CLOSE')
    sleep(1)
    stbt.press('KEY_MENU')
    sleep(1)
    print stbt.ocr()
Exemplo n.º 25
0
def test_that_ocr_region_none_isnt_allowed():
    with pytest.raises(TypeError):
        stbt.ocr(frame=load_image("ocr/small.png"), region=None)
Exemplo n.º 26
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    assert isinstance(text, unicode)
    assert u'£500\nDavid Röthlisberger' == text
Exemplo n.º 27
0
def test_that_ocr_region_none_isnt_allowed():
    stbt.ocr(frame=load_image("ocr/small.png"), region=None)
Exemplo n.º 28
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    eq_(u'£500\nRöthlisberger', text)
Exemplo n.º 29
0
 def ocr():
     return stbt.ocr(frame=frame)
Exemplo n.º 30
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png'))
    text = text.replace("horizonta|", "horizontal")  # for tesseract < 3.03
    eq_(ligature_text, text)
Exemplo n.º 31
0
def test_that_default_language_is_configurable():
    f = cv2.imread("tests/ocr/unicode.png")
    assert not stbt.match_text(u"Röthlisberger", f)  # reads Réthlisberger
    with temporary_config({"ocr.lang": "deu"}):
        assert stbt.match_text(u"Röthlisberger", f)
        assert u"Röthlisberger" in stbt.ocr(f)
Exemplo n.º 32
0
def test_that_ocr_can_read_small_text():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/small.png'))
    assert u'Small anti-aliased text is hard to read\nunless you magnify' == \
        text
Exemplo n.º 33
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    assert isinstance(text, unicode)
    assert u'£500\nDavid Röthlisberger' == text
Exemplo n.º 34
0
def test_ocr_debug():
    # So that the output directory name doesn't depend on how many tests
    # were run before this one.
    ImageLogger._frame_number = itertools.count(1)  # pylint:disable=protected-access

    f = stbt.load_image("action-panel.png")
    r = stbt.Region(0, 370, right=1280, bottom=410)
    c = (235, 235, 235)
    nonoverlapping = stbt.Region(2000, 2000, width=10, height=10)

    with scoped_curdir(), scoped_debug_level(2):

        stbt.ocr(f)
        stbt.ocr(f, region=r)
        stbt.ocr(f, region=r, text_color=c)
        stbt.ocr(f, region=nonoverlapping)

        stbt.match_text("Summary", f)  # no match
        stbt.match_text("Summary", f, region=r)  # no match
        stbt.match_text("Summary", f, region=r, text_color=c)
        stbt.match_text("Summary", f, region=nonoverlapping)

        files = subprocess.check_output("find stbt-debug | sort", shell=True) \
                          .decode("utf-8")
        assert files == dedent("""\
            stbt-debug
            stbt-debug/00001
            stbt-debug/00001/index.html
            stbt-debug/00001/source.png
            stbt-debug/00001/tessinput.png
            stbt-debug/00001/upsampled.png
            stbt-debug/00002
            stbt-debug/00002/index.html
            stbt-debug/00002/source.png
            stbt-debug/00002/tessinput.png
            stbt-debug/00002/upsampled.png
            stbt-debug/00003
            stbt-debug/00003/index.html
            stbt-debug/00003/source.png
            stbt-debug/00003/tessinput.png
            stbt-debug/00003/text_color_difference.png
            stbt-debug/00003/text_color_threshold.png
            stbt-debug/00003/upsampled.png
            stbt-debug/00004
            stbt-debug/00004/index.html
            stbt-debug/00004/source.png
            stbt-debug/00005
            stbt-debug/00005/index.html
            stbt-debug/00005/source.png
            stbt-debug/00005/tessinput.png
            stbt-debug/00005/upsampled.png
            stbt-debug/00006
            stbt-debug/00006/index.html
            stbt-debug/00006/source.png
            stbt-debug/00006/tessinput.png
            stbt-debug/00006/upsampled.png
            stbt-debug/00007
            stbt-debug/00007/index.html
            stbt-debug/00007/source.png
            stbt-debug/00007/tessinput.png
            stbt-debug/00007/text_color_difference.png
            stbt-debug/00007/text_color_threshold.png
            stbt-debug/00007/upsampled.png
            stbt-debug/00008
            stbt-debug/00008/index.html
            stbt-debug/00008/source.png
            """)
Exemplo n.º 35
0
def test_that_ocr_returns_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'))
    assert isinstance(text, unicode)
Exemplo n.º 36
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=load_image('ocr/unicode.png'), lang='eng+deu')
    assert isinstance(text, str)
    assert u'£500\nDavid Röthlisberger' == text
Exemplo n.º 37
0
def test_user_dictionary_with_non_english_language(words):
    assert u'192.168.10.1' == stbt.ocr(
        frame=load_image('ocr/192.168.10.1.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        lang="deu",
        tesseract_user_words=words)
Exemplo n.º 38
0
 def selection(self):
     return stbt.ocr(
         frame=self._frame,
         mode=stbt.OcrMode.SINGLE_LINE,
         # Exclude the edges & corners of the button:
         region=self.selection_region.extend(x=5, y=2, right=-5, bottom=-2))
Exemplo n.º 39
0
def test_user_dictionary_with_non_english_language():
    assert u'UJJM2LGE' == stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        lang="deu",
        tesseract_user_words=[u'UJJM2LGE'])
Exemplo n.º 40
0
 def selection(self):
     return stbt.ocr(
         frame=self._frame,
         mode=stbt.OcrMode.SINGLE_LINE,
         # Exclude the edges & corners of the button:
         region=self.selection_region.extend(x=5, y=2, right=-5, bottom=-2))
Exemplo n.º 41
0
def test_that_ocr_still_returns_if_region_doesnt_intersect_with_frame():
    frame = cv2.imread("tests/ocr/menu.png")
    result = stbt.ocr(frame=frame, region=stbt.Region(1280, 0, 1280, 720))
    assert result == u''
Exemplo n.º 42
0
def test_that_ocr_still_returns_if_region_doesnt_intersect_with_frame(region):
    frame = load_image("ocr/menu.png")
    result = stbt.ocr(frame=frame, region=region)
    assert result == u''
Exemplo n.º 43
0
def test_that_ocr_can_read_small_text():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/small.png'))
    eq_(u'Small anti-aliased text is hard to read\nunless you magnify', text)
Exemplo n.º 44
0
def test_that_ocr_returns_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'))
    assert isinstance(text, unicode)
Exemplo n.º 45
0
def test_ocr_on_static_images(image, expected_text, region, mode):
    kwargs = {"region": region}
    if mode is not None:
        kwargs["mode"] = mode
    text = stbt.ocr(cv2.imread("tests/ocr/" + image), **kwargs)
    assert text == expected_text
Exemplo n.º 46
0
def test_that_ocr_reads_unicode():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/unicode.png'), lang='eng+deu')
    eq_(u'£500\nRöthlisberger', text)
 def program_title(self):
     return stbt.ocr(frame=self._frame,
                     region=stbt.Region(x=120, y=540, right=880,
                                        bottom=590),
                     text_color=(255, 255, 255))
Exemplo n.º 48
0
def test_that_ligatures_and_ambiguous_punctuation_are_normalised():
    text = stbt.ocr(frame=cv2.imread('tests/ocr/ambig.png'))
    text = text.replace("horizonta|", "horizontal")  # for tesseract < 3.03
    eq_(ligature_text, text)
Exemplo n.º 49
0
 def message(self):
     return stbt.ocr(region=stbt.Region(515, 331, 400, 100),
                     frame=self._frame).replace('\n', ' ')
Exemplo n.º 50
0
def test_char_whitelist():
    # Without char_whitelist tesseract reads "OO" (the letter oh).
    assert u'00' == stbt.ocr(
        frame=load_image('ocr/00.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        char_whitelist="0123456789")
Exemplo n.º 51
0
def test_that_ocr_still_returns_if_region_doesnt_intersect_with_frame(region):
    frame = cv2.imread("tests/ocr/menu.png")
    result = stbt.ocr(frame=frame, region=region)
    assert result == u''
Exemplo n.º 52
0
def test_user_dictionary_with_non_english_language():
    eq_(u'UJJM2LGE', stbt.ocr(
        frame=cv2.imread('tests/ocr/UJJM2LGE.png'),
        mode=stbt.OcrMode.SINGLE_WORD,
        lang="deu",
        tesseract_user_words=[u'UJJM2LGE']))
Exemplo n.º 53
0
def get_program_name_from_infobar():
    stbt.press("KEY_INFO")
    program_name = stbt.ocr(
        region=stbt.Region(x=316, y=627, right=1054, bottom=688))
    return program_name