def __init__(self, frame=None): """The default constructor takes an optional frame of video; if the frame is not provided, it will grab a frame from the device-under-test. If you override the constructor in your derived class (for example to accept additional parameters), make sure to accept an optional ``frame`` parameter and supply it to the super-class's constructor. """ if frame is None: from stbt_core import get_frame frame = get_frame() self.__frame_object_cache = {} self.__local = threading.local() self._frame = frame
def is_screen_black(frame=None, mask=None, threshold=None, region=Region.ALL): """Check for the presence of a black screen in a video frame. :type frame: `stbt.Frame` or `numpy.ndarray` :param frame: If this is specified it is used as the video frame to check; otherwise a new frame is grabbed from the device-under-test. This is an image in OpenCV format (for example as returned by `frames` and `get_frame`). :type mask: str or `numpy.ndarray` :param mask: A black & white image that specifies which part of the image to analyse. White pixels select the area to analyse; black pixels select the area to ignore. This can be a string (a filename that will be resolved as per `stbt.load_image`) or a single-channel image in OpenCV format. If you specify ``region``, the mask must be the same size as the region. Otherwise the mask must be the same size as the frame. :param int threshold: Even when a video frame appears to be black, the intensity of its pixels is not always 0. To differentiate almost-black from non-black pixels, a binary threshold is applied to the frame. The ``threshold`` value is in the range 0 (black) to 255 (white). The global default (20) can be changed by setting ``threshold`` in the ``[is_screen_black]`` section of :ref:`.stbt.conf`. :type region: `Region` :param region: Only analyze the specified region of the video frame. :returns: An object that will evaluate to true if the frame was black, or false if not black. The object has the following attributes: * **black** (*bool*) – True if the frame was black. * **frame** (`stbt.Frame`) – The video frame that was analysed. """ if threshold is None: threshold = get_config('is_screen_black', 'threshold', type_=int) if frame is None: from stbt_core import get_frame frame = get_frame() if mask is not None: mask = load_image(mask, cv2.IMREAD_GRAYSCALE) region = _validate_region(frame, region) imglog = ImageLogger("is_screen_black", region=region, threshold=threshold) imglog.imwrite("source", frame) greyframe = cv2.cvtColor(crop(frame, region), cv2.COLOR_BGR2GRAY) if mask is not None: imglog.imwrite("mask", mask) cv2.bitwise_and(greyframe, mask, dst=greyframe) maxVal = greyframe.max() result = _IsScreenBlackResult(bool(maxVal <= threshold), frame) debug("is_screen_black: {found} black screen using mask={mask}, " "threshold={threshold}, region={region}: " "{result}, maximum_intensity={maxVal}".format( found="Found" if result.black else "Didn't find", mask=mask, threshold=threshold, region=region, result=result, maxVal=maxVal)) if imglog.enabled: imglog.imwrite("grey", greyframe) _, thresholded = cv2.threshold(greyframe, threshold, 255, cv2.THRESH_BINARY) imglog.imwrite("non_black", thresholded) imglog.set(maxVal=maxVal, non_black_region=pixel_bounding_box(thresholded)) _log_image_debug(imglog, result) return result
def _match_all(image, frame, match_parameters, region): """ Generator that yields a sequence of zero or more truthy MatchResults, followed by a falsey MatchResult. """ if match_parameters is None: match_parameters = MatchParameters() if frame is None: from stbt_core import get_frame frame = get_frame() template = _load_image(image) # Normalise single channel images to shape (h, w, 1) rather than just (h, w) t = template.image.view() if len(t.shape) == 2: t.shape = t.shape + (1, ) frame = frame.view() if len(frame.shape) == 2: frame.shape = frame.shape + (1, ) if len(t.shape) != 3: raise ValueError( "Invalid shape for image: %r. Shape must have 2 or 3 elements" % (template.image.shape, )) if len(frame.shape) != 3: raise ValueError( "Invalid shape for frame: %r. Shape must have 2 or 3 elements" % (frame.shape, )) if t.shape[2] in [1, 3, 4]: pass else: raise ValueError("Expected 3-channel image, got %d channels: %s" % (t.shape[2], template.absolute_filename)) if any(frame.shape[x] < t.shape[x] for x in (0, 1)): raise ValueError("Frame %r must be larger than reference image %r" % (frame.shape, t.shape)) if any(t.shape[x] < 1 for x in (0, 1)): raise ValueError("Reference image %r must contain some data" % (t.shape, )) if (frame.shape[2], t.shape[2]) not in [(1, 1), (3, 3), (3, 4)]: raise ValueError( "Frame %r and reference image %r must have the same number of " "channels" % (frame.shape, t.shape)) if t.shape[2] == 4: if cv2_compat.version < [3, 0, 0]: raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires OpenCV 3.0 or greater (you have %s)." % (template.relative_filename, cv2_compat.version)) if match_parameters.match_method not in (MatchMethod.SQDIFF, MatchMethod.CCORR_NORMED): # See `matchTemplateMask`: # https://github.com/opencv/opencv/blob/3.2.0/modules/imgproc/src/templmatch.cpp#L840-L917 raise ValueError( "Reference image %s has alpha channel, but transparency " "support requires match_method SQDIFF or CCORR_NORMED " "(you specified %s)." % (template.relative_filename, match_parameters.match_method)) input_region = Region.intersect(_image_region(frame), region) if input_region is None: raise ValueError("frame with dimensions %r doesn't contain %r" % (frame.shape, region)) if input_region.height < t.shape[0] or input_region.width < t.shape[1]: raise ValueError("%r must be larger than reference image %r" % (input_region, t.shape)) imglog = ImageLogger("match", match_parameters=match_parameters, template_name=template.friendly_name, input_region=input_region) # pylint:disable=undefined-loop-variable try: for (matched, match_region, first_pass_matched, first_pass_certainty) in _find_matches(crop(frame, input_region), t, match_parameters, imglog): match_region = Region.from_extents(*match_region) \ .translate(input_region) result = MatchResult( getattr(frame, "time", None), matched, match_region, first_pass_certainty, frame, (template.relative_filename or template.image), first_pass_matched) imglog.append(matches=result) draw_on(frame, result, label="match(%s)" % template.short_repr()) yield result finally: try: _log_match_image_debug(imglog) except Exception: # pylint:disable=broad-except pass
def match_text(text, frame=None, region=Region.ALL, mode=OcrMode.PAGE_SEGMENTATION_WITHOUT_OSD, lang=None, tesseract_config=None, case_sensitive=False, upsample=True, text_color=None, text_color_threshold=None, engine=None, char_whitelist=None): """Search for the specified text in a single video frame. This can be used as an alternative to `match`, searching for text instead of an image. :param str text: The text to search for. :param frame: See `ocr`. :param region: See `ocr`. :param mode: See `ocr`. :param lang: See `ocr`. :param tesseract_config: See `ocr`. :param upsample: See `ocr`. :param text_color: See `ocr`. :param text_color_threshold: See `ocr`. :param engine: See `ocr`. :param char_whitelist: See `ocr`. :param bool case_sensitive: Ignore case if False (the default). :returns: A `TextMatchResult`, which will evaluate to True if the text was found, false otherwise. For example, to select a button in a vertical menu by name (in this case "TV Guide"):: m = stbt.match_text("TV Guide") assert m.match while not stbt.match('selected-button.png').region.contains(m.region): stbt.press('KEY_DOWN') | Added in v30: The ``engine`` parameter and support for Tesseract v4. | Added in v31: The ``char_whitelist`` parameter. """ if frame is None: from stbt_core import get_frame frame = get_frame() region = _validate_region(frame, region) _config = dict(tesseract_config or {}) _config['tessedit_create_hocr'] = 1 rts = getattr(frame, "time", None) imglog = ImageLogger("match_text") xml = _tesseract(frame, region, mode, lang, _config, None, text.split(), upsample, text_color, text_color_threshold, engine, char_whitelist, imglog) if xml == '': hocr = None result = TextMatchResult(rts, False, None, frame, text) else: import lxml.etree hocr = lxml.etree.fromstring(xml.encode('utf-8')) p = _hocr_find_phrase(hocr, to_unicode(text).split(), case_sensitive) if p: # Find bounding box box = Region.bounding_box( *[_hocr_elem_region(elem) for _, elem in p]) # _tesseract crops to region and scales up by a factor of 3 so # we must undo this transformation here. n = 3 if upsample else 1 box = Region.from_extents(region.x + box.x // n, region.y + box.y // n, region.x + box.right // n, region.y + box.bottom // n) result = TextMatchResult(rts, True, box, frame, text) else: result = TextMatchResult(rts, False, None, frame, text) if result.match: debug("match_text: Match found: %s" % str(result)) else: debug("match_text: No match found: %s" % str(result)) imglog.set(text=text, case_sensitive=case_sensitive, result=result, hocr=hocr) _log_ocr_image_debug(imglog) return result
def ocr(frame=None, region=Region.ALL, mode=OcrMode.PAGE_SEGMENTATION_WITHOUT_OSD, lang=None, tesseract_config=None, tesseract_user_words=None, tesseract_user_patterns=None, upsample=True, text_color=None, text_color_threshold=None, engine=None, char_whitelist=None, corrections=None): r"""Return the text present in the video frame as a Unicode string. Perform OCR (Optical Character Recognition) using the "Tesseract" open-source OCR engine. :param frame: If this is specified it is used as the video frame to process; otherwise a new frame is grabbed from the device-under-test. This is an image in OpenCV format (for example as returned by `frames` and `get_frame`). :param region: Only search within the specified region of the video frame. :type region: `Region` :param mode: Tesseract's layout analysis mode. :type mode: `OcrMode` :param str lang: The three-letter `ISO-639-3 <http://www.loc.gov/standards/iso639-2/php/code_list.php>`__ language code of the language you are attempting to read; for example "eng" for English or "deu" for German. More than one language can be specified by joining with '+'; for example "eng+deu" means that the text to be read may be in a mixture of English and German. This defaults to "eng" (English). You can override the global default value by setting ``lang`` in the ``[ocr]`` section of :ref:`.stbt.conf`. You may need to install the tesseract language pack; see installation instructions `here <https://stb-tester.com/manual/troubleshooting#install-ocr-language-pack>`__. :param dict tesseract_config: Allows passing configuration down to the underlying OCR engine. See the `tesseract documentation <https://github.com/tesseract-ocr/tesseract/wiki/ControlParams>`__ for details. :type tesseract_user_words: unicode string, or list of unicode strings :param tesseract_user_words: List of words to be added to the tesseract dictionary. To replace the tesseract system dictionary altogether, also set ``tesseract_config={'load_system_dawg': False, 'load_freq_dawg': False}``. :type tesseract_user_patterns: unicode string, or list of unicode strings :param tesseract_user_patterns: List of patterns to add to the tesseract dictionary. The tesseract pattern language corresponds roughly to the following regular expressions:: tesseract regex ========= =========== \c [a-zA-Z] \d [0-9] \n [a-zA-Z0-9] \p [:punct:] \a [a-z] \A [A-Z] \* * :param bool upsample: Upsample the image 3x before passing it to tesseract. This helps to preserve information in the text's anti-aliasing that would otherwise be lost when tesseract binarises the image. This defaults to ``True``; you should only disable it if you are doing your own pre-processing on the image. :type text_color: 3-element tuple of integers between 0 and 255, BGR order :param text_color: Color of the text. Specifying this can improve OCR results when tesseract's default thresholding algorithm doesn't detect the text, for example white text on a light-colored background or text on a translucent overlay. :param int text_color_threshold: The threshold to use with ``text_color``, between 0 and 255. Defaults to 25. You can override the global default value by setting ``text_color_threshold`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :param engine: The OCR engine to use. Defaults to ``OcrEngine.TESSERACT``. You can override the global default value by setting ``engine`` in the ``[ocr]`` section of :ref:`.stbt.conf`. :type engine: `OcrEngine` :type char_whitelist: unicode string :param char_whitelist: String of characters that are allowed. Useful when you know that the text is only going to contain numbers or IP addresses, for example so that tesseract won't think that a zero is the letter o. Note that Tesseract 4.0's LSTM engine ignores ``char_whitelist``. :param dict corrections: Dictionary of corrections to replace known OCR mis-reads. Each key of the dict is the text to search for; the value is the corrected string to replace the matching key. If the key is a string, it is treated as plain text and it will only match at word boundaries (for example the string ``"he saw"`` won't match ``"the saw"`` nor ``"he saws"``). If the key is a regular expression pattern (created with `re.compile`) it can match anywhere, and the replacement string can contain backreferences such as ``"\1"`` which are replaced with the corresponding group in the pattern (same as Python's `re.sub`). Example:: corrections={'bad': 'good', re.compile(r'[oO]'): '0'} Plain strings are replaced first (in the order they are specified), followed by regular expresions (in the order they are specified). The default value for this parameter can be set with `stbt.set_global_ocr_corrections`. If global corrections have been set *and* this ``corrections`` parameter is specified, the corrections in this parameter are applied first. | Added in v30: The ``engine`` parameter and support for Tesseract v4. | Added in v31: The ``char_whitelist`` parameter. | Added in v32: The ``corrections`` parameter. """ if frame is None: from stbt_core import get_frame frame = get_frame() region = _validate_region(frame, region) if isinstance(tesseract_user_words, (bytes, str)): tesseract_user_words = [tesseract_user_words] if isinstance(tesseract_user_patterns, (bytes, str)): tesseract_user_patterns = [tesseract_user_patterns] imglog = ImageLogger("ocr", result=None) text = _tesseract(frame, region, mode, lang, tesseract_config, tesseract_user_patterns, tesseract_user_words, upsample, text_color, text_color_threshold, engine, char_whitelist, imglog) text = text.strip().translate(_ocr_transtab) text = apply_ocr_corrections(text, corrections) debug(u"ocr(frame=%s, region=%r): %r" % (_frame_repr(frame), region, text)) _log_ocr_image_debug(imglog, text) return text