def test__match_lines(): lines = [[ { "text": "Open", "region": Region(left=1356, top=440, right=1417, bottom=473), }, { "text": "Edit", "region": Region(left=1493, top=440, right=1536, bottom=473), }, { "text": "New", "region": Region(left=1641, top=446, right=1690, bottom=464), }, { "text": "Delete", "region": Region(left=1755, top=444, right=1831, bottom=464), }, ]] expected = [{ "text": "New", "region": Region(left=1641, top=446, right=1690, bottom=464), "confidence": 100.0, }] result = ocr._match_lines(lines, "New", 100) assert result == expected
def _find_size(self, base: Geometry, size: SizeLocator): """Find region of fixed size around base, or origin if no base defined.""" if isinstance(base, Undefined): return Region.from_size(0, 0, size.width, size.height) if isinstance(base, Region): center = base.center else: center = base left = center.x - size.width // 2 top = center.y - size.height // 2 result = Region.from_size(left, top, size.width, size.height) return [result]
def _match_lines(lines: List[Dict], text: str, confidence: float) -> List[Dict]: """Find best matches between lines of text and target text, and return resulting bounding boxes and confidences. A line of N words will be matched to the given text in all 1 to N length sections, in every sequential position. """ matches = [] for line in lines: match = {} for window in range(1, len(line) + 1): for index in range(len(line) - window + 1): words = line[index : index + window] regions = [word["region"] for word in words] sentence = " ".join(word["text"] for word in words) ratio = SequenceMatcher(None, sentence, text).ratio() * 100.0 if ratio < confidence: continue if match and match["confidence"] >= ratio: continue match = { "text": sentence, "region": Region.merge(regions), "confidence": ratio, } if match: matches.append(match) return sorted(matches, key=lambda match: match["confidence"], reverse=True)
def _find_region(self, base: Geometry, region: RegionLocator): """Find absolute region on screen. Can not be based on existing value.""" if not isinstance(base, Undefined): self.logger.warning("Using absolute region coordinates") position = Region(region.left, region.top, region.right, region.bottom) return [position]
def _iter_matches(self, image, template) -> Region: """Brute-force search for template image in larger image. Use optimized string search for finding the first row and then check if whole template matches. TODO: Generalize string-search algorithm to work in two dimensions """ image = ImageOps.grayscale(image) template = ImageOps.grayscale(template) template_width, template_height = template.size template_rows = chunks(tuple(template.getdata()), template_width) image_width, _ = image.size image_rows = chunks(tuple(image.getdata()), image_width) for image_y, image_row in enumerate(image_rows[: -len(template_rows)]): for image_x in self._search_string(image_row, template_rows[0]): match = True for match_y, template_row in enumerate(template_rows[1:], image_y): match_row = image_rows[match_y][image_x : image_x + template_width] if template_row != match_row: match = False break if match: yield Region.from_size( image_x, image_y, template_width, template_height )
def resize_region( self, region: Region, left: int = 0, top: int = 0, right: int = 0, bottom: int = 0, ) -> Region: """ Return a resized new ``Region`` from a given region. Extends edges the given amount outward from the center, i.e. positive left values move the left edge to the left. :param region: The region to resize. :param left: Amount of pixels to resize left edge. :param top: Amount of pixels to resize top edge. :param right: Amount of pixels to resize right edge. :param bottom: Amount of pixels to resize bottom edge. Usage examples: .. code-block:: robotframework ${region}= Find Element ocr:"Net Assets" ${resized_region}= Resize Region ${region} bottom=10 .. code-block:: python region = desktop.find_element('ocr:"Net Assets"') resized_region = desktop.resize_region(region, bottom=10) """ return region.resize(left, top, right, bottom)
def _find(self, locator: str) -> List[Geometry]: """Internal method for resolving and searching locators.""" if isinstance(locator, (Region, Point)): return [locator] locator: Locator = parse_locator(locator) self.logger.info("Using locator: %s", locator) if isinstance(locator, PointLocator): position = Point(locator.x, locator.y) return [position] elif isinstance(locator, OffsetLocator): position = self.ctx.get_mouse_position() position = position.move(locator.x, locator.y) return [position] elif isinstance(locator, RegionLocator): region = Region(locator.left, locator.top, locator.right, locator.bottom) return [region] elif isinstance(locator, ImageLocator): ensure_recognition() return self._find_templates(locator) elif isinstance(locator, OcrLocator): ensure_recognition() return self._find_ocr(locator) else: raise NotImplementedError(f"Unsupported locator: {locator}")
def get_display_dimensions(self) -> Region: """Returns the dimensions of the current virtual display, which is the combined size of all physical monitors. """ with mss.mss() as sct: disp = sct.monitors[0] return Region.from_size(disp["left"], disp["top"], disp["width"], disp["height"])
def move_region(self, region: Region, left: int, top: int) -> Region: """ Return a new ``Region`` with an offset from the given region. :param region: the region to move. :param left: amount of pixels to move left/right. :param top: amount of pixels to move up/down. """ return region.move(left, top)
def test_find_template(region_and_template): region, template = region_and_template region = Region(*region) matches = templates.find(image=IMAGES / "source.png", template=IMAGES / template) assert len(matches) == 1 match = matches[0] assert match.center == region.center
def define_region(self, left: int, top: int, right: int, bottom: int) -> Region: """ Return a new ``Region`` with the given dimensions. :param left: left edge coordinate. :param top: top edge coordinate. :param right: right edge coordinate. :param bottom: bottom edge coordinate. """ return Region(left, top, right, bottom)
def highlight_elements(self, locator: str): """Draw an outline around all matching elements.""" if not utils.is_windows(): raise NotImplementedError("Not supported on non-Windows platforms") matches = self.ctx.find(locator) for match in matches: if isinstance(match, Region): _draw_outline(match) elif isinstance(match, Point): region = Region(match.x - 5, match.y - 5, match.x + 5, match.y + 5) _draw_outline(region) else: raise TypeError(f"Unknown location type: {match}")
def resize_region( self, region: Region, left: Optional[int] = 0, top: Optional[int] = 0, right: Optional[int] = 0, bottom: Optional[int] = 0, ) -> Region: """ Return a resized new ``Region`` from a given region. :param region: the region to resize. :param left: amount of pixels to resize left edge. :param top: amount of pixels to resize top edge. :param right: amount of pixels to resize right edge. :param bottom: amount of pixels to resize bottom edge. """ return region.resize(left, top, right, bottom)
def find(image: Union[Image.Image, Path], text: str, confidence: float = DEFAULT_CONFIDENCE): """Scan image for text and return a list of regions that contain it (or something close to it). :param image: Path to image or Image object :param text: Text to find in image :param confidence: Minimum confidence for text similaritys """ image = to_image(image) confidence = clamp(1, float(confidence), 100) text = str(text).strip() if not text: raise ValueError("Empty search string") try: data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) except TesseractNotFoundError as err: raise EnvironmentError(INSTALL_PROMPT) from err lines = defaultdict(list) for word in _iter_rows(data): if word["level"] != 5: continue if not word["text"].strip(): continue key = "{:d}-{:d}-{:d}".format(word["block_num"], word["par_num"], word["line_num"]) region = Region.from_size(word["left"], word["top"], word["width"], word["height"]) # NOTE: Currently ignoring confidence in tesseract results lines[key].append({"text": word["text"], "region": region}) assert len(lines[key]) == word["word_num"] matches = _match_lines(lines.values(), text, confidence) return matches
def _match_template(image: Image.Image, template: Image.Image, tolerance: float) -> Iterator[Region]: """Use opencv's matchTemplate() to slide the `template` over `image` to calculate correlation coefficients, and then filter with a tolerance to find all relevant global maximums. """ template_width, template_height = template.size if image.mode == "RGBA": image = image.convert("RGB") if template.mode == "RGBA": template = template.convert("RGB") image = numpy.array(image) template = numpy.array(template) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) template = cv2.cvtColor(template, cv2.COLOR_RGB2BGR) # Template matching result is a single channel array of shape: # Width: Image width - template width + 1 # Height: Image height - template height + 1 coefficients = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) coeff_height, coeff_width = coefficients.shape while True: # The point (match_x, match_y) is the top-left of the best match _, match_coeff, _, (match_x, match_y) = cv2.minMaxLoc(coefficients) if match_coeff < tolerance: break # Zero out values for a template-sized region around the best match # to prevent duplicate matches for the same element. left = clamp(0, match_x - template_width // 2, coeff_width) top = clamp(0, match_y - template_height // 2, coeff_height) right = clamp(0, match_x + template_width // 2, coeff_width) bottom = clamp(0, match_y + template_height // 2, coeff_height) coefficients[top:bottom, left:right] = 0 yield Region.from_size(match_x, match_y, template_width, template_height)
def resize_region( self, region: Region, left: int = 0, top: int = 0, right: int = 0, bottom: int = 0, ) -> Region: """ Return a resized new ``Region`` from a given region. Extends edges the given amount outward from the center, i.e. positive left values move the left edge to the left. :param region: The region to resize. :param left: Amount of pixels to resize left edge. :param top: Amount of pixels to resize top edge. :param right: Amount of pixels to resize right edge. :param bottom: Amount of pixels to resize bottom edge. """ return region.resize(left, top, right, bottom)
def _dict_lines(data: Dict) -> List: lines = defaultdict(list) for word in _iter_rows(data): if word["level"] != 5: continue if not word["text"].strip(): continue key = "{:d}-{:d}-{:d}".format( word["block_num"], word["par_num"], word["line_num"] ) region = Region.from_size( word["left"], word["top"], word["width"], word["height"] ) # NOTE: Currently ignoring confidence in tesseract results lines[key].append({"text": word["text"], "region": region}) assert len(lines[key]) == word["word_num"] return list(lines.values())
def _find_from_displays( self, finder: Callable[[Image.Image], List[Region]]) -> List[Region]: """Call finder function for each display and return a list of found regions. :param finder: Callable that searches an image """ matches = [] screenshots = [] # Search all displays, and map results to combined virtual display start_time = time.time() for display in screen.displays(): image = screen.grab(display) regions = finder(image) for region in regions: region = region.resize(5) screenshot = image.crop(region.as_tuple()) screenshots.append(screenshot) local = Region.from_size(0, 0, image.size[0], image.size[1]) regions = transform(regions, local, display) matches.extend(regions) # Log matches and preview images duration = time.time() - start_time plural = "es" if len(matches) != 1 else "" self.logger.info("Searched in %.2f seconds", duration) self.logger.info("Found %d match%s", len(matches), plural) for match, screenshot in zip(matches, screenshots): screen.log_image(screenshot, size=400) self.logger.info(match) return matches
def move_region(self, region: Region, left: int, top: int) -> Region: """ Return a new ``Region`` with an offset from the given region. :param region: The region to move. :param left: Amount of pixels to move left/right. :param top: Amount of pixels to move up/down. Usage examples: .. code-block:: robotframework ${region}= Find Element ocr:"Net Assets" ${moved_region}= Move Region ${region} 500 0 .. code-block:: python region = desktop.find_element('ocr:"Net Assets"') moved_region = desktop.move_region(region, 500, 0) """ return region.move(left, top)
def define_region(self, left: int, top: int, right: int, bottom: int) -> Region: """ Return a new ``Region`` with the given dimensions. :param left: Left edge coordinate. :param top: Top edge coordinate. :param right: Right edge coordinate. :param bottom: Bottom edge coordinate. Usage examples: .. code-block:: robotframework ${region}= Define Region 10 10 50 30 .. code-block:: python region = desktop.define_region(10, 10, 50, 30) """ return Region(left, top, right, bottom)
def _iter_match_pillow(self, image, template, tolerance): """Brute-force search for template image in larger image. Use optimized string search for finding the first row and then check if whole template matches. TODO: Generalize string-search algorithm to work in two dimensions """ if tolerance is not None and not self._tolerance_warned: self._tolerance_warned = True self.logger.warning( "Template matching tolerance not supported for current search method" ) image = ImageOps.grayscale(image) template = ImageOps.grayscale(template) template_width, template_height = template.size template_rows = chunks(tuple(template.getdata()), template_width) image_width, _ = image.size image_rows = chunks(tuple(image.getdata()), image_width) for image_y, image_row in enumerate(image_rows[:-len(template_rows)]): for image_x in self._search_string(image_row, template_rows[0]): match = True for match_y, template_row in enumerate(template_rows[1:], image_y): match_row = image_rows[match_y][image_x:image_x + template_width] if template_row != match_row: match = False break if match: yield Region.from_size(image_x, image_y, template_width, template_height)
def _monitor_to_region(monitor: Dict) -> Region: """Convert mss monitor to Region instance.""" return Region.from_size(monitor["left"], monitor["top"], monitor["width"], monitor["height"])