def _screencap_to_image(cap, rotate=0): w, h, pixels = cap if len(pixels) == w * h * 4 + 4: # new format for Android P colorspace = struct.unpack_from('<I', pixels, 4)[0] pixels = pixels[4:] elif len(pixels) == w * h * 4: colorspace = 0 else: raise ValueError('screencap short read') arr = np.frombuffer(pixels, dtype=np.uint8) arr = arr.reshape((h, w, 4)) if rotate == 0: pass elif rotate == 90: arr = cv2.rotate(arr, cv2.ROTATE_90_COUNTERCLOCKWISE) elif rotate == 180: arr = cv2.rotate(arr, cv2.ROTATE_180) elif rotate == 270: arr = cv2.rotate(arr, cv2.ROTATE_90_CLOCKWISE) else: raise ValueError('invalid rotate') if colorspace == 2: from PIL import Image as PILImage, ImageCms from imgreco.cms import p3_profile, srgb_profile pil_im = PILImage.frombuffer('RGBA', (w, h), arr, 'raw', 'RGBA', 0, 1) ImageCms.profileToProfile(pil_im, p3_profile, srgb_profile, ImageCms.INTENT_RELATIVE_COLORIMETRIC, inPlace=True) return Image.fromarray(arr, 'RGBA')
def _find_homography_test(templ, haystack): pts = match_feature(templ, haystack).template_corners img2 = cv.polylines(np.asarray(haystack.convert('L')), [np.int32(pts)], True, 0, 2, cv.LINE_AA) img = Image.fromarray(img2, 'L') print(pts) img.show()
class _ScreenCapImplPNG: def __init__(self, device_session_factory, rotate, displayid=None): self.device_session_factory = device_session_factory self.screenshot_rotate = rotate self.displayid = displayid if self.displayid is None: self.command = 'screencap -p' else: self.command = 'screencap -p -d {}'.format(self.displayid) def check(self): return self.screencap().size def screencap(self): from PIL import Image as PILImage, ImageCms s = self.device_session_factory().exec_stream(self.command) data = recvall(s, 4194304, True) img = PILImage.open(BytesIO(data)) if self.screenshot_rotate != 0: img = img.rotate(self.screenshot_rotate) if icc := img.info.get('icc_profile', ''): iccio = BytesIO(icc) src_profile = ImageCms.ImageCmsProfile(iccio) from imgreco.cms import srgb_profile ImageCms.profileToProfile(img, src_profile, srgb_profile, ImageCms.INTENT_RELATIVE_COLORIMETRIC, inPlace=True) from util import pil_zerocopy return Image.fromarray(pil_zerocopy.asarray(img), img.mode)
def get_recruit_tags(img): import textdistance vw, vh = common.get_vwvh(img) tagimgs = [ img.crop((50 * vw - 36.481 * vh, 50.185 * vh, 50 * vw - 17.315 * vh, 56.111 * vh)).convert('L'), img.crop((50 * vw - 13.241 * vh, 50.185 * vh, 50 * vw + 6.111 * vh, 56.111 * vh)).convert('L'), img.crop((50 * vw + 10.000 * vh, 50.185 * vh, 50 * vw + 29.259 * vh, 56.111 * vh)).convert('L'), img.crop((50 * vw - 36.481 * vh, 60.278 * vh, 50 * vw - 17.315 * vh, 66.019 * vh)).convert('L'), img.crop((50 * vw - 13.241 * vh, 60.278 * vh, 50 * vw + 6.111 * vh, 66.019 * vh)).convert('L') ] tagimgs = [ Image.fromarray( cv2.threshold(img.array, 127, 255, cv2.THRESH_BINARY_INV)[1]) for img in tagimgs ] eng = ocr.acquire_engine_global_cached('zh-cn') recognize = lambda img: eng.recognize(img, int(vh * 20), hints=[ocr.OcrHint.SINGLE_LINE], char_whitelist=known_tagchars ).text.replace(' ', '') cookedtags = [] for img in tagimgs: logger.logimage(img) tag = recognize(img) logger.logtext(tag) if not tag: continue if tag in known_tags: cookedtags.append(tag) continue distances = [(target, textdistance.levenshtein(tag, target)) for target in known_tags.difference(cookedtags)] distances.sort(key=lambda x: x[1]) mindistance = distances[0][1] matches = [x[0] for x in distances if x[1] == mindistance] if mindistance > 2: logger.logtext('autocorrect: minimum distance %d too large' % mindistance) cookedtags.append(tag) elif len(matches) == 1: logger.logtext('autocorrect to %s, distance %d' % (matches[0], mindistance)) cookedtags.append(matches[0]) else: logger.logtext( 'autocorrect: failed to match in %s with distance %d' % (','.join(matches), mindistance)) cookedtags.append(tag) return cookedtags
def recognize(self, img, ppi=70, hints=None, **kwargs): if hints == None: hints = [] if OcrHint.SINGLE_LINE in hints: img2 = cv2.copyMakeBorder(np.asarray(img), 32, 32, 32, 32, cv2.BORDER_REPLICATE) img = Image.fromarray(img2, img.mode) swbmp = _swbmp_from_pil_image(img) return _dump_ocrresult(self.winengine.RecognizeAsync(swbmp).wait())
def populate(): dirs, files = resources.get_entries('riic/bskill') for file in files: if file.endswith('.png'): name = file[:-4] im = resources.load_image('riic/bskill/' + file, 'RGBA') if im.size != icon_size: im = im.resize(icon_size, Image.BILINEAR) background = np.full((im.height, im.width, 3), 32) alpha = im.array[..., 3] / 255.0 normal_blend = (im.array[..., 0:3] * alpha[..., np.newaxis] + (1 - alpha[..., np.newaxis]) * background).astype( np.uint8) alpha = alpha / 2.0 dark_blend = (im.array[..., 0:3] * alpha[..., np.newaxis] + (1 - alpha[..., np.newaxis]) * background).astype( np.uint8) normal_icons[name] = Image.fromarray(normal_blend, 'RGB') dark_icons[name] = Image.fromarray(dark_blend, 'RGB')
def image_threshold_mat2img(mat, threshold=127): """ threshold filter on L channel :param threshold: negative value means inverted output """ if threshold < 0: resultmat = mat <= -threshold else: resultmat = mat >= threshold lut = np.zeros(256, dtype=np.uint8) lut[1:] = 255 return Image.fromarray(lut[resultmat.astype(np.uint8)], 'L').convert('1')
def enhance_contrast(img, lower=90, upper=None): img = np.asarray(img, dtype=np.uint8) if upper is None: upper = np.max(img) lut = np.zeros(256, dtype=np.uint8) lut[lower:upper + 1] = np.linspace(0, 255, upper - lower + 1, endpoint=True, dtype=np.uint8) lut[upper + 1:] = 255 return Image.fromarray(lut[np.asarray(img, np.uint8)])
def ocr_for_single_line(img, cand_alphabet: str = None, engine=None): if engine is None: from imgreco import ocr engine = ocr.acquire_engine_global_cached('zh-cn') from util import cvimage as Image extra_args = {} if cand_alphabet: extra_args['char_whitelist'] = cand_alphabet res = engine.recognize(Image.fromarray(img), hints=[ocr.OcrHint.SINGLE_LINE], **extra_args).text.replace(' ', '') return res
def split_chars(textimg, split_threshold=127): """requires white chars on black background, grayscale image""" img = imgops.crop_blackedge(textimg, split_threshold) if img is None: return [] # logger.logimage(img) mat = np.asarray(img, dtype=np.uint8) left = 0 inchar = True chars = [] height, width, *_ = mat.shape spacing_threshold = 1 spaces = 0 for x in range(left, width): col = mat[:, x] if inchar and (col < split_threshold).all(): spaces += 1 if spaces >= spacing_threshold: inchar = False if left != x: chars.append( imgops.crop_blackedge( Image.fromarray(mat[:, left:x + 1]))) if not inchar and (col > split_threshold).any(): left = x inchar = True spaces = 0 if inchar and left != x: chars.append(imgops.crop_blackedge(Image.fromarray(mat[:, left:x + 1]))) # for cimg in chars: # logger.logimage(cimg) return chars
def _screencap_to_image(cap, rotate=0): w, h, pixels = cap mem = memoryview(pixels) arr = np.frombuffer(mem, dtype=np.uint8) arr = arr.reshape((h, w, 4)) if rotate == 0: pass elif rotate == 90: arr = cv2.rotate(arr, cv2.ROTATE_90_COUNTERCLOCKWISE) elif rotate == 180: arr = cv2.rotate(arr, cv2.ROTATE_180) elif rotate == 270: arr = cv2.rotate(arr, cv2.ROTATE_90_CLOCKWISE) else: raise ValueError('invalid rotate') return Image.fromarray(arr, 'RGBA')
def clear_background(img, threshold=90): mat = np.array(img, dtype=np.uint8) mask = mat < threshold mat[mask] = 0 return Image.fromarray(mat)
def invert_color(img): mat = np.asarray(img) lut = np.linspace(255, 0, 256, dtype=np.uint8) resultmat = lut[mat] return Image.fromarray(resultmat, img.mode)
def recognize_operator_box(self, img: Image.Image, recognize_skill=False, skill_facility_hint=None) -> OperatorBox: name_img = img.subview( (0, 375, img.width, img.height - 2)).convert('L') name_img = imgops.enhance_contrast(name_img, 90, 220) name_img = imgops.crop_blackedge2(name_img) name_img = Image.fromarray( cv2.copyMakeBorder(255 - name_img.array, 8, 8, 8, 8, cv2.BORDER_CONSTANT, value=[255, 255, 255])) # save image for training ocr # name_img.save(os.path.join(config.SCREEN_SHOOT_SAVE_PATH, '%d-%04d.png' % (self.tag, self.seq))) self.seq += 1 # OcrHint.SINGLE_LINE (PSM 7) will ignore some operator names, use raw line for LSTM (PSM 13) here # the effect of user-words is questionable, it seldom produce improved output (maybe we need LSTM word-dawg instead) ocrresult = self.ocr.recognize(name_img, ppi=240, tessedit_pageseg_mode='13', user_words_file='operators') name = ocrresult.text.replace(' ', '') if name not in operator_set: comparisions = [(n, textdistance.levenshtein(name, n)) for n in operator_set] comparisions.sort(key=lambda x: x[1]) self.logger.debug('%s not in operator set, closest match: %s' % (name, comparisions[0][0])) if comparisions[0][1] == comparisions[1][1]: self.logger.warning('multiple fixes availiable for %r', ocrresult) name = comparisions[0][0] mood_img = img.subview(Rect.from_xywh(44, 358, 127, 3)).convert('L').array mood_img = np.max(mood_img, axis=0) mask = (mood_img >= 200).astype(np.uint8) mood = np.count_nonzero(mask) / mask.shape[0] * 24 tagimg = img.subview((35, 209, 155, 262)) on_shift = resources.load_image_cached('riic/on_shift.png', 'RGB') distracted = resources.load_image_cached('riic/distracted.png', 'RGB') rest = resources.load_image_cached('riic/rest.png', 'RGB') tag = None if imgops.compare_mse(tagimg, on_shift) < 3251: tag = 'on_shift' elif imgops.compare_mse(tagimg, distracted) < 3251: tag = 'distracted' elif imgops.compare_mse(tagimg, rest) < 3251: tag = 'rest' has_room_check = img.subview(Rect.from_xywh(45, 2, 62, 6)).convert('L') mse = np.mean(np.power( has_room_check.array.astype(np.float32) - 50, 2)) self.richlogger.logtext(f'has_room_check mse={mse}') if mse < 200: room_img = img.subview(Rect.from_xywh(42, 6, 74, 30)).array room_img = imgops.enhance_contrast( Image.fromarray(np.max(room_img, axis=2)), 64, 220) room_img = Image.fromarray(255 - room_img.array) self.richlogger.logimage(room_img) room = self.ocr.recognize( room_img, ppi=240, hints=[ocr.OcrHint.SINGLE_LINE], char_whitelist='0123456789FB').text.replace(' ', '') else: room = None if recognize_skill: skill1_icon = img.subview(Rect.from_xywh(4, 285, 54, 54)) skill2_icon = img.subview(Rect.from_xywh(67, 285, 54, 54)) skill1, score1 = self.recognize_skill(skill1_icon, skill_facility_hint) skill2, score2 = self.recognize_skill(skill2_icon, skill_facility_hint) else: skill1 = None skill2 = None skill_icons = [] if skill1 is not None: skill_icons.append(skill1) if skill2 is not None: skill_icons.append(skill2) self.richlogger.logimage(name_img) self.richlogger.logtext(repr(ocrresult)) result = OperatorBox(None, name, mood, tag, room, skill_icons=skill_icons) self.richlogger.logtext(repr(result)) return result
def convert_to_pil(cv_img, color_code=cv.COLOR_BGR2RGB): return Image.fromarray(cv.cvtColor(cv_img, color_code))
def convert_to_pil(cv_img): return Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))