def checkOrientation(img, path): print("Processing PDF Page: ", path) errorIncurred = False try: pageDescrp = image_to_osd(img) except TesseractError: print("Tesseract Orientation Error") errorIncurred = True if not errorIncurred: index1 = pageDescrp.find('Rotate') index2 = pageDescrp.find('confidence') orienDescrp = pageDescrp[index1:index2] rotateAngle = list(filter(str.isdigit, orienDescrp)) orienDescrp = "" for i in range(0, len(rotateAngle)): orienDescrp = orienDescrp + rotateAngle[i] # Perform Rotation if required rotateAngle = int(orienDescrp) if rotateAngle > 0: rotatedImg = imutils.rotate_bound(img, rotateAngle) cv2.imwrite(path, rotatedImg) else: # Check Angular Rotation of Image: checkSkewness(img, path) else: # Check Angular Rotation of Image: checkSkewness(img, path)
def rotate_img(image, count=0): """ rotate img if not aligned in right direction """ try: text = pytesseract.image_to_osd(image) except: text = None #print(text) if text is not None and count < 4: text = text.split('\n') text = text[2].split(':') rotate = int(text[1].strip()) if rotate == 90: image = cv2.rotate(image, rotateCode=cv2.ROTATE_90_CLOCKWISE) elif rotate == 270: image = cv2.rotate(image, rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE) elif rotate == 180: image = cv2.rotate(image, rotateCode=cv2.ROTATE_180) elif text is None and count < 4: count = count + 1 image = cv2.rotate(image, rotateCode=cv2.ROTATE_90_CLOCKWISE) image = rotate_img(image, count) return image
def read_card(encoded, orientation=0, algorithm='gbc', parser='regex'): err = False msg = None image = cv2.imdecode(encoded, cv2.IMREAD_UNCHANGED) print('before=>', image.shape) obj_card = detect_object(image) obj_card = deskew_object(obj_card) prep = image_processing(obj_card) if size_thresh(prep): err = True msg = {'error': f'gambar {image.shape} terlalu kecil'} if not err and orientation: osd = ts.image_to_osd(prep) angle = re.search(r'(?<=Rotate: )\d+', osd).group(0) if not image_orientation(angle): err = True msg = {'error': f'posisi gambar {angle} derajat'} if not err: data = ts.image_to_string(prep) result = card_classifier(data, algorithm, parser) return json.dumps(result) return json.dumps(msg)
def detect(image): # get bounding boxes d = pytesseract.image_to_data(Image.open(image), output_type=Output.DICT) # draw over original image the bounding boxes img = cv2.imread(image) n_boxes = len(d['level']) for i in range(n_boxes): (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) # convert cv2 to pillow image img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert colorspace bounding_boxes_image = Image.fromarray(img) # parse ocr string output_string = pytesseract.image_to_string(Image.open(image)) # get image osd metadata osd = pytesseract.image_to_osd(Image.open(image), output_type=Output.DICT) # print(f'osd: {osd}') return { 'osd': osd, 'ocr_string': output_string, }, bounding_boxes_image
def get_osd_info(img_path): import pytesseract ''' parse tesseract osd output string to a dict for easier variable reading. Info: int Page_number int Orientation_in_degrees int Rotate float Orientation_confidence string Script float Script_confidence ''' osd_info = pytesseract.image_to_osd(img_path) info = {} for i in osd_info.split('\n'): try: if '.' in i.split(':')[1].strip(): info[str(i.split(':')[0].replace(' ', '_'))] = float( i.split(':')[1].strip()) else: info[str(i.split(':')[0].replace(' ', '_'))] = int( i.split(':')[1].strip()) except: info[str(i.split(':')[0].replace(' ', '_'))] = i.split(':')[1].strip() return info
def _detect_locale(item_rows: numpy.ndarray, locale: str) -> str: """Detects the right locale for the given items if required.""" if locale != 'auto': # If locale is already specified, return as is. return locale # Convert to Pillow image and truncate overly long images. image = Image.fromarray(item_rows[:9800, :]) try: osd_data = pytesseract.image_to_osd( image, output_type=pytesseract.Output.DICT) except pytesseract.TesseractError: return 'en-us' possible_locales = SCRIPT_MAP.get(osd_data['script']) assert possible_locales, 'Failed to automatically detect language.' # If we can uniquely guess the language from the script, use that. if len(possible_locales) == 1: logging.info('Detected locale: %s', possible_locales[0]) return possible_locales[0] # Otherwise, run OCR on the first few items and try to find the best matching locale. item_names = run_ocr(item_rows[:30 * 35, :], lang='script/Latin') def match_score_func(locale): """Computes how many items match for a given locale.""" item_db = _get_item_db(locale) return sum(name in item_db for name in item_names) best_locale = max(possible_locales, key=match_score_func) logging.info('Detected locale: %s', best_locale) return best_locale
def __extract_osd(self): ''' Extracts a dict containing OSD (screen and dimensions) data for the image. ''' self.__osd = pytesseract.image_to_osd(self.image, lang=self.language, output_type="dict")
def _get_angle_tesseract( self, config="--psm 12 --oem 3", ): image_osd = pytesseract.image_to_osd(self._image, config=config) rotation_angle = re.search('(?<=Rotate: )\d+', image_osd).group(0) return rotation_angle
def img(url): #if url.endswith(".jpeg") or url.endswith(".jpg"): urlretrieve(url, "input.jpg") im = Image.open(r"input.jpg") img = prep(im) cv2.imwrite("input.png", img) image = cv2.imread("input.png") #getting image in the correct orientation try: angle = int( re.search('(?<=Orientation in degrees: )\d+', pytesseract.image_to_osd(image)).group(0)) filename = "{}.png".format(os.getpid()) cv2.imwrite(filename, ndimage.rotate(image, angle)) #plt.imshow(ndimage.rotate(image, angle)) text = pytesseract.image_to_string(Image.open(filename), config="-l eng --oem 1 --psm 3") text = f1(text) os.remove(filename) return text except TesseractError as e: c = str(e.message) text = pytesseract.image_to_string(Image.open("input.png"), config="-l eng --oem 1 --psm 3") text = f1(text) #os.remove(filename) print(c) return text
def tesseract(): # If you don't have tesseract executable in your PATH, include the following: #pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_exec$ # Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract' # Simple image to string #.encode('utf8') #add to print statement to make encoding work print(pytesseract.image_to_string(Image.open(imgFile)).encode('utf8')) # Get bounding box estimates print(pytesseract.image_to_boxes(Image.open(imgFile))) # Get verbose data including boxes, confidences, line and page numbers print(pytesseract.image_to_data(Image.open(imgFile))) # Get information about orientation and script detection print(pytesseract.image_to_osd(Image.open(imgFile))) # In order to bypass the internal image conversions, just use relative or ab$ # NOTE: If you don't use supported images, tesseract will return error print(pytesseract.image_to_string(imgFile)) # get a searchable PDF pdf = pytesseract.image_to_pdf_or_hocr(imgFile, extension='pdf') # get HOCR output hocr = pytesseract.image_to_pdf_or_hocr(imgFile, extension='hocr')
def image_to_osd(self, img): """画像から文字方向を読み込む""" ocr_osd = pytesseract.image_to_osd( img, output_type=pytesseract.Output.DICT, ) return ocr_osd
def img_oriented(image): import re osd = pytesseract.image_to_osd(image) angle = osd.splitlines()[2].split()[1] script = osd.splitlines()[4].split()[1] print("angle: ", angle) print("script: ", script) (h, w) = image.shape[:2] (cX, cY) = (w // 2, h // 2) # grab the rotation matrix (applying the negative of the # angle to rotate clockwise), then grab the sine and cosine # (i.e., the rotation components of the matrix) M = cv2.getRotationMatrix2D((cX, cY), -int(angle), 1.0) cos = np.abs(M[0, 0]) sin = np.abs(M[0, 1]) # compute the new bounding dimensions of the image nW = int((h * sin) + (w * cos)) nH = int((h * cos) + (w * sin)) # adjust the rotation matrix to take into account translation M[0, 2] += (nW / 2) - cX M[1, 2] += (nH / 2) - cY # perform the actual rotation and return the image return cv2.warpAffine(image, M, (nW, nH))
def de_skew(img, center = None, scale = 1.0,cropping= True): tesData = pytesseract.image_to_osd(img) #tess angle for orientaion correction pytesAngle = int(re.search('(?<=Rotate: )\d+', tesData).group(0)) tess_angle=360- pytesAngle if tess_angle == 360: tess_angle = 0 angle = getAngle(img) + tess_angle print('Angle estimation arg for de_skew(): ',angle) (h, w) = img.shape[:2] center = (w // 2, h // 2) rotation_mat = cv2.getRotationMatrix2D(center, angle, 1.0) M = rotation_mat abs_cos = abs(rotation_mat[0,0]) abs_sin = abs(rotation_mat[0,1]) # find the new width and height bounds bound_w = int(h * abs_sin + w * abs_cos) bound_h = int(h * abs_cos + w * abs_sin) # subtract old image center (bringing image back to origo) and adding the new image center coordinates rotation_mat[0, 2] += bound_w/2 - center[0] rotation_mat[1, 2] += bound_h/2 - center[1] if cropping: openCVrotated = cv2.warpAffine(img.copy(), rotation_mat, (bound_w, bound_h),flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP) else: openCVrotated = cv2.warpAffine(img.copy(), M, center ,flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP) cv2.imwrite(resFolderPath + r'\rotated.jpg',openCVrotated) return openCVrotated
def orient_image(img): try: rotate = image_to_osd(img, output_type=Output.DICT)["rotate"] # This tells it to use the # highest quality interpolation algorithm that it has available, # and to expand the image to encompass the full rotated size # instead of cropping. # The documentation does not say what color # the background will be filled with. # https://stackoverflow.com/a/17822099 angle = -float(rotate) # if angle > 0: # angle = 360 - angle logger.info(f'Orientation angle {angle}') k = angle // 90 if k != 0: img = np.rot90(img, k=k) # img = img.rotate(-rotate, resample=Image.BICUBIC, expand=True) # sometimes an error can occur with tesseract reading the image # maybe there is not enough text or dpi is not set # this need to be handled except Exception as e: raise e return img
def get_rotation_degree_by_quarter(self, image): # Use tesseract to calculate the degree. osd_info = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT) major_degree = osd_info['orientation'] print("Major Degree: {}".format(major_degree)) return major_degree
def skewImage3(image): newdata = pytesseract.image_to_osd(image) angle = re.search('(?<=Rotate: )\d+', newdata).group(0) angle = int(angle) if angle == 0: return image, angle return rotationImage(image, angle), angle
def get_best_text(image, iter: int): rot_angle = 90 // iter best_angle = None best_conf = 0 for i in range(iter): rot = image.rotate( i * rot_angle) print("angle = {0}\n".format(i*rot_angle)) # rot = rot.convert("RGB") # confs = pytesseract.image_to_data(rot, output_type=pytesseract.Output.DICT)["conf"] rot.show() # #convert non-integer entries to 0 # for i in range(len(confs)): # # print(confs[i], " <- item\n") # # print(type(confs[i]), " <- type\n") # if type(confs[i]) != int: # confs[i] = 0 output = pytesseract.image_to_osd(rot, output_type='dict') confidence1 = output["script_conf"] confidence2 = output["orientation_conf"] # confidence = sum(confs)/len(confs) # print("confidence = {0}, sum = {1}, len = {2}\n".format(confidence, sum(confs), len(confs))) # print(pytesseract.image_to_osd(rot)) if confidence1 + confidence2 > best_conf: best_angle = i*rot_angle best_conf = confidence1 + confidence2 rot = image.rotate(best_angle) return pytesseract.image_to_string(rot)
def fix_rotation(img): rotated_img = img # osd: orientation and script detection tess_data = pytesseract.image_to_osd(img, nice=1) angle = int(re.search(r"(?<=Rotate: )\d+", tess_data).group(0)) print("angle: " + str(angle)) if angle != 0 and angle != 360: (h, w) = img.shape[:2] center = (w / 2, h / 2) # Perform the rotation rotation_mat = cv2.getRotationMatrix2D(center, -angle, 1.0) # Fixing the image cut-off by calculating the new center abs_cos = abs(rotation_mat[0, 0]) abs_sin = abs(rotation_mat[0, 1]) bound_w = int(h * abs_sin + w * abs_cos) bound_h = int(h * abs_cos + w * abs_sin) rotation_mat[0, 2] += bound_w / 2 - center[0] rotation_mat[1, 2] += bound_h / 2 - center[1] rotated_img = cv2.warpAffine(img, rotation_mat, (bound_w, bound_h)) return rotated_img
def orientpage(filename): pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract' f = open(filename, 'rb') img_bytes = f.read() f.close() image = cv2.imdecode(np.frombuffer(img_bytes, dtype='uint8'), cv2.IMREAD_COLOR) # Initially decode as color gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.bitwise_not(gray) rot_data = pytesseract.image_to_osd(image) ##print("[OSD] " + rot_data) rot = re.search('(?<=Rotate: )\d+', rot_data).group(0) angle = float(rot) if angle > 0: angle = 360 - angle ##print("[ANGLE] " + str(angle)) # rotate the image to deskew it (h, w) = image.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, angle, 0.7) rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) # TODO: Rotated image can be saved here ##print(pytesseract.image_to_osd(rotated)); cv2.imwrite(temp, rotated)
def pre_processing(img): pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' #image_original = cv2.imread(img_file_path, cv2.IMREAD_COLOR) #image_original = cv2.resize(image_original, (1700, 2200)) #image_original = cv2.resize(image_original, None ,fx=0.7, fy=0.7, interpolation= cv2.INTER_AREA) image_scaled = cv2.resize(img, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_LINEAR) #### quay anh > 90 rotate_img = image_scaled newdata = pytesseract.image_to_osd(image_scaled) angle = 360 - int(re.search('(?<=Rotate: )\d+', newdata).group(0)) if angle > 0 and angle < 360: rotate_img = rotate(image_scaled, angle) # convert the image to grayscale gray = cv2.cvtColor(rotate_img, cv2.COLOR_BGR2GRAY) # threshold the image after Gaussian filtering # medBlur = cv2.medianBlur(gray, 3) # gauBlur = cv2.GaussianBlur(medBlur, (3,3), 10) #return bit thresh = cv2.threshold(gray, 90, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] #kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3)) #dilate = cv2.dilate(thresh, kernel, iterations=1) return thresh
def pdf_language_detect(page_file): try: osd = pytesseract.image_to_osd(page_file) language_script = osd.split('\nScript')[1][2:] print('Language detected {0}'.format(language_script)) return language_script except: return None
def test_image_to_osd(test_file): result = image_to_osd(test_file) assert isinstance(result, unicode if IS_PYTHON_2 else str) for key in [ 'Page number', 'Orientation in degrees', 'Rotate', 'Orientation confidence', 'Script', 'Script confidence' ]: assert key + ':' in result
def rotate(thres): val = pytesseract.image_to_osd(thres) if (val[50:53] == '270'): thres = cv2.rotate(thres, cv2.ROTATE_90_COUNTERCLOCKWISE) elif (val[51:53] == '90'): thres = cv2.rotate(thres, cv2.ROTATE_90_CLOCKWISE) elif (val[50:53] == '180'): thres = cv2.rotate(thres, cv2.ROTATE_180) return thres
def orientation_script_detection(self): """ Detects orientation and script of the file :returns: angle and script """ image = cv2.imread(self.image) osd = pytesseract.image_to_osd(image) angle = re.search(r"(?<=Rotate: )\d+", osd).group(0) script = re.search(r"(?<=Script: )\w+", osd).group(0) return angle, script
def osd(image: Image) -> dict: """Returns orientation and script data for `image`. """ s = pytesseract.image_to_osd(image) ret = dict() for line in s.split('\n'): # type: ignore if line: key, value = line.split(':') # type: ignore key, value = key.strip(), value.strip() ret[key] = appropriate_type(value) return ret
def deskewImageNew(img): #Correct Skewness of image and return the image cpy = img.copy() try: #Get text orientation from Tesseract osd = pytesseract.image_to_osd(img) rotationAngle = int(osd.split("\n")[2].split(":")[1].strip()) img1 = imutils.rotate_bound(img,rotationAngle) return img1 except: return cpy
def rotate(image): im = Image.open(image) try: angle = 360 - int( re.search('(?<=Rotate: )\d+', pytesseract.image_to_osd(im)).group(0)) except: angle = 0 im = im.rotate(angle) im.save(path, "PNG")
def rotate_image(image, center = None, scale = 1.0): angle=360-int(re.search('(?<=Rotate: )\d+', pytesseract.image_to_osd(image)).group(0)) (h, w) = image.shape[:2] if center is None: center = (w / 2, h / 2) # Perform the rotation M = cv2.getRotationMatrix2D(center, angle, scale) rotated = cv2.warpAffine(image, M, (w, h)) return rotated
def check_orientation(image): try: orientation_details = pytesseract.image_to_osd(image) angle = int(orientation_details.split("\n")[2].split(":")[-1]) if angle == 90: image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE) elif angle == 180: image = cv2.rotate(image, cv2.ROTATE_180) return image except: return image
def deskewImageNew(img): #Correct Skewness of image and return the image try: img1 = img.copy() osd = pytesseract.image_to_osd(img1[:img1.shape[0] // 2,:]) rotationAngle = int(osd.split("\n")[2].split(":")[1].strip()) img1 = imutils.rotate_bound(img1,rotationAngle) return img1 except Exception as e: print("\tDeskewing Failed :-",e) return img
from wand.image import Image as wand_Image from PIL import Image as PIL_Image from wand.color import Color import os import pytesseract def build_images(force=False): if not all([not force, os.path.isfile('./foo-0.png'), os.path.isfile('./foo-0.png')]): all_pages = wand_Image(filename='./example.pdf', resolution=300) for idx, page in enumerate(all_pages.sequence): with Image(page) as i: i.format = 'png' i.background_color = Color('white') i.alpha_channel = 'remove' i.save(filename='foo-%s.png' % idx) build_images() boxes = pytesseract.image_to_boxes(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT) data = pytesseract.image_to_data(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT) osd = pytesseract.image_to_osd(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)