def ocr(): api = tesseract.TessBaseAPI() api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) mImgFile = "eurotext.jpg" pixImage = tesseract.pixRead(mImgFile) api.SetImage(pixImage) text = api.GetUTF8Text() conf = api.MeanTextConf() print(text, len(text)) print("Cofidence Level: %d %%" % conf) print("Confidences of All words") header("Method 1", "*" * 10) confOfText = api.AllWordConfidences() print(confOfText) print("Number of Words:") print("counted by tesseract: %d" % len(confOfText)) print("counted by me: %d[%d]" % (countWords(text), countWords2(text))) if len(confOfText) != countWords(text): print("Why the words counted by tesseract are different from mine!!!!") header("Method 2", "*" * 10) confs = tesseract.AllWordConfidences(api) print(confs, len(confs))
def getConfidence(mask, angle, me): aDeg = math.degrees(angle) rotated = rotate(mask, 90.0 - aDeg, False, (me[1], me[0])) #rotated = rotate(mask, 90.0-adeg, False) io.imsave("temp3_0.png", rotated) #using the OCR gifTempFile = "temp3_1.gif" tifTempFile = "temp3_2.tif" im = Image.open('temp3_0.png') im.save(gifTempFile, "GIF") original = Image.open(gifTempFile) bg = original.resize(im.size, Image.NEAREST) bg.save(tifTempFile) pixImage = tesseract.pixRead(tifTempFile) api.SetImage(pixImage) outText = api.GetUTF8Text() conf = api.MeanTextConf() if showProcess: wait = input("PRESS ENTER TO CONTINUE.") if conf <= 10: conf = 10 return conf
def ocr(): api = tesseract.TessBaseAPI() api.Init(".","eng",tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) mImgFile = "eurotext.jpg" pixImage=tesseract.pixRead(mImgFile) api.SetImage(pixImage) text=api.GetUTF8Text() conf=api.MeanTextConf() print(text,len(text)) print("Cofidence Level: %d %%"%conf) print("Confidences of All words") header("Method 1","*"*10) confOfText=api.AllWordConfidences() print(confOfText) print("Number of Words:") print("counted by tesseract: %d"%len(confOfText)) print("counted by me: %d[%d]"%(countWords(text), countWords2(text))) if len(confOfText)!=countWords(text): print("Why the words counted by tesseract are different from mine!!!!") header("Method 2","*"*10) confs=tesseract.AllWordConfidences(api) print(confs, len(confs))
def gettext(imagepath): api = tesseract.TessBaseAPI() api.SetOutputName("outputName") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) pixImage = tesseract.pixRead(imagepath) api.SetImage(pixImage) outText = api.GetUTF8Text() print("OCR output:\n%s" % outText) api.End() return outText
def imageToText(self,imageLocation,imageTextLoc): if not self.api: self.api = tesseract.TessBaseAPI() #self.api.SetOutputName("outputName") self.api.Init("includes", "eng", tesseract.OEM_DEFAULT) self.api.SetPageSegMode(tesseract.PSM_AUTO) pixImage = tesseract.pixRead(imageLocation) self.api.SetImage(pixImage) outText = self.api.GetUTF8Text() GObject.idle_add(self.addRowData, self.listStoreCounter, 2, "Saving Text to File") textFile = open(imageTextLoc, 'w') textFile.write(str(outText)) textFile.close() GObject.idle_add(self.addRowData, self.listStoreCounter, 2, "Text Saved to File") #self.api.End() (outText,pixImage) = (None,None)
def imageToText(self, imageLocation, imageTextLoc): if not self.api: self.api = tesseract.TessBaseAPI() #self.api.SetOutputName("outputName") self.api.Init("includes", "eng", tesseract.OEM_DEFAULT) self.api.SetPageSegMode(tesseract.PSM_AUTO) pixImage = tesseract.pixRead(imageLocation) self.api.SetImage(pixImage) outText = self.api.GetUTF8Text() GObject.idle_add(self.addRowData, self.listStoreCounter, 2, "Saving Text to File") textFile = open(imageTextLoc, 'w') textFile.write(str(outText)) textFile.close() GObject.idle_add(self.addRowData, self.listStoreCounter, 2, "Text Saved to File") #self.api.End() (outText, pixImage) = (None, None)
#!/usr/bin/env python # -*- coding: utf-8 -*- #from __future__ import print_function import tesseract import gc import pprint api = tesseract.TessBaseAPI() api.SetOutputName("outputName"); #api.Init(".","eng") api.Init(".","eng",tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) mImgFile = "eurotext.jpg" print("Method 1: Leptonica->pixRead") pixImage=tesseract.pixRead(mImgFile) print("Type of pixiamge=",type(pixImage)) print("repr(pixiamge)=",repr(pixImage)) api.SetImage(pixImage) outText=api.GetUTF8Text() print(("OCR output:\n%s"%outText)); api.End() outText=None tesseract.pixDestroy(pixImage)
rotated = rotate(mask, 90.0 - adeg, False, (me[1], me[0])) #rotated = rotate(mask, 90.0-adeg, False) io.imsave("rotated3/" + str(i) + ".png", rotated) #using the OCR gifTempFile = "o1.gif" tifTempFile = "o2.tif" im = Image.open('rotated3/' + str(i) + '.png') im.save(gifTempFile, "GIF") original = Image.open(gifTempFile) bg = original.resize(im.size, Image.NEAREST) bg.save(tifTempFile) pixImage = tesseract.pixRead(tifTempFile) api.SetImage(pixImage) outText = api.GetUTF8Text() outText = outText.replace("\n", "") outText = outText.replace("\t", "") conf = api.MeanTextConf() f.write(str(i) + "\t" + outText + "\t" + str(conf) + "\n") f.close() api.End() ''' mask = mask * 255 io.imsave('bing.png', mask) block_size = 40
api = tesseract.TessBaseAPI() api.Init(tesslangpath, "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR) api.SetVariable("classify_enable_learning", "0") api.SetVariable("classify_enable_adaptive_matcher", "0") image_dict = OrderedDict() segment_text_list = [] cluster_pattern_list = [] # classify for fname in test_images: test_image = ImageFile(fname) test_classes, test_segments = ocr.ocr(test_image, show_steps=verbose) if use_tesseract: tesseract_image = tesseract.pixRead(fname) tesseract_classes = [] cluster_list = [] for segment in test_segments: cluster_segments = prim.get_cluster(segment, test_segments) if len(cluster_segments) == 10: add = True for list_cluster in cluster_list: add = add and not tesseract_utils.is_cluster_match( cluster_segments, list_cluster) if not add: break if add: cluster_list.append(cluster_segments) pattern = tesseract_utils.get_pattern(cluster_segments) cluster_pattern_list.append([cluster_segments, pattern])
api = tesseract.TessBaseAPI() api.Init(tesslangpath, "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR) api.SetVariable("classify_enable_learning", "0") api.SetVariable("classify_enable_adaptive_matcher", "0") image_dict = OrderedDict() segment_text_list = [] cluster_pattern_list = [] # classify for fname in test_images: test_image = ImageFile(fname) test_classes, test_segments = ocr.ocr(test_image, show_steps=verbose) if use_tesseract: tesseract_image = tesseract.pixRead(fname) tesseract_classes = [] cluster_list = [] for segment in test_segments: cluster_segments = prim.get_cluster(segment, test_segments) if len(cluster_segments) == 10: add = True for list_cluster in cluster_list: add = add and not tesseract_utils.is_cluster_match(cluster_segments, list_cluster) if not add: break if add: cluster_list.append(cluster_segments) pattern = tesseract_utils.get_pattern(cluster_segments) cluster_pattern_list.append([cluster_segments, pattern]) whitelist = tesseract_utils.get_whitelist(segment, cluster_segments, pattern)
def process(image): global tess_api sz = 500 ratio = (sz + 0.0) / image.shape[1] dim = (sz, int(image.shape[0] * ratio)) orig = image image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA) if Param.Value("bilateral"): bila = cv2.bilateralFilter(image, 11, 17, 17) else: bila = image gray = cv2.cvtColor(bila, cv2.COLOR_BGR2GRAY) canny = cv2.Canny(gray, 25, 50) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) if Param.Value("dilate"): canny = cv2.dilate(canny, kernel, iterations=Param.Value("dilate")) if Param.Value("erode"): canny = cv2.erode(canny, kernel, iterations=Param.Value("erode")) goodCnts = [] badCnts = [] if Param.Value('contours'): (cnts, hier) = cv2.findContours(canny.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # loop over our contours for c in cnts: if cv2.contourArea(c) < 1000: continue # approximate the contour peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.01 * peri, True) # print len(approx) good = False # if our approximated contour has four points, then # we can assume that we have found our screen if len(approx) == 4: d01 = np.sum((approx[0] - approx[1])**2) d12 = np.sum((approx[1] - approx[2])**2) wh_ratio = np.sqrt(np.divide(float(d01), d12)) if wh_ratio > 1.5 and wh_ratio < 3: good = True elif 1 / wh_ratio > 1.5 and 1 / wh_ratio < 3: approx = np.roll(approx, 1, axis=0) good = True if good: goodCnts.append(approx) else: badCnts.append(approx) cv2.drawContours(image, goodCnts, -1, (0, 255, 0), 2) if Param.Value('all_contours'): cv2.drawContours(image, badCnts, -1, (128, 128, 0), 1) warped = None if Param.Value('lines_p') and len(goodCnts) > 0: lines = cv2.HoughLinesP(canny, 1, np.pi / 180, Param.Value('lines_threshold'), minLineLength=Param.Value('lines_minlength'), maxLineGap=Param.Value('lines_maxgap')) if lines is None: lines = [] else: lines = lines[0] goodLines = [] goodBoxes = [] badLines = [] for line in lines: line = np.array([[line[0], line[1]], [line[2], line[3]]]) length2 = np.sum((line[0] - line[1])**2) ok = False for cnt in goodCnts: minLength2 = np.sum((cnt[0] - cnt[3])**2) * 2 if length2 < minLength2: continue d = [geoPointLineDist(point[0], line) for point in cnt] if d[0] < 10 and d[3] < 10: goodLines.append(line) goodBoxes.append(cnt) elif d[1] < 10 and d[2] < 10: goodLines.append(line) goodBoxes.append(np.roll(cnt, 2, axis=0)) elif Param.Value('show_bad_lines'): badLines.append(line) if Param.Value('show_bad_lines'): for line in goodLines: cv2.line(image, line[0], line[1], (0, 0, 255), 1) for line in badLines: cv2.line(image, line[0], line[1], (0, 0, 255), 1) if len(goodLines) > 0: g = np.array(goodLines) longestIndex = np.argmax( np.sum((g[:, 1:2, :] - g[:, 0:1, :])**2, axis=2)) longestLine = goodLines[longestIndex] cv2.line(image, tuple(longestLine[0]), tuple(longestLine[1]), (200, 200, 0), 2) box = goodBoxes[longestIndex] tl = np.array(geoClosestPoint(box[0][0], longestLine, False)) bl = np.array(geoClosestPoint(box[3][0], longestLine, False)) tr = box[1][0] br = box[2][0] h = bl - tl tl = tl + h * 3.1 bl = tl + h h = br - tr tr = tr + h * 3.1 br = tr + h w = tr - tl tl = tl - w * 1.1 tr = tr + w * 0.6 w = br - bl bl = bl - w * 1.1 br = br + w * 0.6 tl = tl / ratio tr = tr / ratio bl = bl / ratio br = br / ratio width = int(np.sqrt(np.sum((tl - tr)**2))) height = int(np.sqrt(np.sum((tl - bl)**2))) dst = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype="float32") rect = np.array([tl, tr, br, bl], dtype="float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(orig, M, (width, height)) if Param.Value('lines'): lines = cv2.HoughLines(canny, 1, np.pi / 180, Param.Value('lines_threshold')) for rho, theta in (lines[0] if lines is not None else []): a = np.cos(theta) b = np.sin(theta) x0 = a * rho y0 = b * rho x1 = int( x0 + 1000 * (-b) ) # Here i have used int() instead of rounding the decimal value, so 3.8 --> 3 y1 = int( y0 + 1000 * (a) ) # But if you want to round the number, then use np.around() function, then 3.8 --> 4.0 x2 = int( x0 - 1000 * (-b) ) # But we need integers, so use int() function after that, ie int(np.around(x)) y2 = int(y0 - 1000 * (a)) cv2.line(image, (x1, y1), (x2, y2), (0, 128, 128), 2) Param.DisplayAll(image) cv2.imshow("Image", display([image, bila, gray, canny])) if warped is not None: if Param.Value('ocr'): warped = cv2.bilateralFilter(warped, 11, 17, 17) if tess_api is None: tess_api = tesseract.TessBaseAPI() tess_api.Init(".", "eng", tesseract.OEM_DEFAULT) tess_api.SetVariable("tessedit_char_whitelist", "0123456789+>") tess_api.SetPageSegMode(tesseract.PSM_AUTO) cv2.imwrite("ocr.png", warped) # SetCvImage segfauls. F**k it. pixImage = tesseract.pixRead("ocr.png") tess_api.SetImage(pixImage) outText = tess_api.GetUTF8Text() for line in outText.split("\n"): if len(line) > 40: print line esr = fixEsr(line) if esr is None: esr = fixEsr(re.sub(r'\d (\d)', '+ \\1', line)) print esr if esr is not None: cv2.putText(warped, esr, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 128), 2) cv2.imshow("Image2", warped)
def process(image): global tess_api sz = 500 ratio = (sz + 0.0) / image.shape[1] dim = (sz, int(image.shape[0] * ratio)) orig = image image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA) if Param.Value("bilateral"): bila = cv2.bilateralFilter(image, 11, 17, 17) else: bila = image gray = cv2.cvtColor(bila, cv2.COLOR_BGR2GRAY) canny = cv2.Canny(gray, 25, 50) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) if Param.Value("dilate"): canny = cv2.dilate(canny, kernel, iterations = Param.Value("dilate")) if Param.Value("erode"): canny = cv2.erode(canny, kernel, iterations = Param.Value("erode")) goodCnts = [] badCnts = [] if Param.Value('contours'): (cnts, hier) = cv2.findContours(canny.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # loop over our contours for c in cnts: if cv2.contourArea(c) < 1000: continue # approximate the contour peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.01 * peri, True) # print len(approx) good = False # if our approximated contour has four points, then # we can assume that we have found our screen if len(approx) == 4: d01 = np.sum((approx[0]-approx[1])**2) d12 = np.sum((approx[1]-approx[2])**2) wh_ratio = np.sqrt(np.divide(float(d01), d12)) if wh_ratio > 1.5 and wh_ratio < 3: good = True elif 1/wh_ratio > 1.5 and 1/wh_ratio < 3: approx = np.roll(approx, 1, axis=0) good = True if good: goodCnts.append(approx) else: badCnts.append(approx) cv2.drawContours(image, goodCnts, -1, (0, 255, 0), 2) if Param.Value('all_contours'): cv2.drawContours(image, badCnts, -1, (128, 128, 0), 1) warped = None if Param.Value('lines_p') and len(goodCnts) > 0: lines = cv2.HoughLinesP(canny, 1, np.pi/180, Param.Value('lines_threshold'), minLineLength = Param.Value('lines_minlength'), maxLineGap = Param.Value('lines_maxgap')) if lines is None: lines = [] else: lines = lines[0] goodLines = [] goodBoxes = [] badLines = [] for line in lines: line = np.array([[line[0], line[1]], [line[2], line[3]]]) length2 = np.sum((line[0]-line[1])**2) ok = False for cnt in goodCnts: minLength2 = np.sum((cnt[0]-cnt[3])**2) * 2 if length2 < minLength2: continue d = [geoPointLineDist(point[0], line) for point in cnt] if d[0] < 10 and d[3] < 10: goodLines.append(line) goodBoxes.append(cnt) elif d[1] < 10 and d[2] < 10: goodLines.append(line) goodBoxes.append(np.roll(cnt, 2, axis=0)) elif Param.Value('show_bad_lines'): badLines.append(line) if Param.Value('show_bad_lines'): for line in goodLines: cv2.line(image, line[0], line[1], (0,0,255), 1) for line in badLines: cv2.line(image, line[0], line[1], (0,0,255), 1) if len(goodLines) > 0: g = np.array(goodLines) longestIndex = np.argmax(np.sum((g[:,1:2,:] - g[:,0:1,:])**2, axis=2)) longestLine = goodLines[longestIndex] cv2.line(image, tuple(longestLine[0]), tuple(longestLine[1]), (200, 200, 0), 2) box = goodBoxes[longestIndex] tl = np.array(geoClosestPoint(box[0][0], longestLine, False)) bl = np.array(geoClosestPoint(box[3][0], longestLine, False)) tr = box[1][0] br = box[2][0] h = bl - tl tl = tl + h * 3.1 bl = tl + h h = br - tr tr = tr + h * 3.1 br = tr + h w = tr - tl tl = tl - w * 1.1 tr = tr + w * 0.6 w = br - bl bl = bl - w * 1.1 br = br + w * 0.6 tl = tl/ratio tr = tr/ratio bl = bl/ratio br = br/ratio width = int(np.sqrt(np.sum((tl-tr)**2))) height = int(np.sqrt(np.sum((tl-bl)**2))) dst = np.array([ [0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype = "float32") rect = np.array([tl, tr, br, bl], dtype = "float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(orig, M, (width, height)) if Param.Value('lines'): lines = cv2.HoughLines(canny, 1, np.pi/180, Param.Value('lines_threshold')) for rho,theta in (lines[0] if lines is not None else []): a = np.cos(theta) b = np.sin(theta) x0 = a*rho y0 = b*rho x1 = int(x0 + 1000*(-b)) # Here i have used int() instead of rounding the decimal value, so 3.8 --> 3 y1 = int(y0 + 1000*(a)) # But if you want to round the number, then use np.around() function, then 3.8 --> 4.0 x2 = int(x0 - 1000*(-b)) # But we need integers, so use int() function after that, ie int(np.around(x)) y2 = int(y0 - 1000*(a)) cv2.line(image, (x1,y1), (x2,y2), (0,128,128), 2) Param.DisplayAll(image) cv2.imshow("Image", display([image, bila, gray, canny])) if warped is not None: if Param.Value('ocr'): warped = cv2.bilateralFilter(warped, 11, 17, 17) if tess_api is None: tess_api = tesseract.TessBaseAPI() tess_api.Init(".","eng",tesseract.OEM_DEFAULT) tess_api.SetVariable("tessedit_char_whitelist", "0123456789+>") tess_api.SetPageSegMode(tesseract.PSM_AUTO) cv2.imwrite("ocr.png", warped) # SetCvImage segfauls. F**k it. pixImage=tesseract.pixRead("ocr.png") tess_api.SetImage(pixImage) outText=tess_api.GetUTF8Text() for line in outText.split("\n"): if len(line) > 40: print line esr = fixEsr(line) if esr is None: esr = fixEsr(re.sub(r'\d (\d)', '+ \\1', line)) print esr if esr is not None: cv2.putText(warped, esr, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 128), 2) cv2.imshow("Image2", warped)