def tesseract_ocr(imgname, type='PagesWrapper'): api = tesseract.TessBaseAPI() api.SetOutputName("outputName") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) if type == 'PagesWrapper': result = tesseract.ProcessPagesWrapper(imgname, api) elif type == 'PagesFileStream': result = tesseract.ProcessPagesFileStream(mImgFile, api) elif type == 'PagesRaw': result = tesseract.ProcessPagesRaw(mImgFile, api) elif type == 'PagesBuffer': mBuffer = open(imgname).read() result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api) return result
# -*- coding: utf-8 -*- #from __future__ import print_function import tesseract import ctypes import os #print "HAVE_LIBLEPT=",tesseract.isLibLept() #print dir("tesseract") #print tesseract.MAX_NUM_INT_FEATURES api = tesseract.TessBaseAPI() api.SetOutputName("outputName") #api.Init(".","eng") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) mImgFile = "eurotext.jpg" result = tesseract.ProcessPagesWrapper(mImgFile, api) print "result(ProcessPagesWrapper)=", result #api.ProcessPages(mImgFile,None, 0, result) #print "abc" result = tesseract.ProcessPagesFileStream(mImgFile, api) print "result(ProcessPagesFileStream)=", result result = tesseract.ProcessPagesRaw(mImgFile, api) print "result(ProcessPagesRaw)", result f = open(mImgFile, "rb") mBuffer = f.read() f.close() result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api) mBuffer = None print "result(ProcessPagesBuffer)=", result
y1 += options.line_gap if y1 > height: y1 = height pg_box = (x0, y0, x1, y1) region = img.crop(pg_box) region.save(tmpimgname, "TIFF") y0 = (height - y1) else: x0 = 0 y0 = 0 x1 = 0 y1 = 0 result = "" if (square.y1 - square.y0) > options.box_threshold: orig_result = tesseract.ProcessPagesWrapper(tmpimgname, api) + "" result = orig_result.replace("\n", "") result = result.replace("\t", "") result = result.strip() if len(result) > 0: # print "RESULTS-------------------------------------->" # print "%d - Result= %s" % (len(result),result) # print "<--------------------------------------RESULTS" if not options.hadoop: file.write("%s\n" % result) coordtemp = tempfile.NamedTemporaryFile() result = tesseract.ExtractResultsWrapper(api, coordtemp.name, len(orig_result), "") #print "len", result
im = cam.get_image(surface) pygame.display.update() screen.blit(im,(0,0)) #if i==400: # pygame.image.save(im, "a.jpg") # img="a.jpg" # result = tesseract.ProcessPagesWrapper(img,api) # print result # print "ok" for event in pygame.event.get(): # Shutdown with X button if event.type==pygame.QUIT: sys.exit() # Shutdown with ESC elif event.type == KEYDOWN: if event.key == K_ESCAPE: sys.exit() i=i+1 ''' im = Image.open("5.jpg") #im=im.rotate(1) im.save("e.jpg") im2 = im.convert("L") im2.save("b.jpg") threshold = 100 im = im2.point(lambda p: p > threshold and 255) im.save("d.jpg") img = "d.jpg" result = tesseract.ProcessPagesWrapper(img, api) print result print "ok"