def tesseract_ocr(imgname, type='PagesWrapper'): api = tesseract.TessBaseAPI() api.SetOutputName("outputName") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) if type == 'PagesWrapper': result = tesseract.ProcessPagesWrapper(imgname, api) elif type == 'PagesFileStream': result = tesseract.ProcessPagesFileStream(mImgFile, api) elif type == 'PagesRaw': result = tesseract.ProcessPagesRaw(mImgFile, api) elif type == 'PagesBuffer': mBuffer = open(imgname).read() result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api) return result
#from __future__ import print_function import tesseract import ctypes import os #print "HAVE_LIBLEPT=",tesseract.isLibLept() #print dir("tesseract") #print tesseract.MAX_NUM_INT_FEATURES api = tesseract.TessBaseAPI() api.SetOutputName("outputName") #api.Init(".","eng") api.Init(".", "eng", tesseract.OEM_DEFAULT) api.SetPageSegMode(tesseract.PSM_AUTO) mImgFile = "eurotext.jpg" result = tesseract.ProcessPagesWrapper(mImgFile, api) print "result(ProcessPagesWrapper)=", result #api.ProcessPages(mImgFile,None, 0, result) #print "abc" result = tesseract.ProcessPagesFileStream(mImgFile, api) print "result(ProcessPagesFileStream)=", result result = tesseract.ProcessPagesRaw(mImgFile, api) print "result(ProcessPagesRaw)", result f = open(mImgFile, "rb") mBuffer = f.read() f.close() result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api) mBuffer = None print "result(ProcessPagesBuffer)=", result