Exemplo n.º 1
0
def tesseract_ocr(imgname, type='PagesWrapper'):
    api = tesseract.TessBaseAPI()
    api.SetOutputName("outputName")
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)
    if type == 'PagesWrapper':
        result = tesseract.ProcessPagesWrapper(imgname, api)
    elif type == 'PagesFileStream':
        result = tesseract.ProcessPagesFileStream(mImgFile, api)
    elif type == 'PagesRaw':
        result = tesseract.ProcessPagesRaw(mImgFile, api)
    elif type == 'PagesBuffer':
        mBuffer = open(imgname).read()
        result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api)
    return result
Exemplo n.º 2
0
#from __future__ import print_function
import tesseract
import ctypes
import os
#print "HAVE_LIBLEPT=",tesseract.isLibLept()
#print dir("tesseract")
#print tesseract.MAX_NUM_INT_FEATURES
api = tesseract.TessBaseAPI()
api.SetOutputName("outputName")
#api.Init(".","eng")
api.Init(".", "eng", tesseract.OEM_DEFAULT)
api.SetPageSegMode(tesseract.PSM_AUTO)
mImgFile = "eurotext.jpg"

result = tesseract.ProcessPagesWrapper(mImgFile, api)
print "result(ProcessPagesWrapper)=", result
#api.ProcessPages(mImgFile,None, 0, result)
#print "abc"
result = tesseract.ProcessPagesFileStream(mImgFile, api)
print "result(ProcessPagesFileStream)=", result

result = tesseract.ProcessPagesRaw(mImgFile, api)
print "result(ProcessPagesRaw)", result

f = open(mImgFile, "rb")
mBuffer = f.read()
f.close()
result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api)
mBuffer = None
print "result(ProcessPagesBuffer)=", result