Exemplo n.º 1
0
def tesseract_ocr(imgname, type='PagesWrapper'):
    api = tesseract.TessBaseAPI()
    api.SetOutputName("outputName")
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)
    if type == 'PagesWrapper':
        result = tesseract.ProcessPagesWrapper(imgname, api)
    elif type == 'PagesFileStream':
        result = tesseract.ProcessPagesFileStream(mImgFile, api)
    elif type == 'PagesRaw':
        result = tesseract.ProcessPagesRaw(mImgFile, api)
    elif type == 'PagesBuffer':
        mBuffer = open(imgname).read()
        result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api)
    return result
Exemplo n.º 2
0
# -*- coding: utf-8 -*-
#from __future__ import print_function
import tesseract
import ctypes
import os
#print "HAVE_LIBLEPT=",tesseract.isLibLept()
#print dir("tesseract")
#print tesseract.MAX_NUM_INT_FEATURES
api = tesseract.TessBaseAPI()
api.SetOutputName("outputName")
#api.Init(".","eng")
api.Init(".", "eng", tesseract.OEM_DEFAULT)
api.SetPageSegMode(tesseract.PSM_AUTO)
mImgFile = "eurotext.jpg"

result = tesseract.ProcessPagesWrapper(mImgFile, api)
print "result(ProcessPagesWrapper)=", result
#api.ProcessPages(mImgFile,None, 0, result)
#print "abc"
result = tesseract.ProcessPagesFileStream(mImgFile, api)
print "result(ProcessPagesFileStream)=", result

result = tesseract.ProcessPagesRaw(mImgFile, api)
print "result(ProcessPagesRaw)", result

f = open(mImgFile, "rb")
mBuffer = f.read()
f.close()
result = tesseract.ProcessPagesBuffer(mBuffer, len(mBuffer), api)
mBuffer = None
print "result(ProcessPagesBuffer)=", result
Exemplo n.º 3
0
                y1 += options.line_gap
                if y1 > height:
                    y1 = height
                pg_box = (x0, y0, x1, y1)
                region = img.crop(pg_box)
                region.save(tmpimgname, "TIFF")
                y0 = (height - y1)
        else:
            x0 = 0
            y0 = 0
            x1 = 0
            y1 = 0

        result = ""
        if (square.y1 - square.y0) > options.box_threshold:
            orig_result = tesseract.ProcessPagesWrapper(tmpimgname, api) + ""
            result = orig_result.replace("\n", "")
            result = result.replace("\t", "")
            result = result.strip()

        if len(result) > 0:
            # print "RESULTS-------------------------------------->"
            # print "%d - Result= %s" % (len(result),result)
            # print "<--------------------------------------RESULTS"
            if not options.hadoop:
                file.write("%s\n" % result)

            coordtemp = tempfile.NamedTemporaryFile()
            result = tesseract.ExtractResultsWrapper(api, coordtemp.name,
                                                     len(orig_result), "")
            #print "len", result
Exemplo n.º 4
0
    im = cam.get_image(surface)
    pygame.display.update()
    screen.blit(im,(0,0))
    #if i==400:
    #    pygame.image.save(im, "a.jpg")
    #    img="a.jpg"
    #    result = tesseract.ProcessPagesWrapper(img,api)
    #    print result
    #    print "ok"
    for event in pygame.event.get():
        # Shutdown with X button
        if event.type==pygame.QUIT:
            sys.exit()
        # Shutdown with ESC
        elif event.type == KEYDOWN:
            if event.key == K_ESCAPE:
                sys.exit()
    i=i+1
'''
im = Image.open("5.jpg")
#im=im.rotate(1)
im.save("e.jpg")
im2 = im.convert("L")
im2.save("b.jpg")
threshold = 100
im = im2.point(lambda p: p > threshold and 255)
im.save("d.jpg")
img = "d.jpg"
result = tesseract.ProcessPagesWrapper(img, api)
print result
print "ok"