示例#1
0
def extract_and_ocr(filename, region):
	application_id = os.environ['ABBYY_APPLICATION_ID']
	password = os.environ['ABBYY_PASSWORD']
	ocr_engine = CloudOCR(application_id=application_id, password=password)

	image = Image.open(filename)
	region_data = image.crop(region)
	stream = StringIO()
	region_data.save(stream, 'JPEG')
	stream.seek(0)

	post_file = {'temp.jpg': stream}
	result = ocr_engine.process_and_download(post_file,exportFormat='txt')

	return result['txt'].read()
示例#2
0
def extract_and_ocr(filename, region):
    application_id = os.environ['ABBYY_APPLICATION_ID']
    password = os.environ['ABBYY_PASSWORD']
    ocr_engine = CloudOCR(application_id=application_id, password=password)

    image = Image.open(filename)
    region_data = image.crop(region)
    stream = StringIO()
    region_data.save(stream, 'JPEG')
    stream.seek(0)

    post_file = {'temp.jpg': stream}
    result = ocr_engine.process_and_download(post_file, exportFormat='txt')

    return result['txt'].read()
示例#3
0
def mrz_scan(source_file, APPID, PWD):
    if (os.path.isfile(source_file) == False):
        raise Exception("file error.")
    ocr_engine = CloudOCR(APPID, PWD)
    input_file = open(source_file, 'rb')
    post_file = {input_file.name: input_file}
    # print("Waiting...")
    time.sleep(0.0001)
    result = ocr_engine.process_and_download(post_file,
                                             exportFormat='txt',
                                             language='English')
    mrz_code = ""
    f = result['txt'].read().decode("utf-8")
    for d in f.splitlines()[-3:]:
        if '<' in d:
            # remove space for error scan
            mrz_code += re.sub(' ', '', d)[:44]
            mrz_code += '\n'
    print("ourcode :", mrz_code)
    return mrz_code
示例#4
0
文件: ocrizer.py 项目: ethanmcc/ABBYY
        'Contains a password for accessing password-protected images in PDF format.'
    )
    parser.add_argument('--inputFilename', help='', required=True)
    args = parser.parse_args()

    if 'ABBYY_APPLICATION_ID' in os.environ.keys():
        application_id = os.environ['ABBYY_APPLICATION_ID']
    else:
        application_id = args.application_id

    if 'ABBYY_PASSWORD' in os.environ.keys():
        password = os.environ['ABBYY_PASSWORD']
    else:
        password = args.password

    ocr_engine = CloudOCR(application_id, password)

    api_parameters = ['language', 'textType', 'exportFormat', 'pdfPassword']
    parameters = dict(
        filter(lambda x: x[0] in api_parameters and x[1] is not None,
               args._get_kwargs()))

    input_file = open(args.inputFilename, 'rb')
    post_file = {input_file.name: input_file}
    result = ocr_engine.process_and_download(post_file, **parameters)
    for format, content in result.iteritems():
        output_filename = '{name}.{extension}'.format(name='.'.join(
            input_file.name.split('.')[:-1]),
                                                      extension=format)
        with open(output_filename, 'wb') as output_file:
            output_file.write(content.read())
示例#5
0
文件: ocrizer.py 项目: samueltc/ABBYY
	parser.add_argument('--language', help='Specifies recognition language of the document.')
	parser.add_argument('--textType', help='Specifies the type of the text on a page.')
	parser.add_argument('--exportFormat', help='Specifies the export format.')
	parser.add_argument('--pdfPassword', help='Contains a password for accessing password-protected images in PDF format.')
	parser.add_argument('--inputFilename', help='', required=True)
	args = parser.parse_args()

	if 'ABBYY_APPLICATION_ID' in list(os.environ.keys()):
		application_id = os.environ['ABBYY_APPLICATION_ID']
	else:
		application_id = args.application_id

	if 'ABBYY_PASSWORD' in list(os.environ.keys()):
		password = os.environ['ABBYY_PASSWORD']
	else:
		password = args.password

	ocr_engine = CloudOCR(application_id, password)

	api_parameters = ['language', 'textType', 'exportFormat', 'pdfPassword']
	parameters = dict([x for x in args._get_kwargs() if x[0] in api_parameters and x[1] is not None])
	
	input_file = open(args.inputFilename, 'rb')
	post_file = {input_file.name: input_file}
	result = ocr_engine.process_and_download(post_file, **parameters)
	for format, content in result.items():
		output_filename = '{name}.{extension}'.format(name='.'.join(input_file.name.split('.')[:-1]), extension=format)
		with open(output_filename, 'wb') as output_file:
			output_file.write(content.read())
			output_file.close()
		 
from ABBYY import CloudOCR
from io import BytesIO
import os
import dropbox
import argparse

parser = argparse.ArgumentParser(
    description="runs ocr on the given files and uploads it to dropbox")
parser.add_argument('file', type=argparse.FileType('r'), nargs='+')
parser.add_argument('--output', required=True)
args = parser.parse_args()

dropbox = dropbox.Dropbox(os.environ['dropbox_token'])
ocr = CloudOCR(application_id=os.environ['abbyy_app_id'],
               password=os.environ['abbyy_app_secret'])


def ocrImages(files, exportFormat="pdfSearchable"):
    task = None
    for file in files:
        if task is None:
            task = ocr.submitImage(file={file.name: file})
        else:
            task = ocr.submitImage(file={file.name: file}, taskId=task['id'])

    task = ocr.processDocument(taskId=task['id'], exportFormat=exportFormat)
    result = ocr.wait_for_task(task)
    print("%s credits were used." % result['credits'])
    download = ocr.session.get(result['resultUrl'])

    return download