def extract_and_ocr(filename, region): application_id = os.environ['ABBYY_APPLICATION_ID'] password = os.environ['ABBYY_PASSWORD'] ocr_engine = CloudOCR(application_id=application_id, password=password) image = Image.open(filename) region_data = image.crop(region) stream = StringIO() region_data.save(stream, 'JPEG') stream.seek(0) post_file = {'temp.jpg': stream} result = ocr_engine.process_and_download(post_file,exportFormat='txt') return result['txt'].read()
def extract_and_ocr(filename, region): application_id = os.environ['ABBYY_APPLICATION_ID'] password = os.environ['ABBYY_PASSWORD'] ocr_engine = CloudOCR(application_id=application_id, password=password) image = Image.open(filename) region_data = image.crop(region) stream = StringIO() region_data.save(stream, 'JPEG') stream.seek(0) post_file = {'temp.jpg': stream} result = ocr_engine.process_and_download(post_file, exportFormat='txt') return result['txt'].read()
def mrz_scan(source_file, APPID, PWD): if (os.path.isfile(source_file) == False): raise Exception("file error.") ocr_engine = CloudOCR(APPID, PWD) input_file = open(source_file, 'rb') post_file = {input_file.name: input_file} # print("Waiting...") time.sleep(0.0001) result = ocr_engine.process_and_download(post_file, exportFormat='txt', language='English') mrz_code = "" f = result['txt'].read().decode("utf-8") for d in f.splitlines()[-3:]: if '<' in d: # remove space for error scan mrz_code += re.sub(' ', '', d)[:44] mrz_code += '\n' print("ourcode :", mrz_code) return mrz_code
'Contains a password for accessing password-protected images in PDF format.' ) parser.add_argument('--inputFilename', help='', required=True) args = parser.parse_args() if 'ABBYY_APPLICATION_ID' in os.environ.keys(): application_id = os.environ['ABBYY_APPLICATION_ID'] else: application_id = args.application_id if 'ABBYY_PASSWORD' in os.environ.keys(): password = os.environ['ABBYY_PASSWORD'] else: password = args.password ocr_engine = CloudOCR(application_id, password) api_parameters = ['language', 'textType', 'exportFormat', 'pdfPassword'] parameters = dict( filter(lambda x: x[0] in api_parameters and x[1] is not None, args._get_kwargs())) input_file = open(args.inputFilename, 'rb') post_file = {input_file.name: input_file} result = ocr_engine.process_and_download(post_file, **parameters) for format, content in result.iteritems(): output_filename = '{name}.{extension}'.format(name='.'.join( input_file.name.split('.')[:-1]), extension=format) with open(output_filename, 'wb') as output_file: output_file.write(content.read())
parser.add_argument('--language', help='Specifies recognition language of the document.') parser.add_argument('--textType', help='Specifies the type of the text on a page.') parser.add_argument('--exportFormat', help='Specifies the export format.') parser.add_argument('--pdfPassword', help='Contains a password for accessing password-protected images in PDF format.') parser.add_argument('--inputFilename', help='', required=True) args = parser.parse_args() if 'ABBYY_APPLICATION_ID' in list(os.environ.keys()): application_id = os.environ['ABBYY_APPLICATION_ID'] else: application_id = args.application_id if 'ABBYY_PASSWORD' in list(os.environ.keys()): password = os.environ['ABBYY_PASSWORD'] else: password = args.password ocr_engine = CloudOCR(application_id, password) api_parameters = ['language', 'textType', 'exportFormat', 'pdfPassword'] parameters = dict([x for x in args._get_kwargs() if x[0] in api_parameters and x[1] is not None]) input_file = open(args.inputFilename, 'rb') post_file = {input_file.name: input_file} result = ocr_engine.process_and_download(post_file, **parameters) for format, content in result.items(): output_filename = '{name}.{extension}'.format(name='.'.join(input_file.name.split('.')[:-1]), extension=format) with open(output_filename, 'wb') as output_file: output_file.write(content.read()) output_file.close()
from ABBYY import CloudOCR from io import BytesIO import os import dropbox import argparse parser = argparse.ArgumentParser( description="runs ocr on the given files and uploads it to dropbox") parser.add_argument('file', type=argparse.FileType('r'), nargs='+') parser.add_argument('--output', required=True) args = parser.parse_args() dropbox = dropbox.Dropbox(os.environ['dropbox_token']) ocr = CloudOCR(application_id=os.environ['abbyy_app_id'], password=os.environ['abbyy_app_secret']) def ocrImages(files, exportFormat="pdfSearchable"): task = None for file in files: if task is None: task = ocr.submitImage(file={file.name: file}) else: task = ocr.submitImage(file={file.name: file}, taskId=task['id']) task = ocr.processDocument(taskId=task['id'], exportFormat=exportFormat) result = ocr.wait_for_task(task) print("%s credits were used." % result['credits']) download = ocr.session.get(result['resultUrl']) return download