def pyocr_one_image(self, image_path): """ revision: 20190804 Author: https://blog.csdn.net/HuangZhang_123/article/details/61920975 references: tesseract OCR官网针对大部分linux系统可以直接命令行安装:https://github.com/tesseract-ocr/tesseract/wiki tesseract OCR5.0.0 windowns 64位下载地址:https://github.com/UB-Mannheim/tesseract/wiki 如果海外网速慢,国内4.0版本下载地址是:http://www.xue51.com/soft/1594.html 简体中文训练集:https://github.com/tesseract-ocr/tessdata/blob/master/chi_sim.traineddata 繁体中文训练集:https://github.com/tesseract-ocr/tessdata/blob/master/chi_tra.traineddata 官网文档:https://digi.bib.uni-mannheim.de/tesseract/doc/ pyocr官网:https://gitlab.gnome.org/World/OpenPaperwork/pyocr 博文: (只在命令行训练/运行tesseract-OCR)Tesseract-OCR识别中文与训练字库实例:https://www.cnblogs.com/wzben/p/5930538.html https://blog.csdn.net/qq_37193537/article/details/81335165 """ os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' tools = pyocr.get_available_tools()[:] if 1 > len(tools): error_msg = f"No OCR tool found" self.write_log( f"Inside Method {sys._getframe().f_code.co_name} of Class {self.__class__.__name__}, {error_msg}" ) return False #查找OCR引擎 self.write_log(f"Using {tools[0].get_name()} to ocr {image_path}") #lang='chi_sim'为OCR的识别语言库。C:\Program Files\Tesseract-OCR\tessdata return tools[0].image_to_string(Image.open(image_path), lang="chi_sim")
def dt_image_to_text(self, filename, buff, lang='chi_sim'): # chinese simple """ Argument : filename : input file name Return : return file text Note : image file to text file """ tools = pyocr.get_available_tools()[:] document = "" if len(tools) > 0: builder = TextBuilder() if type(filename) is str: document = tools[0].image_to_string(PI.open(filename), lang=lang, builder=builder) elif buff: document = tools[0].image_to_string(PI.open(io.BytesIO(buff)), lang=lang, builder=builder) return document
def predict(self): captchapath = self.Download() im = Image.open(captchapath) width = im.size[0] height = im.size[1] # 创建Draw对象: # draw = ImageDraw.Draw(im) # 填充每个像素: for x in range(0, width): for y in range(0, height): r, g, b = im.getpixel((x, y)) if r > 130 and g > 130 and b > 130: im.putpixel((x, y), (255, 255, 255)) else: im.putpixel((x, y), (0, 0, 0)) # im.save('124.jpg') # 验证码破解 tools = pyocr.get_available_tools()[:] #验证码修正表 redata = {'I': 'r', 'E': 'g', 'G': '6', "L": 'i', "l": 'k'} captcha = tools[0].image_to_string(im, lang='eng') captcha = self.charReplace(captcha) l = list(captcha) for i in range(len(l)): for j in redata.keys(): if l[i] == j: l[i] = redata[j] newcaptcha = "".join(l) #预测完了 删除原来的验证图片 os.remove(captchapath) return newcaptcha
def __init__(self, args): with open(args.config, "r") as f: self.config = yaml.load(f) self.args = args tools = pyocr.get_available_tools() self.tool = tools[0] self.p = PokemonGo()
def img_upload_file(): if request.method == 'POST': #接收前端上传的文件,img_file为imput标签的name file = request.files["file"] #读取文件名 filename = secure_filename(file.filename) #保存文件 file.save(os.path.join(config["UPLOAD_FOLDER"], filename)) #识别图片 # 查找OCR引擎 tools = pyocr.get_available_tools()[:] if len(tools) == 0: print("No OCR tool found") sys.exit(1) img_path = os.path.join(config["UPLOAD_FOLDER"], filename) ocr_name = "Using '%s'" % (tools[0].get_name()) ocr_data = tools[0].image_to_string(Image.open(img_path), lang='chi_sim') result = { 'ocr_name': ocr_name, 'ocr_data': ocr_data, } #成功识别后删除图片 if len(ocr_data) != 0: os.remove(img_path) return json.dumps(result)
def ocr(path): import os os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' tools = pyocr.get_available_tools()[:] if len(tools) == 0: return None return tools[0].image_to_string(Image.open(path), lang='chi_sim')
def saldo_sodexo(card, card_type, cpf): session = requests.Session() url = 'https://sodexosaldocartao.com.br/saldocartao/consultaSaldo.do?operation=consult' captcha_url = 'https://sodexosaldocartao.com.br/saldocartao/jcaptcha.do' captcha = download_captcha(captcha_url, session) tool = pyocr.get_available_tools()[0] value = tool.image_to_string(process_image(captcha)) os.unlink(captcha) validated = validate_captcha(value) if validated: data = {'service': card_type, 'cardNumber': card, 'cpf': cpf, 'jcaptcha_response': validated, 'x': '6', 'y': '9'} r = session.post(url, params=data) if not 'textRed' in r.content: model = parse_html(r.content) print model['name'] print model['company'] print model['status'] print model['card'] print model['balance'] return True return False
def processing(path_to_image, filename, user): directory_for_input_data = BASE_DIR + '/ocr/ocr_input_data/{}/'.format(user) if not os.path.exists(directory_for_input_data): os.makedirs(directory_for_input_data) if filename[-3:] == 'pdf': filename = filename[:-4] with WandImage(filename=settings.STATIC_ROOT+path_to_image) as img: img.save(filename="{}/{}.jpg".format(directory_for_input_data, filename)) elif filename[-3:] == 'png': filename = filename[:-4] img = Image.open(fp=STATIC_ROOT+path_to_image) rgb_im = img.convert('RGB') rgb_im.save('{}/{}.jpg'.format(directory_for_input_data, filename), 'JPEG') else: filename = filename[:-4] img = Image.open(fp=STATIC_ROOT+path_to_image) img.save('{}/{}.jpg'.format(directory_for_input_data, filename), 'JPEG') tools = pyocr.get_available_tools() tool = tools[0] date = str(datetime.today())[:-7].replace('-', '_').replace(' ', '__').replace(':', '_') text = tool.image_to_string(Image.open("{}/{}.jpg".format(directory_for_input_data, filename))) directory_for_results = BASE_DIR + '/ocr/ocr_results/{}/'.format(user) if not os.path.exists(directory_for_results): os.makedirs(directory_for_results) filename = 'res_{}_{}.txt'.format(filename, date) with codecs.open(directory_for_results+filename, 'a', encoding='utf-8') as txt_file: txt_file.write(text) return directory_for_results+filename
def study_ocr(): file = STORAGE_FOLDER + "/test4.png" os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' tools = pyocr.get_available_tools()[:] if len(tools) == 0: return "No OCR tool found" image = Image.open(file) text = tools[0].image_to_string(image, lang='eng') return text
def __init__(self, args): with open(args.config, "r") as f: self.config = yaml.load(f) self.args = args tools = pyocr.get_available_tools() self.tool = tools[0] self.state = '' self.egg_walked = 0 self.egg_total = 0
def processing(): global rectangle,thresh,erosion_iters,most_common_filter # r = cv2.getTrackbarPos('Threshold', 'Inputs') #changed from frame to Inputs # cv2.namedWindow('image') cv2.setMouseCallback('image', draw_shape) # img_temp, img, roi = initialize_images() rectangle = None scale = 1 factor = 0.75 # while(1): sleep(0.2) img = img_temp.copy() # ### ROI ########################################################### if is_rectangle(rectangle): roi = img_temp[rectangle[0][1]:rectangle[1][1], rectangle[0][0]:rectangle[1][0]] cv2.rectangle(img,rectangle[0],rectangle[1],(0,255,0),0) else: roi = None ### FILTER ######################################################## # retrieving parameters from GUI slidebars ### DISPLAY ####################################################### cv2.imshow('image',img) if roi is not None: #changed from "if roi != None" which gave array related error kernel = np.ones((5, 5), np.uint8) roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) roi = cv2.threshold(roi, thresh, 255, cv2.THRESH_BINARY)[1] roi = cv2.erode(roi, kernel, iterations=erosion_iters) cv2.imshow('ROI',roi) if roi is not None and not drawing: #changed from "if roi != None" which gave array related error ### OCR ########################################################### tool = pyocr.get_available_tools()[0] # lang = 'letsgodigital'#'letsgodigital'#"eng" #export TESSDATA_PREFIX=/path/to/tessdata/folder txt = tool.image_to_string(Image.fromarray(roi), lang=lang, builder=builders.TextBuilder()) print(txt) ### ACTIONS ####################################################### k = cv2.waitKey(1) & 0xFF if k == ord('c'): img_temp, img, roi = initialize_images() img = img_temp.copy() rectangle, roi = None, None pass elif k == ord('r'): # todo: ROI resizing scale *= factor print(scale) img = cv2.resize(img, (0,0), fx=factor, fy=factor) img_temp = img.copy() elif k == 27: #esc key cv2.destroyAllWindows() break
def __init__(self): with open("config.yaml", "r") as f: self.config = yaml.load(f, Loader=yaml.FullLoader) tools = pyocr.get_available_tools() self.tool = tools[0] self.state = '' self.egg_walked = 0 self.distance_total = 0 self.distance_walked = 0 self.speed = float(input("Mennyivel mész? km/h "))
def Imgprint(img): tools = pyocr.get_available_tools()[:] if len(tools) == 0: print("No OCR tool found") sys.exit(1) #print("Using '%s'" % (tools[0].get_name())) #print(tools[0].image_to_string(Image.open('/Users/martin/data/9.png'), lang='chi_sim')) res = tools[0].image_to_string(img, lang='chi_sim') res = re.findall(r"[\u4e00-\u9fa5]", res, re.S) return res
def ocr_image(self, buff, lang='chi_sim'): # chinese simple tools = pyocr.get_available_tools()[:] document = "" if len(tools) > 0: builder = TextBuilder() document = tools[0].image_to_string(buff, lang=lang, builder=builder) return document
def __init__(self, args): with open(args.config, "r") as f: self.config = yaml.load(f) self.args = args tools = pyocr.get_available_tools() self.tool = tools[0] self.p = PokemonGo() self.i = 2 self.CHECK_STRING = self.config['names']['name_check'] self.SEARCH_STRING = self.config['names']['search_string']
def ocrReco(wMat, size_): newMat = np.zeros((28, 28 * size_)) for i in range(size_): for j in range(28): for k in range(28): newMat[j, k + i * 28] = wMat[i, j, k] tempImg = Image.fromarray(uint8(newMat)) tempImg.save('./temp/temp.png') tools = pyocr.get_available_tools()[:] print tools[0].image_to_string(Image.open('./temp/temp.png'), lang='eng').encode('GBK', 'ignore')
def imgTostring(path): from pyocr import pyocr from PIL import Image tools = pyocr.get_available_tools()[:] #寻找可用的OCR工具 if len(tools) == 0: print "No OCR tool found" return -1 else: code = tools[0].image_to_string(Image.open('code.jpg')) return code
def imgTostring(path): from pyocr import pyocr from PIL import Image tools = pyocr.get_available_tools()[:] #寻找可用的OCR工具 if len(tools)== 0: print "No OCR tool found" return -1 else: code = tools[0].image_to_string(Image.open('code.jpg')) return code
def find_missing_ocr(lang): """ OCR tools are a little bit more tricky """ missing = [] try: from pyocr import pyocr ocr_tools = pyocr.get_available_tools() except ImportError: print( "[WARNING] Couldn't import Pyocr. Will assume OCR tool is not" " installed yet" ) ocr_tools = [] if len(ocr_tools) <= 0: langs = [] missing.append( ( 'Tesseract', '(none)', { 'debian': 'tesseract-ocr', 'fedora': 'tesseract', 'gentoo': 'app-text/tesseract', 'linuxmint': 'tesseract-ocr', 'ubuntu': 'tesseract-ocr', }, ) ) else: try: langs = ocr_tools[0].get_available_languages() except Exception as exc: print("[WARNING] Exception while looking for available languages:" " {}".format(str(exc))) langs = [] if (len(langs) <= 0 or lang['tesseract'] not in langs): missing.append( ( 'Tesseract language data', '(none)', { 'debian': ('tesseract-ocr-%s' % lang['tesseract']), 'fedora': ('tesseract-langpack-%s' % lang['tesseract']), 'linuxmint': ('tesseract-ocr-%s' % lang['tesseract']), 'ubuntu': ('tesseract-ocr-%s' % lang['tesseract']), }, ) ) return missing
def onLeftButtonUp(event): self.sel = False try: self.canvas.delete(lastDraw) except Exception as e: pass sleep(0.1) myleft, myright = sorted([self.X.get(), event.x]) mytop, mybottom = sorted([self.Y.get(), event.y]) self.selectPosition = (myleft, myright, mytop, mybottom) pic = ImageGrab.grab((myleft + 1, mytop + 1, myright, mybottom)) tools = pyocr.get_available_tools()[:] code = tools[0].image_to_string(pic) self.result = code self.top.destroy()
def ocr_pyocr(img_id): img_filename = './' + get_image_filename(img_id) img = cv2.imread(img_filename, cv2.IMREAD_COLOR) tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) tool = tools[0] langs = tool.get_available_languages() lang = langs[0] txt = tool.image_to_string(Image.open(img_filename), lang=lang, builder=pyocr.tesseract.builders.TextBuilder()) write_output(img_id, txt)
def main(): image = Image.open("d:/validate_code.png") #将图片转换成灰度图片 image = image.convert("L") # #去噪,G = 50,N = 4,Z = 4 #clearNoise(image,50,4,4) # #保存图片 image.save("d:/result.png") tools = pyocr.get_available_tools()[:] if len(tools) == 0: print("No OCR tool found") sys.exit(1) print("Using '%s'" % (tools[0].get_name())) print(tools[0].get_version()) print("Using '%s'" % (tools[0].get_available_languages())) print(tools[0].image_to_string(Image.open('d:/result.png'), lang='eng'))
def find_missing_ocr(lang): """ OCR tools are a little bit more tricky """ missing = [] try: from pyocr import pyocr ocr_tools = pyocr.get_available_tools() except ImportError: print("[WARNING] Couldn't import Pyocr. Will assume OCR tool is not" " installed yet") ocr_tools = [] if len(ocr_tools) <= 0: langs = [] missing.append(( 'Tesseract', '(none)', { 'debian': 'tesseract-ocr', 'fedora': 'tesseract', 'gentoo': 'app-text/tesseract', 'linuxmint': 'tesseract-ocr', 'ubuntu': 'tesseract-ocr', }, )) else: try: langs = ocr_tools[0].get_available_languages() except Exception as exc: print("[WARNING] Exception while looking for available languages:" " {}".format(str(exc))) langs = [] if (len(langs) <= 0 or lang['tesseract'] not in langs): missing.append(( 'Tesseract language data', '(none)', { 'debian': ('tesseract-ocr-%s' % lang['tesseract']), 'fedora': ('tesseract-langpack-%s' % lang['tesseract']), 'linuxmint': ('tesseract-ocr-%s' % lang['tesseract']), 'ubuntu': ('tesseract-ocr-%s' % lang['tesseract']), }, )) return missing
def postImage(): image_file = request.files['file'] text = "Hello Glass" tools = pyocr.get_available_tools()[:] if len(tools) > 0: text = tools[0].image_to_string(Image.open(image_file), lang='eng', psm='6', builder=builders.TextBuilder()) text = " ".join(text.split()) st = SummaryTool() sentences_dic = st.get_senteces_ranks(text) summary = st.get_summary(text, sentences_dic) redis.publish('notifications', "%s." % summary) print "=========================================\n" print text print "=========================================\n" print summary return ""
def convert(verbose=False): tools = pyocr.get_available_tools()[0] counter = 1 # f = open("data/logs.txt", "w") # convert into image and then text every pdf file in "data/pdf" for pdf_dir in sorted(os.listdir(PDF_DIRECTORY)): for pdf_file in sorted(os.listdir(PDF_DIRECTORY + "/" + pdf_dir)): # status print if verbose: print("\n\033[1m%d. %s\033[0m" % (counter, pdf_file), end="\n\n") try: get_images(pdf_file, pdf_dir, verbose) get_paragraphs(pdf_file, tools, verbose) get_texts(pdf_file, pdf_dir, tools, verbose) except Exception: # f.write("ERROR: %s\n" %(pdf_file)) pass counter += 1 return
def security_code(): img_base64 = request.args.get('img', '') if not img_base64: abort(404) # context = ssl._create_unverified_context() # https # image_bytes = urlopen(img_url, context=context).read() img_base64 = img_base64.split(',', 1)[-1] data_stream = io.BytesIO(base64.b64decode(img_base64)) # img = Image.open(data_stream) # 对图片进行一下预处理,提升识别效果 img = img.convert("RGBA") pixdata = img.load() for y in xrange(img.size[1]): for x in xrange(img.size[0]): if pixdata[x, y] == (204, 233, 246, 255): pixdata[x, y] = (255, 255, 255, 1) # ocr 识别文字 tools = pyocr.get_available_tools()[:] if len(tools) == 0: abort(404) code = tools[0].image_to_string(img, lang='eng') return code
def verify_ocr(filename): from pyocr import pyocr from PIL import Image import cv2 import numpy as np # verify Code OCR tools = pyocr.get_available_tools()[:] if len(tools) == 0: print("No OCR tool found") print("input manually") output = input('input correct varify code:') else: cap = cv2.imread(filename, 0) cap[:] = 255 - cap[:] ret, thresh = cv2.threshold(cap, 100, 255, cv2.THRESH_BINARY) kernel = np.ones((3, 3), dtype=np.uint8) erosion = cv2.erode(thresh, kernel, iterations=2) cv2.imshow('image', erosion) cv2.imwrite('verify.png', erosion) print("Using '%s'" % (tools[0].get_name())) output = tools[0].image_to_string(Image.open('verify.png'), lang='eng') print("recognizing..." + output) return output
import sys, Image from pyocr import pyocr sys.path = ["src"] + sys.path import builders tools = pyocr.get_available_tools()[:] if len(tools) == 0: print "No OCR tool found" sys.exit(1) print tools[0].image_to_string(Image.open('test.jpg'), lang='eng', psm='6', builder=builders.TextBuilder())
#!/usr/bin/env python3.7 import sys import argparse import yaml from PIL import Image from pyocr import pyocr from pyocr import builders with open("../config.yaml", "r") as f: config = yaml.safe_load(f) tools = pyocr.get_available_tools() tool = tools[0] def ocr_img(img, loc, debug=None): image = Image.open(img) crop = image.crop(loc) if debug != None: crop.show() print(tool.image_to_string(crop).replace("\n", " ")) # usage '__name__' --loc location --app app1|app2 image def main(): parser = argparse.ArgumentParser(description='Pokemon GO image tester') parser.add_argument('--loc', type=str,
import io import pdfminer #image_file = "H:/Projects/OCR/transforming_into_an_analytics_driven_insurance_carrier.pdf" image_pdf = Image(filename="H:/Projects/OCR/II_StateAdvisoryForumState_AL_2016.pdf", resolution=400) image_jpeg = image_pdf.convert('jpeg') req_image = [] final_text = [] for img in image_jpeg.sequence: img_page = Image(image=img) req_image.append(img_page.make_blob('jpeg')) tool = pyocr.get_available_tools()[0] #lang = tool.get_available_languages()[1] for img in req_image: txt = tool.image_to_string( PI.open(io.BytesIO(img)), # lang=lang, builder=builders.TextBuilder() ) final_text.append(txt) print(final_text[1]) file = open("output.txt","w",encoding='utf-8') for item in final_text:
output_jpg = input_pdf.replace(".pdf", ".jpg") output_img.write(output_jpg) # print datetime.now() - start_time from wand.image import Image from PIL import Image as PI import sys import os from pyocr import pyocr from pyocr import builders import io #TESSERACT_CMD = os.environ["TESSDATA_PREFIX"] + os.sep + 'tesseract.exe' if os.name == 'nt' else 'tesseract' tool = pyocr.get_available_tools()[0] print tool lang = tool.get_available_languages() print lang req_image = [] final_text = [] image_pdf = Image(file="test_pf.pdf", resolution=300) image_jpeg = image_pdf.convert('jpeg') for img in image_jpeg.sequence: img_page = Image(image=img) req_image.append(img_page.make_blob('jpeg')) for img in req_image: txt = tool.image_to_string(PI.open(io.BytesIO(img)),
def __init__(self, mainwindow_gui, config): gobject.GObject.__init__(self) widget_tree = load_uifile("settingswindow.glade") self.window = widget_tree.get_object("windowSettings") self.window.set_transient_for(mainwindow_gui) self.__config = config self.workdir_chooser = widget_tree.get_object("filechooserbutton") actions = { "apply" : ( [widget_tree.get_object("buttonSettingsOk")], ActionApplySettings(self, config) ), "cancel" : ( [widget_tree.get_object("buttonSettingsCancel")], ActionCancelSettings(self, config) ), "select_scanner" : ( [widget_tree.get_object("comboboxDevices")], ActionSelectScanner(self) ), "scan_calibration" : ( [widget_tree.get_object("buttonScanCalibration")], ActionScanCalibration(self) ) } self.device_settings = { "devid" : { 'gui' : widget_tree.get_object("comboboxDevices"), 'stores' : { 'loading' : widget_tree.get_object("liststoreLoading"), 'loaded' : widget_tree.get_object("liststoreDevice"), }, 'nb_elements' : 0, 'active_idx' : -1, }, "resolution" : { 'gui' : widget_tree.get_object("comboboxResolution"), 'stores' : { 'loading' : widget_tree.get_object("liststoreLoading"), 'loaded' : widget_tree.get_object("liststoreResolution"), }, 'nb_elements' : 0, 'active_idx' : -1, }, } self.ocr_settings = { "lang" : { 'gui' : widget_tree.get_object("comboboxLang"), 'store' : widget_tree.get_object("liststoreOcrLang"), } } self.calibration = { "scan_button" : widget_tree.get_object("buttonScanCalibration"), "image_gui" : widget_tree.get_object("imageCalibration"), "image_viewport" : widget_tree.get_object("viewportCalibration"), "images" : [], # array of tuples : (resize factor, PIL image) "image_eventbox" : widget_tree.get_object("eventboxCalibration"), "image_scrollbars" : widget_tree.get_object("scrolledwindowCalibration"), } self.grips = CalibrationGripHandler(config, self) self.progressbar = widget_tree.get_object("progressbarScan") self.__scan_start = 0.0 self.workers = { "device_finder" : WorkerDeviceFinder(config.scanner_devid), "resolution_finder" : WorkerResolutionFinder( config.scanner_resolution, config.RECOMMENDED_RESOLUTION), "scan" : WorkerCalibrationScan( self.calibration['image_viewport']), "progress_updater" : WorkerProgressUpdater("calibration scan", self.progressbar) } ocr_tools = pyocr.get_available_tools() if len(ocr_tools) <= 0: ocr_langs = [] else: ocr_langs = ocr_tools[0].get_available_languages() ocr_langs = self.__get_short_to_long_langs(ocr_langs) ocr_langs.sort(key=lambda lang: lang[1]) ocr_langs.insert(0, (None, _("Disable OCR"))) self.ocr_settings['lang']['store'].clear() for (short_lang, long_lang) in ocr_langs: self.ocr_settings['lang']['store'].append([long_lang, short_lang]) for action in ["apply", "cancel", "select_scanner", "scan_calibration"]: actions[action][1].connect(actions[action][0]) self.workers['device_finder'].connect( 'device-finding-start', lambda worker: gobject.idle_add( self.__on_device_finding_start_cb)) self.workers['device_finder'].connect( 'device-found', lambda worker, user_name, store_name, active: \ gobject.idle_add(self.__on_value_found_cb, self.device_settings['devid'], user_name, store_name, active)) self.workers['device_finder'].connect( 'device-finding-end', lambda worker: gobject.idle_add( self.__on_finding_end_cb, self.device_settings['devid'])) self.workers['resolution_finder'].connect( 'resolution-finding-start', lambda worker: gobject.idle_add( self.__on_finding_start_cb, self.device_settings['resolution'])) self.workers['resolution_finder'].connect( 'resolution-found', lambda worker, user_name, store_name, active: \ gobject.idle_add(self.__on_value_found_cb, self.device_settings['resolution'], user_name, store_name, active)) self.workers['resolution_finder'].connect( 'resolution-finding-end', lambda worker: gobject.idle_add( self.__on_finding_end_cb, self.device_settings['resolution'])) self.workers['scan'].connect('calibration-scan-start', lambda worker: self.__on_scan_start()) self.workers['scan'].connect('calibration-scan-done', lambda worker, img: self.__on_scan_done(img)) self.workers['scan'].connect('calibration-resize-done', lambda worker, factor, img: self.__on_resize_done(factor, img)) self.calibration['image_eventbox'].connect("button-press-event", lambda x, ev: self.grips.on_mouse_button_pressed_cb(ev)) self.calibration['image_eventbox'].connect("motion-notify-event", lambda x, ev: self.grips.on_mouse_motion(ev)) self.calibration['image_eventbox'].connect("button-release-event", lambda x, ev: self.grips.on_mouse_button_released_cb(ev)) self.calibration['image_eventbox'].add_events( gtk.gdk.POINTER_MOTION_MASK) self.window.connect("destroy", self.__on_destroy) self.display_config(config) self.window.set_visible(True) # Must be connected after the window has been displayed. # Otherwise, if "disable OCR" is already selected in the config # it will display a warning popup even before the dialog has been # displayed self.ocr_settings['lang']['gui'].connect( "changed", self.__on_ocr_lang_changed) self.workers['device_finder'].start()
def ocr(self, filename): tools = pyocr.get_available_tools()[:] if len(tools) == 0: print("No Ocr tool") return tools[0].image_to_string(Image.open(filename), lang='chi_sim')
def rec(self, pic_file_path = os.path.join(work_dir, 'mimidama', 'test_pics', 'test1.jpg'), typecode = 60001, timeout=120): """ :param pic_file_path:图片路径 :param typecode:验证码类型 :return:(str(ret), code_id, is_report_error) 验证码,验证码的id,是否提交的错误报告 当ret为空或实际识别错误时,先判断is_report_error是否为False,如果为False需要提交一次验证码错误报告(self.reportError(code_id)) """ self.handleLog(u"进入recchar的打码方法:", pic_file_path, typecode, timeout, self.type) ret = "" code_id = 0 is_report_error = False if self.type == "mimidama": result = c_char_p("") #self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) code_id = self.RecPath(c_int(self.s_id), c_char_p(self.s_key), self.user, self.passwd, c_char_p(pic_file_path), c_int(60001), result) self.handleLog("result", result) if code_id <= 0: print('get result error ,ErrorCode:%d do reportError!' % code_id) report_ret = self.reportErrorID(code_id) self.handleLog("report_ret:", report_ret) if report_ret != 0: self.handleLog("验证码识别错误时提交报告错误 report_ret:%d pic_file_path:%s"%(report_ret, pic_file_path)) else: self.handleLog("reportError ok!") is_report_error = True else: self.handleLog("the code_id is:%d result is:%s" % (code_id, result.value)) ret = result.value pass elif self.type == "ruokuai": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) im = open(pic_file_path, 'rb').read() try: rk_create_ret = self.rc.rk_create(im, typecode, timeout) self.handleLog("%s, %s" % (str(rk_create_ret), rk_create_ret.get('Error',"NO Error"))) ret = rk_create_ret["Result"] code_id = rk_create_ret["Id"] self.handleLog("%s, %s" % (rk_create_ret["Result"], rk_create_ret["Id"])) except Exception as e: self.handleException(e) elif self.type == "zhongdengwang": import Image import sys import ImageEnhance import ImageFilter from pyocr import pyocr #self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: tools = pyocr.get_available_tools()[:] if len(tools) == 0: print("No OCR tool found") sys.exit(1) #print("Using '%s'" % (tools[0].get_name())) #打开图片 im = Image.open(pic_file_path) #转化到亮度 imgry = im.convert('L') #imgry.save('g'+pic_file_path) #二值化 out = imgry.point(table,'1') #out.save('b'+pic_file_path) #ret = tools[0].image_to_string(Image.open(pic_file_path), lang='fra') #print ret ret = tools[0].image_to_string(out, lang='fra') #识别对吗 ret = ret.strip() #ret = ret.upper(); for r in rep: ret = ret.replace(r,rep[r]) #print ret except Exception as e: self.handleException(e) elif self.type == "jiangsu": #print "pic_file_path:", pic_file_path, " strlen:", len(pic_file_path), " type", type(pic_file_path), " id:", id(pic_file_path) #print "pic_file_path:", pic_file_path, " strlen:", len(pic_file_path), " type", type(str(pic_file_path)), "id:", id(str(pic_file_path)) try: ret = self.netf.ComputeJiangsu(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "xinjiang": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeXinjiang2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "chongqing": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeChongqing2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "guangdong": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeGuangdong4(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "neimenggu": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeGuangdong4(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "gansu": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeGansu(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "hainan": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeGuangdong3(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "henan": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHenan2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "shanghai": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShanghai1(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "hunan": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShanghai1(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "ningxia": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeNingxia(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "jiangxi": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeJiangxi(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "tianjin": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeTianjin(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "fujian": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShanghai1(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "hebei": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShanghai1(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "anhui": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHenan2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "guangxi": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHenan2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "heilongjiang": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHenan2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "yunnan": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShanghai1(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "xizang": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHenan2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "qinghai": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHenan2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "sichuan": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeXinjiang2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "zongju": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShanghai1(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "zhejiang": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeZhejiang(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "shanxitaiyuan": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHenan2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "guizhou": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeGuizhou3(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "hubei": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHubei(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "hubei2": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHubei2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "shan3xi": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShan3xi(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "shan3xi2": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShan3xi2(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "shandong": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShandong(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "beijing": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeBeijing1(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "zhongdeng":#中登网 self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeZhongdeng(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "cnca":#管理体系认证 self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeCnca(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "haiguan":#管理体系认证 self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeHaiguan(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "jilin": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeShandong(str(pic_file_path)) except Exception as e: self.handleException(e) elif self.type == "liaoning": self.log.info("type=%s,pic_file_path=%s" % (self.type, pic_file_path)) try: ret = self.netf.ComputeLiaoning3(str(pic_file_path)) except Exception as e: self.handleException(e) else: self.handleLog("unkown type:", self.type) sys.exit(-1) return (ret, code_id, is_report_error)
def __init__(self): with open("config.yaml", "r") as f: self.config = yaml.load(f, Loader=yaml.FullLoader) tools = pyocr.get_available_tools() self.tool = tools[0] self.friends = friends
from pyocr import pyocr from pyocr import tesseract from tests import tests_cuneiform from tests import tests_tesseract if __name__ == '__main__': for tool in pyocr.TOOLS: print("- OCR: %s" % tool.get_name()) available = tool.is_available() print(" is_available(): %s" % (str(available))) if available: print(" get_version(): %s" % (str(tool.get_version()))) print(" get_available_languages(): ") print(" " + ", ".join(tool.get_available_languages())) print("") print("") print("OCR tool found:") for tool in pyocr.get_available_tools(): print("- %s" % tool.get_name()) if tesseract.is_available(): print("---") print("Tesseract:") unittest.TextTestRunner().run(tests_tesseract.get_all_tests()) if cuneiform.is_available(): print("---") print("Cuneiform:") unittest.TextTestRunner().run(tests_cuneiform.get_all_tests())
def __init__(self, mainwindow_gui, config): gobject.GObject.__init__(self) widget_tree = load_uifile("settingswindow.glade") self.window = widget_tree.get_object("windowSettings") self.window.set_transient_for(mainwindow_gui) self.__config = config self.workdir_chooser = widget_tree.get_object("filechooserbutton") actions = { "apply": ([widget_tree.get_object("buttonSettingsOk")], ActionApplySettings(self, config)), "cancel": ([widget_tree.get_object("buttonSettingsCancel")], ActionCancelSettings(self, config)), "select_scanner": ([widget_tree.get_object("comboboxDevices")], ActionSelectScanner(self)), "scan_calibration": ([widget_tree.get_object("buttonScanCalibration")], ActionScanCalibration(self)) } self.device_settings = { "devid": { 'gui': widget_tree.get_object("comboboxDevices"), 'stores': { 'loading': widget_tree.get_object("liststoreLoading"), 'loaded': widget_tree.get_object("liststoreDevice"), }, 'nb_elements': 0, 'active_idx': -1, }, "resolution": { 'gui': widget_tree.get_object("comboboxResolution"), 'stores': { 'loading': widget_tree.get_object("liststoreLoading"), 'loaded': widget_tree.get_object("liststoreResolution"), }, 'nb_elements': 0, 'active_idx': -1, }, } self.ocr_settings = { "lang": { 'gui': widget_tree.get_object("comboboxLang"), 'store': widget_tree.get_object("liststoreOcrLang"), } } self.calibration = { "scan_button": widget_tree.get_object("buttonScanCalibration"), "image_gui": widget_tree.get_object("imageCalibration"), "image_viewport": widget_tree.get_object("viewportCalibration"), "images": [], # array of tuples : (resize factor, PIL image) "image_eventbox": widget_tree.get_object("eventboxCalibration"), "image_scrollbars": widget_tree.get_object("scrolledwindowCalibration"), } self.grips = CalibrationGripHandler(config, self) self.progressbar = widget_tree.get_object("progressbarScan") self.__scan_start = 0.0 self.workers = { "device_finder": WorkerDeviceFinder(config.scanner_devid), "resolution_finder": WorkerResolutionFinder(config.scanner_resolution, config.RECOMMENDED_RESOLUTION), "scan": WorkerCalibrationScan(self.calibration['image_viewport']), "progress_updater": WorkerProgressUpdater("calibration scan", self.progressbar) } ocr_tools = pyocr.get_available_tools() if len(ocr_tools) <= 0: ocr_langs = [] else: ocr_langs = ocr_tools[0].get_available_languages() ocr_langs = self.__get_short_to_long_langs(ocr_langs) ocr_langs.sort(key=lambda lang: lang[1]) ocr_langs.insert(0, (None, _("Disable OCR"))) self.ocr_settings['lang']['store'].clear() for (short_lang, long_lang) in ocr_langs: self.ocr_settings['lang']['store'].append([long_lang, short_lang]) for action in [ "apply", "cancel", "select_scanner", "scan_calibration" ]: actions[action][1].connect(actions[action][0]) self.workers['device_finder'].connect( 'device-finding-start', lambda worker: gobject.idle_add(self.__on_device_finding_start_cb)) self.workers['device_finder'].connect( 'device-found', lambda worker, user_name, store_name, active: \ gobject.idle_add(self.__on_value_found_cb, self.device_settings['devid'], user_name, store_name, active)) self.workers['device_finder'].connect( 'device-finding-end', lambda worker: gobject.idle_add( self.__on_finding_end_cb, self.device_settings['devid'])) self.workers['resolution_finder'].connect( 'resolution-finding-start', lambda worker: gobject.idle_add(self.__on_finding_start_cb, self. device_settings['resolution'])) self.workers['resolution_finder'].connect( 'resolution-found', lambda worker, user_name, store_name, active: \ gobject.idle_add(self.__on_value_found_cb, self.device_settings['resolution'], user_name, store_name, active)) self.workers['resolution_finder'].connect( 'resolution-finding-end', lambda worker: gobject.idle_add( self.__on_finding_end_cb, self.device_settings['resolution'])) self.workers['scan'].connect('calibration-scan-start', lambda worker: self.__on_scan_start()) self.workers['scan'].connect( 'calibration-scan-done', lambda worker, img: self.__on_scan_done(img)) self.workers['scan'].connect( 'calibration-resize-done', lambda worker, factor, img: self.__on_resize_done(factor, img)) self.calibration['image_eventbox'].connect( "button-press-event", lambda x, ev: self.grips.on_mouse_button_pressed_cb(ev)) self.calibration['image_eventbox'].connect( "motion-notify-event", lambda x, ev: self.grips.on_mouse_motion(ev)) self.calibration['image_eventbox'].connect( "button-release-event", lambda x, ev: self.grips.on_mouse_button_released_cb(ev)) self.calibration['image_eventbox'].add_events( gtk.gdk.POINTER_MOTION_MASK) self.window.connect("destroy", self.__on_destroy) self.display_config(config) self.window.set_visible(True) # Must be connected after the window has been displayed. # Otherwise, if "disable OCR" is already selected in the config # it will display a warning popup even before the dialog has been # displayed self.ocr_settings['lang']['gui'].connect("changed", self.__on_ocr_lang_changed) self.workers['device_finder'].start()