def __init__(self, **kwargs): self.trainers_predictors_list = [] self.text_predictors_list = [ ("previous_level", (1212, 231, 1230, 280), "0123456789", "8"), ("main_level", (1203, 323, 1223, 399), "0123456789", "8"), ("next_level", (1212, 445, 1230, 493), "0123456789", "8"), ("sub_level", (1177, 625, 1203, 692), "0123456789/", "8"), ("gold", (1091, 283, 1126, 471), "0123456789.abcdefghijklmnopqrstuvwxyz", "7"), ("current_dps_down_no_tab", (389, 562, 423, 709), "0123456789.abcdefghijklmnopqrstuvwxyz", "8"), ("last_hero", (124, 109, 148, 430), "0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "7") ] self.api = PyTessBaseAPI() self.api.Init() print(tesserocr.tesseract_version()) print(tesserocr.get_languages()) self.global_image = None self.status = CurrentStatus() boss_trainer = TrainerPredictor( "boss_active_predictor", ["boss_active", "boss_inactive", "no_boss"], (1224, 555, 1248, 648), 12, 46, 255.0, [200, 30]) egg_trainer = TrainerPredictor("egg_active_predictor", ["egg_active", "egg_inactive"], (741, 31, 761, 64), 10, 16, 255.0, [200, 30]) gold_pet_trainer = TrainerPredictor( "gold_pet_predictor", ["goldpet", "nopet", "normalpet", "partial pet"], (624, 364, 734, 474), 40, 40, 255.0, [200, 30]) tab_predictor = TrainerPredictor("tab_predictor", [ "skills_tab", "heroes_tab", "equipment_tab", "pet_tab", "relic_tab", "shop_tab", "no_tab" ], (51, 1, 59, 717), 2, 179, 255.0, [200, 30]) self.trainers_predictors_list.append(boss_trainer) self.trainers_predictors_list.append(egg_trainer) self.trainers_predictors_list.append(gold_pet_trainer) self.trainers_predictors_list.append(tab_predictor) for trainer in self.trainers_predictors_list: pass #trainer.crop_images() #trainer.process_images() #trainer.read_and_pickle() #trainer.train_graph() saved_classes_file = glo.DATA_FOLDER + "/dataforclassifier/TrainerPredictor_list.pickle" save_pickle(saved_classes_file, self.trainers_predictors_list)
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Thu Oct 5 05:06:47 2017 @author: ubuntu """ ## sample_db_path = "./sample/" test_db_path = "/media/ubuntu/Investigation/DataSet/Image/Classification/Insurance/Insurance/Tmp/VIN/" filename = "1.jpg" fullpath = test_db_path + filename ### import cv2 import numpy as np import tesserocr from PIL import Image def opencv2pillow(image): return Image.fromarray(image) print tesserocr.tesseract_version() # print tesseract-ocr version print tesserocr.get_languages() image = cv2.imread(fullpath) image = opencv2pillow(image) print tesserocr.image_to_text(image)
@Author: Rodney Cheung @Date: 2020-05-13 10:58:02 @LastEditors: Rodney Cheung @LastEditTime: 2020-05-15 18:09:00 @FilePath: /Tesser/main.py ''' import tesserocr from tesserocr import PyTessBaseAPI, RIL from PIL import Image, ImageOps import os import argparse TESSDATA_PATH = '/Volumes/code/open_source/tessdata_best/' # TESTDATA_PATH = '/Volumes/code/work/wq_maintain_material/picture/test_data' print(tesserocr.tesseract_version()) print(tesserocr.get_languages(path=TESSDATA_PATH)) def str2bool(v): if isinstance(v, bool): return v if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') def main():
import tesserocr as tc from PIL import Image ''' 通过添加字体库支持新的语言和字体 C:\\Users\\admin\\AppData\\Local\\Programs\\Python\\Python37-32\\/tessdata/' ''' class OcrTools: def __init__(self): demo = '' print(tc.tesseract_version()) # print tesseract-ocr version print(tc.get_languages()) # prints tessdata path and list of available languages filename = 'data/news.png' en_filename = 'data/testp.png' image = Image.open(filename) #print(tc.image_to_text(image)) # print ocr text from image # or #标准中文图片 print('---------------------标准中文图片---------------------') print(tc.file_to_text(filename,lang='chi_sim')) #标准英文图片 print('---------------------标准英文图片---------------------') print(tc.file_to_text(en_filename))
#!/usr/bin/python import sys import tesserocr from time import time versions = tesserocr.tesseract_version().split() # versions will be a list like this: # ['tesseract', '4.00.00alpha', 'leptonica-1.74', 'libjpeg', '8d', # '(libjpeg-turbo', '1.5.1)', ':', 'libpng', '1.6.25', ':', 'libtiff', # '4.0.7', ':', 'zlib', '1.2.8', 'Found', 'AVX', 'Found', 'SSE'] print "hmmbug/tesserocr test script" print print "Software versions:" print " python:", ".".join([str(i) for i in sys.version_info[:3]]) print " tesseract:", versions[1] print " leptonica:", versions[2] print " tesserocr:", tesserocr.__version__ print TESTS = { # a list of tests as image_file: text_in_image u"hello_world.png": u"Hello world.", u"quick_fox.png": u"The quick brown fox jumps over the lazy dog", } exit_code = 0 with tesserocr.PyTessBaseAPI() as api: for img, txt in TESTS.iteritems():
def getinfo(): print(tesserocr.tesseract_version()) # print tesseract-ocr version print(tesserocr.get_languages())
def _get_tesseract_version(): from tesserocr import tesseract_version return tesseract_version()
def detect_text_tess(path): print(tesserocr.tesseract_version()) # print tesseract-ocr version print(tesserocr.get_languages() ) # prints tessdata path and list of available languages return tesserocr.file_to_text(path, lang='eng')