Пример #1
0
def get_text_from_image(image_data, app_id, app_key, app_secret, api_version=0, timeout=3):
    """
    Get image text use baidu ocr

    :param image_data:
    :param app_id:
    :param app_key:
    :param app_secret:
    :param api_version:
    :param timeout:
    :return:
    """
    client = AipOcr(appId=app_id, apiKey=app_key, secretKey=app_secret)
    client.setConnectionTimeoutInMillis(timeout * 1000)

    options = {}
    options["language_type"] = "CHN_ENG"

    if api_version == 1:
        result = client.basicAccurate(image_data, options)
    else:
        result = client.basicGeneral(image_data, options)

    if "error_code" in result:
        print("baidu api error: ", result["error_msg"])
        return ""
    return "".join([words["words"] for words in result["words_result"]])
Пример #2
0
def imageRecognition(image):
	APP_ID = '1'
	API_KEY = '1'
	SECRET_KEY = '1'
	client = AipOcr(APP_ID, API_KEY, SECRET_KEY)	
	text = client.basicGeneral(image)	
	print 'read image...'	
	kw_list = []	
	for xy in text['words_result']:		
		kw_list.append(xy['words'])
	kw = ''.join(kw_list)#问题列表转字符串
	return kw
Пример #3
0
def get_distinguish_img_str(name):
    s = ''
    try:
        client = AipOcr(AppID,API_Key,Secret_Key)
        img = open(name,'rb').read()
        msg = client.basicGeneral(img)
        for m in msg.get('words_result'):
            s += m.get('words') + '\n'
    except Exception as ex:
        s = str(ex)
    if not s:
        s = 'No Img Data !'
    return s
Пример #4
0
def ocr_img_baidu(image, config):
    # 百度OCR API  ,在 https://cloud.baidu.com/product/ocr 上注册新建应用即可
    """ 你的 APPID AK SK """
    APP_ID = config.get('baidu_api','APP_ID')
    API_KEY = config.get('baidu_api','API_KEY')
    SECRET_KEY = config.get('baidu_api','SECRET_KEY')

    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    global combine_region
    # 切割题目+选项区域,左上角坐标和右下角坐标,自行测试分辨率
    combine_region = config.get("region", "combine_region").replace(' ','').split(',')
    combine_region = list(map(int, combine_region))
    region_im = image.crop((combine_region[0], combine_region[1], combine_region[2], combine_region[3]))
    # 转化为灰度图
    #region_im = region_im.convert('L')

    # 把图片变成二值图像
    #region_im = binarizing(region_im, 190)
    #region_im.show()
    img_byte_arr = io.BytesIO()
    region_im.save(img_byte_arr, format='PNG')
    image_data = img_byte_arr.getvalue()
    # base64_data = base64.b64encode(image_data)
    response = client.basicGeneral(image_data)
    #print(response)
    words_result = response['words_result']

    texts = [x['words'] for x in words_result]
    # print(texts)
    if len(texts) > 2:
        question = texts[0]
        choices = texts[1:]
        choices = [x.replace(' ', '') for x in choices]
    else:
        print(Fore.RED + '截图区域设置错误,请重新设置' + Fore.RESET)
        exit(0)

    # 处理出现问题为两行或三行
    if choices[0].endswith('?'):
        question += choices[0]
        choices.pop(0)
    elif choices[1].endswith('?'):
        question += choices[0]
        question += choices[1]
        choices.pop(0)
        choices.pop(0)

    return question, choices
Пример #5
0
	def __init__(self):
		self.index_url = "http://bkjw.sxu.edu.cn/"
		self.class_url = "http://bkjw.sxu.edu.cn/ZNPK/KBFB_RoomSel.aspx"
		self.score_url = "http://bkjw.sxu.edu.cn/_data/login.aspx"
		self.cookie = cookiejar.CookieJar()
		self.handler = rq.HTTPCookieProcessor(self.cookie)
		self.opener = rq.build_opener(self.handler)
		self.header = {
		"Host":"bkjw.sxu.edu.cn",
		"Origin":"http://bkjw.sxu.edu.cn",
		"Content-Type":"application/x-www-form-urlencoded",
		"Referer":"http://bkjw.sxu.edu.cn/_data/login.aspx",
		"Upgrade-Insecure-Requests":"1",
		"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"

		}
		self.header2 = {
		"Host":"bkjw.sxu.edu.cn",
		"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
		"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
		"Accept-Language":"zh-CN,zh;q=0.8",
		"Accept-Encoding":"gzip, deflate",
		"Referer":"http://bkjw.sxu.edu.cn/xscj/Stu_MyScore.aspx",
		"Content-Type":"application/x-www-form-urlencoded",
		"Content-Length":"76",
		"Cookie":"",
		"Connection":"keep-alive",
		"Upgrade-Insecure-Requests":"1"
		}
		self.jxl_list = ["101","105"]
		self.js_list_101 = ["1010101","1010102","1010103","1010104","1010105","1010106","1010107","1010108","1010109","1010110","1010111","1010112","1010113","1010114","1010115","1010201","1010202","1010203","1010204","1010205","1010206","1010207","1010208","1010301","1010302","1010303","1010304","1010305","1010306","1010307","1010308","1010401","1010402","1010501","1010502","1010503","1010504","1010505","1010506","1010507","1010508","1010509","1010510","1010511"]
		self.jxl_list_105 = ['1050101', '1050102', '1050103', '1050104', '1050105', '1050106', '1050107', '1050108', '1050109', '1050110', '1050111', '1050112', '1050113', '1050114', '1050115', '1050116','1050201', '1050202', '1050203', '1050204', '1050205', '1050206', '1050207', '1050208', '1050209','1050211', '1050212', '1050213', '1050214', '1050215', '1050216', '1050217', '1050218','1050301', '1050302', '1050303', '1050304', '1050305', '1050306', '1050307', '1050308', '1050309','1050310','1050311', '1050312', '1050313', '1050314', '1050315', '1050316', '1050317','1050401', '1050402', '1050403', '1050404', '1050405', '1050406', '1050407', '1050408', '1050409','1050501','1050502','1050503','1050504','1050505']
		self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
Пример #6
0
class PicName(object):
    # 编辑图片
    def __init__(self):
        # """ 你的 APPID AK SK """
        APP_ID = '15584553'
        API_KEY = 'MGGfM6EGySBdKOM6605nbhDg'
        SECRET_KEY = 'zxpcnv2pd0VD5zlRwwySFx55BOU0sXhl'
        self.file_path = input('请输入图片文件夹: ')
        self.stylecode = int(input('请输入款号长度: '))
        # self.stylecode = 11
        self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    def get_file_path(self):
        result_file_path = []
        pic_format = ['jpg', 'png']
        try:
            all_file = [
                os.path.join(self.file_path, i)
                for i in os.listdir(self.file_path)
            ]
            for i in all_file:
                one_file = os.path.basename(i).lower()
                if '~$' in i:
                    pass
                elif one_file[-3:] in pic_format:
                    result_file_path.append(i)
        except FileNotFoundError as e:
            print('路径输入错误', e)
        return result_file_path

    def get_file_content(self, picture_file):
        img = Image.open(picture_file)
        width, height = img.size
        roi = img.crop((0, 0, width / 3 * 2, height / 4))
        # roi.show()
        img.close()
        roi.save('temp.jpg')
        # 读取图片
        with open('temp.jpg', 'rb') as fp:
            return fp.read()

    def get_picture_name(self, picture_file):
        image = self.get_file_content(picture_file)
        # 调用通用文字识别, 图片参数为本地图片
        relative_name = os.path.basename(picture_file)
        dir_name = os.path.dirname(picture_file)
        result_dict = self.client.basicAccurate(image)
        words_result = result_dict['words_result']
        stylecode = words_result[0]['words']
        if len(stylecode) == self.stylecode:
            print('\t{}名称识别成功'.format(relative_name))
        else:
            print('\t{}名称识别不成功'.format(relative_name))
        new_name = os.path.join(dir_name, stylecode + '.jpg')
        os.renames(picture_file, new_name)
        return '\t文件重命名成功'

    def main(self):
        for picture_file in self.get_file_path():
            print(picture_file)
            pic_rename = self.get_picture_name(picture_file)
            print(pic_rename)
Пример #7
0
# -*- coding: UTF-8 -*-

from aip import AipOcr
import json

# 定义常量
APP_ID = '9851066'
API_KEY = 'LUGBatgyRGoerR9FZbV4SQYk'
SECRET_KEY = 'fB2MNz1c2UHLTximFlC4laXPg7CVfyjV'

# 初始化AipFace对象
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)

# 读取图片
filePath = "d:/pic/20180716sz.jpg"
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

# 定义参数变量
options = {
    'detect_direction': 'true',
    'language_type': 'CHN_ENG',
}

# 调用通用文字识别接口
result = aipOcr.basicGeneral(get_file_content(filePath), options)
#print(json.dumps(result))
print(type(result))
print(result)
d = dict(result)
Пример #8
0
def getClient():
    APP_ID = '15765872'
    API_KEY = 'Bywy2lZjSttR1xPy48cwc5QQ'
    SECRET_KEY = 'Ca0GLFtO4Ebc4qV7hVHkzScxN0Si0r7Q'
    return AipOcr(APP_ID, API_KEY, SECRET_KEY)
Пример #9
0
 def __init__(self, app_id, api_key, secret_key):
     self._app_id = app_id
     self._api_key = api_key
     self._secret_key = secret_key
     self._client = AipOcr(app_id, api_key, secret_key)
Пример #10
0
 def __init__(self, appid, api_key, secrrt_key, redis_url):
     self.appid = appid
     self.api_key = api_key
     self.secrrt_key = secrrt_key
     self.client = AipOcr(appid, api_key, secrrt_key)
     self.redis = RedisClient(redis_url)
Пример #11
0
import cv2
from aip import AipOcr
import serial
import tkinter
#from os import system
import os
import win32com.client
from pygame import mixer  # Load the required library

APP_ID = '20329897'
API_KEY = 'z8D0PlnOuGxSMg9LgA3wNNBN'
SECRET_KEY = 'SyLQ39pw7c7ngqw8qhStQXo4h1n8ZrRv'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
speaker = win32com.client.Dispatch("SAPI.SpVoice")
x = 0
y = 0


def play():
    mixer.init()
    mixer.music.load('8813.wav')
    mixer.music.play()


def feedback():
    sign_up = tkinter.Toplevel(top)
    sign_up.geometry("500x495")
    tkinter.Label(sign_up, text='如有产品问题或建议,请联系开发者',
                  font=('Times', 16, 'bold')).place(x=25, y=40)
    tkinter.Label(sign_up, text='email:  [email protected]',
                  font=('Times', 14)).place(x=30, y=125)
Пример #12
0
import os
import random
import subprocess
import requests
from io import BytesIO
from pathlib import Path

from PIL import Image
from aip import AipOcr

import config

ocr_client = AipOcr(config.APP_ID, config.API_KEY, config.SECRET_KEY)


def screenshot():
    process = subprocess.Popen('adb shell screencap -p', stdout=subprocess.PIPE)
    binary_screenshot = process.stdout.read().replace(b'\r\n', b'\n')

    # 写入文件
    # Path('test.png').write_bytes(binary_screenshot)

    # 写入内存
    fb = BytesIO()
    fb.write(binary_screenshot)

    print('[*] 截图成功!')
    return Image.open(fb)


def ocr(img, join=True):
Пример #13
0
from aip import AipOcr

""" 你的 APPID AK SK """
APP_ID = ''
API_KEY = 'Vfoayf6ZuupEesUXDEygLbPQ'
SECRET_KEY = '50kwhTnPZok7KPu2yF8HXVub6fzIqOXK'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

image = get_file_content('/Users/tigerzhang/Downloads/huafei.JPG')

""" 调用通用文字识别(高精度版) """
client.basicAccurate(image);

""" 如果有可选参数 """
options = {}
options["detect_direction"] = "true"
options["probability"] = "true"


""" 带参数调用通用文字识别(高精度版) """
resp = client.basicAccurate(image, options)
for word in resp['words_result']:
    if word['words'].startswith('充值卡密码'):
        print(word['words'])
Пример #14
0
# 引入文字识别OCR SDK
from aip import AipOcr

# 定义常量
APP_ID = '9838807'
API_KEY = 'ZyNwfGnvQQnYPIuGt25iTWhw'
SECRET_KEY = 'r8RZWXQPMBnS4TyUorzdO6fpFO4h1Ggs'

# 读取图片
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

# 初始化ApiOcr对象
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)

# 定义参数变量
options = {
  'detect_direction': 'true',
  'language_type': 'CHN_ENG',
}

# 调用通用文字识别接口
result = aipOcr.basicGeneral(get_file_content('general.png'), options)
print(result)
Пример #15
0
class AipClient(object):
    '''
    百度识别api
    '''
    def __init__(self, appid, api_key, secrrt_key, redis_url):
        self.appid = appid
        self.api_key = api_key
        self.secrrt_key = secrrt_key
        self.client = AipOcr(appid, api_key, secrrt_key)
        self.redis = RedisClient(redis_url)

    def __new__(cls, *args, **kw):
        '''
        api 单例模式
        '''
        if not hasattr(cls, '_instance'):
            cls._instance = super().__new__(cls)
        return cls._instance


    @property
    def options(self):
        return {"language_type":"CHN_ENG",
        "detect_direction":"false",
        "detect_language":"false",
        "probability":"false"}


    def General(self, image,**kwargs):
        print('调取General_api  识别')
        return self.client.basicGeneral(image, self.options)

    def Accurate(self, image):
        print('调取Accurate_api  识别')
        return self.client.basicAccurate(image, self.options)

    def orc(self, image, font_key, word, **kwargs):
        hash_value = MD5.md5(image)
        results = self.General(image, **kwargs)
        if results.get('words_result'):
            if results.get('words_result') != '*':
                result = results['words_result'][0]['words']
                self.redis.add(hash_value, result)
                self.redis.hadd(font_key, word, result)
            return result
        results = self.Accurate(image)
        if results.get('words_result'):
            if results.get('words_result') != '*':
                result = results['words_result'][0]['words']
                self.redis.add(hash_value, result)
                self.redis.hadd(font_key, word, result)
            return result
        # Image.open(BytesIO(image)).show()
        # print(hash_value)
        return '*'

    def run(self, image, font_key,word, **kwargs):
        hash_value = MD5.md5(image)
        if self.redis.exists(hash_value):
            result = self.redis.get(hash_value)
            self.redis.hadd(font_key, word, result)
            return result
        else:
            return self.orc(image, font_key, word, **kwargs)
Пример #16
0
from aip import AipOcr
import re

# 百度应用授权
APP_ID = '21834800'
API_KEY = 'gzsi6XkPupAE4xbhUcfq4TPO'
SECRET_KEY = '5O74roabd3cz2MFbreyQAaDGQkUc6NR8'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
i = open('C:\\Users\\wk\\Desktop\\2.png', 'rb')
# python 读取文件时报错UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 205: illegal multibyte sequence

img = i.read()
# 得到数据信息字典
message = client.basicGeneral(img)
# print(message)
# print(message.get('words_result'))

# 给出识别的文字的字典的列表。
for i in message.get('words_result'):
    print(i.get('words'))
Пример #17
0
#基于百度ocr的测试
from aip import AipOcr
import configparser

config = {'appId': '', 'apiKey': '', 'secretKey': ''}

client = AipOcr(**config)


def get_file_content(file):
    with open(file, 'rb') as fp:
        return fp.read()


def img_to_str(image_path):
    image = get_file_content(image_path)
    # 通用文字识别(可以根据需求进行更改)
    result = client.basicGeneral(image)
    return result


if __name__ == '__main__':
    text = img_to_str('sourceImages/testOcr.png')
    print(text)
Пример #18
0
from aip import AipOcr
import json

APP_ID = '10706210'
API_KEY = 'kh6kczBGNeE6zFDDFS6U6zC4'
SECRET_KEY = 'L8D6xMO5BkVlISKGaG2TP90vhM0yd1eV'
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)

options = {
    'detect_direction': 'true',
    'language_type': 'CHN_ENG',
}


def get_file_content(c_filePath):
    with open(c_filePath, 'rb') as fp:
        return fp.read()


if __name__ == '__main__':
    q_filePath = "test.jpg"
    result = aipOcr.basicGeneral(get_file_content(q_filePath), options)
    c_Result_s = ''
    for word_s in result['words_result']:
        c_Result_s = c_Result_s + word_s['words']
    print(c_Result_s)
Пример #19
0
from common.common_func import DRG_func
from common.common_func_merchant import Drg_merchant
import requests
from aip import AipOcr
import datetime
import time
""" 你的 APPID AK SK """
APP_ID = '19611635'
API_KEY = '5kn9XvSsF19BPNuTtYeuPghP'
SECRET_KEY = 'oRAxD3tm0ducNoF3dbzaVfBh9EQHiP6W'
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

s = requests.session()
#DF = DRG_func(s)
DM = Drg_merchant(s)
DM.merchant_login(username='******', password='******')

#获取图片验证码,毫秒级时间戳
t = int(time.time() * 1000)
smscode = 'https://spman.shb02.net/common/jcaptcha/create?%s' % t
respnose = s.get(smscode)

#图片验证码存储路径
path = 'F:\\dx.jpg'
with open(path, 'wb') as f:
    f.write(respnose.content)
    f.close()
""" 读取图片 """


def get_file_content(filePath):
Пример #20
0
# !/usr/bin/env python
# encoding=utf-8
# Date:    2018-05-28
# Author:  pangjian
from aip import AipOcr

APP_ID = '11312548'
API_KEY = 'F8VHIMmovwN8oaINsLHTYNXk'
SECRET_KEY = 'DVvrhcOjTAhRLphpgXjV9AuGRGinp1HQ'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

def get_file_content(filePath):
    try:
        file = open(filePath, 'r')
        ret = file.read()
        return ret
    except:
        print 'read file error'
    finally:
        file.close()

""" 如果有可选参数 """
options = {}
options["language_type"] = "ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"

image = get_file_content('image2.jpg')
client.basicGeneral(image, options)
Пример #21
0
class sxufreesite:
	index_url = ""
	class_url = ""
	score_url = ""
	usrname = ""
	password = ""
	header = ""
	table = ""
	cookie = ""
	handler = ""
	values = {}
	tr_list = []
	td_list = []
	class_list = []
	js_list = []
	jxl_list_101 = []
	jxl_list_102 = []
	final_list = []
	APP_ID = '11519354'
	API_KEY = 'tLlZhgC4kwx8ArqEhBXzCvRw'
	SECRET_KEY = 'GnpZ0XXBFgZXz8v0aYTGIMhHRMmlRKSd'
	def __init__(self):
		self.index_url = "http://bkjw.sxu.edu.cn/"
		self.class_url = "http://bkjw.sxu.edu.cn/ZNPK/KBFB_RoomSel.aspx"
		self.score_url = "http://bkjw.sxu.edu.cn/_data/login.aspx"
		self.cookie = cookiejar.CookieJar()
		self.handler = rq.HTTPCookieProcessor(self.cookie)
		self.opener = rq.build_opener(self.handler)
		self.header = {
		"Host":"bkjw.sxu.edu.cn",
		"Origin":"http://bkjw.sxu.edu.cn",
		"Content-Type":"application/x-www-form-urlencoded",
		"Referer":"http://bkjw.sxu.edu.cn/_data/login.aspx",
		"Upgrade-Insecure-Requests":"1",
		"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"

		}
		self.header2 = {
		"Host":"bkjw.sxu.edu.cn",
		"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
		"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
		"Accept-Language":"zh-CN,zh;q=0.8",
		"Accept-Encoding":"gzip, deflate",
		"Referer":"http://bkjw.sxu.edu.cn/xscj/Stu_MyScore.aspx",
		"Content-Type":"application/x-www-form-urlencoded",
		"Content-Length":"76",
		"Cookie":"",
		"Connection":"keep-alive",
		"Upgrade-Insecure-Requests":"1"
		}
		self.jxl_list = ["101","105"]
		self.js_list_101 = ["1010101","1010102","1010103","1010104","1010105","1010106","1010107","1010108","1010109","1010110","1010111","1010112","1010113","1010114","1010115","1010201","1010202","1010203","1010204","1010205","1010206","1010207","1010208","1010301","1010302","1010303","1010304","1010305","1010306","1010307","1010308","1010401","1010402","1010501","1010502","1010503","1010504","1010505","1010506","1010507","1010508","1010509","1010510","1010511"]
		self.jxl_list_105 = ['1050101', '1050102', '1050103', '1050104', '1050105', '1050106', '1050107', '1050108', '1050109', '1050110', '1050111', '1050112', '1050113', '1050114', '1050115', '1050116','1050201', '1050202', '1050203', '1050204', '1050205', '1050206', '1050207', '1050208', '1050209','1050211', '1050212', '1050213', '1050214', '1050215', '1050216', '1050217', '1050218','1050301', '1050302', '1050303', '1050304', '1050305', '1050306', '1050307', '1050308', '1050309','1050310','1050311', '1050312', '1050313', '1050314', '1050315', '1050316', '1050317','1050401', '1050402', '1050403', '1050404', '1050405', '1050406', '1050407', '1050408', '1050409','1050501','1050502','1050503','1050504','1050505']
		self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)

	def get_img_code(self):
		req = rq.Request("http://bkjw.sxu.edu.cn/sys/ValidateCode.aspx",headers=self.header)
		with self.opener.open(req) as gec:
        # print(cookie)
			name = "imgCode.jpg"
			img_res = gec.read()
		with open(name,"wb") as ic:
			ic.write(img_res)
			print(self.cookie)



	# def get_score(self):
	# 	username = "******"
	# 	password = "******"
		# cookies = {}
		# for item in self.cookies:
		# 	cookies["name"] = item.name
		# 	cookies["value"] = item.value
		# cookies["domain"] = ".bkjw.sxu.edu.cn"
		# cookies["path"] = "/"
		# cookies["expires"] = None
		# browser2 = webdriver.PhantomJS()
		# # browser2.get("http://bkjw.sxu.edu.cn/sys/ValidateCode.aspx")
		# # ck2 = browser2.get_cookies()
		# # print(ck2)
		# browser = webdriver.PhantomJS()
		# browser.get("http://bkjw.sxu.edu.cn")
		# browser.delete_all_cookies()
		# browser.add_cookie(cookies)
		# browser.refresh()
		# browser.switch_to.frame(0)
		# browser.find_element_by_id("txt_asmcdefsddsd").send_keys(username)
		# browser.find_element_by_id("txt_pewerwedsdfsdff").send_keys(password)
		# browser.find_element_by_id("txt_sdertfgsadscxcadsads").click()
		# a = browser.get_screenshot_as_file("1.jpg")
		# im = Image.open("1.jpg")

		# box = (145,278,224,298)
		# region = im.crop(box)
		# region2 = region.convert("RGB")
		# region2.save("imgCode.jpg")
		# browser.add_cookie(cookies)

		# for j in range(20):
		# 	imgcodeidentify.deal_img("imgCode.jpg")
		# 	imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg")
		# 	imgcodeidentify.interference_point(imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg"),"imgCode.jpg")
		# code = self.client.basicGeneral(self.get_file_content("imgCode.jpg"))["words_result"][0]["words"]
		# browser.find_element_by_id("txt_sdertfgsadscxcadsads").send_keys(code)
		# ck = browser.get_cookies()

		# print(ck)
		# time.sleep(5)
		# browser.get_screenshot_as_file("1.jpg")




	def get_score(self,opener,username,password,yzm):

		h1 = hashlib.md5()

		h1.update(password.encode(encoding='utf-8'))

		hex_password = h1.hexdigest()

		temp_pwd = username+hex_password[:30].upper()+"10108"

		h2 = hashlib.md5()

		h2.update(temp_pwd.encode(encoding='utf-8'))

		hex_temp = h2.hexdigest()

		dsdsdsdsdxcxdfgfg = hex_temp[:30].upper()   #密码

		txt_asmcdefsddsd = username                 #用户名

		h3 = hashlib.md5()

		h3.update(yzm.upper().encode(encoding='utf-8'))

		hex_temp_yzm = h3.hexdigest()[:30].upper()+'10108'

		h4 = hashlib.md5()

		h4.update(hex_temp_yzm.encode(encoding='utf-8'))

		fgfggfdgtyuuyyuuckjg = h4.hexdigest()[:30].upper()  #验证码

		__VIEWSTATE = "dDwyMTIyOTQxMzM0Ozs+AI2AQlMGeOYvPjA1fJfST57PPCk="

		pcInfo = "Mozilla/5.0+(Windows+NT+10.0;+Win64;+x64;+rv:61.0)+Gecko/20100101+Firefox/61.0Windows+NT+10.0;+Win64;+x645.0+(Windows)+SN:NULL"

		Sel_Type = "STU"

		typeName = "学生"
		values = {}
		values["__VIEWSTATE"] = __VIEWSTATE
		values["dsdsdsdsdxcxdfgfg"] = dsdsdsdsdxcxdfgfg
		values["fgfggfdgtyuuyyuuckjg"] = fgfggfdgtyuuyyuuckjg
		values["pcInfo"] = pcInfo
		values["Sel_Type"] = Sel_Type
		values["txt_asmcdefsddsd"] = txt_asmcdefsddsd
		values["txt_pewerwedsdfsdff"] = ""
		values["txt_sdertfgsadscxcadsads"] = ""
		values["typeName"] = typeName
		
		data = urllib.parse.urlencode(values).encode('gb2312')            #GB18030

		req = rq.Request(self.score_url,data,headers=self.header)
		
		html = self.opener.open(req).read().decode('gb2312')

		print(data)
		print(html)

		# http://bkjw.sxu.edu.cn/xscj/Stu_MyScore_rpt.aspx
		# http://bkjw.sxu.edu.cn/xscj/Stu_MyScore_Drawimg.aspx?x=1&h=2&w=782&xnxq=20171&xn=2017&xq=1&rpt=1&rad=2&zfx=0&xh=201700004159
	def post_score(self,opener):
		# sel_xn=2017&sel_xq=1&SJ=1&btn_search=%BC%EC%CB%F7&SelXNXQ=2&zfx_flag=0&zxf=0
		data = "sel_xn=2017&sel_xq=1&SJ=1&btn_search=%BC%EC%CB%F7&SelXNXQ=2&zfx_flag=0&zxf=0".encode('GB18030')
		for item in self.cookie:
			self.header2["Cookie"] = item.name+'='+item.value
		print(self.header2)
		head2 = urllib.parse.urlencode(self.header2).encode('utf-8')
		request = rq.Request("http://bkjw.sxu.edu.cn/xscj/Stu_MyScore_Drawimg.aspx?x=1&h=2&w=782&xnxq=20171&xn=2017&xq=1&rpt=1&rad=2&zfx=0&xh=201700004159",head2)#,data   self.header2
		html = self.opener.open(request).read()
		with open("score.jpg","wb") as jpg:
			jpg.write(html)
		print(html)
	def get_file_content(self,filePath):
		with open(filePath, 'rb') as fp:
			result = fp.read()
			return result
	def post_data(self,opener,Sel_XNXQ,rad_gs,imgcode,Sel_XQ,Sel_JXL,Sel_ROOM):
		self.values["Sel_XNXQ"] = Sel_XNXQ 
		self.values["rad_gs"] = rad_gs     
		self.values["txt_yzm"] = imgcode   
		self.values["Sel_XQ"] = Sel_XQ     
		self.values["Sel_JXL"] = Sel_JXL   
		self.values["Sel_ROOM"] = Sel_ROOM 
		data = urllib.parse.urlencode(self.values).encode('GB18030')
		request = rq.Request("http://bkjw.sxu.edu.cn/ZNPK/KBFB_RoomSel_rpt.aspx", data, self.header)
		html = self.opener.open(request).read().decode('GB18030')
		reg = re.compile("<tr.*>.*</tr>")
		self.table = reg.findall(html)[0]
		return html
	def recommend_class(self):
		EmptyClassList = []
		for i in range(5):
			for j in range(7):
				if self.tr_list[i][j] == "":
					t = (j+1,i+1)
					EmptyClassList.append(t)
					print("¸Ã½ÌÊÒÐÇÆÚ"+str(j+1)+"µÚ"+str(i+1)+"½Ú¿ÎΪ¿Õ½ÌÊÒ")
					return EmptyClassList

	def deal_table(self,html):
		soup = BeautifulSoup(html,"html5lib")
		td_list = soup.findAll(valign = "top")
		tr_list1 = []
		tr_list2 = []
		tr_list3 = []
		tr_list4 = []
		tr_list5 = []
		count = 1
		for i in td_list:
			if count <= 7:
				tr_list1.append(i.text)
			elif count <=14 and count >=8:
				tr_list2.append(i.text)
			elif count <=21 and count >=15:
				tr_list3.append(i.text)
			elif count <=28 and count >=22:
				tr_list4.append(i.text)
			elif count <=35 and count >=29:
				tr_list5.append(i.text)
			else:
				pass
				count = count + 1
				self.tr_list.append(tr_list1)
				self.tr_list.append(tr_list2)
				self.tr_list.append(tr_list3)
				self.tr_list.append(tr_list4)
				self.tr_list.append(tr_list5)
	def main(self):#,xq,time
		count = 0
		for i in self.js_list_101: 
			while True:         
				if count%10 == 0:
					opener = self.get_img_code()
					for j in range(30):
						imgcodeidentify.deal_img("imgCode.jpg")
						imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg")
						imgcodeidentify.interference_point(imgcodeidentify.interference_line(imgcodeidentify.deal_img("imgCode.jpg"),"imgCode.jpg"),"imgCode.jpg")
						try:
							code = self.client.basicGeneral(self.get_file_content("imgCode.jpg"))["words_result"][0]["words"]
						except IndexError:
							continue
	                #self.client.basicGeneral(self.get_file_content("imgCode.jpg"))["words_result"][0]["words"]
						code = code.replace(" ","")
						print(code)

		                # input("ÇëÊäÈëÑéÖ¤Âë\n")
						try:
							html = self.post_data(opener,"20171","1",code,"1","101",i)
						except IndexError:
							continue
						else:
							self.deal_table(html)
							temp_list = self.recommend_class()
							temp_dict = {}
							temp_dict[str(i)] = temp_list
							self.final_list.append(temp_dict)
						# print("Àí¿ÆÂ¥"+str(i)+"½ÌÊÒ²éѯÍê±Ï")
						count = count + 1
						break
Пример #22
0
from aip import AipOcr
import base64
""" 你的 APPID AK SK """
APP_ID = '10682639'
API_KEY = 'yYkzlkhdkO4CsOo7fGHZmgXx'
SECRET_KEY = 'DWIxGQsDGbuTY9v7qCC5t5VqOkDZC8c1'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
""" 读取图片 """


def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()


image = get_file_content('11.jpg')
imagebase64 = base64.b64encode(image)
print(len(imagebase64))

# """ 调用通用文字识别, 图片参数为本地图片 """
# client.basicGeneral(image);
""" 如果有可选参数 """
options = {}
# options["language_type"] = "CHN_ENG"
# options["detect_direction"] = "true"
# options["detect_language"] = "true"
# options["probability"] = "true"
""" 带参数调用通用文字识别, 图片参数为本地图片 """
# client.basicGeneral(image, options)
#retbig = client.accurate(image, options)
Пример #23
0
class unlockScrapy(object):
    def __init__(self, driver):
        super(unlockScrapy, self).__init__()
        # selenium驱动
        self.driver = driver
        self.WAPPID = '百度文字识别appid'
        self.WAPPKEY = '百度文字识别appkey'
        self.WSECRETKEY = '百度文字识别secretkey'
        # 百度文字识别sdk客户端
        self.WCLIENT = AipOcr(self.WAPPID, self.WAPPKEY, self.WSECRETKEY)

    # 按顺序点击图片中的文字
    def clickWords(self, wordsPosInfo):
        # 获取到大图的element
        imgElement = self.driver.find_element_by_xpath(
            "/html/body/div[3]/div[3]/img")
        # 根据上图文字在下图中的顺序依次点击下图中的文字
        for info in wordsPosInfo:
            ActionChains(self.driver).move_to_element_with_offset(
                to_element=imgElement,
                xoffset=info['location']['left'] + 20,
                yoffset=info['location']['top'] + 20).click().perform()
            time.sleep(1)

    # 下载上面的小图和下面的大图
    def downloadImg(self):
        # 小图的src
        codeSrc = self.driver.find_element_by_xpath(
            "/html/body/div[3]/div[1]/img").get_attribute("src")
        # 大图的src
        checkSrc = self.driver.find_element_by_xpath(
            "/html/body/div[3]/div[3]/img").get_attribute("src")
        # 保存下载
        fh = open("code.jpeg", "wb")
        # 由于其src是base64编码的,因此需要以base64编码形式写入
        fh.write(base64.b64decode(codeSrc.split(',')[1]))
        fh.close()
        fh = open("checkCode.jpeg", "wb")
        fh.write(base64.b64decode(checkSrc.split(',')[1]))
        fh.close()

    # 图片二值化,便于识别其中的文字
    def chageImgLight(self):
        im = Image.open("code.jpeg")
        im1 = im.point(lambda p: p * 4)
        im1.save("code.jpeg")
        im = Image.open("checkCode.jpeg")
        im1 = im.point(lambda p: p * 4)
        im1.save("checkCode.jpeg")

    # 破解滑动
    def unlockScroll(self):
        # 滑块element
        scrollElement = self.driver.find_elements_by_class_name(
            'cpt-img-double-right-outer')[0]
        ActionChains(
            self.driver).click_and_hold(on_element=scrollElement).perform()
        ActionChains(self.driver).move_to_element_with_offset(
            to_element=scrollElement, xoffset=30, yoffset=10).perform()
        ActionChains(self.driver).move_to_element_with_offset(
            to_element=scrollElement, xoffset=100, yoffset=20).perform()
        ActionChains(self.driver).move_to_element_with_offset(
            to_element=scrollElement, xoffset=200, yoffset=50).perform()

    # 读取图片文件
    def getFile(self, filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    # 识别上面小图中的文字
    def iTow(self):
        try:
            op = {'language_type': 'CHN_ENG', 'detect_direction': 'true'}
            res = self.WCLIENT.basicAccurate(self.getFile('code.jpeg'),
                                             options=op)
            words = ''
            for item in res['words_result']:
                if item['words'].endswith('。'):
                    words = words + item['words'] + '\r\n'
                else:
                    words = words + item['words']
            return words
        except:
            return 'error'

    # 识别下面大图中文字的坐标
    def getPos(self, words):
        try:
            op = {'language_type': 'CHN_ENG', 'recognize_granularity': 'small'}
            res = self.WCLIENT.accurate(self.getFile('checkCode.jpeg'),
                                        options=op)
            # 所有文字的位置信息
            allPosInfo = []
            # 需要的文字的位置信息
            needPosInfo = []
            for item in res['words_result']:
                allPosInfo.extend(item['chars'])
            # 筛选出需要的文字的位置信息
            for word in words:
                for item in allPosInfo:
                    if word == item['char']:
                        needPosInfo.append(item)
            return needPosInfo
        except Exception as e:
            print(e)

    def main(self):
        # 破解滑块
        self.unlockScroll()
        time.sleep(2)
        # 下载图片
        self.downloadImg()
        time.sleep(2)
        # 图像二值化,方便识别
        self.chageImgLight()
        # 识别小图文字
        text = self.iTow()
        # 获取大图的文字位置信息
        posInfo = self.getPos(list(text))
        # 由于小图或大图文字识别可能不准确,因此这里设置识别出的文字少于4个则重新识别
        while len(posInfo) != 4 or len(text) != 4:
            # 点击重新获取图片,再次识别
            self.driver.find_elements_by_xpath(
                '/html/body/div[3]/div[4]/div/a')[0].click()
            time.sleep(2)
            self.downloadImg()
            time.sleep(2)
            text = self.iTow()
            posInfo = self.getPos(list(text))
        time.sleep(3)
        print('匹配成功,开始点击')
        # 点击下面大图中的文字
        self.clickWords(posInfo)
        # 点击提交按钮
        self.driver.find_elements_by_xpath(
            '/html/body/div[3]/div[4]/a')[0].click()
        time.sleep(2)
        # 如果破解成功,html的title会变
        if self.driver.title != '携程在手,说走就走':
            print('破解成功')
        else:
            # 再次尝试
            print('破解失败,再次破解')
            self.main()
Пример #24
0
from aip import AipOcr

APP_ID = '11730410'
API_KEY = 'Teuyu8PygKTn8KEdUqLTTvh1'
SECRET_KEY = 'rIqEyFe6TkFTKrt7Isa9rvsG9vzTELCT '
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

""" 读取图片 """
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

image = get_file_content('example.jpg')

""" 调用通用文字识别(高精度版) """
client.basicAccurate(image);

""" 如果有可选参数 """
options = {}
options["detect_direction"] = "true"
options["probability"] = "true"

""" 带参数调用通用文字识别(高精度版) """
client.basicAccurate(image, options)
Пример #25
0
import base64
from aip import AipOcr
import re

# client_id 为官网获取的AK, client_secret 为官网获取的SK
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=dvhSujnaY9qaWlWNenyCo2ZK&client_secret=82G08tOb0wYntAHD4oYdEMPGsvQEHinA'
response = requests.get(host)
content = response.json()
access_token = content["access_token"]

# 定义常量
APP_ID = '11650666'
API_KEY = 'qQqkIOEYaLdMwA42op63gaLc'
SECRET_KEY = 'Cnw2YGHs2n58CyVmLvyKAc5zwaQHMyVl'
# 初始化文字识别分类器
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)


# 读取图片
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()


imageAddr = open('paris.txt', 'r')
fo = open('pictureToWords.txt', 'w+')
for filepath in imageAddr.readlines():
    filepath = filepath.replace('\n', '')
    image = open(filepath, 'rb').read()
    data = {'image': base64.b64encode(image).decode()}
Пример #26
0
from aip import AipOcr
import re

APP_ID = "18137435"
API_KEY = "Rtast1dOns0Mf4ckdQyM8LKC"
SECRET_KEY = "gpEkNxGpWkeqCbnTl3DsR1ni6h4fSCYV"
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

#将图片读取到程序中
with open(r"E:\image\aa.png", "rb") as f:
    image = f.read()
data = str(client.basicGeneral(image)).replace(" ", "")
print(data)
pat = r"{'words': '(.*?)'}"
pattern = re.compile(pat)
res = pattern.findall(data)[0]
print(res)
import uuid
from os import remove
import requests
import enchant
from PIL import Image, ImageFilter
from aip import AipOcr

app_id = '11565085'
api_key = 'dh9pPBqw1H4hQQyPrk4HHVv6'
secret_key = '6mjlcxPsT2NRs7wETIqs3xYBjz0pdyH5'
client = AipOcr(app_id, api_key, secret_key)
d = enchant.Dict('en_US')


def process_image(filename):
    image = Image.open(filename)
    threshold_grey = 25
    image = image.convert('L')
    im2 = Image.new("L", image.size, 255)
    for y in range(image.size[1]):
        for x in range(image.size[0]):
            pix = image.getpixel((x, y))
            if int(pix) > threshold_grey:
                im2.putpixel((x, y), 255)
            else:
                im2.putpixel((x, y), 0)
    im2 = im2.filter(ImageFilter.MedianFilter())
    im2.save(filename)
    return im2

Пример #28
0
def main(videoname):
    conf = config.getConfig(videoname)

    APP_ID = conf['APP_ID']
    API_KEY = conf['API_KEY']
    SECRET_KEY = conf['SECRET_KEY']
    imgDir = conf['imgDir']
    outputDir = conf['outputDir']

    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    def get_OCR(imgName):
        image = get_file_content(imgDir + '/' + imgName)

        options = {}
        options["recognize_granularity"] = "big"
        options["language_type"] = "CHN_ENG"
        options["detect_direction"] = "true"

        res = client.general(image, options)
        try:
            w = res['words_result']
            print str(res)
            return w
        except:
            return False

    def is_img(f):
        return re.match(r'.+jpg', f)

    start = time.time()

    output = open(outputDir + str(start) + '.txt', 'a')

    pathDir = sorted(filter(is_img, os.listdir(imgDir)))

    positionData = []
    for imgName in pathDir:
        output.write('Start: ' + imgName + '\n')
        ocrRes = get_OCR(imgName)
        # fail then retry
        while ocrRes == False:
            print 'Fail: ' + imgName
            ocrRes = get_OCR(imgName)
        for word in ocrRes:
            top = int(word['location']['top'])
            height = int(word['location']['height'])
            w = word['words']
            has = False
            for group in positionData:
                # belong to this group
                if abs(group['top'] - top) < (group['height'] / 2):
                    # Avoid duplicate: check if current word is similar to last word
                    lastWord = group['words'][len(group['words']) - 1]
                    if difflib.SequenceMatcher(None, lastWord,
                                               w).quick_ratio() > 0.8:
                        break
                    # append words
                    group['words'].append(w)
                    # cal new value
                    group['totalTop'] += top
                    group['totalHeight'] += height
                    group['totalNum'] += 1
                    group['top'] = group['totalTop'] / group['totalNum']
                    group['height'] = group['totalHeight'] / group['totalNum']
                    has = True
                    break

            if has == False:
                positionData.append({
                    'top': top,  # group standard, using average value of tops
                    'totalTop': top,
                    'height': height,
                    'totalHeight': height,
                    'totalNum': 1,  # how many pics has been add to this group 
                    'words': [w]
                })

            output.write('Words: ' + w + '\n')
            output.write('Top: ' + str(word['location']['top']) + '\n')
            output.write('Height: ' + str(word['location']['height']) + '\n')

        output.write('Finished: ' + imgName + '\n')
        print 'Finished: ' + imgName

    output.write(str(positionData) + '\n')

    max_group = []
    for group in positionData:
        if group['totalNum'] > len(max_group):
            max_group = group['words']
    allWords = ','.join(max_group)
    output.write('-----------------------' + '\n')
    output.write(allWords + '\n')
    output.write('-----------------------' + '\n')
    end = time.time()
    output.write('Running time: ' + str(end - start) + '\n')
    output.close()
    print 'Finished All'
Пример #29
0
class IdentifyCaptchaPicture(object):
    """识别验证码类"""
    def __init__(self):
        self.output_graph = 'output_graph.pb'
        self.output_labels = 'output_labels.txt'
        self.captcha_picture = 'captcha_picture.jpg'
        self.english_to_chinese = english_to_chinese
        self.id_to_something = self._id_to_something()
        self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    # 读取 output_labels.txt 文件
    def _id_to_something(self):
        id_to_something = {}
        with open(self.output_labels, 'r') as f:
            lines = f.readlines()
            for index, line in enumerate(lines):
                line = line.strip()
                id_to_something[index] = line
        return id_to_something

    # 根据 id 找到 分类
    def _find_string_by_id(self, id_test):
        if id_test in self.id_to_something:
            return self.id_to_something[id_test]
        else:
            return None

    # 切割验证码
    def _cut_captcha_picture(self):
        image = Image.open(self.captcha_picture)

        one = image.crop((5, 41, 71, 107))
        two = image.crop((77, 41, 143, 107))
        three = image.crop((149, 41, 215, 107))
        four = image.crop((221, 41, 287, 107))
        five = image.crop((5, 113, 71, 179))
        six = image.crop((77, 113, 143, 179))
        seven = image.crop((149, 113, 215, 179))
        eight = image.crop((221, 113, 287, 179))
        all_picture_after_cut = [
            one, two, three, four, five, six, seven, eight
        ]
        return all_picture_after_cut

    # 识别验证码的文字
    def _identify_captcha_picture_text(self):
        image = Image.open(self.captcha_picture)

        text = image.crop((124, 0, 287, 26))
        image_byte_array = io.BytesIO()
        text.save(image_byte_array, format='PNG')
        text = image_byte_array.getvalue()
        text = self.client.basicGeneral(text)['words_result']
        try:
            return text[0]['words']
        except (IndexError, Exception):
            return ''

    # 识别验证码
    def identify_captcha_picture(self):
        all_picture_after_cut = self._cut_captcha_picture()
        with tf.gfile.FastGFile(self.output_graph, 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            tf.import_graph_def(graph_def, name='')

        with tf.Session() as sess:
            text = self._identify_captcha_picture_text()
            print('识别出的文字是:', text)
            correct_choose = []
            softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
            # 遍历每张切割好的图片
            for index, one_picture in enumerate(all_picture_after_cut):
                image_byte_array = io.BytesIO()
                one_picture.save(image_byte_array, format='PNG')
                one_picture = image_byte_array.getvalue()
                prediction = sess.run(
                    softmax_tensor,
                    feed_dict={'DecodeJpeg/contents:0': one_picture})
                prediction = np.squeeze(prediction)

                # 找到概率最大的分类
                top_1 = np.argmax(prediction, axis=0)
                something = self._find_string_by_id(top_1)
                something_chinese = self.english_to_chinese[something]
                # print('100001:', text)
                for one_word in something_chinese:
                    if one_word in text:
                        correct_choose.append(str(index + 1))
                        break
                print('%s %0.5f%%' %
                      (something_chinese, prediction[top_1] * 100))
                # print('\n-----------------------------------------------\n')
            # print('correct_choose:', correct_choose)
            return correct_choose
Пример #30
0
 def __init__(self):
     self.api_id = THEKEY2BD.THEKE2BD_api_id
     self.api_key = THEKEY2BD.THEKE2BD_api_key
     self.secret_key = THEKEY2BD.THEKE2BD_secret_key
     self.client = AipOcr(self.api_id, self.api_key, self.secret_key)
Пример #31
0
class MyApp(QMainWindow, Ui_MainWindow):
    '''
    使用PyQt5做GUI界面,调用百度文字识别API识别图片文字
    '''
    def __init__(self):
        QMainWindow.__init__(self)
        Ui_MainWindow.__init__(self)
        super().__init__()
        self.initUI()  # 调用自定义的UI初始化函数initUI()
        self.status = False  # 状态变量,如果是打开图片来转换的,设置status为True,以区分截图时调用的图片转换函数
        self.APP_ID = ''
        self.API_KEY = ''
        self.SECRET_KEY = ''
        self.fileName1 = None
        self.fileName2 = None

    def initUI(self):
        '''
        Initialize the window's UI
        '''
        self.setupUi(self)
        self.setWindowTitle("图片转文字GUI程序")
        self.setWindowIcon(QIcon("../icons/eye.png"))  # 设置图标,linux下只有任务栏会显示图标

        self.initMenuBar()  # 初始化菜单栏
        self.initToolBar()  # 初始化工具栏
        self.initButton()  # 初始化按钮

        self.show()  # 显示

    def initMenuBar(self):
        '''
        初始化菜单栏
        '''
        menubar = self.menuBar()
        exitAct = QAction(QIcon('../icons/exit.png'), 'Exit', self)
        exitAct.setShortcut('Ctrl+Q')
        exitAct.triggered.connect(qApp.quit)

        fileMenu = menubar.addMenu('&File')
        fileMenu.addAction(exitAct)

        fileMenu = menubar.addMenu('&Help')

    def initToolBar(self):
        '''
        初始化工具栏
        创建一个QAction实例exitAct,然后添加到designer已经创建的默认的工具栏toolBar里面
        '''
        exitAct = QAction(QIcon('../icons/exit.png'), 'Exit', self)
        exitAct.setShortcut('Ctrl+Q')
        exitAct.triggered.connect(qApp.quit)

        self.toolBar.addAction(exitAct)

    def initButton(self):
        '''
        初始化按钮
        '''
        self.btnBrowse.clicked.connect(
            self.browserButton_callback)  # 按下按钮调用回调函数
        self.btnBrowse.setToolTip("浏览需要转换的文件")  # 设置提示
        #self.btnBrowse.setStyleSheet("{border-image: url(/home/kindy/Files/python/gui/pyq/play.ico);}") # 此代码没有效果
        self.btnScreen.clicked.connect(
            self.screenButton_callback)  # 一旦按下按钮,连接槽函数进行处理
        self.btnScreen.setToolTip("截取屏幕文字")
        self.btnConvert.clicked.connect(self.convertButton_callback)
        self.btnConvert.setToolTip("转换图片中的文字")

    def browserButton_callback(self):
        '''
        使用QFileDialog打开文件管理器
        '''
        #global fileName1    # 设置全局
        self.status = True
        self.fileName1, filetype = QFileDialog.getOpenFileName(
            self, "选取图片文件", "/home/kindy/图片",
            "All Files (*);;Music Files (*.png)")  #设置文件扩展名过滤,注意用双分号间隔
        self.filePath.setText(self.fileName1)

    def screenButton_callback(self):
        '''
        打开截图,点击对勾号会自动保存在目录"../temp/temp.png"
        '''
        #global fileName2
        self.fileName2 = r'../temp/temp.png'

        self.cap = ScreenShot()
        self.cap.show()

    def convertButton_callback(self, filename):
        '''
        调用百度API进行文字识别
        '''
        # 初始化文字识别
        self.APP_ID = self.appid.text()
        self.API_KEY = self.apikey.text()
        self.SECRET_KEY = self.screetkey.text()
        self.plainTextEdit.setPlainText('')
        self.plainTextEdit.setStatusTip('')
        if self.APP_ID == '' or self.API_KEY == '' or self.SECRET_KEY == '':
            if self.APP_ID == '':
                QMessageBox.critical(self, "标题", "请输入appid",
                                     QMessageBox.Yes | QMessageBox.No,
                                     QMessageBox.Yes)
            if self.API_KEY == '':
                QMessageBox.critical(self, "标题", "请输入apikey",
                                     QMessageBox.Yes | QMessageBox.No,
                                     QMessageBox.Yes)
            if self.SECRET_KEY == '':
                QMessageBox.critical(self, "标题", "请输入screetkey",
                                     QMessageBox.Yes | QMessageBox.No,
                                     QMessageBox.Yes)
        else:
            self.aipOcr = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
            start = time.time()
            if self.fileName1 == None and self.fileName2 == None:
                QMessageBox.critical(self, "标题", "请选择一张图片,或进行截图",
                                     QMessageBox.Yes | QMessageBox.No,
                                     QMessageBox.Yes)
            else:
                if self.status:
                    res = self.aipOcr.webImage(getImageBytes(self.fileName1))
                else:
                    res = self.aipOcr.webImage(getImageBytes(self.fileName2))
                print(res)
                flag = True
                for i in res:
                    if ('error_code' in i or ('error_msg' in i)):
                        flag = True
                    else:
                        flag = False
                if flag:
                    QMessageBox.critical(self, "标题", "输入信息有误,请重新输入",
                                         QMessageBox.Yes | QMessageBox.No,
                                         QMessageBox.Yes)

                else:
                    txt = res['words_result']
                    text = str()
                    for i in range(len(txt)):
                        text += (str(txt[i]['words']) + '\n')
                    self.plainTextEdit.setPlainText(text)
                    print(text)
                    end = time.time()
                    self.plainTextEdit.setStatusTip("图片文字转换时间:%.2fs" %
                                                    (end - start))
Пример #32
0
from aip import AipOcr


APP_ID = '16600152'
API_KEY = 'oNnWciQupqWPH871GU0T77dy'
SECRET_KEY = 'xMNjsEhc1RbIxGlhWdX9ACDe5LjktDHi'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)


""" 读取图片 """
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

image = get_file_content('C:\\Users\\povti\\Desktop\\test\\test2.jpg')

# """ 调用通用文字识别, 图片参数为本地图片 """
# result=client.basicGeneral(image);
# print(result)

""" 如果有可选参数 """
options = {}
# options["language_type"] = "CHN_ENG"
# options["detect_direction"] = "true"
# options["detect_language"] = "true"
options["probability"] = "true"

""" 带参数调用通用文字识别, 图片参数为本地图片 """
print(client.basicGeneral(image, options))
Пример #33
0
class BaseCls(object):
    STATUS_MAP = {
        'B': '海关终审通过',
        'P': '预审批通过',
        '': '',
    }

    def __init__(self, *args, **kwargs):
        self.UserAgent =settings.USER_AGENT
        self.CheckIndex = settings.CHECK_INDEX
        self.IndexUrl = settings.INDEX_URL
        self.ImageUrl = settings.IMAGE_URL
        self.ImageDir = settings.IMAGE_DIR
        self.ImageDir2 = settings.IMAGE_DIR2
        self.CookieDir = settings.COOKIE_DIR
        self.CookieUrl = None
        self.aipOcr = AipOcr(settings.APP_ID, settings.API_KEY, settings.SECRET_KEY)
        self.session = MySession()

    def check_login(self):
        header = {
            'Host': 'app.singlewindow.cn',
            'Origin': 'http://app.singlewindow.cn',
            'Referer': 'http://app.singlewindow.cn/cas/login?_local_login_flag=1&service=http://app.singlewindow.cn/cas/jump.jsp%3FtoUrl%3DaHR0cDovL2FwcC5zaW5nbGV3aW5kb3cuY24vY2FzL29hdXRoMi4wL2F1dGhvcml6ZT9jbGllbnRfaWQ9MTM2NyZyZXNwb25zZV90eXBlPWNvZGUmcmVkaXJlY3RfdXJpPWh0dHAlM0ElMkYlMkZzei5zaW5nbGV3aW5kb3cuY24lMkZkeWNrJTJGT0F1dGhMb2dpbkNvbnRyb2xsZXI=&localServerUrl=http://sz.singlewindow.cn/dyck&localDeliverParaUrl=/deliver_para.jsp&colorA1=d1e4fb&colorA2=66,%20124,%20193,%200.8',
            'Cache-Control': 'max-age=0',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Upgrade-Insecure-Requests': '1',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
        }
        self.session.headers.update(header)
        self.session.cookies.update(self.get_cookie())
        res = self.session.post(self.IndexUrl, timeout=30)
        if res.text.find("登录成功") > 0:
            log.info('已登录!!!')
        else:
            log.info('cookie失效,重新登录!!!')
            self.get_cookie(LOCAL_COOKIE_FLG=False)

    def get_file_content(self):
        image = Image.open(self.ImageDir)
        im = image
        img_grey = im.convert('L')
        threshold = 55
        table = []
        for i in range(256):
            if i < threshold:
                table.append(0)
            else:
                table.append(1)
        img_out = img_grey.point(table, '1')
        img_out.save(self.ImageDir2)
        with open(self.ImageDir2, 'rb') as f:
            return f.read()

    def get_login_info(self, headers=None):
        if headers is None:
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
                "Connection": "keep-alive",
            }
        res = requests.get(self.IndexUrl, headers=headers, timeout=30)  # 这里用self.session的话会把session信息带过去,导致无法登录
        # res = self.session.get(self.IndexUrl, headers=headers, timeout=30)  # 这里用self.session的话会把session信息带过去,导致无法登录
        content = etree.HTML(res.text)
        lt = content.xpath(r'//*[@id="fm1"]/p[1]/input[1]/@value')[0]
        execution = content.xpath(r'//*[@id="fm1"]/p[1]/input[2]/@value')[0]
        return lt, execution

    def know_Image(self, headers=None, timeout=30):
        start_time = time.time()
        used_time = 0
        ret = {'value': None, 'error': None}
        if headers is None:
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
                "Connection": "keep-alive",
            }
        code_count = 0
        while used_time < timeout:
            code_count += 1
            res = self.session.get(self.ImageUrl, headers=headers, timeout=30)
            with open(self.ImageDir, "wb") as f:
                f.write(res.content)
            value = self.aipOcr.basicGeneral(self.get_file_content(), settings.OPTIONS)
            try:
                value = value['words_result'][0]['words'].replace(' ', '')
            except:
                continue
            if 4 == len(value) and re.match('^[0-9a-zA-Z]{4}$', value):
                log.info("已识别验证码%d张,验证码识别成功..." % code_count)
                log.info("验证码是:%r" % value)
                ret['value'] = value
                return ret
            used_time = int(time.time() - start_time)
        ret['error'] = '图片识别程序超时退出...'
        log.warning('图片识别程序超时退出...')
        return ret

    def get_login_cookie(self):
        lt, execution = self.get_login_info()
        data = {
            'swy': settings.USERNAME,
            'swm': hashlib.md5(settings.PASSWD.encode('utf8')).hexdigest(),
            'swm2': '',
            'verifyCode': self.know_Image().get('value'),
            'lt': lt,
            '_eventId': 'submit',
            'execution': execution,
            'swLoginFlag': 'swUp',
            'lpid': 'P1',
        }
        header = {
            'Host': 'app.singlewindow.cn',
            'Origin': 'http://app.singlewindow.cn',
            'Referer': 'http://app.singlewindow.cn/cas/login?_local_login_flag=1&service=http://app.singlewindow.cn/cas/jump.jsp%3FtoUrl%3DaHR0cDovL2FwcC5zaW5nbGV3aW5kb3cuY24vY2FzL29hdXRoMi4wL2F1dGhvcml6ZT9jbGllbnRfaWQ9MTM2NyZyZXNwb25zZV90eXBlPWNvZGUmcmVkaXJlY3RfdXJpPWh0dHAlM0ElMkYlMkZzei5zaW5nbGV3aW5kb3cuY24lMkZkeWNrJTJGT0F1dGhMb2dpbkNvbnRyb2xsZXI=&localServerUrl=http://sz.singlewindow.cn/dyck&localDeliverParaUrl=/deliver_para.jsp&colorA1=d1e4fb&colorA2=66,%20124,%20193,%200.8',
            'Cache-Control': 'max-age=0',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Upgrade-Insecure-Requests': '1',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
        }
        self.session.headers.update(header)
        res = self.session.post(self.IndexUrl, data=data, timeout=30)
        if res.text.find("登录成功") > 0:
            log.info('模拟登陆成功!!!')
            return True
        return False

    def save_cookie(self):
        with open(self.CookieDir, "w") as output:
            cookies = self.session.cookies.get_dict()
            json.dump(cookies, output)
        log.info("已在目录下生成cookie文件")

    def get_cookie(self, LOCAL_COOKIE_FLG=True):
        """1.不同进程怎么共享登录状态?"""
        if LOCAL_COOKIE_FLG and os.path.exists(self.CookieDir):
            print('cookie已存在...')
            with open(self.CookieDir, "r") as f:
                cookie = json.load(f)
                return cookie
        else:
            print('cookie不存在...')
            if os.path.exists(self.CookieDir):
                os.remove(self.CookieDir)
            return self.get_web_cookie()

    def get_web_cookie(self):
        retry = 10
        while not self.get_login_cookie():
            log.info('登陆失败,1S后重新登陆..')
            retry -= 1
            print('retry = ', retry)
            if retry < 1:
                raise Exception('登录重试次数超过9次,程序退出')
            time.sleep(1)
        headers = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'Content-Type': 'application/json',
            'Referer': None,
        }
        self.session.headers.update(headers)
        self.session.get(self.CookieUrl, timeout=30)
        self.save_cookie()
        return self.session.cookies.get_dict()
Пример #34
0
# -*- coding: utf-8 -*-
from aip import AipOcr

""" 你的 APPID AK SK """
APP_ID = '14836038'
API_KEY = 'grTm0iuaEnyjNn0XZriXGkKU'
SECRET_KEY = 'vUVtHedaEKmG8ecjWarhxSjmxWTBeIu7'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)


""" 读取图片 """
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

image = get_file_content(r'C:\Users\Administrator\Desktop\WebCrawler\day16\baiduAi\realbaiduAi\test_pic\sina.jpg')

""" 调用通用文字识别, 图片参数为本地图片 """
# list_w=client.basicGeneral(image)['words_result']
list_w=client.basicAccurate(image)['words_result']
for i in list_w:
    for k in i.items():
        print(i['words'])

""" 如果有可选参数 """
options = {}
options["language_type"] = "CHN_ENG"
options["detect_direction"] = "true"
options["detect_language"] = "true"
options["probability"] = "true"
Пример #35
0
# -*- encoding:utf-8 -*-
"""
@作者:leel
@文件名:character_recognize.py
@时间:2020/7/2  12:27
@文档说明:
"""
import re
from aip import AipOcr
""" 你的 APPID AK SK """
APP_ID = '20711349'
API_KEY = '	ErkKG3QtmuMWvrG1o9q3MVQz'
SECRET_KEY = 'YxG1xekcAC2xD0FnV1GWEo2FuLnydGWI'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
""" 读取图片 """


def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()


image = get_file_content('example.jpg')
""" 调用通用文字识别(含位置信息版), 图片参数为本地图片 """
data = client.general(image)
print(data)

data = str(data)
pattern = re.compile("'words': '(.*?)'}")  #:后有一个空格
res = pattern.findall(data)
Пример #36
0
def client():
    '''百度api key'''
    l = []
    l.append(AipOcr('', '', ''))
    return random.choice(l)
Пример #37
0
 def __init__(self):
     app_id= pd.APP_ID
     api_key = pd.API_KEY
     secret_key = pd.SECRET_KEY
     self.client = AipOcr(app_id, api_key, secret_key)
Пример #38
0
#coding=utf-8  
# 百度ocrapi,读取本地图片并识别

from aip import AipOcr


# 百度AI部分
APP_ID = '15529244'
API_KEY = 'TgXpSb1tWliUDeqrYLh722i7'
SECRET_KEY = 'K19qjQ2An9LSEDwd143vCxpXU3whwOsz'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

""" 读取图片 """
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

image = get_file_content('111.png')
results = client.general(image)["words_result"]

outputtxt = []
for r in results:
    text = r["words"]
    outputtxt.append(text)

#print(outputtxt)
f = open('111.md','w',encoding='utf-8')
for i in outputtxt:
    f.write(i + '\n')
f.close()
Пример #39
0
        self.textRect = None
        self.textInput.hide()
        self.textInput.clearText()
        self.redraw()

    def changeFont(self, font):
        self.fontNow = font


## 百度API参数

APP_ID = '9851066'
API_KEY = 'LUGBatgyRGoerR9FZbV4SQYk'
SECRET_KEY = 'fB2MNz1c2UHLTximFlC4laXPg7CVfyjV'
# 初始化文字识别
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
options = {'detect_direction': 'true', 'language_type': 'CHN_ENG'}

# 使用QtCreator建立的ui文件路径
qtCreatorFile = "baidu-api.ui"
# 使用uic加载
Ui_MainWindow, QtBaseClass = uic.loadUiType(qtCreatorFile)


# 读取图片
def getImageBytes(filename):
    with open(filename, 'rb') as fp:
        return fp.read()


class MyApp(QMainWindow, Ui_MainWindow):
Пример #40
0
from aip import AipOcr

if __name__ == "__main__":
    # 此处填入在百度云控制台处获得的appId, apiKey, secretKey的实际值
    appId, apiKey, secretKey = [
        '23077616', '6FlEkzTZhREpWzPiWWtGGdjt',
        'wqoMOGQEMGOlc8DVCyLfgjnKhinsSYH6'
    ]
    # 创建ocr对象
    ocr = AipOcr(appId, apiKey, secretKey)
    with open('img/dizhi.png', 'rb') as fin:
        img = fin.read()
        res = ocr.basicGeneral(img)
        print(res)
Пример #41
0
#coding:utf-8
from aip import AipOcr
import json
'''你的 APPID AK SK '''
'''
https://cloud.baidu.com/doc/OCR/OCR-Python-SDK.html#.E9.80.9A.E7.94.A8.E6.96.87.E5.AD.97.E8.AF.86.E5.88.AB

'''

APP_ID = '***'
API_KEY = '***'
SECRET_KEY = '***'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)


def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()
		

image = get_file_content('example.jpg')

text = client.basicGeneral(image)



print text['log_id']
print type(text['words_result'])
# with open('result.txt',r'a+') as my:
	# my.write(str(text['words_result']))
Пример #42
0
    def __init__(self,app_id,api_key,secret_key):
        ## 用自己申请到的app id等内容初始化AipOcr
        self.aipOcr = AipOcr(app_id,api_key,secret_key)

        """ 读取图片 """
Пример #43
0
# 2.调用三方的sdk,来实现
import time

import keyboard

from PIL import ImageGrab
from aip import AipOcr

# 调用百度的sdk
""" 你的 APPID AK SK """
APP_ID = '17134093'
API_KEY = 'gDGaUOGMRX5cxqFOxgp5SGbm'
SECRET_KEY = '2YOAu5p6MEpq9iWKR3yKRERfxkduWFWN'

client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
""" 读取图片 """


def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()


while 1:
    # 监听键盘按键
    keyboard.wait(hotkey='f1')
    keyboard.wait(hotkey='ctrl+c')

    time.sleep(0.1)
Пример #44
0
# -*- coding: UTF-8 -*-  

from aip import AipOcr


# 定义常量  
APP_ID = '10764564'
API_KEY = 'TxvXG9liAGvVVpSuYGZ0iwXW'
SECRET_KEY = 'QG8SWcX4l98Q5HGxRhAnBKqhK9VmYGZL'

# 初始化文字识别分类器
aipOcr=AipOcr(APP_ID, API_KEY, SECRET_KEY)

# 读取图片  
filePath = "wenzi.png"

def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

# 定义参数变量
options = {
    'detect_direction': 'true',
    'language_type': 'CHN_ENG',
}

# 网络图片文字文字识别接口
result = aipOcr.webImage(get_file_content(filePath),options)

# 如果图片是url 调用示例如下
# result = apiOcr.webImage('http://www.xxxxxx.com/img.jpg')