Python AipOcr.AipOcr примеры, aip.AipOcr.AipOcr Python примеры использования

Пример #1

0

Показать файл

Файл: OCR.py Проект: Berlinsss/NLP_Work

from aip import AipOcr

""" My APPID AK SK  """
APP_ID1 = '24147388'
API_KEY1 = 'QU5nINLo2vXhnnljIiBk2BwB'
SECRET_KEY1 = 'sZBkkSKlw876QzTbHXHNmGKvOZLcU9Sy'

APP_ID2 = '24205701'
API_KEY2 = 'YsobyEQFsVQnk9iZkyEqhbU7'
SECRET_KEY2 = 'nAdD3L2Whmxgx0GYGnFHGlFg0jZZ3Mga'

client = AipOcr(APP_ID2, API_KEY2, SECRET_KEY2)


""" 读取图片 """
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

def get_handwriting(fpath,str):
    image = get_file_content(fpath)

    """ 调用百度OCR手写文字识别, 图片参数为本地图片 """
    #handwriting调用的是手写具体的在ocr.py中，
    results = client.handwriting(image)["words_result"]

    for result in results:
        text = result["words"]
        str = str + '\n' + text

    return str

Пример #2

0

Показать файл

Файл: main.py Проект: Muxxs/brain_game

#coding=utf-8

#baidu
import urllib2, os, time
import urllib
from aip import AipOcr
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
User_Agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36'
url = "https://www.baidu.com/s?wd="

import urllib
import urllib2


def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        print 1
        return fp.read()


def threading_str(html, words):
    #time_start=time.time()
    print words, html.count(words)
    #time_emd=time.time()
    #print words,time_emd-time_start


def baidu(words, an):

Пример #3

0

Показать файл

Файл: ocr.py Проект: yumupinglan/video-to-text-ocr-demo

def main(videoname):
    conf = config.getConfig(videoname)

    APP_ID = conf['APP_ID']
    API_KEY = conf['API_KEY']
    SECRET_KEY = conf['SECRET_KEY']
    imgDir = conf['imgDir']
    outputDir = conf['outputDir']

    client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    def get_OCR(imgName):
        image = get_file_content(imgDir + '/' + imgName)

        options = {}
        options["recognize_granularity"] = "big"
        options["language_type"] = "CHN_ENG"
        options["detect_direction"] = "true"

        res = client.general(image, options)
        try:
            w = res['words_result']
            print str(res)
            return w
        except:
            return False

    def is_img(f):
        return re.match(r'.+jpg', f)

    start = time.time()

    output = open(outputDir + str(start) + '.txt', 'a')

    pathDir = sorted(filter(is_img, os.listdir(imgDir)))

    positionData = []
    for imgName in pathDir:
        output.write('Start: ' + imgName + '\n')
        ocrRes = get_OCR(imgName)
        # fail then retry
        while ocrRes == False:
            print 'Fail: ' + imgName
            ocrRes = get_OCR(imgName)
        for word in ocrRes:
            top = int(word['location']['top'])
            height = int(word['location']['height'])
            w = word['words']
            has = False
            for group in positionData:
                # belong to this group
                if abs(group['top'] - top) < (group['height'] / 2):
                    # Avoid duplicate: check if current word is similar to last word
                    lastWord = group['words'][len(group['words']) - 1]
                    if difflib.SequenceMatcher(None, lastWord,
                                               w).quick_ratio() > 0.8:
                        break
                    # append words
                    group['words'].append(w)
                    # cal new value
                    group['totalTop'] += top
                    group['totalHeight'] += height
                    group['totalNum'] += 1
                    group['top'] = group['totalTop'] / group['totalNum']
                    group['height'] = group['totalHeight'] / group['totalNum']
                    has = True
                    break

            if has == False:
                positionData.append({
                    'top': top,  # group standard, using average value of tops
                    'totalTop': top,
                    'height': height,
                    'totalHeight': height,
                    'totalNum': 1,  # how many pics has been add to this group 
                    'words': [w]
                })

            output.write('Words: ' + w + '\n')
            output.write('Top: ' + str(word['location']['top']) + '\n')
            output.write('Height: ' + str(word['location']['height']) + '\n')

        output.write('Finished: ' + imgName + '\n')
        print 'Finished: ' + imgName

    output.write(str(positionData) + '\n')

    max_group = []
    for group in positionData:
        if group['totalNum'] > len(max_group):
            max_group = group['words']
    allWords = ','.join(max_group)
    output.write('-----------------------' + '\n')
    output.write(allWords + '\n')
    output.write('-----------------------' + '\n')
    end = time.time()
    output.write('Running time: ' + str(end - start) + '\n')
    output.close()
    print 'Finished All'

Пример #4

0

Показать файл

# coding: utf-8

# In[12]:

from aip import AipOcr
import re
import pandas as pd
from pandas import DataFrame

# In[2]:

appid = 'xxxxxxxx'
apikey = 'xxxxxxxxxxxxxxx'
secretkey = 'xxxxxxxxxxxxxxxxxxxxxxxxx'

client = AipOcr(appid, apikey, secretkey)

# In[5]:

i = open(r'C:\Users\11197\Desktop\2.jpg', 'rb')
img = i.read()

# In[8]:

message = client.basicGeneral(img)
# print(message)
word = []
for i in message.get('words_result'):
    word.append(i.get('words'))

# In[10]:

Пример #5

0

Показать файл

Файл: OCR.py Проект: liang-lpl/python

import keyboard
from PIL import ImageGrab
import time
from aip import AipOcr

app_id = ''
api_key = ''
secret_key = ''

client = AipOcr(app_id, api_key, secret_key)

while True:

    keyboard.wait(hotkey='alt+a')
    keyboard.wait(hotkey='ctrl+s')
    time.sleep(0.1)

    image = ImageGrab.grabclipboard()
    image.save('image_001.jpg')

    with open('image_001.jpg', 'rb') as file:
        image = file.read()
        result = client.basicAccurate(image)
        result = result['words_result']
        for i in result:
            print(i['words'])
            with open('word.txt', 'a+', encoding='UTF-8') as text:
                text.writelines('%s\n' % i['words'])

    hotkey = keyboard.read_hotkey()
    if hotkey == 'q':

Пример #6

0

Показать файл

Файл: rclimg_ocr.py Проект: wanywhn/EveryLauncher

 def __init__(self, em):
     self.em = em
     self.currentindex = 0
     self._client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
     self._imgClient = AipImageClassify(APP_ID, API_KEY, SECRET_KEY)

Пример #7

0

Показать файл

Файл: iocr.py Проект: lescpsn/AutoRobot

def bd_get_client():
    return AipOcr(encrypt.decrypt(APP_ID), encrypt.decrypt(API_KEY),
                  encrypt.decrypt(SECRET_KEY))

Пример #8

0

Показать файл

import concurrent.futures
import subprocess
import time
import re

import numpy as np

import cv2
from aip import AipOcr

# import pytesseract

import key

client = AipOcr(key.AIP_APP_ID, key.AIP_API_KEY, key.AIP_SECRET_KEY)

executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)


def capture_img():
    print('Capturing img...', end=' ')
    st = time.time()
    pipe = subprocess.Popen(".\\adb\\adb shell screencap -p",
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            shell=True)
    image_bytes = pipe.stdout.read().replace(b'\r\n', b'\n')
    image = cv2.imdecode(np.fromstring(image_bytes, np.uint8),
                         cv2.IMREAD_COLOR)
    print('Time:', time.time() - st)
    return image

Пример #9

0

Показать файл

Файл: views.py Проект: xianxianlog/baiduocr

def form_ocr(request):
    """接收图片获取图片数据并向百度发送请求获取响应"""
    if request.method == 'POST':
        # 1.获取图片
        print("开始工作")
        print(request.FILES)
        try:
            pic = request.FILES['pic']
            print(pic.size)
            print(pic.name)

            # 2.读取图片数据
            img = pic.read()
            #print(img)
        except:
            return JsonResponse({'return_msg': 0})

        # 3.向百度发送请求获取响应
        AK = '自己的AK'
        SK = '自己的SK'
        app_id = "自己的app_id"
        client = AipOcr(app_id, AK, SK)
        options = {}
        options['result_type'] = 'excel'
        result_id = client.tableRecognitionAsync(img, options)
        try:
            request_id = result_id['result'][0]['request_id']
            print(request_id)
        except:
            request_id = ""

        return_msg = 0
        file_url = ""
        if request_id:
            try_times = 0
            while True:
                time.sleep(10)
                result = client.getTableRecognitionResult(request_id)
                print(result)
                try:
                    msg = result['result']['ret_msg']
                except:
                    print("出错")
                    # 返回给前端的数据信息
                    return_msg = 0
                    break
                if msg == "已完成":
                    file_url = result['result']['result_data']
                    return_msg = 1
                    break
                else:
                    try_times += 1
                    print('try_times', try_times)
                    if try_times > 2:
                        print("网络繁忙，请稍后再试")
                        return_msg = 0
                        break
                    continue
            else:
                return_msg = 0

        return JsonResponse({'return_msg': return_msg, 'file_url': file_url})

Пример #10

0

Показать файл

Файл: book_yumaoqiu.py Проект: zhangxzh9/Book_sus_badminton

class Book:
    # 默认浏览器设置为chrome
    # 百度ai识图申请得账号密钥
    APP_ID = '15273108'
    API_KEY = 'BQl4DK7sGjwvMKFvBB9UNVPD'
    SECRET_KEY = 'dYGm5MvIzXVWQXqHU1h1fYRs5xMQEKyF'
    aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    # 定义参数变量，调用百度ai识图的参数
    options = {
        'detect_direction': 'true',
        'language_type': 'CHN_ENG',
    }

    # 自动订的函数，前两个参数不用介绍，第三个是开始刷场的时间，最后一个是定早上or下午or晚上的场
    def auto_book(self, username, password, buytime, ex_time):
        # 生成有头的chrome浏览器
        self.driver = webdriver.Chrome()
        # 最大化窗口
        self.driver.maximize_window()
        # 得到选择的是早上，下午还是晚上
        sw = {
            "早上": [
                '09:01-10:00',
                '10:01-11:00'],
            "下午": [
                '14:01-15:00',
                '15:01-16:00',
                '16:01-17:00'],
            "晚上": [
                '19:01-20:00',
                '20:01-21:00',
                '21:01-22:00']}
        sw1 = sw.get(ex_time, None)
        # 进入定羽毛球的初始页面，等1.5秒刷新完成，这些都是登陆的操作，不影响按点抢场
        self.driver.get("http://gym.sysu.edu.cn/product/show.html?id=61")
        time.sleep(1.5)
        # 点击登陆按钮进入登陆界面
        self.driver.find_element_by_link_text("登录").click()
        time.sleep(0.2)
        # 输入传入的用户名和密码
        self.driver.find_element_by_id("username").send_keys(username)
        time.sleep(0.2)
        self.driver.find_element_by_id("password").send_keys(password)
        # 得到验证码字符串
        t1 = self.Convertimg()
        t2 = t1.replace(' ', '')
        t = t2.replace('.', '')
        # 检验验证码字符串长度是否是4且由数字和字母构成，不是则点击验证码刷新验证码并再次验证，直到满足要求
        # while len(t)!=4 or t.__contains__(':') or t.__contains__('.'):
        while len(t) != 4 or t.isalnum() == False:
            imglocation = ("//img[@name='captchaImg']")  # 验证码的xpath地址
            item = self.driver.find_element_by_xpath(imglocation)
            item.click()
            time.sleep(0.1)
            t1 = self.Convertimg()
            t2 = t1.replace(' ', '')
            t = t2.replace('.', '')
            # if len(t)==4:
            #     break
        self.driver.find_element_by_id("captcha").send_keys(t)  # 填入验证码
        time.sleep(1)
        self.driver.find_element_by_name("submit").click()  # 点击提交按钮登陆
        # 刷新页面，成为登陆页面
        self.driver.get("http://gym.sysu.edu.cn/product/show.html?id=61")
        self.driver.execute_script('window.open()')  # 另外打开一个选项卡，进行两个场一起定
        self.driver.execute_script('window.open()')  # 另外打开一个选项卡，进行两个场一起定
        self.driver.switch_to.window(self.driver.window_handles[1])  # 切换选项卡
        # 这个选项卡也进入相同页面
        self.driver.get('http://gym.sysu.edu.cn/product/show.html?id=61')
        self.driver.switch_to.window(self.driver.window_handles[2])  # 切换选项卡
        # 这个选项卡也进入相同页面
        self.driver.get('http://gym.sysu.edu.cn/product/show.html?id=61')
        while True:  # 进入一个无限循环，一直判断是否到达预定时间
            now = datetime.datetime.now()
            if now.strftime('%Y-%m-%d %H:%M:%S') == buytime:  # 其实就是一个字符串匹配的过程
                self.driver.refresh()  # 刷新页面
                # 因为中大的羽毛球场不是0点整开始放场的，往往迟个两分钟，所以我们需要检测是否有三天后的标签
                while self.is_element_exist(
                        '//*[@id="datesbar"]/div/ul/li[4]') != True:
                    self.driver.refresh()  # 不存在则刷新页面直到出现为止
                    time.sleep(1.2)  # 1.2秒刷新一次
                if self.is_element_exist(
                        '//*[@id="datesbar"]/div/ul/li[4]'):  # 出现了
                    self.driver.find_element_by_xpath(
                        '//*[@id="datesbar"]/div/ul/li[4]').click()  # 点击三天后的标签
                    block_list = self.driver.find_elements_by_xpath(
                        '//span[@class="cell football easyui-tooltip tooltip-f"]')  # 得到元素（场地）列表
                    for block_place in block_list:  # 对每一个场地进行是否有场的判断
                        if block_place.get_attribute(
                                "data-timer") == sw1[0]:  # 是否在我们选的时间有场
                            block_place.click()  # 有的话就订下来
                            self.driver.find_element_by_xpath(
                                '//*[@id="reserve"]').click()  # 确定
                            time.sleep(1)  # 等待确认页面出现
                            break
                    self.driver.find_element_by_xpath(
                        '//*[@id="reserve"]').click()  # 确定

                    self.driver.switch_to.window(
                        self.driver.window_handles[0])  # 切换到第一个选项卡进行类似的操作
                    self.driver.refresh()  # 刷新
                    self.driver.find_element_by_xpath(
                        '//*[@id="datesbar"]/div/ul/li[4]').click()
                    block_list = self.driver.find_elements_by_xpath(
                        '//span[@class="cell football easyui-tooltip tooltip-f"]')
                    for block_place in block_list:
                        if block_place.get_attribute(
                                "data-timer") == sw1[1]:
                            block_place.click()
                            self.driver.find_element_by_xpath(
                                '//*[@id="reserve"]').click()
                            time.sleep(1)
                            break
                    self.driver.find_element_by_xpath(
                        '//*[@id="reserve"]').click()

                    self.driver.switch_to.window(
                        self.driver.window_handles[1])  # 切换到第一个选项卡进行类似的操作
                    self.driver.refresh()  # 刷新
                    self.driver.find_element_by_xpath(
                        '//*[@id="datesbar"]/div/ul/li[4]').click()
                    block_list = self.driver.find_elements_by_xpath(
                        '//span[@class="cell football easyui-tooltip tooltip-f"]')
                    for block_place in block_list:
                        if block_place.get_attribute(
                                "data-timer") == sw1[2]:
                            block_place.click()
                            self.driver.find_element_by_xpath(
                                '//*[@id="reserve"]').click()
                            time.sleep(1)
                            break
                    self.driver.find_element_by_xpath(
                        '//*[@id="reserve"]').click()
                break
        self.conf()
        self.driver.switch_to.window(self.driver.window_handles[0])
        self.conf()
        self.driver.switch_to.window(self.driver.window_handles[2])
        self.conf()

    def conf(self):  # 确认并购买的函数
        time.sleep(1)
        try:
            self.driver.find_element_by_xpath(
                '//button[@class="confirm"]').click()
            time.sleep(1)
            self.driver.find_element_by_xpath(
                '//img[@src="/images/payment/ydzx.png"]').click()
            self.driver.find_element_by_xpath(
                '//button[@class="button-large button-info"]').click()
            now = datetime.datetime.now()
            print(now.strftime('%Y-%m-%d %H:%M:%S'))
            print('purchase success')
        except ElementNotVisibleException as e:
            print("没有抢到，被定完了")

    def is_element_exist(self, command):  # 基于xpath判断元素是否存在
        try:
            s2 = self.driver.find_element_by_xpath(command)
        except NoSuchElementException as e:
            return False
        return True

    def Convertimg(self):
        imglocation = ("//img[@name='captchaImg']")  # 验证码的xpath地址
        item = self.driver.find_element_by_xpath(imglocation)
        item.screenshot("yanzhengma.png")
        self.clearimage('yanzhengma.png')
        result = self.aipOcr.basicGeneral(
            self.get_file_content('final.png'), self.options)
        text = result['words_result'][0]['words']
        os.remove('clear.png')
        # os.remove('final.png')
        return text

    def clearimage(self, originadd):
        img = Image.open(originadd)  # 读取系统的内照片
        # 将黑色干扰线替换为白色
        width = img.size[0]  # 长度
        height = img.size[1]  # 宽度
        for i in range(0, width):  # 遍历所有长度的点
            for j in range(0, height):  # 遍历所有宽度的点
                data = (img.getpixel((i, j)))  # 打印该图片的所有点
                if (data[0] <= 25 and data[1] <=
                        25 and data[2] <= 25):  # RGBA的r,g,b均小于25
                    img.putpixel((i, j), (255, 255, 255, 255))  # 则这些像素点的颜色改成白色
        img = img.convert("RGB")  # 把图片强制转成RGB
        img.save('clear.png')  # 保存修改像素点后的图片
        # 灰度化
        Grayimg = cv2.cvtColor(cv2.imread('clear.png'), cv2.COLOR_BGR2GRAY)
        ret, thresh = cv2.threshold(Grayimg, 160, 255, cv2.THRESH_BINARY)
        cv2.imwrite('clear.png', thresh)
        os.remove('yanzhengma.png')
        self.ResizeImage('clear.png', 'final.png', 90, 32, 'png')

    def ResizeImage(self, filein, fileout, width, height, type):  # 改变图片尺寸
        img = Image.open(filein)
        out = img.resize((width, height), Image.ANTIALIAS)
        out.save(fileout, type)

    def get_file_content(self, filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

Пример #11

0

Показать файл

    def f_pic2word(self):
        if self.app == '' or self.key1 == '' or self.key2 == '':
            id = self.ut_id.toPlainText()
            key1 = self.ut_key1.toPlainText()
            key2 = self.ut_key2.toPlainText()

            if id == '' or key1 == '' or key2 == '':
                if id == '':
                    self.ut_id.setText("请输入id")
                if key1 == '':
                    self.ut_key1.setText("请输入key1")
                if key2 == '':
                    self.ut_key2.setText("请输入key2")
                return
        else:
            id = self.app
            key1 = self.key1
            key2 = self.key2

        success = 0
        fail = 0
        miss = 0
        path = self.picfolder + "\%d.jpg"
        for i in range(1, 1000):
            APP_ID = id
            APP_KEY = key1
            SECRET_KEY = key2
            client = AipOcr(APP_ID, APP_KEY, SECRET_KEY)

            options = {}
            options["language_type"] = "CHN_ENG"
            options["detect_direction"] = "false"
            options["detect_language"] = "false"
            options["probability"] = "false"

            try:
                img = open(path % i, "rb")
                res = client.basicGeneral(img.read(), options)
                img.close()

                word = str(res['words_result'])
                t = word.split("'}, {'")
                t2 = ""
                for i in range(1, len(t)):
                    t2 += t[i][2:-4]
                    t2 += '\n'
                t2 += "\n\n"

                try:
                    f = open(self.output_words_pic, 'a')
                    f.write(t2)
                    f.write("\n\n")
                    f.close()
                    success += 1
                except Exception as e:
                    print(e)
                    fail += 1

                print(t2)
                print("\n\n")

            except Exception as e:
                print(e)
                miss += 1
                if miss == 5:
                    break

        self.ut_page.setText("转换成功：" + str(success) + "页 转换失败：" + str(fail) +
                             "页")
        if success > 0:
            self.app = id
            self.key1 = key1
            self.key2 = key2
            try:
                f = open(self.keyfile, 'w')
                f.write(id + '@@' + key1 + '@@' + key2 + '@@')
                f.close()
            except Exception as e:
                print(e)

Пример #12

0

Показать файл

Файл: my_screenshots.py Проект: Zzaniu/nothing

 def __init__(self, content):
     self.aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
     self.content = content

Пример #13

0

Показать файл

Файл: auto_sign_in.py Проект: yepcn/flexget_qbittorrent_mod

    def sign_in_by_code_hdsky(self, task, entry, config):
        app_id = config.get('aipocr_app_id')
        api_key = config.get('aipocr_api_key')
        secret_key = config.get('aipocr_secret_key')

        if not (AipOcr and Image):
            entry['result'] = 'baidu-aip or pillow not existed'
            entry.fail(entry['result'])
            return
        if not (app_id and api_key and secret_key):
            entry['result'] = 'Api not set'
            entry.fail(entry['result'])
            return

        client = AipOcr(app_id, api_key, secret_key)

        response = self._request(task,
                                 entry,
                                 'get',
                                 entry['base_url'],
                                 headers=entry['headers'])
        state = self.check_state(entry, response, entry['base_url'])
        if state != SignState.NO_SIGN_IN:
            return

        data = {
            'action': (None, 'new'),
        }
        response = self._request(task,
                                 entry,
                                 'post',
                                 'https://hdsky.me/image_code_ajax.php',
                                 headers=entry['headers'],
                                 files=data)
        content = self._decode(response)
        image_hash = json.loads(content)['code']

        if image_hash:
            img_response = self._request(
                task,
                entry,
                'get',
                'https://hdsky.me/image.php?action=regimage&imagehash={}'.
                format(image_hash),
                headers=entry['headers'])
        else:
            entry['result'] = 'Cannot find: image_hash, url: {}'.format(
                entry['url'])
            entry.fail(entry['result'])
            return

        img = Image.open(BytesIO(img_response.content))
        width = img.size[0]
        height = img.size[1]
        for i in range(0, width):
            for j in range(0, height):
                noise = self._detect_noise(img, i, j, width, height)
                if noise:
                    img.putpixel((i, j), (255, 255, 255))
        img_byte_arr = BytesIO()
        img.save(img_byte_arr, format='png')
        response = client.basicAccurate(img_byte_arr.getvalue(),
                                        {"language_type": "ENG"})
        code = re.sub('\\W', '', response['words_result'][0]['words'])
        code = code.upper()
        logger.info(response)
        if len(code) == 6:
            data = {
                'action': (None, 'showup'),
                'imagehash': (None, image_hash),
                'imagestring': (None, code)
            }
            response = self._request(task,
                                     entry,
                                     'post',
                                     entry['url'],
                                     headers=entry['headers'],
                                     files=data)
            print(response.text)
            state = self.check_state(entry, response, response.request.url)
        if len(code) != 6 or state == SignState.WRONG_ANSWER:
            with open(path.dirname(__file__) + "/temp.png", "wb") as code_file:
                code_file.write(img_response.content)
            with open(path.dirname(__file__) + "/temp2.png",
                      "wb") as code_file:
                code_file.write(img_byte_arr.getvalue())
            entry['result'] = 'ocr failed: {}, see temp.png'.format(code)
            entry.fail(entry['result'])

Пример #14

0

Показать файл

Файл: auto_sign_in.py Проект: yepcn/flexget_qbittorrent_mod

    def sign_in_by_code(self, task, entry, config):
        app_id = config.get('aipocr_app_id')
        api_key = config.get('aipocr_api_key')
        secret_key = config.get('aipocr_secret_key')

        if not (AipOcr and Image):
            entry['result'] = 'baidu-aip or pillow not existed'
            entry.fail(entry['result'])
            return
        if not (app_id and api_key and secret_key):
            entry['result'] = 'Api not set'
            entry.fail(entry['result'])
            return

        client = AipOcr(app_id, api_key, secret_key)

        response = self._request(task,
                                 entry,
                                 'get',
                                 entry['base_url'],
                                 headers=entry['headers'])
        state = self.check_state(entry, response, entry['base_url'])
        if state != SignState.NO_SIGN_IN:
            return

        response = self._request(task,
                                 entry,
                                 'get',
                                 entry['url'],
                                 headers=entry['headers'])
        content = self._decode(response)
        image_hash_re = re.search('(?<=imagehash=).*?(?=")', content)
        img_src_re = re.search('(?<=img src=").*?(?=")', content)

        if image_hash_re and img_src_re:
            image_hash = image_hash_re.group()
            img_src = img_src_re.group()
            img_response = self._request(task,
                                         entry,
                                         'get',
                                         urljoin(entry['url'], img_src),
                                         headers=entry['headers'])
        else:
            entry['result'] = 'Cannot find key: image_hash, url: {}'.format(
                entry['url'])
            entry.fail(entry['result'])
            return

        img = Image.open(BytesIO(img_response.content))
        width = img.size[0]
        height = img.size[1]
        for i in range(0, width):
            for j in range(0, height):
                noise = self._detect_noise(img, i, j, width, height)
                if noise:
                    img.putpixel((i, j), (255, 255, 255))
        img_byte_arr = BytesIO()
        img.save(img_byte_arr, format='png')
        response = client.basicAccurate(img_byte_arr.getvalue(),
                                        {"language_type": "ENG"})
        code = re.sub('\\W', '', response['words_result'][0]['words'])
        code = code.upper()
        logger.info(response)
        if len(code) == 6:
            params = {'cmd': 'signin'}
            data = {
                'imagehash': (None, image_hash),
                'imagestring': (None, code)
            }
            response = self._request(task,
                                     entry,
                                     'post',
                                     entry['url'],
                                     headers=entry['headers'],
                                     files=data,
                                     params=params)
            state = self.check_state(entry, response, response.request.url)
        if len(code) != 6 or state == SignState.WRONG_ANSWER:
            with open(path.dirname(__file__) + "/opencd_code.png",
                      "wb") as code_file:
                code_file.write(img_response.content)
            with open(path.dirname(__file__) + "/opencd_code2.png",
                      "wb") as code_file:
                code_file.write(img_byte_arr.getvalue())
            entry['result'] = 'ocr failed: {}, see opencd_code.png'.format(
                code)
            entry.fail(entry['result'])

Пример #15

0

Показать файл

 def __init__(self, app_id, app_key, secret_key):
     self.ocr_api = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
     self.token_api = "https://aip.baidubce.com/oauth/2.0/token"
     self.client = AipOcr(app_id, app_key, secret_key)

Пример #16

0

Показать файл

Файл: pic2word.py Проект: huiup/python_code

from aip import AipOcr
# 图片文字识别
APP_ID = '19082278'
APP_KEY = 'QWkx0oIWNoHuoer6f14PXDbq'  # 公钥
SECRET_KEY = 'qXX5aTLKrDY9jyfCE171Q4zMTVl2ii8I'  # 密钥
text_list = []

client = AipOcr(APP_ID, APP_KEY, SECRET_KEY)  # 初始化AipFace对象
with open(r'2.png', 'rb') as image:
    img_data = image.read()

text = client.basicGeneral(img_data)  # 返回一个字典
# print(text)
for i in text.get('words_result'):
    text_list.append(i.get('words'))
    print(i.get('words'))
# print(text_list)

Пример #17

0

Показать файл

Файл: config.py Проект: miuric/share

SHARE = "share"

ms_port_config = {
    SHARE: 20201,
}

db_host = '127.0.0.1'
db_user = '******'
db_passwd = 'liuyirui'
db_port = 3306

#south
# south_base_host = "http://localhost:20202"
south_base_host = "http://49.233.250.82:8888"
key = 'key'
client = 'name:client-2'

# 百度识图
bd_config = {
    'appId': '18898769',
    'apiKey': '80LZK2FsGUoSReVwaqhW5RN6',
    'secretKey': 'sbMp3ERC1v0yuFYtDWPYsjlDB6cXQc2I'
}

bd_client = AipOcr(**bd_config)

# log
_levels = TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL = 'TRACE', 'DEBUG', 'INFO', 'SUCCESS', 'WARNING', 'ERROR', 'CRITICAL'
STDERR_DEBUG_LEVEL = DEBUG

Пример #18

0

Показать файл

def autoChoose():
    start_time = time.time()
    # screencap and  pull
    os.system('adb shell screencap -p /sdcard/screen_shot.png')
    os.system('adb pull /sdcard/screen_shot.png .')

    # crop image
    image = Image.open('./screen_shot.png')
    image_size = image.size
    print image_size
    width = image_size[0]
    height = image_size[1]
    region = image.crop((0, height * 0.15, width, height * 0.7))
    region.save('./screen_shot_crop.png')
    # OCR
    aa = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    filepath = './screen_shot_crop.png'
    with open(filepath, 'rb') as fp:
        image = fp.read()
    ret = aa.basicGeneral(image)

    # question region
    if ret:
        ask = ''
        lines = ret['words_result']

        if len(lines) <= 4:
            ask = lines[0]['words'].split('.')[-1]

        elif len(lines) == 5:
            ask = lines[0]['words'].split('.')[-1] + lines[1]['words']

        elif len(lines) > 5:
            ask = lines[0]['words'].split(
                '.')[-1] + lines[1]['words'] + lines[2]['words']

    if len(lines) >= 3:
        Coption = lines[-1]['words']
        Boption = lines[-2]['words']
        Aoption = lines[-3]['words']
    else:
        Aoption, Boption, Coption = ' ', ' ', ' '

    keyword = ask
    print keyword

    convey = 'n'
    end_time = time.time()
    if convey == 'y' or convey == 'Y':
        results = baiduSearch.search(keyword, convey=True)
    elif convey == 'n' or convey == 'N' or not convey:
        results = baiduSearch.search(keyword)
    else:
        print('输入错误')
        exit(0)
    count = 0

    N = {'A': 0, 'B': 0, 'C': 0}

    for result in results:

        N['A'] += result.abstract.count(Aoption)
        N['B'] += result.abstract.count(Boption)
        N['C'] += result.abstract.count(Coption)
        # Qred = result.abstract.replace(keyword, '\033[1;30;41m' + keyword + '\033[0m')# '\033[1;30;41m' + Aoption + '\033[0m'
        Ared = result.abstract.replace(
            Aoption, '\033[1;31m' + Aoption +
            '\033[0m')  #'\033[1;30;41m' + Aoption + '\033[0m'
        Bred = Ared.replace(Boption, '\033[1;32m' + Boption +
                            '\033[0m')  #'\033[1;32m' + 'Boption' + '\033[0m'
        Cred = Bred.replace(Coption, '\033[1;34m' + Coption +
                            '\033[0m')  #'\033[1;34m' + 'Coption' + '\033[0m'

        print('\033[1;30;41m' + result.title + '\033[0m')
        print ' '
        print Cred
        print ' '
        count = count + 1
        if count == 4:
            break
    print '\033[1;31;40m', '答案： ', max(N.items(),
                                       key=lambda x: x[1])[0], '\033[0m'

    print(r'run time: ' + str(end_time - start_time) + 's')

Пример #19

0

Показать файл

Файл: PigPriceUpdateTemplate.py Проект: gamcing/2020_Automated-office

def main():
    book = xw.Book.caller()
    """爬取农业部数据"""
    today = datetime.date.today()
    #获取当月第一天
    firstmonthday = datetime.datetime(today.year, today.month, 1)
    #获取当年第一天
    firstday = datetime.datetime(today.year, 1, 1)
    oneday = datetime.timedelta(days=1)
    all = {}  #存放所有爬取的网页链接，key为日期，value为链接
    url = 'http://www.scs.moa.gov.cn/scxxfb/'

    #爬取主页
    response = requests.get(url)
    content = response.content
    page = etree.HTML(content)
    data = page.find('.//div[@class="sj_e_tonzhi_list"]')

    for i in data:
        infos = i.findall('.//li')
        for info in infos:
            rrr = info.find('.//a')
            link = url + str(rrr.get('href'))
            date = re.findall(r'.\w+.t(\d+)\w+', link)
            all[date[0]] = str(link)

    for i in range(1, 13):
        url = 'http://www.scs.moa.gov.cn/scxxfb/index_' + str(i) + '.htm'
        response = requests.get(url)
        content = response.content
        page = etree.HTML(content)
        data = page.find('.//div[@class="sj_e_tonzhi_list"]')

        for i in data:
            infos = i.findall('.//li')
            for info in infos:
                rrr = info.find('.//a')
                link = 'http://www.scs.moa.gov.cn/scxxfb/' + str(
                    rrr.get('href'))
                date = re.findall(r'.\w+.t(\d+)\w+', link)
                all[date[0]] = str(link)

    #print(all)
    #爬取目标页、正则提取猪肉价格
    def price_get(link):
        response = requests.get(link)
        content = response.content
        page = etree.HTML(content)
        info = page.find('.//div[@class="TRS_Editor"]')
        text = info.find('.//p').text
        price = re.findall(r'猪肉\D+(\d+.\d+)元', text)
        return price

    price1 = {}  #存放猪肉价格，key为日期，value为价格

    #今天的价格，若未更新则为前一天价格
    while today.strftime('%Y%m%d') not in all.keys():
        today -= oneday
    else:
        d_p_price = price_get(str(all[today.strftime('%Y%m%d')]))
        price1[today.strftime('%Y%m%d')] = d_p_price

    #本月初价格，更新时间为本月第一个工作日
    while firstmonthday.strftime('%Y%m%d') not in all.keys():
        firstmonthday += oneday
    else:
        m_p_price = price_get(str(all[firstmonthday.strftime('%Y%m%d')]))
        price1[firstmonthday.strftime('%Y%m%d')] = m_p_price

    #本年初价格，更新时间为本年第一个工作日
    while firstday.strftime('%Y%m%d') not in all.keys():
        firstday += oneday
    else:
        y_p_price = price_get(str(all[firstday.strftime('%Y%m%d')]))
        price1[firstday.strftime('%Y%m%d')] = y_p_price

    #对应价格的列表
    #l=[price1[today.strftime('%Y%m%d')],price1[firstmonthday.strftime('%Y%m%d')],price1[firstday.strftime('%Y%m%d')]]

    print(price1)
    '''
    最终结果是price1是一个字典
    pric1e[today.strftime('%Y%m%d')]是今日价格
    price1[firstmonthday.strftime('%Y%m%d')]是本月初
    price1[firstday.strftime('%Y%m%d')]是本年初
    '''
    """爬取二元能繁母猪数据"""
    #百度云账号
    APP_ID = '#####'
    API_KEY = '########'
    SECRECT_KEY = '########'
    client = AipOcr(APP_ID, API_KEY, SECRECT_KEY)

    #爬取主页，获取目标网页链接
    url = 'http://sousuo.gov.cn/s.htm?q=%E4%BA%8C%E5%85%83%E6%AF%8D%E7%8C%AA%E9%94%80%E5%94%AE%E4%BB%B7%E6%A0%BC&t=govall&timetype=timeqb&mintime=&maxtime=&sort=pubtime&sortType=1&nocorrect='
    response = requests.get(url)
    content = response.content
    page = etree.HTML(content)
    table = page.find('.//h3[@class="res-title"]')
    channels = table.find('.//a')
    link = channels.get('href')
    #print(link)

    #爬取最新公告的标题
    html = requests.get(link)
    html.encoding = 'utf-8'
    text = html.text
    page1 = etree.HTML(text)
    info = page1.find('.//div[@class="article oneColumn pub_border"]')
    t = info.find('.//h1')
    title = t.text
    #print(title)

    #从公告标题中提取更新数据对应的日期
    datestr = title[len(title) - 14:len(title) - 9]
    date = '2020年' + datestr
    date1 = datetime.datetime.strptime(date, '%Y年%m月%d日')
    #print(date1)

    #爬取公告中的图片
    content1 = page1.find('.//div[@class="pages_content"]')
    channels1 = content1.find('.//img')
    link_img = channels1.get('src')
    links = str(link)
    pic_urls = links[:len(links) - 19] + link_img

    #调用百度api对图片进行文本识别，从中提取价格内容
    prices = client.basicGeneralUrl(pic_urls)
    r = prices['words_result']
    info = r[5]
    price = info['words']
    #print(price)

    #更新每周二元母猪价格
    pork_price = {}
    week = date1.strftime("%W")
    pork_price[week + '周'] = price  #存放每周二元母猪价格，key为周数，对应价格
    print(pork_price)
    """抓取wind数据 写入excel"""

    #链接到wind数据库
    w.start()
    w.isconnected()

    #统计仔猪数据
    ##download仔猪数据
    pig_baby_codes = ['############']  ###仔猪代码已打码
    pig_baby = w.edb(pig_baby_codes,
                     datetime.date.today() + datetime.timedelta(days=-5),
                     datetime.date.today(),
                     usedf=True,
                     ShowBlank=0)
    pig_baby = pig_baby[1]
    pig_baby.columns = ['###########']  ###仔猪地区标签已打码

    ##分地区统计仔猪数据
    pig_baby_mean = pd.DataFrame([])
    pig_baby_mean_names = ['##########']  ###仔猪分地区统计的地区标签已打码
    for i in range(1, 13, 2):
        pig_baby_mean[pig_baby_mean_names[int(
            (i - 1) /
            2)]] = (pig_baby.iloc[:, i - 1] + pig_baby.iloc[:, i]) / 2
    print(pig_baby_mean)

    #生猪
    ##download生猪数据
    pig_codes = ["###############"]  ###生猪代码已打码
    pig = w.edb(pig_codes,
                datetime.date.today() + datetime.timedelta(days=-4),
                datetime.date.today(),
                usedf=True,
                ShowBlank=0)
    pig = pig[1]
    pig.columns = ["###############"]  ###生猪地区标签已打码

    ##分地区统计仔猪数据
    pig_mean = pd.DataFrame(np.zeros((4, 5)))
    pig_mean_names = ["###########"]  ###生猪分地区统计的地区标签已打码
    pig_mean.columns = pig_mean_names
    print(pig_mean)
    pig_mean.index = pig.index[1:]
    for name in pig_mean_names:
        i = 0
        for n in list(pig.columns):
            if name in n:
                pig_mean[name] = pig_mean[name] + pig[n]
                i += 1
        pig_mean[name] = pig_mean[name] / i

    print(pig_baby_mean)

    #统计玉米数据
    ##donload玉米价格
    corn_codes = ['S5005793']
    corn = w.edb(corn_codes,
                 datetime.date.today() + datetime.timedelta(days=-5),
                 datetime.date.today(),
                 usedf=True,
                 ShowBlank=0)
    corn = corn[1]
    corn.columns = ['现货价:玉米:平均价']
    corn = corn.T
    print(corn)

    #关闭Wind接口
    w.stop()

    #仔猪、生猪、猪肉、玉米价格汇总
    pig_baby_mean = pig_baby_mean.T
    pig_mean = pig_mean.T
    pig_baby_data = list(pig_baby_mean[pig_baby_mean.columns[-1]])
    pig_baby_data.append(np.mean(pig_baby_data))
    pig_data = list(pig_mean[pig_mean.columns[-1]])
    pig_data.append(np.mean(pig_data))
    corn_data = list(corn[corn.columns[-1]])
    pig_baby_data.extend(pig_data)
    pig_baby_data.extend(corn_data)
    pig_baby_data.append(float(price1[today.strftime('%Y%m%d')][0]))
    alldata = pig_baby_data
    print(alldata)

    #最近5日日期的一个list——days是datetime格式列表，days1是字符格式列表
    days = [
        datetime.datetime.today() + datetime.timedelta(days=-i)
        for i in range(5)
    ]
    days1 = [days[i].strftime('%Y-%m-%d') for i in range(5)]
    days.reverse()
    days1.reverse()
    print(days)

    #最近五周的一个list——week_nows
    week_list = {}
    today = datetime.date.today()
    weeks = today.strftime("%W")
    week_n = int(weeks)
    week_list[week_n] = week_n
    l = [week_list[week_n] - i for i in range(5)]
    for i in range(5):
        l[i] = str(l[i]) + '周'
    l.reverse()
    print(l)
    week_nows = l

    #链接到目标表格
    sht = book.sheets[0]

    #判断二元能繁母猪年度数据、月度数据是否要更新
    firstday_week = datetime.datetime(datetime.date.today().year,
                                      datetime.date.today().month,
                                      1).strftime("%W") + '周'
    if week_nows[-1] == '1周':
        sht.range('Q8').value = float(price)
    if week_nows[-1] == firstday_week:
        sht.range('P8').value = float(price)

    #判断仔猪、生猪、猪肉、玉米年度数据、月度数据是否要更新
    if days1[-1][6:] == '01-01':
        sht.range('Q11:Q25').options(transpose=True).value = alldata
    if days1[-1][9:] == '01':
        sht.range('P11:P25').options(transpose=True).value = alldata

    #更新主体数据（若今天数据已更新则不再更）
    ##二元能繁母猪
    if sht.range('K7').value == week_nows[-1]:
        pass
    else:
        sht.range('G8:J8').value = sht.range('H8:K8').value
        sht.range('K8').value = float(price)

    ##仔猪、生猪、猪肉、玉米
    if sht.range('K9').value.date() == days[-1].date():
        pass
    else:
        sht.range('G7:K7').value = week_nows
        sht.range('G9:K9').value = days1
        sht.range('G11:J25').value = sht.range('H11:K25').value
        sht.range('K11:K25').options(transpose=True).value = alldata

Пример #20

0

Показать файл

Файл: source.py Проект: wenxiao2012/OCR-GALGAME-SystemTray

def OCR_Core(Image, **kwargs):
    #: PIL.Image
    with open("./important/setting.json", 'r+') as f:
        setting = json.load(f)
    global GALMode, ResultJson  # 为了文字处理使用
    global LastImageValue, OCRText, OCRResultSetting
    if kwargs.get("EXTRA"):
        SelectOCR = OcrAll[OCRResultSetting.get()]
    else:
        SelectOCR = OcrAll[OcrSetting.get()]
        #写入内存，太慢，更换保存本地方式
        Image.save('important/LastImage.jpg')
        with open('important/LastImage.jpg', 'rb+') as f:
            LastImageValue = f.read()
    OCRText = ""
    if SelectOCR == "bd_normal" or SelectOCR == "bd_accurate":
        AppID = setting["userInfo"]["bd_info"]["AppID"]
        APIKey = setting["userInfo"]["bd_info"]["APIKey"]
        SecretKey = setting["userInfo"]["bd_info"]["SecretKey"]
        BDOcr = AipOcr(AppID, APIKey, SecretKey)
        if not GALMode:  #在gal模式下获取下拉框内容
            if SelectOCR == "bd_normal":
                OCRLanguage = setting["defaultOCRLanguage"]
                ResultJson = BDOcr.basicGeneral(
                    LastImageValue, {"language_type": OCRLanguage})  #格式错误
            else:
                ResultJson = BDOcr.basicAccurate(LastImageValue)
        else:
            GALLanguage = setting["defaultGALLanguage"]
            ResultJson = BDOcr.basicGeneral(
                LastImageValue, {"language_type": GALLanguage})  # 格式错误
        if not (ResultJson["words_result_num"]):  # 没有结果
            if GALMode:
                return ""
            else:
                messagebox.showinfo(u"识别错误", u"未识别到文字")
        if ResultJson.get("words_result"):  #能获取结果
            # 文本处理
            for i in ResultJson["words_result"]:
                OCRText += i['words'] + "\n"
            return OCRText
        elif ResultJson.get('error_code') == 14:  #证书失效,检查用户信息
            messagebox.showerror(title="Error",
                                 message=u"检查APPID,APIKEY,以及SECRET_KEY,程序退出")
            sys.exit()
        elif ResultJson.get('error_code') == 17:  #今天超额
            messagebox.showerror(title="Error", message=u"今日次数超额")
            sys.exit()
        else:
            messagebox.showerror(title="Error",
                                 message=u"错误代码:" + str(ResultJson))
            sys.exit()
    else:  #腾讯OCR
        TX_INFO = setting["userInfo"]["tx_info"]
        SecretId = TX_INFO["SecretId"]
        SecretKey = TX_INFO["SecretKey"]
        try:
            cred = credential.Credential(SecretId, SecretKey)
            httpProfile = HttpProfile()
            httpProfile.endpoint = "ocr.tencentcloudapi.com"

            clientProfile = ClientProfile()
            clientProfile.httpProfile = httpProfile
            # zh\auto\jap\kor
            client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)
            params = '{"ImageBase64":"' + str(
                bytes.decode(
                    base64.b64encode(LastImageValue),
                    encoding='utf-8')) + '","LanguageType":"auto"}'  #生成传输参数
            # 可修改
            # GeneralFasterOCR == 通用印刷体识别高速版，没有语言选项，有方位
            # GeneralBasicOCR == 通用印刷体识别，有语言选项，有方位
            # GeneralAccurateOCR == 通用印刷体高精度版，没有语言选项，有方位
            if SelectOCR == "tx_normal":
                req = models.GeneralBasicOCRRequest()
                req.from_json_string(params)
                resp = client.GeneralBasicOCR(req)
            elif SelectOCR == "tx_quick":
                req = models.GeneralFastOCRRequest()
                req.from_json_string(params)
                resp = client.GeneralFastOCR(req)
            else:
                req = models.GeneralAccurateOCRRequest()
                req.from_json_string(params)
                resp = client.GeneralAccurateOCR(req)
            ResultJson = json.loads(resp.to_json_string())  # 获取结果json
            OCRText = ""  # 纯文本
            for i in ResultJson["TextDetections"]:
                OCRText += i["DetectedText"] + "\n"
            return OCRText

        except TencentCloudSDKException as err:
            if err.get_code() == "FailedOperation.ImageNoText":
                if not GALMode:
                    messagebox.showinfo("识别失败", "没有识别到文字")
                return False

Пример #21

0

Показать файл

Файл: sysubookmain.py Проект: xumj9/sysubadminton

width = 280
height = 130
type = 'png'

repadd = dir + "rep.png"
greyadd = dir + "grey.png"
edadd = dir + "edge.png"
resadd = dir + "resize.png"

config = {
    'appId': '11352343',
    'apiKey': 'Nd5Z1NkGoLDvHwBnD2bFLpCE',
    'secretKey': 'A9FsnnPj1Ys2Gof70SNgYo23hKOIK8Os'
}

client = AipOcr(**config)

driver = webdriver.Firefox(
    executable_path='/Users/mengjiexu/Documents/parser/geckodriver')
driver.get("http://gym.sysu.edu.cn/product/show.html?id=61")
driver.maximize_window()
driver.find_element_by_xpath("//a[contains(text(),'登录')]").click()

screenshotadd = "/Users/mengjiexu/Documents/badminton/screenshot.png"
codeadd = "/Users/mengjiexu/Documents/badminton/code.png"
rebadd = "/Users/mengjiexu/Documents/badminton/rgb.png"


def ResizeImage(filein, fileout, width, height, type):
    img = Image.open(filein)
    out = img.resize((width, height),

Пример #22

0

Показать файл

 def __init__(self):
     self.switch = 0
     self.client = AipOcr(keys.baidu['ocr_id'], keys.baidu['ocr_ak'],
                          keys.baidu['ocr_sk'])

Пример #23

0

Показать файл

Файл: main.py Проект: rwangr/quiz-game-supply

    {
        'app_name': '头脑王者(iPhone X)',
        'answer_count': 4,
        'crop_area':
        (50, 250, 350, 825),  #iPhone X on 1440*900 display resolution Mac
        'mask_area': [(20, 520, 70, 540), (340, 520, 390, 540)]
    }  #iPhone X on 1440*900 display resolution Mac
]

NEG_KEYWORDS = ['不', '没']

# Configuration
cfg = ConfigParser()
cfg.read('secret.ini')
client = AipOcr(cfg.get('BAIDU_OCR', 'APP_ID'),
                cfg.get('BAIDU_OCR', 'API_KEY'),
                cfg.get('BAIDU_OCR', 'SECRET_KEY'))


def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()


def is_contain_keywords(text, keywords):
    for word in keywords:
        if text.count(word) > 0:
            return True
    return False

Пример #24

0

Показать файл

 def renew_client_ocr(self):
     self.client = AipOcr(keys.baidu['ocr_id'], keys.baidu['ocr_ak'],
                          keys.baidu['ocr_sk'])
     self.switch += 1

Пример #25

0

Показать файл

Файл: test.py Проект: xlx130/from_video_get_ASR_traindata

def pull_srt_from_video(video_name, save_srt_name):
    if opt.ocr_source == "baidu":
        APP_ID = '11531274'
        API_KEY = 'nl59T9O2lmZ7iAD2wttS457F'
        SECRET_KEY = 'U0VztUf0QKwjfTxzxIcG1CWf9qz9Sobf'

        client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
    elif opt.ocr_source == "tecent":
        app_id = '1106978111'
        app_key = '9hUBH27QnbtCWZ2x'
        client = apiutil.AiPlat(app_id, app_key)

    vid = imageio.get_reader(video_name, 'ffmpeg')

    all_frames = vid.get_length()
    filelist = [[0, 0, u""]]  #格式为：[starttime , endtime, data]
    interval_frame = 5
    framerate = vid.get_meta_data()['fps']

    same_list = []
    avg_char_width = -1

    if opt.movie_name == "rmdmy":
        start_frame = (1 * 60 + 30) * 25
        end_frame = (43 * 60 + 30) * 25
    elif opt.movie_name == "wdqbs":
        start_frame = (1 * 60 + 30) * 25
        end_frame = all_frames - start_frame
    elif opt.movie_name == "bly":
        start_frame = (2 * 60 + 30) * 25
        end_frame = all_frames - ((3 * 60 + 40) * 25)
    elif opt.movie_name == "nrb":
        start_frame = (0 * 60 + 5) * 25
        end_frame = all_frames

    print "all_frames = %d,end_frame = %d" % (all_frames, end_frame)
    last_result = ""
    try:
        for num in range(all_frames):
            if num < start_frame or num > end_frame:
                continue
            im = vid.get_data(num)
            if num % interval_frame != 0:  #每10帧是40ms，帧率是25hz
                continue
            print num

            image = im  #skimage.img_as_float(im).astype(np.float64)
            # if flag == 0:
            #     imageio.imsave("abcd.jpg", image[image.shape[0] * 2 /3:,:])
            #     words_tmp, porb_tmp, item = img_to_str(client, "abcd.jpg", avg_char_width,flag=flag)
            #     if words_tmp=="":
            #         continue
            #     y = item[0]["y"]
            #     height = item[0]["height"]
            #     if y > 0:
            #         flag = 1
            if opt.movie_name == "rmdmy":
                tmp = image[(image.shape[0] * 2 / 3 +
                             100):(image.shape[0] * 2 / 3 + 200), 200:1000]
            elif opt.movie_name == "wdqbs":
                tmp = image[(image.shape[0] * 2 / 3 +
                             180):(image.shape[0] * 2 / 3 + 320),
                            int(image.shape[1] * 0.15):int(image.shape[1] *
                                                           0.78)]
            elif opt.movie_name == "bly":
                tmp = image[(image.shape[0] * 2 / 3 +
                             180):(image.shape[0] * 2 / 3 + 320),
                            int(image.shape[1] * 0.15):int(image.shape[1] *
                                                           0.78)]
            elif opt.movie_name == "nrb":
                tmp = image[(image.shape[0] * 2 / 3 +
                             120):(image.shape[0] * 2 / 3 + 320),
                            int(image.shape[1] * 0.17):int(image.shape[1] *
                                                           0.88)]
            save_file_name = "%s_%d.jpg" % (save_srt_name, num)
            imageio.imsave(save_file_name, tmp)

            result, porb, avg_char_width, rsp = img_to_str(
                None, save_file_name, avg_char_width, last_result)
            if rsp['ret'] != 0 or (rsp['ret'] == 0 and result == ""):
                gam2 = exposure.adjust_gamma(tmp, 0.5)
                imageio.imsave(save_file_name, gam2)
                result, porb, avg_char_width, rsp = img_to_str(
                    None, save_file_name, avg_char_width, last_result)
            last_result = result
            os.remove(save_file_name)

            print result.encode("utf-8")
            if result != "":
                if len(same_list) > 0:
                    ratio = Levenshtein.ratio(result, filelist[-1][-1])

                    if ratio < 0.5:
                        filelist[-1][2] = same_rule(same_list)
                        same_list = [(result, porb)]
                        filelist.append([
                            num * 1.0 / framerate, num * 1.0 / framerate,
                            result
                        ])
                    else:
                        same_list.append((result, porb))
                        filelist[-1][1] = num * 1.0 / framerate
                else:
                    same_list = [(result, porb)]
                    filelist.append(
                        [num * 1.0 / framerate, num * 1.0 / framerate,
                         result])  # 格式为：[starttime , endtime, data]
            #for reply system busy
            # elif result == "" and porb == -1:
            #     None
            else:
                if len(same_list) > 0:
                    filelist[-1][2] = same_rule(same_list)
                    same_list = []

    except IOError:
        print('可能是ioerror ')
    finally:
        np.savez(save_srt_name, filelist=filelist)

Пример #26

0

Показать файл

 def __init__(self):
     self.client=AipOcr(APP_ID, API_KEY, SECRET_KEY)
     self.p_thres=0.5

Пример #27

0

Показать файл

Файл: 军棋分辨.py Проект: shiep18/EIS2020

import wave
from aip import AipSpeech
from xpinyin import Pinyin
import requests
from os import system
import win32com.client
speaker = win32com.client.Dispatch("SAPI.SpVoice")

#军旗 司令 军长
#师长 旅长 团长 营长 炸弹
#连长 排长 工兵 地雷

APP_ID = '20359943'
API_KEY = 'qnLBpWaNcl8mEORZRRCcKzZ2'
SECRET_KEY = 'BoKRYbe64dAVFTPWxMz7YOl4F7G8jtL2'
aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
cap = cv2.VideoCapture(0)
aaa = 2
temp = 0
temp1 = 0
w1 = ''
w = ''
b = 0

filePath = "test.png"
#filePath = "all.png"


def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

Пример #28

0

Показать файл

def autoreply(request):
    if True:
        webData = request.body
        xmlData = ET.fromstring(webData)
        msg_type = xmlData.find('MsgType').text
        ToUserName = xmlData.find('ToUserName').text
        FromUserName = xmlData.find('FromUserName').text
        CreateTime = xmlData.find('CreateTime').text
        # MsgType = xmlData.find('MsgType').text
        #MsgId = xmlData.find('MsgId').text
        toUser = FromUserName
        fromUser = ToUserName
        print(msg_type)
        if msg_type == 'text':
            MsgContent = xmlData.find('Content').text
            content = get_content(MsgContent)
            content = '\n'.join(content)
            replyMsg = TextMsg(toUser, fromUser, content)
            return replyMsg.send()
        elif msg_type == 'event':
            MsgEvent = xmlData.find('Event').text
            if MsgEvent == "subscribe":
                content = "终于等到你，小g已在此恭候多时。\n" \
                          "这是一个gis与ai的公众号，您可以输入关键词搜索资源。" \
                          "如输入'arcgis'，小g会为你提供关于arcgis的各种资源。\n" \
                          "懒人福音，小g支持语音输入搜索。\n" \
                          "除此之外，小g还能将您发送的图片中的文字读取出来哦。\n" \
                          "最后附上:\n <a href='https://www.aigisss.com'> 👉 👉 个人小站👈 👈 </a>"

            else:
                content = "感谢您的陪伴，请别离开我，告诉我，我改还不行吗[皱眉][皱眉]"
            replyMsg = TextMsg(toUser, fromUser, content)
            return replyMsg.send()

        elif msg_type == 'image':
            PicUrl = xmlData.find('PicUrl').text
            sysfile = os.path.abspath('.')
            unknown_img_uuid = (str(uuid.uuid1())).replace("-", "")
            unknownimgpath = sysfile + '/media/images/' + unknown_img_uuid + '.jpg'

            img = requests.get(PicUrl)
            with open(unknownimgpath, 'ab') as f:
                f.write(img.content)
            # 初始化文字百度识别分类器
            aipOcr = AipOcr(BAIDU_APP_ID, BAIDU_API_KEY, BAIDU_SECRET_KEY)
            # 定义参数变量
            options = {
                'detect_direction': 'true',
                'language_type': 'CHN_ENG',
            }
            try:
                result = aipOcr.webImage(get_file_content(unknownimgpath),
                                         options)
                if result["words_result_num"] == 0:
                    vector_word = "图中没有文字或未能识别"
                else:
                    pic_words = []
                    for i in result["words_result"]:
                        pic_words.append(i["words"])
                    pic_words = [('<p>' + i + '</p>') for i in pic_words]
                    vector_word = ''.join(pic_words)
            except:
                vector_word = "图中没有文字或未能识别"
            vector_words = vector_word
            os.remove(unknownimgpath)
            replyMsg = TextMsg(toUser, fromUser, vector_words)
            return replyMsg.send()
        elif msg_type == 'voice':
            content = "语音已收到,谢谢"
            VoiceContent = xmlData.find('Recognition').text
            if VoiceContent is not None:
                voiceContent = ["您的语音是：{0}".format(VoiceContent)]
                VoiceContent = VoiceContent.replace('。', '')
                content0 = get_content(VoiceContent)
                voiceRes2 = voiceContent + content0
                content = '\n'.join(voiceRes2)
                replyMsg = TextMsg(toUser, fromUser, content)
            else:
                replyMsg = TextMsg(toUser, fromUser, content)
            return replyMsg.send()
        elif msg_type == 'video':
            content = "视频已收到,谢谢"
            replyMsg = TextMsg(toUser, fromUser, content)
            return replyMsg.send()
        elif msg_type == 'shortvideo':
            content = "小视频已收到,谢谢"
            replyMsg = TextMsg(toUser, fromUser, content)
            return replyMsg.send()
        elif msg_type == 'location':
            content = "位置已收到,谢谢"
            replyMsg = TextMsg(toUser, fromUser, content)
            return replyMsg.send()
        else:
            #msg_type == 'link'
            content = "链接已收到,谢谢"
            replyMsg = TextMsg(toUser, fromUser, content)
            return replyMsg.send()
    #except Exception as Argment:
    else:
        return "123"

Пример #29

0

Показать файл

Файл: baiduocr.py Проект: mengtianwxs/pypdf2word

 def __init__(self):
     self.api_id = THEKEY2BD.THEKE2BD_api_id
     self.api_key = THEKEY2BD.THEKE2BD_api_key
     self.secret_key = THEKEY2BD.THEKE2BD_secret_key
     self.client = AipOcr(self.api_id, self.api_key, self.secret_key)

Пример #30

0

Показать файл

from aip import AipOcr
from tutorial.cloud.baidu import get_key_values

# 初始化ApiOcr对象
aipOcr = AipOcr(*get_key_values('ocr_demo'))


# 读取图片
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()


# 定义参数变量
options = {
    'detect_direction': 'true',
    'language_type': 'CHN_ENG',
}
"""调用通用文字识别接口
Python SDK OCR的BUG [识别本地图片出错] https://developer.baidu.com/forum/topic/show?topicId=241904
测试本地文件失败，方法是修改aip里的ocr.py函数_validate，替换掉下面的代码
        # 支持url
        if re.match(r'^\w{1,128}://', data['image']):
            data['url'] = data['image']
            del data['image']
            return True

    替换后
        # 支持url
        if isinstance(data['image'], str) and re.match(r'^\w{1,128}://', data['image']):
            data['url'] = data['image']

Python AipOcr.AipOcr примеры использования