from aip import AipOcr """ My APPID AK SK """ APP_ID1 = '24147388' API_KEY1 = 'QU5nINLo2vXhnnljIiBk2BwB' SECRET_KEY1 = 'sZBkkSKlw876QzTbHXHNmGKvOZLcU9Sy' APP_ID2 = '24205701' API_KEY2 = 'YsobyEQFsVQnk9iZkyEqhbU7' SECRET_KEY2 = 'nAdD3L2Whmxgx0GYGnFHGlFg0jZZ3Mga' client = AipOcr(APP_ID2, API_KEY2, SECRET_KEY2) """ 读取图片 """ def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() def get_handwriting(fpath,str): image = get_file_content(fpath) """ 调用百度OCR手写文字识别, 图片参数为本地图片 """ #handwriting调用的是手写具体的在ocr.py中, results = client.handwriting(image)["words_result"] for result in results: text = result["words"] str = str + '\n' + text return str
#coding=utf-8 #baidu import urllib2, os, time import urllib from aip import AipOcr APP_ID = '' API_KEY = '' SECRET_KEY = '' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) User_Agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36' url = "https://www.baidu.com/s?wd=" import urllib import urllib2 def get_file_content(filePath): with open(filePath, 'rb') as fp: print 1 return fp.read() def threading_str(html, words): #time_start=time.time() print words, html.count(words) #time_emd=time.time() #print words,time_emd-time_start def baidu(words, an):
def main(videoname): conf = config.getConfig(videoname) APP_ID = conf['APP_ID'] API_KEY = conf['API_KEY'] SECRET_KEY = conf['SECRET_KEY'] imgDir = conf['imgDir'] outputDir = conf['outputDir'] client = AipOcr(APP_ID, API_KEY, SECRET_KEY) def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() def get_OCR(imgName): image = get_file_content(imgDir + '/' + imgName) options = {} options["recognize_granularity"] = "big" options["language_type"] = "CHN_ENG" options["detect_direction"] = "true" res = client.general(image, options) try: w = res['words_result'] print str(res) return w except: return False def is_img(f): return re.match(r'.+jpg', f) start = time.time() output = open(outputDir + str(start) + '.txt', 'a') pathDir = sorted(filter(is_img, os.listdir(imgDir))) positionData = [] for imgName in pathDir: output.write('Start: ' + imgName + '\n') ocrRes = get_OCR(imgName) # fail then retry while ocrRes == False: print 'Fail: ' + imgName ocrRes = get_OCR(imgName) for word in ocrRes: top = int(word['location']['top']) height = int(word['location']['height']) w = word['words'] has = False for group in positionData: # belong to this group if abs(group['top'] - top) < (group['height'] / 2): # Avoid duplicate: check if current word is similar to last word lastWord = group['words'][len(group['words']) - 1] if difflib.SequenceMatcher(None, lastWord, w).quick_ratio() > 0.8: break # append words group['words'].append(w) # cal new value group['totalTop'] += top group['totalHeight'] += height group['totalNum'] += 1 group['top'] = group['totalTop'] / group['totalNum'] group['height'] = group['totalHeight'] / group['totalNum'] has = True break if has == False: positionData.append({ 'top': top, # group standard, using average value of tops 'totalTop': top, 'height': height, 'totalHeight': height, 'totalNum': 1, # how many pics has been add to this group 'words': [w] }) output.write('Words: ' + w + '\n') output.write('Top: ' + str(word['location']['top']) + '\n') output.write('Height: ' + str(word['location']['height']) + '\n') output.write('Finished: ' + imgName + '\n') print 'Finished: ' + imgName output.write(str(positionData) + '\n') max_group = [] for group in positionData: if group['totalNum'] > len(max_group): max_group = group['words'] allWords = ','.join(max_group) output.write('-----------------------' + '\n') output.write(allWords + '\n') output.write('-----------------------' + '\n') end = time.time() output.write('Running time: ' + str(end - start) + '\n') output.close() print 'Finished All'
# coding: utf-8 # In[12]: from aip import AipOcr import re import pandas as pd from pandas import DataFrame # In[2]: appid = 'xxxxxxxx' apikey = 'xxxxxxxxxxxxxxx' secretkey = 'xxxxxxxxxxxxxxxxxxxxxxxxx' client = AipOcr(appid, apikey, secretkey) # In[5]: i = open(r'C:\Users\11197\Desktop\2.jpg', 'rb') img = i.read() # In[8]: message = client.basicGeneral(img) # print(message) word = [] for i in message.get('words_result'): word.append(i.get('words')) # In[10]:
import keyboard from PIL import ImageGrab import time from aip import AipOcr app_id = '' api_key = '' secret_key = '' client = AipOcr(app_id, api_key, secret_key) while True: keyboard.wait(hotkey='alt+a') keyboard.wait(hotkey='ctrl+s') time.sleep(0.1) image = ImageGrab.grabclipboard() image.save('image_001.jpg') with open('image_001.jpg', 'rb') as file: image = file.read() result = client.basicAccurate(image) result = result['words_result'] for i in result: print(i['words']) with open('word.txt', 'a+', encoding='UTF-8') as text: text.writelines('%s\n' % i['words']) hotkey = keyboard.read_hotkey() if hotkey == 'q':
def __init__(self, em): self.em = em self.currentindex = 0 self._client = AipOcr(APP_ID, API_KEY, SECRET_KEY) self._imgClient = AipImageClassify(APP_ID, API_KEY, SECRET_KEY)
def bd_get_client(): return AipOcr(encrypt.decrypt(APP_ID), encrypt.decrypt(API_KEY), encrypt.decrypt(SECRET_KEY))
import concurrent.futures import subprocess import time import re import numpy as np import cv2 from aip import AipOcr # import pytesseract import key client = AipOcr(key.AIP_APP_ID, key.AIP_API_KEY, key.AIP_SECRET_KEY) executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) def capture_img(): print('Capturing img...', end=' ') st = time.time() pipe = subprocess.Popen(".\\adb\\adb shell screencap -p", stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) image_bytes = pipe.stdout.read().replace(b'\r\n', b'\n') image = cv2.imdecode(np.fromstring(image_bytes, np.uint8), cv2.IMREAD_COLOR) print('Time:', time.time() - st) return image
def form_ocr(request): """接收图片获取图片数据并向百度发送请求获取响应""" if request.method == 'POST': # 1.获取图片 print("开始工作") print(request.FILES) try: pic = request.FILES['pic'] print(pic.size) print(pic.name) # 2.读取图片数据 img = pic.read() #print(img) except: return JsonResponse({'return_msg': 0}) # 3.向百度发送请求获取响应 AK = '自己的AK' SK = '自己的SK' app_id = "自己的app_id" client = AipOcr(app_id, AK, SK) options = {} options['result_type'] = 'excel' result_id = client.tableRecognitionAsync(img, options) try: request_id = result_id['result'][0]['request_id'] print(request_id) except: request_id = "" return_msg = 0 file_url = "" if request_id: try_times = 0 while True: time.sleep(10) result = client.getTableRecognitionResult(request_id) print(result) try: msg = result['result']['ret_msg'] except: print("出错") # 返回给前端的数据信息 return_msg = 0 break if msg == "已完成": file_url = result['result']['result_data'] return_msg = 1 break else: try_times += 1 print('try_times', try_times) if try_times > 2: print("网络繁忙,请稍后再试") return_msg = 0 break continue else: return_msg = 0 return JsonResponse({'return_msg': return_msg, 'file_url': file_url})
class Book: # 默认浏览器设置为chrome # 百度ai识图申请得账号密钥 APP_ID = '15273108' API_KEY = 'BQl4DK7sGjwvMKFvBB9UNVPD' SECRET_KEY = 'dYGm5MvIzXVWQXqHU1h1fYRs5xMQEKyF' aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) # 定义参数变量,调用百度ai识图的参数 options = { 'detect_direction': 'true', 'language_type': 'CHN_ENG', } # 自动订的函数,前两个参数不用介绍,第三个是开始刷场的时间,最后一个是定早上or下午or晚上的场 def auto_book(self, username, password, buytime, ex_time): # 生成有头的chrome浏览器 self.driver = webdriver.Chrome() # 最大化窗口 self.driver.maximize_window() # 得到选择的是早上,下午还是晚上 sw = { "早上": [ '09:01-10:00', '10:01-11:00'], "下午": [ '14:01-15:00', '15:01-16:00', '16:01-17:00'], "晚上": [ '19:01-20:00', '20:01-21:00', '21:01-22:00']} sw1 = sw.get(ex_time, None) # 进入定羽毛球的初始页面,等1.5秒刷新完成,这些都是登陆的操作,不影响按点抢场 self.driver.get("http://gym.sysu.edu.cn/product/show.html?id=61") time.sleep(1.5) # 点击登陆按钮进入登陆界面 self.driver.find_element_by_link_text("登录").click() time.sleep(0.2) # 输入传入的用户名和密码 self.driver.find_element_by_id("username").send_keys(username) time.sleep(0.2) self.driver.find_element_by_id("password").send_keys(password) # 得到验证码字符串 t1 = self.Convertimg() t2 = t1.replace(' ', '') t = t2.replace('.', '') # 检验验证码字符串长度是否是4且由数字和字母构成,不是则点击验证码刷新验证码并再次验证,直到满足要求 # while len(t)!=4 or t.__contains__(':') or t.__contains__('.'): while len(t) != 4 or t.isalnum() == False: imglocation = ("//img[@name='captchaImg']") # 验证码的xpath地址 item = self.driver.find_element_by_xpath(imglocation) item.click() time.sleep(0.1) t1 = self.Convertimg() t2 = t1.replace(' ', '') t = t2.replace('.', '') # if len(t)==4: # break self.driver.find_element_by_id("captcha").send_keys(t) # 填入验证码 time.sleep(1) self.driver.find_element_by_name("submit").click() # 点击提交按钮登陆 # 刷新页面,成为登陆页面 self.driver.get("http://gym.sysu.edu.cn/product/show.html?id=61") self.driver.execute_script('window.open()') # 另外打开一个选项卡,进行两个场一起定 self.driver.execute_script('window.open()') # 另外打开一个选项卡,进行两个场一起定 self.driver.switch_to.window(self.driver.window_handles[1]) # 切换选项卡 # 这个选项卡也进入相同页面 self.driver.get('http://gym.sysu.edu.cn/product/show.html?id=61') self.driver.switch_to.window(self.driver.window_handles[2]) # 切换选项卡 # 这个选项卡也进入相同页面 self.driver.get('http://gym.sysu.edu.cn/product/show.html?id=61') while True: # 进入一个无限循环,一直判断是否到达预定时间 now = datetime.datetime.now() if now.strftime('%Y-%m-%d %H:%M:%S') == buytime: # 其实就是一个字符串匹配的过程 self.driver.refresh() # 刷新页面 # 因为中大的羽毛球场不是0点整开始放场的,往往迟个两分钟,所以我们需要检测是否有三天后的标签 while self.is_element_exist( '//*[@id="datesbar"]/div/ul/li[4]') != True: self.driver.refresh() # 不存在则刷新页面直到出现为止 time.sleep(1.2) # 1.2秒刷新一次 if self.is_element_exist( '//*[@id="datesbar"]/div/ul/li[4]'): # 出现了 self.driver.find_element_by_xpath( '//*[@id="datesbar"]/div/ul/li[4]').click() # 点击三天后的标签 block_list = self.driver.find_elements_by_xpath( '//span[@class="cell football easyui-tooltip tooltip-f"]') # 得到元素(场地)列表 for block_place in block_list: # 对每一个场地进行是否有场的判断 if block_place.get_attribute( "data-timer") == sw1[0]: # 是否在我们选的时间有场 block_place.click() # 有的话就订下来 self.driver.find_element_by_xpath( '//*[@id="reserve"]').click() # 确定 time.sleep(1) # 等待确认页面出现 break self.driver.find_element_by_xpath( '//*[@id="reserve"]').click() # 确定 self.driver.switch_to.window( self.driver.window_handles[0]) # 切换到第一个选项卡进行类似的操作 self.driver.refresh() # 刷新 self.driver.find_element_by_xpath( '//*[@id="datesbar"]/div/ul/li[4]').click() block_list = self.driver.find_elements_by_xpath( '//span[@class="cell football easyui-tooltip tooltip-f"]') for block_place in block_list: if block_place.get_attribute( "data-timer") == sw1[1]: block_place.click() self.driver.find_element_by_xpath( '//*[@id="reserve"]').click() time.sleep(1) break self.driver.find_element_by_xpath( '//*[@id="reserve"]').click() self.driver.switch_to.window( self.driver.window_handles[1]) # 切换到第一个选项卡进行类似的操作 self.driver.refresh() # 刷新 self.driver.find_element_by_xpath( '//*[@id="datesbar"]/div/ul/li[4]').click() block_list = self.driver.find_elements_by_xpath( '//span[@class="cell football easyui-tooltip tooltip-f"]') for block_place in block_list: if block_place.get_attribute( "data-timer") == sw1[2]: block_place.click() self.driver.find_element_by_xpath( '//*[@id="reserve"]').click() time.sleep(1) break self.driver.find_element_by_xpath( '//*[@id="reserve"]').click() break self.conf() self.driver.switch_to.window(self.driver.window_handles[0]) self.conf() self.driver.switch_to.window(self.driver.window_handles[2]) self.conf() def conf(self): # 确认并购买的函数 time.sleep(1) try: self.driver.find_element_by_xpath( '//button[@class="confirm"]').click() time.sleep(1) self.driver.find_element_by_xpath( '//img[@src="/images/payment/ydzx.png"]').click() self.driver.find_element_by_xpath( '//button[@class="button-large button-info"]').click() now = datetime.datetime.now() print(now.strftime('%Y-%m-%d %H:%M:%S')) print('purchase success') except ElementNotVisibleException as e: print("没有抢到,被定完了") def is_element_exist(self, command): # 基于xpath判断元素是否存在 try: s2 = self.driver.find_element_by_xpath(command) except NoSuchElementException as e: return False return True def Convertimg(self): imglocation = ("//img[@name='captchaImg']") # 验证码的xpath地址 item = self.driver.find_element_by_xpath(imglocation) item.screenshot("yanzhengma.png") self.clearimage('yanzhengma.png') result = self.aipOcr.basicGeneral( self.get_file_content('final.png'), self.options) text = result['words_result'][0]['words'] os.remove('clear.png') # os.remove('final.png') return text def clearimage(self, originadd): img = Image.open(originadd) # 读取系统的内照片 # 将黑色干扰线替换为白色 width = img.size[0] # 长度 height = img.size[1] # 宽度 for i in range(0, width): # 遍历所有长度的点 for j in range(0, height): # 遍历所有宽度的点 data = (img.getpixel((i, j))) # 打印该图片的所有点 if (data[0] <= 25 and data[1] <= 25 and data[2] <= 25): # RGBA的r,g,b均小于25 img.putpixel((i, j), (255, 255, 255, 255)) # 则这些像素点的颜色改成白色 img = img.convert("RGB") # 把图片强制转成RGB img.save('clear.png') # 保存修改像素点后的图片 # 灰度化 Grayimg = cv2.cvtColor(cv2.imread('clear.png'), cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(Grayimg, 160, 255, cv2.THRESH_BINARY) cv2.imwrite('clear.png', thresh) os.remove('yanzhengma.png') self.ResizeImage('clear.png', 'final.png', 90, 32, 'png') def ResizeImage(self, filein, fileout, width, height, type): # 改变图片尺寸 img = Image.open(filein) out = img.resize((width, height), Image.ANTIALIAS) out.save(fileout, type) def get_file_content(self, filePath): with open(filePath, 'rb') as fp: return fp.read()
def f_pic2word(self): if self.app == '' or self.key1 == '' or self.key2 == '': id = self.ut_id.toPlainText() key1 = self.ut_key1.toPlainText() key2 = self.ut_key2.toPlainText() if id == '' or key1 == '' or key2 == '': if id == '': self.ut_id.setText("请输入id") if key1 == '': self.ut_key1.setText("请输入key1") if key2 == '': self.ut_key2.setText("请输入key2") return else: id = self.app key1 = self.key1 key2 = self.key2 success = 0 fail = 0 miss = 0 path = self.picfolder + "\%d.jpg" for i in range(1, 1000): APP_ID = id APP_KEY = key1 SECRET_KEY = key2 client = AipOcr(APP_ID, APP_KEY, SECRET_KEY) options = {} options["language_type"] = "CHN_ENG" options["detect_direction"] = "false" options["detect_language"] = "false" options["probability"] = "false" try: img = open(path % i, "rb") res = client.basicGeneral(img.read(), options) img.close() word = str(res['words_result']) t = word.split("'}, {'") t2 = "" for i in range(1, len(t)): t2 += t[i][2:-4] t2 += '\n' t2 += "\n\n" try: f = open(self.output_words_pic, 'a') f.write(t2) f.write("\n\n") f.close() success += 1 except Exception as e: print(e) fail += 1 print(t2) print("\n\n") except Exception as e: print(e) miss += 1 if miss == 5: break self.ut_page.setText("转换成功:" + str(success) + "页 转换失败:" + str(fail) + "页") if success > 0: self.app = id self.key1 = key1 self.key2 = key2 try: f = open(self.keyfile, 'w') f.write(id + '@@' + key1 + '@@' + key2 + '@@') f.close() except Exception as e: print(e)
def __init__(self, content): self.aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) self.content = content
def sign_in_by_code_hdsky(self, task, entry, config): app_id = config.get('aipocr_app_id') api_key = config.get('aipocr_api_key') secret_key = config.get('aipocr_secret_key') if not (AipOcr and Image): entry['result'] = 'baidu-aip or pillow not existed' entry.fail(entry['result']) return if not (app_id and api_key and secret_key): entry['result'] = 'Api not set' entry.fail(entry['result']) return client = AipOcr(app_id, api_key, secret_key) response = self._request(task, entry, 'get', entry['base_url'], headers=entry['headers']) state = self.check_state(entry, response, entry['base_url']) if state != SignState.NO_SIGN_IN: return data = { 'action': (None, 'new'), } response = self._request(task, entry, 'post', 'https://hdsky.me/image_code_ajax.php', headers=entry['headers'], files=data) content = self._decode(response) image_hash = json.loads(content)['code'] if image_hash: img_response = self._request( task, entry, 'get', 'https://hdsky.me/image.php?action=regimage&imagehash={}'. format(image_hash), headers=entry['headers']) else: entry['result'] = 'Cannot find: image_hash, url: {}'.format( entry['url']) entry.fail(entry['result']) return img = Image.open(BytesIO(img_response.content)) width = img.size[0] height = img.size[1] for i in range(0, width): for j in range(0, height): noise = self._detect_noise(img, i, j, width, height) if noise: img.putpixel((i, j), (255, 255, 255)) img_byte_arr = BytesIO() img.save(img_byte_arr, format='png') response = client.basicAccurate(img_byte_arr.getvalue(), {"language_type": "ENG"}) code = re.sub('\\W', '', response['words_result'][0]['words']) code = code.upper() logger.info(response) if len(code) == 6: data = { 'action': (None, 'showup'), 'imagehash': (None, image_hash), 'imagestring': (None, code) } response = self._request(task, entry, 'post', entry['url'], headers=entry['headers'], files=data) print(response.text) state = self.check_state(entry, response, response.request.url) if len(code) != 6 or state == SignState.WRONG_ANSWER: with open(path.dirname(__file__) + "/temp.png", "wb") as code_file: code_file.write(img_response.content) with open(path.dirname(__file__) + "/temp2.png", "wb") as code_file: code_file.write(img_byte_arr.getvalue()) entry['result'] = 'ocr failed: {}, see temp.png'.format(code) entry.fail(entry['result'])
def sign_in_by_code(self, task, entry, config): app_id = config.get('aipocr_app_id') api_key = config.get('aipocr_api_key') secret_key = config.get('aipocr_secret_key') if not (AipOcr and Image): entry['result'] = 'baidu-aip or pillow not existed' entry.fail(entry['result']) return if not (app_id and api_key and secret_key): entry['result'] = 'Api not set' entry.fail(entry['result']) return client = AipOcr(app_id, api_key, secret_key) response = self._request(task, entry, 'get', entry['base_url'], headers=entry['headers']) state = self.check_state(entry, response, entry['base_url']) if state != SignState.NO_SIGN_IN: return response = self._request(task, entry, 'get', entry['url'], headers=entry['headers']) content = self._decode(response) image_hash_re = re.search('(?<=imagehash=).*?(?=")', content) img_src_re = re.search('(?<=img src=").*?(?=")', content) if image_hash_re and img_src_re: image_hash = image_hash_re.group() img_src = img_src_re.group() img_response = self._request(task, entry, 'get', urljoin(entry['url'], img_src), headers=entry['headers']) else: entry['result'] = 'Cannot find key: image_hash, url: {}'.format( entry['url']) entry.fail(entry['result']) return img = Image.open(BytesIO(img_response.content)) width = img.size[0] height = img.size[1] for i in range(0, width): for j in range(0, height): noise = self._detect_noise(img, i, j, width, height) if noise: img.putpixel((i, j), (255, 255, 255)) img_byte_arr = BytesIO() img.save(img_byte_arr, format='png') response = client.basicAccurate(img_byte_arr.getvalue(), {"language_type": "ENG"}) code = re.sub('\\W', '', response['words_result'][0]['words']) code = code.upper() logger.info(response) if len(code) == 6: params = {'cmd': 'signin'} data = { 'imagehash': (None, image_hash), 'imagestring': (None, code) } response = self._request(task, entry, 'post', entry['url'], headers=entry['headers'], files=data, params=params) state = self.check_state(entry, response, response.request.url) if len(code) != 6 or state == SignState.WRONG_ANSWER: with open(path.dirname(__file__) + "/opencd_code.png", "wb") as code_file: code_file.write(img_response.content) with open(path.dirname(__file__) + "/opencd_code2.png", "wb") as code_file: code_file.write(img_byte_arr.getvalue()) entry['result'] = 'ocr failed: {}, see opencd_code.png'.format( code) entry.fail(entry['result'])
def __init__(self, app_id, app_key, secret_key): self.ocr_api = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" self.token_api = "https://aip.baidubce.com/oauth/2.0/token" self.client = AipOcr(app_id, app_key, secret_key)
from aip import AipOcr # 图片文字识别 APP_ID = '19082278' APP_KEY = 'QWkx0oIWNoHuoer6f14PXDbq' # 公钥 SECRET_KEY = 'qXX5aTLKrDY9jyfCE171Q4zMTVl2ii8I' # 密钥 text_list = [] client = AipOcr(APP_ID, APP_KEY, SECRET_KEY) # 初始化AipFace对象 with open(r'2.png', 'rb') as image: img_data = image.read() text = client.basicGeneral(img_data) # 返回一个字典 # print(text) for i in text.get('words_result'): text_list.append(i.get('words')) print(i.get('words')) # print(text_list)
SHARE = "share" ms_port_config = { SHARE: 20201, } db_host = '127.0.0.1' db_user = '******' db_passwd = 'liuyirui' db_port = 3306 #south # south_base_host = "http://localhost:20202" south_base_host = "http://49.233.250.82:8888" key = 'key' client = 'name:client-2' # 百度识图 bd_config = { 'appId': '18898769', 'apiKey': '80LZK2FsGUoSReVwaqhW5RN6', 'secretKey': 'sbMp3ERC1v0yuFYtDWPYsjlDB6cXQc2I' } bd_client = AipOcr(**bd_config) # log _levels = TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL = 'TRACE', 'DEBUG', 'INFO', 'SUCCESS', 'WARNING', 'ERROR', 'CRITICAL' STDERR_DEBUG_LEVEL = DEBUG
def autoChoose(): start_time = time.time() # screencap and pull os.system('adb shell screencap -p /sdcard/screen_shot.png') os.system('adb pull /sdcard/screen_shot.png .') # crop image image = Image.open('./screen_shot.png') image_size = image.size print image_size width = image_size[0] height = image_size[1] region = image.crop((0, height * 0.15, width, height * 0.7)) region.save('./screen_shot_crop.png') # OCR aa = AipOcr(APP_ID, API_KEY, SECRET_KEY) filepath = './screen_shot_crop.png' with open(filepath, 'rb') as fp: image = fp.read() ret = aa.basicGeneral(image) # question region if ret: ask = '' lines = ret['words_result'] if len(lines) <= 4: ask = lines[0]['words'].split('.')[-1] elif len(lines) == 5: ask = lines[0]['words'].split('.')[-1] + lines[1]['words'] elif len(lines) > 5: ask = lines[0]['words'].split( '.')[-1] + lines[1]['words'] + lines[2]['words'] if len(lines) >= 3: Coption = lines[-1]['words'] Boption = lines[-2]['words'] Aoption = lines[-3]['words'] else: Aoption, Boption, Coption = ' ', ' ', ' ' keyword = ask print keyword convey = 'n' end_time = time.time() if convey == 'y' or convey == 'Y': results = baiduSearch.search(keyword, convey=True) elif convey == 'n' or convey == 'N' or not convey: results = baiduSearch.search(keyword) else: print('输入错误') exit(0) count = 0 N = {'A': 0, 'B': 0, 'C': 0} for result in results: N['A'] += result.abstract.count(Aoption) N['B'] += result.abstract.count(Boption) N['C'] += result.abstract.count(Coption) # Qred = result.abstract.replace(keyword, '\033[1;30;41m' + keyword + '\033[0m')# '\033[1;30;41m' + Aoption + '\033[0m' Ared = result.abstract.replace( Aoption, '\033[1;31m' + Aoption + '\033[0m') #'\033[1;30;41m' + Aoption + '\033[0m' Bred = Ared.replace(Boption, '\033[1;32m' + Boption + '\033[0m') #'\033[1;32m' + 'Boption' + '\033[0m' Cred = Bred.replace(Coption, '\033[1;34m' + Coption + '\033[0m') #'\033[1;34m' + 'Coption' + '\033[0m' print('\033[1;30;41m' + result.title + '\033[0m') print ' ' print Cred print ' ' count = count + 1 if count == 4: break print '\033[1;31;40m', '答案: ', max(N.items(), key=lambda x: x[1])[0], '\033[0m' print(r'run time: ' + str(end_time - start_time) + 's')
def main(): book = xw.Book.caller() """爬取农业部数据""" today = datetime.date.today() #获取当月第一天 firstmonthday = datetime.datetime(today.year, today.month, 1) #获取当年第一天 firstday = datetime.datetime(today.year, 1, 1) oneday = datetime.timedelta(days=1) all = {} #存放所有爬取的网页链接,key为日期,value为链接 url = 'http://www.scs.moa.gov.cn/scxxfb/' #爬取主页 response = requests.get(url) content = response.content page = etree.HTML(content) data = page.find('.//div[@class="sj_e_tonzhi_list"]') for i in data: infos = i.findall('.//li') for info in infos: rrr = info.find('.//a') link = url + str(rrr.get('href')) date = re.findall(r'.\w+.t(\d+)\w+', link) all[date[0]] = str(link) for i in range(1, 13): url = 'http://www.scs.moa.gov.cn/scxxfb/index_' + str(i) + '.htm' response = requests.get(url) content = response.content page = etree.HTML(content) data = page.find('.//div[@class="sj_e_tonzhi_list"]') for i in data: infos = i.findall('.//li') for info in infos: rrr = info.find('.//a') link = 'http://www.scs.moa.gov.cn/scxxfb/' + str( rrr.get('href')) date = re.findall(r'.\w+.t(\d+)\w+', link) all[date[0]] = str(link) #print(all) #爬取目标页、正则提取猪肉价格 def price_get(link): response = requests.get(link) content = response.content page = etree.HTML(content) info = page.find('.//div[@class="TRS_Editor"]') text = info.find('.//p').text price = re.findall(r'猪肉\D+(\d+.\d+)元', text) return price price1 = {} #存放猪肉价格,key为日期,value为价格 #今天的价格,若未更新则为前一天价格 while today.strftime('%Y%m%d') not in all.keys(): today -= oneday else: d_p_price = price_get(str(all[today.strftime('%Y%m%d')])) price1[today.strftime('%Y%m%d')] = d_p_price #本月初价格,更新时间为本月第一个工作日 while firstmonthday.strftime('%Y%m%d') not in all.keys(): firstmonthday += oneday else: m_p_price = price_get(str(all[firstmonthday.strftime('%Y%m%d')])) price1[firstmonthday.strftime('%Y%m%d')] = m_p_price #本年初价格,更新时间为本年第一个工作日 while firstday.strftime('%Y%m%d') not in all.keys(): firstday += oneday else: y_p_price = price_get(str(all[firstday.strftime('%Y%m%d')])) price1[firstday.strftime('%Y%m%d')] = y_p_price #对应价格的列表 #l=[price1[today.strftime('%Y%m%d')],price1[firstmonthday.strftime('%Y%m%d')],price1[firstday.strftime('%Y%m%d')]] print(price1) ''' 最终结果是price1是一个字典 pric1e[today.strftime('%Y%m%d')]是今日价格 price1[firstmonthday.strftime('%Y%m%d')]是本月初 price1[firstday.strftime('%Y%m%d')]是本年初 ''' """爬取二元能繁母猪数据""" #百度云账号 APP_ID = '#####' API_KEY = '########' SECRECT_KEY = '########' client = AipOcr(APP_ID, API_KEY, SECRECT_KEY) #爬取主页,获取目标网页链接 url = 'http://sousuo.gov.cn/s.htm?q=%E4%BA%8C%E5%85%83%E6%AF%8D%E7%8C%AA%E9%94%80%E5%94%AE%E4%BB%B7%E6%A0%BC&t=govall&timetype=timeqb&mintime=&maxtime=&sort=pubtime&sortType=1&nocorrect=' response = requests.get(url) content = response.content page = etree.HTML(content) table = page.find('.//h3[@class="res-title"]') channels = table.find('.//a') link = channels.get('href') #print(link) #爬取最新公告的标题 html = requests.get(link) html.encoding = 'utf-8' text = html.text page1 = etree.HTML(text) info = page1.find('.//div[@class="article oneColumn pub_border"]') t = info.find('.//h1') title = t.text #print(title) #从公告标题中提取更新数据对应的日期 datestr = title[len(title) - 14:len(title) - 9] date = '2020年' + datestr date1 = datetime.datetime.strptime(date, '%Y年%m月%d日') #print(date1) #爬取公告中的图片 content1 = page1.find('.//div[@class="pages_content"]') channels1 = content1.find('.//img') link_img = channels1.get('src') links = str(link) pic_urls = links[:len(links) - 19] + link_img #调用百度api对图片进行文本识别,从中提取价格内容 prices = client.basicGeneralUrl(pic_urls) r = prices['words_result'] info = r[5] price = info['words'] #print(price) #更新每周二元母猪价格 pork_price = {} week = date1.strftime("%W") pork_price[week + '周'] = price #存放每周二元母猪价格,key为周数,对应价格 print(pork_price) """抓取wind数据 写入excel""" #链接到wind数据库 w.start() w.isconnected() #统计仔猪数据 ##download仔猪数据 pig_baby_codes = ['############'] ###仔猪代码已打码 pig_baby = w.edb(pig_baby_codes, datetime.date.today() + datetime.timedelta(days=-5), datetime.date.today(), usedf=True, ShowBlank=0) pig_baby = pig_baby[1] pig_baby.columns = ['###########'] ###仔猪地区标签已打码 ##分地区统计仔猪数据 pig_baby_mean = pd.DataFrame([]) pig_baby_mean_names = ['##########'] ###仔猪分地区统计的地区标签已打码 for i in range(1, 13, 2): pig_baby_mean[pig_baby_mean_names[int( (i - 1) / 2)]] = (pig_baby.iloc[:, i - 1] + pig_baby.iloc[:, i]) / 2 print(pig_baby_mean) #生猪 ##download生猪数据 pig_codes = ["###############"] ###生猪代码已打码 pig = w.edb(pig_codes, datetime.date.today() + datetime.timedelta(days=-4), datetime.date.today(), usedf=True, ShowBlank=0) pig = pig[1] pig.columns = ["###############"] ###生猪地区标签已打码 ##分地区统计仔猪数据 pig_mean = pd.DataFrame(np.zeros((4, 5))) pig_mean_names = ["###########"] ###生猪分地区统计的地区标签已打码 pig_mean.columns = pig_mean_names print(pig_mean) pig_mean.index = pig.index[1:] for name in pig_mean_names: i = 0 for n in list(pig.columns): if name in n: pig_mean[name] = pig_mean[name] + pig[n] i += 1 pig_mean[name] = pig_mean[name] / i print(pig_baby_mean) #统计玉米数据 ##donload玉米价格 corn_codes = ['S5005793'] corn = w.edb(corn_codes, datetime.date.today() + datetime.timedelta(days=-5), datetime.date.today(), usedf=True, ShowBlank=0) corn = corn[1] corn.columns = ['现货价:玉米:平均价'] corn = corn.T print(corn) #关闭Wind接口 w.stop() #仔猪、生猪、猪肉、玉米价格汇总 pig_baby_mean = pig_baby_mean.T pig_mean = pig_mean.T pig_baby_data = list(pig_baby_mean[pig_baby_mean.columns[-1]]) pig_baby_data.append(np.mean(pig_baby_data)) pig_data = list(pig_mean[pig_mean.columns[-1]]) pig_data.append(np.mean(pig_data)) corn_data = list(corn[corn.columns[-1]]) pig_baby_data.extend(pig_data) pig_baby_data.extend(corn_data) pig_baby_data.append(float(price1[today.strftime('%Y%m%d')][0])) alldata = pig_baby_data print(alldata) #最近5日日期的一个list——days是datetime格式列表,days1是字符格式列表 days = [ datetime.datetime.today() + datetime.timedelta(days=-i) for i in range(5) ] days1 = [days[i].strftime('%Y-%m-%d') for i in range(5)] days.reverse() days1.reverse() print(days) #最近五周的一个list——week_nows week_list = {} today = datetime.date.today() weeks = today.strftime("%W") week_n = int(weeks) week_list[week_n] = week_n l = [week_list[week_n] - i for i in range(5)] for i in range(5): l[i] = str(l[i]) + '周' l.reverse() print(l) week_nows = l #链接到目标表格 sht = book.sheets[0] #判断二元能繁母猪年度数据、月度数据是否要更新 firstday_week = datetime.datetime(datetime.date.today().year, datetime.date.today().month, 1).strftime("%W") + '周' if week_nows[-1] == '1周': sht.range('Q8').value = float(price) if week_nows[-1] == firstday_week: sht.range('P8').value = float(price) #判断仔猪、生猪、猪肉、玉米年度数据、月度数据是否要更新 if days1[-1][6:] == '01-01': sht.range('Q11:Q25').options(transpose=True).value = alldata if days1[-1][9:] == '01': sht.range('P11:P25').options(transpose=True).value = alldata #更新主体数据(若今天数据已更新则不再更) ##二元能繁母猪 if sht.range('K7').value == week_nows[-1]: pass else: sht.range('G8:J8').value = sht.range('H8:K8').value sht.range('K8').value = float(price) ##仔猪、生猪、猪肉、玉米 if sht.range('K9').value.date() == days[-1].date(): pass else: sht.range('G7:K7').value = week_nows sht.range('G9:K9').value = days1 sht.range('G11:J25').value = sht.range('H11:K25').value sht.range('K11:K25').options(transpose=True).value = alldata
def OCR_Core(Image, **kwargs): #: PIL.Image with open("./important/setting.json", 'r+') as f: setting = json.load(f) global GALMode, ResultJson # 为了文字处理使用 global LastImageValue, OCRText, OCRResultSetting if kwargs.get("EXTRA"): SelectOCR = OcrAll[OCRResultSetting.get()] else: SelectOCR = OcrAll[OcrSetting.get()] #写入内存,太慢,更换保存本地方式 Image.save('important/LastImage.jpg') with open('important/LastImage.jpg', 'rb+') as f: LastImageValue = f.read() OCRText = "" if SelectOCR == "bd_normal" or SelectOCR == "bd_accurate": AppID = setting["userInfo"]["bd_info"]["AppID"] APIKey = setting["userInfo"]["bd_info"]["APIKey"] SecretKey = setting["userInfo"]["bd_info"]["SecretKey"] BDOcr = AipOcr(AppID, APIKey, SecretKey) if not GALMode: #在gal模式下获取下拉框内容 if SelectOCR == "bd_normal": OCRLanguage = setting["defaultOCRLanguage"] ResultJson = BDOcr.basicGeneral( LastImageValue, {"language_type": OCRLanguage}) #格式错误 else: ResultJson = BDOcr.basicAccurate(LastImageValue) else: GALLanguage = setting["defaultGALLanguage"] ResultJson = BDOcr.basicGeneral( LastImageValue, {"language_type": GALLanguage}) # 格式错误 if not (ResultJson["words_result_num"]): # 没有结果 if GALMode: return "" else: messagebox.showinfo(u"识别错误", u"未识别到文字") if ResultJson.get("words_result"): #能获取结果 # 文本处理 for i in ResultJson["words_result"]: OCRText += i['words'] + "\n" return OCRText elif ResultJson.get('error_code') == 14: #证书失效,检查用户信息 messagebox.showerror(title="Error", message=u"检查APPID,APIKEY,以及SECRET_KEY,程序退出") sys.exit() elif ResultJson.get('error_code') == 17: #今天超额 messagebox.showerror(title="Error", message=u"今日次数超额") sys.exit() else: messagebox.showerror(title="Error", message=u"错误代码:" + str(ResultJson)) sys.exit() else: #腾讯OCR TX_INFO = setting["userInfo"]["tx_info"] SecretId = TX_INFO["SecretId"] SecretKey = TX_INFO["SecretKey"] try: cred = credential.Credential(SecretId, SecretKey) httpProfile = HttpProfile() httpProfile.endpoint = "ocr.tencentcloudapi.com" clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile # zh\auto\jap\kor client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile) params = '{"ImageBase64":"' + str( bytes.decode( base64.b64encode(LastImageValue), encoding='utf-8')) + '","LanguageType":"auto"}' #生成传输参数 # 可修改 # GeneralFasterOCR == 通用印刷体识别高速版,没有语言选项,有方位 # GeneralBasicOCR == 通用印刷体识别,有语言选项,有方位 # GeneralAccurateOCR == 通用印刷体高精度版,没有语言选项,有方位 if SelectOCR == "tx_normal": req = models.GeneralBasicOCRRequest() req.from_json_string(params) resp = client.GeneralBasicOCR(req) elif SelectOCR == "tx_quick": req = models.GeneralFastOCRRequest() req.from_json_string(params) resp = client.GeneralFastOCR(req) else: req = models.GeneralAccurateOCRRequest() req.from_json_string(params) resp = client.GeneralAccurateOCR(req) ResultJson = json.loads(resp.to_json_string()) # 获取结果json OCRText = "" # 纯文本 for i in ResultJson["TextDetections"]: OCRText += i["DetectedText"] + "\n" return OCRText except TencentCloudSDKException as err: if err.get_code() == "FailedOperation.ImageNoText": if not GALMode: messagebox.showinfo("识别失败", "没有识别到文字") return False
width = 280 height = 130 type = 'png' repadd = dir + "rep.png" greyadd = dir + "grey.png" edadd = dir + "edge.png" resadd = dir + "resize.png" config = { 'appId': '11352343', 'apiKey': 'Nd5Z1NkGoLDvHwBnD2bFLpCE', 'secretKey': 'A9FsnnPj1Ys2Gof70SNgYo23hKOIK8Os' } client = AipOcr(**config) driver = webdriver.Firefox( executable_path='/Users/mengjiexu/Documents/parser/geckodriver') driver.get("http://gym.sysu.edu.cn/product/show.html?id=61") driver.maximize_window() driver.find_element_by_xpath("//a[contains(text(),'登录')]").click() screenshotadd = "/Users/mengjiexu/Documents/badminton/screenshot.png" codeadd = "/Users/mengjiexu/Documents/badminton/code.png" rebadd = "/Users/mengjiexu/Documents/badminton/rgb.png" def ResizeImage(filein, fileout, width, height, type): img = Image.open(filein) out = img.resize((width, height),
def __init__(self): self.switch = 0 self.client = AipOcr(keys.baidu['ocr_id'], keys.baidu['ocr_ak'], keys.baidu['ocr_sk'])
{ 'app_name': '头脑王者(iPhone X)', 'answer_count': 4, 'crop_area': (50, 250, 350, 825), #iPhone X on 1440*900 display resolution Mac 'mask_area': [(20, 520, 70, 540), (340, 520, 390, 540)] } #iPhone X on 1440*900 display resolution Mac ] NEG_KEYWORDS = ['不', '没'] # Configuration cfg = ConfigParser() cfg.read('secret.ini') client = AipOcr(cfg.get('BAIDU_OCR', 'APP_ID'), cfg.get('BAIDU_OCR', 'API_KEY'), cfg.get('BAIDU_OCR', 'SECRET_KEY')) def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() def is_contain_keywords(text, keywords): for word in keywords: if text.count(word) > 0: return True return False
def renew_client_ocr(self): self.client = AipOcr(keys.baidu['ocr_id'], keys.baidu['ocr_ak'], keys.baidu['ocr_sk']) self.switch += 1
def pull_srt_from_video(video_name, save_srt_name): if opt.ocr_source == "baidu": APP_ID = '11531274' API_KEY = 'nl59T9O2lmZ7iAD2wttS457F' SECRET_KEY = 'U0VztUf0QKwjfTxzxIcG1CWf9qz9Sobf' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) elif opt.ocr_source == "tecent": app_id = '1106978111' app_key = '9hUBH27QnbtCWZ2x' client = apiutil.AiPlat(app_id, app_key) vid = imageio.get_reader(video_name, 'ffmpeg') all_frames = vid.get_length() filelist = [[0, 0, u""]] #格式为:[starttime , endtime, data] interval_frame = 5 framerate = vid.get_meta_data()['fps'] same_list = [] avg_char_width = -1 if opt.movie_name == "rmdmy": start_frame = (1 * 60 + 30) * 25 end_frame = (43 * 60 + 30) * 25 elif opt.movie_name == "wdqbs": start_frame = (1 * 60 + 30) * 25 end_frame = all_frames - start_frame elif opt.movie_name == "bly": start_frame = (2 * 60 + 30) * 25 end_frame = all_frames - ((3 * 60 + 40) * 25) elif opt.movie_name == "nrb": start_frame = (0 * 60 + 5) * 25 end_frame = all_frames print "all_frames = %d,end_frame = %d" % (all_frames, end_frame) last_result = "" try: for num in range(all_frames): if num < start_frame or num > end_frame: continue im = vid.get_data(num) if num % interval_frame != 0: #每10帧是40ms,帧率是25hz continue print num image = im #skimage.img_as_float(im).astype(np.float64) # if flag == 0: # imageio.imsave("abcd.jpg", image[image.shape[0] * 2 /3:,:]) # words_tmp, porb_tmp, item = img_to_str(client, "abcd.jpg", avg_char_width,flag=flag) # if words_tmp=="": # continue # y = item[0]["y"] # height = item[0]["height"] # if y > 0: # flag = 1 if opt.movie_name == "rmdmy": tmp = image[(image.shape[0] * 2 / 3 + 100):(image.shape[0] * 2 / 3 + 200), 200:1000] elif opt.movie_name == "wdqbs": tmp = image[(image.shape[0] * 2 / 3 + 180):(image.shape[0] * 2 / 3 + 320), int(image.shape[1] * 0.15):int(image.shape[1] * 0.78)] elif opt.movie_name == "bly": tmp = image[(image.shape[0] * 2 / 3 + 180):(image.shape[0] * 2 / 3 + 320), int(image.shape[1] * 0.15):int(image.shape[1] * 0.78)] elif opt.movie_name == "nrb": tmp = image[(image.shape[0] * 2 / 3 + 120):(image.shape[0] * 2 / 3 + 320), int(image.shape[1] * 0.17):int(image.shape[1] * 0.88)] save_file_name = "%s_%d.jpg" % (save_srt_name, num) imageio.imsave(save_file_name, tmp) result, porb, avg_char_width, rsp = img_to_str( None, save_file_name, avg_char_width, last_result) if rsp['ret'] != 0 or (rsp['ret'] == 0 and result == ""): gam2 = exposure.adjust_gamma(tmp, 0.5) imageio.imsave(save_file_name, gam2) result, porb, avg_char_width, rsp = img_to_str( None, save_file_name, avg_char_width, last_result) last_result = result os.remove(save_file_name) print result.encode("utf-8") if result != "": if len(same_list) > 0: ratio = Levenshtein.ratio(result, filelist[-1][-1]) if ratio < 0.5: filelist[-1][2] = same_rule(same_list) same_list = [(result, porb)] filelist.append([ num * 1.0 / framerate, num * 1.0 / framerate, result ]) else: same_list.append((result, porb)) filelist[-1][1] = num * 1.0 / framerate else: same_list = [(result, porb)] filelist.append( [num * 1.0 / framerate, num * 1.0 / framerate, result]) # 格式为:[starttime , endtime, data] #for reply system busy # elif result == "" and porb == -1: # None else: if len(same_list) > 0: filelist[-1][2] = same_rule(same_list) same_list = [] except IOError: print('可能是ioerror ') finally: np.savez(save_srt_name, filelist=filelist)
def __init__(self): self.client=AipOcr(APP_ID, API_KEY, SECRET_KEY) self.p_thres=0.5
import wave from aip import AipSpeech from xpinyin import Pinyin import requests from os import system import win32com.client speaker = win32com.client.Dispatch("SAPI.SpVoice") #军旗 司令 军长 #师长 旅长 团长 营长 炸弹 #连长 排长 工兵 地雷 APP_ID = '20359943' API_KEY = 'qnLBpWaNcl8mEORZRRCcKzZ2' SECRET_KEY = 'BoKRYbe64dAVFTPWxMz7YOl4F7G8jtL2' aipOcr = AipOcr(APP_ID, API_KEY, SECRET_KEY) cap = cv2.VideoCapture(0) aaa = 2 temp = 0 temp1 = 0 w1 = '' w = '' b = 0 filePath = "test.png" #filePath = "all.png" def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read()
def autoreply(request): if True: webData = request.body xmlData = ET.fromstring(webData) msg_type = xmlData.find('MsgType').text ToUserName = xmlData.find('ToUserName').text FromUserName = xmlData.find('FromUserName').text CreateTime = xmlData.find('CreateTime').text # MsgType = xmlData.find('MsgType').text #MsgId = xmlData.find('MsgId').text toUser = FromUserName fromUser = ToUserName print(msg_type) if msg_type == 'text': MsgContent = xmlData.find('Content').text content = get_content(MsgContent) content = '\n'.join(content) replyMsg = TextMsg(toUser, fromUser, content) return replyMsg.send() elif msg_type == 'event': MsgEvent = xmlData.find('Event').text if MsgEvent == "subscribe": content = "终于等到你,小g已在此恭候多时。\n" \ "这是一个gis与ai的公众号,您可以输入关键词搜索资源。" \ "如输入'arcgis',小g会为你提供关于arcgis的各种资源。\n" \ "懒人福音,小g支持语音输入搜索。\n" \ "除此之外,小g还能将您发送的图片中的文字读取出来哦。\n" \ "最后附上:\n <a href='https://www.aigisss.com'> 👉 👉 个人小站👈 👈 </a>" else: content = "感谢您的陪伴,请别离开我,告诉我,我改还不行吗[皱眉][皱眉]" replyMsg = TextMsg(toUser, fromUser, content) return replyMsg.send() elif msg_type == 'image': PicUrl = xmlData.find('PicUrl').text sysfile = os.path.abspath('.') unknown_img_uuid = (str(uuid.uuid1())).replace("-", "") unknownimgpath = sysfile + '/media/images/' + unknown_img_uuid + '.jpg' img = requests.get(PicUrl) with open(unknownimgpath, 'ab') as f: f.write(img.content) # 初始化文字百度识别分类器 aipOcr = AipOcr(BAIDU_APP_ID, BAIDU_API_KEY, BAIDU_SECRET_KEY) # 定义参数变量 options = { 'detect_direction': 'true', 'language_type': 'CHN_ENG', } try: result = aipOcr.webImage(get_file_content(unknownimgpath), options) if result["words_result_num"] == 0: vector_word = "图中没有文字或未能识别" else: pic_words = [] for i in result["words_result"]: pic_words.append(i["words"]) pic_words = [('<p>' + i + '</p>') for i in pic_words] vector_word = ''.join(pic_words) except: vector_word = "图中没有文字或未能识别" vector_words = vector_word os.remove(unknownimgpath) replyMsg = TextMsg(toUser, fromUser, vector_words) return replyMsg.send() elif msg_type == 'voice': content = "语音已收到,谢谢" VoiceContent = xmlData.find('Recognition').text if VoiceContent is not None: voiceContent = ["您的语音是:{0}".format(VoiceContent)] VoiceContent = VoiceContent.replace('。', '') content0 = get_content(VoiceContent) voiceRes2 = voiceContent + content0 content = '\n'.join(voiceRes2) replyMsg = TextMsg(toUser, fromUser, content) else: replyMsg = TextMsg(toUser, fromUser, content) return replyMsg.send() elif msg_type == 'video': content = "视频已收到,谢谢" replyMsg = TextMsg(toUser, fromUser, content) return replyMsg.send() elif msg_type == 'shortvideo': content = "小视频已收到,谢谢" replyMsg = TextMsg(toUser, fromUser, content) return replyMsg.send() elif msg_type == 'location': content = "位置已收到,谢谢" replyMsg = TextMsg(toUser, fromUser, content) return replyMsg.send() else: #msg_type == 'link' content = "链接已收到,谢谢" replyMsg = TextMsg(toUser, fromUser, content) return replyMsg.send() #except Exception as Argment: else: return "123"
def __init__(self): self.api_id = THEKEY2BD.THEKE2BD_api_id self.api_key = THEKEY2BD.THEKE2BD_api_key self.secret_key = THEKEY2BD.THEKE2BD_secret_key self.client = AipOcr(self.api_id, self.api_key, self.secret_key)
from aip import AipOcr from tutorial.cloud.baidu import get_key_values # 初始化ApiOcr对象 aipOcr = AipOcr(*get_key_values('ocr_demo')) # 读取图片 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 定义参数变量 options = { 'detect_direction': 'true', 'language_type': 'CHN_ENG', } """调用通用文字识别接口 Python SDK OCR的BUG [识别本地图片出错] https://developer.baidu.com/forum/topic/show?topicId=241904 测试本地文件失败,方法是修改aip里的ocr.py函数_validate,替换掉下面的代码 # 支持url if re.match(r'^\w{1,128}://', data['image']): data['url'] = data['image'] del data['image'] return True 替换后 # 支持url if isinstance(data['image'], str) and re.match(r'^\w{1,128}://', data['image']): data['url'] = data['image']