def BaiduOCR(file): """利用百度api识别文本,并保存提取的文字 picfile: 图片文件名 outfile: 输出文件 """ client = aip.AipSpeech(APP_ID, API_KEY, SECRECT_KEY) # i = open(picfile, 'rb') # img = i.read() # print(img) # print("正在识别图片:\t" + filename) message = client.synthesis("滚哼", 'zh', 1, { 'vol': 4, 'spd': 4, 'pit': 5, 'per': 4 }) # 通用文字识别,每天 50 000 次免费spd语速pit音调,取值0-9vol音量per发音人选择, 0为女声,1为男声, # 3为情感合成-度逍遥,4为情感合成-度丫丫,默认为普通女 # print(message) # message = client.basicAccurate(img) # 通用文字高精度识别,每天 800 次免费 # print("识别成功!") # i.close() if not isinstance(message, dict): with open('auido.mp3', 'wb') as f: f.write(message) webbrowser.open("auido.mp3")
def speech_to_text(): app_id = '10488070' api_key = '9SupHA6M6KYjaA60wbTf0CEp' secret_key = '42689bb0b63888aa8421cd760d3c7fbe' aipspeech = aip.AipSpeech(app_id, api_key, secret_key) speech_content = aipspeech.asr( open(WAVE_OUTPUT_FILENAME, 'rb').read(), 'wav', RATE, {'lan': 'zh'}) speech_result_str = '' for i in speech_content['result']: speech_result_str += i print(speech_result_str) return speech_result_str
def __init__(self,hass,face:dict,voice:dict,config:dict,users:dict,regex:dict): self.face = aip.AipFace(str(face["appId"]), face["apiKey"], face["secretKey"]) self.voice = aip.AipSpeech(str(voice["appId"]), voice["apiKey"], voice["secretKey"]) if self.face.getGroupUsers("werobot")["error_code"] != 0: raise AuthenticationError("人脸识别认证错误!") self.robot = workwerobot.WeRoBot() for k,v in config.items(): self.robot.config[k]=v self.client=self.robot.client self.hass=HASS(hass,self.client) self.users=users self.regex=regex self.robot.config['HOST'] = '0.0.0.0' self.robot.add_handler(self.subscribe, "subscribe_event") self.robot.add_handler(self.recv,"text") self.robot.add_handler(self.recv, "voice") self.robot.add_handler(self.recv, "image")
class SpeechClient(object): _CLIENT = aip.AipSpeech( appId = '10528707', apiKey = 'iBCuQlT14vkiPzr2qzEYpu6s', secretKey = '7znifePL2YhGeKgqSsw2Q33sXqsIG6x2' ) _DEFAULT_RATE=16000 _DEFAULT_AUDIO_TYPE='pcm' _DEFAULT_AUDIO_OPTION = { 'spd':5, 'vol':5, 'pit':5, 'per':1 } def speech_recognize(self, speech, audio_type=_DEFAULT_AUDIO_TYPE, rate=_DEFAULT_RATE): result = self._CLIENT.asr(speech=speech, format=audio_type, rate=rate) if result and result.get('err_no') == 0: msg = result.get('result',"")[0] msg = msg.replace(',','') msg = msg.replace('。','') msg = msg.replace('!','') msg = msg.replace('、','') msg = msg.replace('?','') msg = msg.replace('?','') return msg else: if result['err_msg'] == 'speech quality error.': raise RecognizeError('我没听清楚,再说一次好吗') else: raise RecognizeError(result) def speech_compose(self,response='测试数据:你好猪头肉'): result = self._CLIENT.synthesis(text=response,options=self._DEFAULT_AUDIO_OPTION) if isinstance(result,bytes): return result elif isinstance(result,dict): raise ComposeError(result['err_msg'])
import audio import aip audio.audio_record("d:\\1.wav", 3) f = open("d:\\1.wav", 'rb') client = aip.AipSpeech("Enter Your APP_ID", "Enter Your API_KEY", "Enter Your SECRET_KEY") print(client.asr(f.read(), options={})["result"][0])
# coding:utf8 import json import flask import aip import config import util app = flask.Flask(__name__) client = aip.AipSpeech(config.APP_ID, config.API_KEY, config.SECRET_KEY) @app.route("/audio", methods=['POST']) def post_audio(): mp3_file = flask.request.files['file'] result = client.asr(util.mp3_to_pcm(mp3_file), 'wav', 16000, { 'lan': 'zh', }) asr_response = json.dumps(result, ensure_ascii=False) if 'result' in result: try: result['result'] = util.convert(result['result'][0]) app.logger.info("first -" + asr_response + "second - [" + result['result'] + "]") except Exception, e: app.logger.error("first -" + asr_response + "second -[" + e.message + "]") result['result'] = '' else: app.logger.info("first -" + asr_response + " - " + "[no result]") response = json.dumps(result, ensure_ascii=False)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ai interface wrap """ import aip import config _conf = config.settings.get('components', {}).get('baidu-aip') _token = map(lambda x: _conf.get(x, ''), [ 'app-id', 'app-key', 'secret-key', ]) _speech_client = aip.AipSpeech(*_token) _speech_client.setConnectionTimeoutInMillis(3000) _speech_client.setSocketTimeoutInMillis(3000) _default_recognize_opts = { 'dev_pid': 1536, } _default_synthesize_opts = { 'spd': 5, 'pit': 5, 'vol': 5, 'per': 3, } def recognize(speech, **kwargs): """recognize
import os import aip APP_ID = '19699484' API_KEY = 'qhcEWPTqlNG8mptj7DXOztuU' SECRET_KEY = 'eXLBGpZ85eUohDaU1tgMDR9PViiqhSQL' client = aip.AipSpeech(APP_ID, API_KEY, SECRET_KEY) def main(): global client result = client.synthesis('比如 nvidia broadcast', 'zh', 1, { 'vol': 5, 'per': 0, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open('audio.mp3', 'wb') as f: f.write(result) if __name__ == '__main__': main()