class Baidu_sr(): def __init__(self, APP_ID='21179500', API_KEY='ninYN8Qlg1AIgUvcTGpmQ1L8', SECRET_KEY='daGMOpmE17obYnskrrYb6e5IzGdx8ghl'): self.client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) def __get_text(self, wav_bytes): result = self.client.asr(wav_bytes, 'wav', 16000, { 'dev_pid': 1537, }) try: text = result['result'][0] except Exception as e: print(e) text = "Error" return text # For real time voice recording def speech_recog(self): r = sr.Recognizer() mic = sr.Microphone() yield 'Info:Please try to speak something...' with mic as source: r.adjust_for_ambient_noise(source) audio = r.listen(source) audio_data = audio.get_wav_data(convert_rate=16000) yield "Info:Got you, now I'm trying to recognize that..." yield self.__get_text(audio_data)
def baidu_sound(self, file): """百度语音识别""" APP_ID = '19236313' API_KEY = 'gZ4E58quu5HgFalbda9ktNl7' SECRET_KEY = 'QzGPaVmFUQoSZGO1zbr18MAzldmKY01K' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) if file.split(".")[1] == "wav": # 识别本地文件 with open(file, 'rb') as fp: audio = fp.read() result = client.asr(audio, 'wav', 16000, { 'dev_pid': 1537, }) # 关键为1537而非1536 text = "音频文件格式正确,可以直接进行语音识别" #self.s(text) return result['result'][0] else: """格式转换""" text = "音频文件转换中,请继续等待,音频文件格式为:{type}".format( type=file.split(".")[1]) self.s(text) audio_file = AudioSegment.from_file(file, format=file.split(".")[1]) path = self.sound_file + "\record_1.wav" audio_file.export(path, format="wav") with open(path, 'rb') as fp: audio = fp.read() result = client.asr(audio, 'wav', 16000, { 'dev_pid': 1537, }) # 关键为1537而非1536 text = "音频文件格式转换后再次进行语音识别中,请等待" self.s(text) return result['result'][0]
def speech_to_text_baidu(audio_path: str = "test.wav", if_microphone: bool = True): # https://cloud.baidu.com/product/speech 申请api app_id = '10947352' api_key = 'gELihIXKQxswEye4Wb3gCdsb' secret_key = '2krKB6kQxfCdeuIDjGzXOfmqis7c1ByH' client = AipSpeech(app_id, api_key, secret_key) # 麦克风读入 if if_microphone: result = client.asr( _record(), 'pcm', 16000, { 'dev_pid': 1537, # 识别普通话,使用输入法模型 }) # 文件读入 else: result = client.asr( _get_file_content(audio_path), 'pcm', 16000, { 'dev_pid': 1537, # 识别普通话,使用输入法模型 }) if result["err_msg"] != "success.": return "..." else: return result['result'][0]
def speech_to_text_baidu(audio_path: str = "test.wav", if_microphone: bool = True): # https://cloud.baidu.com/product/speech 申请api app_id = "" api_key = "" secret_key = "" client = AipSpeech(app_id, api_key, secret_key) # 麦克风读入 if if_microphone: result = client.asr( _record(), 'pcm', 16000, { 'dev_pid': 1537, # 识别普通话,使用输入法模型 }) # 文件读入 else: result = client.asr( _get_file_content(audio_path), 'pcm', 16000, { 'dev_pid': 1537, # 识别普通话,使用输入法模型 }) if result["err_msg"] != "success.": return "..." else: return result['result'][0]
def speech2text_baidu(audio_path: str = "test.wav", if_microphone: bool = True): """Baidu ASR API.""" # get API info from https://cloud.baidu.com/product/speech config_file = os.path.join(os.path.dirname(__file__), 'config.yml') with open(config_file) as f: config = yaml.load(f) app_id = str(config['app_id']) api_key = config['api_key'] secret_key = config['secret_key'] client = AipSpeech(app_id, api_key, secret_key) # input from microphone if if_microphone: result = client.asr( _record(), 'pcm', 16000, # recognize Mandarin {'dev_pid': 1537}, ) # input from file else: result = client.asr( _get_file_content(audio_path), 'pcm', 16000, {'dev_pid': 1537}, ) if result["err_msg"] != "success.": return "..." else: return result['result'][0]
def Baidu_ASR(file_dir=None): APP_ID = '' API_KEY = '' SECRET_KEY = '' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) uid, pqc_list = search_files(file_dir) result = [] result.append(uid) for test_file in pqc_list: if isfile(test_file): FLAG = True # while FLAG: try: B_result = client.asr(get_file_content(test_file), 'wav', 16000, { 'lan': 'zh', })['result'][0] result.append(B_result) FLAG = False except: print('Baidu error', test_file) tmp = client.asr(get_file_content(test_file), 'wav', 16000, { 'lan': 'zh', }) if tmp['err_msg'] == 'speech quality error.': result.append([]) FLAG = False else: result.append([]) return result
def process_voice(voice_url): logging.info("voice_url:%s" % voice_url) voice = bytearray(urllib.urlopen(voice_url).read()) asr_client = AipSpeech(SPEECH_APPID, SPEECH_API_KEY, SPEECH_SECRET_KEY) response_zh = asr_client.asr(voice, 'amr', 8000) response_en = asr_client.asr(voice, 'amr', 8000, {'lan': 'en'}) return 'BaiduASR: ' + '\n'.join( response_zh['result']) + '\n' + 'BaiduASR_en: ' + '\n'.join( response_en['result'])
class Speech(object): def __init__(self, config_path): config = ConfigParser() config.read(config_path) self.__app_id = config["speech"]["app_id"] self.__api_key = config["speech"]["api_key"] self.__secret_key = config["speech"]["secret_key"] self.__client = AipSpeech(self.__app_id, self.__api_key, self.__secret_key) def parse(self, speech, format="pcm", rate=8000, dev_pid=BAIDU_AI_SPEECH_LANGUAGE.get("ONLY_CH_MANDARIN")): response = self.__client.asr(speech, format, rate, {"dev_pid": dev_pid}) code = response.get("err_no") result = None if code == 0: result = response.get("result") else: print("百度语音解析失败,错误代码:{code},原因:【{reason}】".format( code=code, reason=BAIDU_AI_SPEECH_ERROR.get(code))) return result
def baiduyuyin(): global myword APP_ID = '9424816' #print ('hello03') API_KEY = 'qaCMG6wQ0WVejR1IpXoS1ABB' #print ('hello04') SECRET_KEY = '4c36038b7b31ab119b9a56ed88f70229' #print ('hello05') # 初始化AipSpeech对象 aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) #print ('hello06') def get_file_content(filePath): with open(filePath, 'rb') as fp: # print ('hello07') # print (fp) return fp.read() # 识别本地文件 #print ('hello08') #os.system('output.wav') try: #print ('hellllll') #print ('start here') result = aipSpeech.asr(get_file_content('output.wav'), 'wav', 8000, {'lan': 'zh'}) #print (result) keywords = result['result'] myword = keywords[0][:-1] except Exception as err: #print ("听不清,杂音") myword = "听不清" #print ('myword is ',myword) return myword
def zhRecognition(audio_file): # 判断中英文 start = datetime.now() status = "error" message = "right" result = "" dev_pid_name = 1537 format = False filename = audio_file path_file_number = glob.glob(pathname='*.wav') while len(path_file_number) >= 10: time.sleep(1) path_file_number = glob.glob(pathname='*.wav') if audio_file[-3:] != 'wav': filename = audio_file[:-4] + '.wav' command = 'ffmpeg -y -i %s %s' % (audio_file, filename) subprocess.call(command, shell=True) format = True try: with open(filename, 'rb') as f: file_content = f.read() aip = AipSpeech(APP_ID, API_KEY, SECRET_KEY) data = aip.asr( file_content, 'wav', 16000, { 'dev_pid': dev_pid_name, # was 1536 1537 }) end = datetime.now() print("error_resulut:", data) print("error_resulut_1:", data['err_msg']) if data['err_msg'] == 'request pv too much': dev_pid_name += 1 print('used time is : ', end - start) if "err_no" in data: if data['err_no'] == 0: result = data['result'][0] status = "ok" elif data["err_no"] == 3301: message = "audio quality poor" else: message = "other" else: message = "time out" response_data = json.dumps({ "status": status, "message": message, "result": result }) if format: os.remove(filename) return response_data except: response_data = json.dumps({ "status": "error", "message": "Please check the file naming format", "result": "" }) return response_data
class Baidu: def __init__(self, config): APP_ID = config[SLUG]['app_id'] API_KEY = config[SLUG]['api_key'] SECRET_KEY = config[SLUG]['secret_key'] self._client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 # 识别本地文件 def recognize(self,audio_file = 'record.wav'): file = get_file_content(audio_file) res = self._client.asr(file, 'wav', 16000, { 'dev_pid': 1536, }) return res def synthesis(self,text = '你好百度',lang = 'zh',type = 1 , vol = 5): result = self._client.synthesis(text, lang, type, { 'vol': vol, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open('speak.mp3', 'wb') as f: f.write(result) return 1 else: print(result) return 0 def say(self,audio_file = 'auido.mp3'): os.system('mpg123 ' + audio_file)
class Voice2Word: def __init__(self): """ 你的 APPID AK SK """ self.APP_ID = '17896871' self.API_KEY = 'Qjva533G96GmTKVblEYRZWSA' self.SECRET_KEY = 'Ya67pHTOs6OCkL35A8LPXQnI13B1wEXV' self.client = AipSpeech(self.APP_ID, self.API_KEY, self.SECRET_KEY) # 读取文件 def __get_file_content(self, filePath): with open(filePath, 'rb') as fp: return fp.read() def voice2word(self, filename='output.wav'): # 识别本地文件 data = self.client.asr(self.__get_file_content(filename), 'wav', 16000, { 'dev_pid': 1536, }) try: data['result'] except: return [] return data['result'] def word2voice(self, word, filename='audio.mp3'): result = self.client.synthesis(word, 'zh', 1, { 'vol': 5, }) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码 if not isinstance(result, dict): with open(filename, 'wb+') as f: f.write(result)
def main(): files = os.listdir(".\\output") files_num = len(files) APP_ID = '16799063' API_KEY = 'pKZkOBI1Fc4HnelKsNwWDv1y' SECRET_KEY = 'dYr6yk1ApFCOdO06zh6xnHiBtohREb4q' def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) for i in files: print(".\\output\\" + i) result = client.asr(get_file_content(".\\output\\" + i), 'pcm', 16000, { 'dev_pid': 1537, }) print(result) if result["err_no"] == 0: with open(".\\text\\" + i[:-4] + ".txt", "a", encoding="utf-8") as f: f.write(result["result"][0]) else: with open(".\\text\\" + i[:-4] + ".txt", "a", encoding="utf-8") as f: f.write(i[:-4] + "转换失败!") return files_num
def voice2text(APP_ID, API_KEY, SECRET_KEY, file_path): client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) ret = client.asr(get_data(file_path), 'pcm', 16000, {'dev_pid': 1536}, ) if ret: if ret['err_msg'] == 'success.': return ret['result'] return None
def speech2text(filepath, cuid='yixue', dev_pid=1737, rate=16000, format='wav'): # 识别本地文件 try: # APP_ID = '16590304' # API_KEY = 'itxU5q7d5OnYEWk2pPibv18U' # SECRET_KEY = '37aGi2oPfh5WZ9whYhXGAUi7i3YmjkeN' """ 你的 APPID AK SK """ APP_ID = '15414045' API_KEY = 'BwSTqlxahGvI5k0kGIYDlybZ' SECRET_KEY = 'g79fxW3Zqw1qrYKeQHmufv8zNXafc6Vt' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) res = client.asr(get_file_content(filepath), format, rate, { 'dev_pid': dev_pid, 'cuid': cuid, }) text = 'error' rtext = '' if res['err_no'] == 0: text = res['result'] for t in text: rtext = rtext + t else: return text return rtext except Exception as err: print('ASR识别异常:'+str(err)) return 'error'
class BaiduASR(object): # 声音文件转字符串 version = "1.0.0" def __init__(self, work_dir, file, auths): self.work_dir = work_dir self.file = file self.auths = {} # self.auths["appId"] = auths["appid"] self.auths["appId"] = "" self.auths["apiKey"] = auths["apikey"] self.auths["secretKey"] = auths["secretkey"] self.client = AipSpeech(**self.auths) def run(self): with open(self.file, "rb") as fp: rb = fp.read() ext = os.path.splitext(self.file)[1].lstrip(".") datas = self.client.asr(rb, ext) if datas.get("err_msg") == "success.": res = datas.get("result", "") str0 = res[0] else: str0 = json.dumps(datas, ensure_ascii=False) return str0 """
def baiduAPI(): """ 你的 APPID AK SK """ APP_ID = 'XXXX' API_KEY = 'XXXX' SECRET_KEY = 'XXXXXXXXX' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 filePath = "latestSpeech/output.wav" def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 识别本地文件 dev_pid: 1537-普通话(纯中文识别)\ 1536-普通话(支持简单的英文识别)\ 1936-普通话远场 result_str = client.asr(get_file_content(filePath), 'wav', 16000, { 'dev_pid': 1537, }) print(result_str) code = result_str["err_no"] if code == 3301: return "音频质量过差,请重新录制清晰的音频!" elif code == 3308: return "音频过长,音频时长不超过60s!" elif code == 0: text = result_str["result"][0] print("文本:" + text) return text else: return "无法识别,请重新录音!"
class NLP: def __init__(self, app_token, aipSpeech_wav_para): self.app_token = app_token self.target_wav_para = aipSpeech_wav_para self.client = AipSpeech(self.app_token["APP_ID"], self.app_token["API_KEY"], self.app_token["SECRET_KEY"]) # 读取文件 def get_file_content(self, file_path): print("* get wave file") with open(file_path, 'rb') as fp: return fp.read() # 识别本地文件 def translate(self, file_path): print("* start translate") result = self.client.asr( self.get_file_content(file_path), self.target_wav_para["SPEECH_FILE_STYLE"], self.target_wav_para["RATE"], { 'dev_pid': self.target_wav_para["dev_pid"], }) print("* done translate") return result def result(self, file_path): result = self.translate(file_path) if result['err_no'] == 0: return result['err_no'], result['result'][0] else: print(result['err_msg']) return result['err_no'], result['err_msg']
def baiduAPI(): """ 你的 APPID AK SK """ APP_ID = '10372170' API_KEY = '' SECRET_KEY = '' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 wav_file = "latestSpeech/output.wav" # pcm_file = "latestSpeech/output_1.pcm" # # 就是此前我们在cmd窗口中输入命令,这里面就是在让Python帮我们在cmd中执行命令 # os.system("ffmpeg -y -i %s -acodec pcm_s16le -f s16le -ac 1 -ar 16000 %s"%(wav_file,pcm_file)) def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 识别本地文件 dev_pid: 1537-普通话(纯中文识别)\ 1536-普通话(支持简单的英文识别)\ 1936-普通话远场 result_str = client.asr(get_file_content(wav_file), 'wav', 16000, { 'dev_pid': 1537, }) code = result_str["err_no"] if code == 3301: return "音频质量过差,请重新录制清晰的音频!" elif code == 3308: return "音频过长,音频时长不超过60s!" elif code == 0: text = result_str["result"][0] print("您刚说的是:\n" + text) return text else: return "无法识别,请重新录音!"
class Speech(object): def __init__(self): self.APP_ID = '16250780' self.APP_KEY = 'xcnrNwkhe61iYGoaZVRNpnma' self.SECRET_KEY = 'rjHGH6zwDmGcx2lGisXYGilE2bnol7e9' self.client = AipSpeech(self.APP_ID, self.APP_KEY, self.SECRET_KEY) def get_file_content(self, filePath='audio.pcm'): with open(filePath, 'rb') as fp: return fp.read() def asr(self, filepath): back = self.client.asr(self.get_file_content(filepath), 'wav', 16000, { 'dev_pid': 1536, }) print back, '\n' print 'aip resutl' # info =None # try: # info=back.get('result')[0].encode('utf-8') # return info # except Exception: # return "error" return back.get('result')[0].encode('utf-8')
def message(self): client = AipSpeech(self.app_id, self.api_key, self.secret_key) json_data = client.asr( self.get_file_content('D:/python37/program/output.wav'), 'pcm', 16000, {'dev_pid': 1536}) error_no = json_data['err_no'] if error_no == 0: print("解析成功") num = 0 waste_list = json_data['result'][0].split("逗号") #循环判断是属于哪种垃圾 for name in waste_list: if name in self.waste_map_dict: num = num + 1 print(name + '是' + self.waste_map_dict[name] + '\n') elif name == '退出程序': num = -1 break else: num = num + 1 print("没有检测到符合条件的垃圾") if num > 0: self.sound_recording() self.message() else: print("解析失败") print(json_data['err_msg'])
def baidu_speech(): #// 成功返回 #{ # "err_no": 0, # "err_msg": "success.", # "corpus_no": "15984125203285346378", # "sn": "481D633F-73BA-726F-49EF-8659ACCC2F3D", # "result": ["北京天气"] # } # // 失败返回 # { # "err_no": 2000, # "err_msg": "data empty.", # "sn": null # } # 你的 APPID AK SK APP_ID = '10639428' API_KEY = '6u8eL2q96PntqBX4cuwgb684' SECRET_KEY = 'ane4qq7jOokBfCQ2WzypHh8ZIVs5Pjqm' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 识别本地文件 re = client.asr(get_file_content('01.wav'), 'wav', 8000, { 'lan': 'zh', }) # print re.get('err_no') # print re.get('result','Err')[0].encode('utf-8') return re.get('result', 'Err')[0].encode('utf-8')
class baidu_speech_2_word(object): #百度AI应用提供参数 APP_ID = None API_KEY = None SECRET_KEY = None client = None def __init__(self, app_id, api_key, secret_key): #获取提供的百度AI接口参数 self.APP_ID = app_id self.API_KEY = api_key self.SECRET_KEY = secret_key #获取授权 self.client = AipSpeech(self.APP_ID, self.API_KEY, self.SECRET_KEY) def speech_2_word(self, sound_file, rate=16000, dev_pid=1537): with open(sound_file, 'rb') as fp: # 识别本地文件 return self.client.asr(fp.read(), 'pcm', rate, { 'dev_pid': dev_pid, }) return None
def voice2word(filePath2mp3): print("transform start") # MP3 to WAV sound = AudioSegment.from_mp3(filePath2mp3) #转换后至temp.wav sound.export('temp.wav', format="wav") print("transform completed") # WAV to Word(下面3行的内容要从百度语音api官网获得) APP_ID = 'xxxxx' API_KEY = 'xxxxx' SECRET_KEY = 'xxxxx' print("connect to Baidu-API") client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) ''' 额外的设置,默认不设 #建立连接的超时时间(单位:毫秒) client.setConnectionTimeoutInMillis = 1000 #通过打开的连接传输数据的超时时间(单位:毫秒) client.setConnectionTimeoutInMillis = 1000 ''' # 读取文件 def get_file_content(filePath): """以二进制的形式读取文件""" with open(filePath, 'rb') as fp: return fp.read() # 识别本地文件 result = client.asr(get_file_content('temp.wav'), 'wav', 8000, { 'lan': 'zh', }) return result
class Recognition: def __init__(self): self.__APP_ID = '7023633' self.__API_KEY = 'liyMCxLhEsmQZ10TIXBwC2M5' self.__SECRET_KEY = '5c96379c38029b266ffadd93c005b481' self.client = AipSpeech(self.__APP_ID, self.__API_KEY, self.__SECRET_KEY) self.error = "ERROR" def recognise(self,filename="voices.wav"): # 将语音转文本STT # 读取录音文件 with open(filename, 'rb') as fp: voices = fp.read() try: # 参数dev_pid:1536普通话(支持简单的英文识别)、1537普通话(纯中文识别)、1737英语、1637粤语、1837四川话、1936普通话远场 result = self.client.asr(voices, 'wav', 16000, {'dev_pid': 1537, }) # result = client.asr(get_file_content(path), 'wav', 16000, {'lan': 'zh', }) # {'err_msg': 'speech quality error.', 'err_no': 3301, 'sn': '9495426851568098183'} # {'corpus_no': '6734930332586954481', 'err_msg': 'success.', 'err_no': 0, 'result': ['哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼 哼哼哼哼哼哼哼。'], 'sn': '307171910251568098164'} # print(result) if result['err_no'] == 0: # print(result["result"][0]) return result["result"][0] else: return self.error except KeyError: print("KeyError") return self.error
def asr(msg): # 将语音消息存入文件,想通过百度翻译,再通过获得图灵机器人的回复 APP_ID = '16516161' API_KEY = 'eycPzd5xfCMsd0jn4aWrjwDz' SECRET_KEY = 'PLWIGyEIYcsYQoHuw6lPzxmBrrmSgsoc' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) msg['Text'](msg['FileName']) # 先从本地获取mp3的bytestring作为数据样本 fp = open(msg['FileName'], 'rb') data = fp.read() fp.close() # 主要部分 aud = io.BytesIO(data) sound = AudioSegment.from_file(aud, format='mp3') raw_data = sound._data # 写入到文件,验证结果是否正确。 l = len(raw_data) f = wave.open("tmp.wav", 'wb') f.setnchannels(1) f.setsampwidth(2) f.setframerate(16000) f.setnframes(l) f.writeframes(raw_data) f.close() fp = open("tmp.wav", 'rb') result = client.asr(fp.read(), 'wav', 16000, {'dev_pid': 1536, }) fp.close() os.remove('tmp.wav') os.remove(msg['FileName']) return result['result'][0]
def speechReco_BaiDu(filename): """ 你的 APPID AK SK """ APP_ID = '6746004' API_KEY = 'uDtZD8h83SbyqVKyZI1vRRVj' SECRET_KEY = '76f710eec808e5dd74854c3180766b4d' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 with open(filename, 'rb') as fp: filedata = fp.read() # 识别本地文件 ret = client.asr( filedata, 'wav', 16000, { #'dev_pid': 1536, 'dev_pid': 1737, #英语 }) if (ret['err_no'] == 0): return ret['result'][0] return ""
class DuAPI: APP_ID = '17674810' API_KEY = 'fVP9VGjFeUMNEf8dhyLyuiN7' SECRET_KEY = 'BG349KHUIt7Ddu1cNKGuY3B32p07OKaC' instance = None def __init__(self): self.aipClient = AipSpeech(self.APP_ID, self.API_KEY, self.SECRET_KEY) def asr(self, wav_path): def get_file_content(path): with open(path, 'rb') as fp: return fp.read() ans = self.aipClient.asr(get_file_content(wav_path), 'wav', 16000, {'dev_pid': 1936}) if ans['err_msg'] == 'success.': return ans['result'][0] else: return '' @classmethod def get_instance(cls): if not cls.instance: cls.instance = DuAPI() return cls.instance
def yuyinshibie(audio, type): save_wave_file('01.pcm', audio) if type == 1: APP_ID = '14711800' API_KEY = 'wyDhSG366cL1zy0GxDHZtHxk' SECRET_KEY = 'b2WprhYbk934KjegLaMk8WRrpw4zBEbW' else: APP_ID = '14731705' API_KEY = 'omZGAmrmqwR9tWOMiBLLKuWH' SECRET_KEY = 'POzePztxjWzVvqnykHGdEygu2QFETuQC' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 识别本地文件 result = client.asr(get_file_content('01.pcm'), 'pcm', 8000, { 'dev_pid': 1536, }) try: return result['result'][0] except: return 0
class BaiduASR(): """ 百度的语音识别API. 要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号, 之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key 填入 config.xml 中. ... baidu_yuyin: appid: '9670645' api_key: 'qg4haN8b2bGvFtCbBGqhrmZy' secret_key: '585d4eccb50d306c401d7df138bb02e7' ... """ SLUG = "baidu-asr" def __init__(self, appid, api_key, secret_key, **args): super(self.__class__, self).__init__() self.client = AipSpeech(appid, api_key, secret_key) def transcribe(self, fp): # 识别本地文件 res = self.client.asr(utils.get_file_content(fp), 'wav', 16000, { 'dev_pid': 1936, }) if res['err_no'] == 0: logger.info(('百度语音识别到了', res['result'])) return ''.join(res['result']) else: logger.info('百度语音识别出错了:' + res['err_msg']) return ''
class Recognition: def __init__(self, config_info): self.baidu = config_info['baidu'] self.speech = AipSpeech(self.baidu['appId'], self.baidu['apiKey'], self.baidu['secretKey']) def get_wave_file(self, wave_file): if os.path.exists(wave_file): fp=wave.open(wave_file,'rb') nf = fp.getnframes() # 获取文件的采样点数量 print('sampwidth:', fp.getsampwidth(), 'framerate:', fp.getframerate(), 'channels:', fp.getnchannels()) f_len = nf * fp._sampwidth # 文件长度计算,每个采样2个字节 audio_data = fp.readframes(nf) frame_rate = fp._framerate return (audio_data, frame_rate, f_len) else: raise FileNotFoundError def recognize(self, audio_file): # 格式支持:pcm(不压缩)、wav(不压缩,pcm编码)、amr(压缩格式)。 # 推荐pcm 采样率 :16000 固定值。 编码:16bit 位深的单声道。 # 百度服务端会将非pcm格式,转为pcm格式,因此使用wav、amr会有额外的转换耗时。 (wave_content, frame_rate, len) = self.get_wave_file(audio_file) result = self.speech.asr(wave_content, 'pcm', frame_rate, {'lan': 'zh'}) return result
@author: jukuo """ #-*- coding: utf-8 -*- from aip import AipSpeech import make_wav """ 你的 APPID AK SK """ APP_ID = '***' API_KEY = '***' SECRET_KEY = '** ' client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件 def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() # 识别本地文件 r = GenAudio() r.num_samples = 2000 #pyaudio内置缓冲大小 r.sampling_rate = 16000 #取样频率 r.level = 1500 #声音保存的阈值 r.count_num = 20 #count_num个取样之内出现COUNT_NUM个大于LEVEL的取样则记录声音 r.save_length = 8 #声音记录的最小长度:save_length * num_samples 个取样 r.time_count = 10 #录音时间,单位s r.read_audio() r.save_wav("./test.wav") result=client.asr(get_file_content('test.wav'), 'pcm', 16000, { 'dev_pid': 1536, }) print(result['result'][0])