def GetAsrResult(self): #本地文件方式请求 try: cred = credential.Credential(self.SecretID, self.SecretKey) httpProfile = HttpProfile() httpProfile.endpoint = self.Url clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile clientProfile.signMethod = "TC3-HMAC-SHA256" client = asr_client.AsrClient(cred, "ap-shanghai", clientProfile) #读取文件以及 base64 with open(self.AudioFile, "rb") as f: audio = f.read() if self.Format == 'pcm': audio = utils.PcmToWav(audio) if sys.version_info[0] == 2: content = base64.b64encode(audio) else: content = base64.b64encode(audio).decode('utf-8') #发送请求 req = models.SentenceRecognitionRequest() params = {"ProjectId": 0, "SubServiceType": 2, "SourceType": 1, "UsrAudioKey": "session-123"} req._deserialize(params) req.DataLen = len(content) req.Data = content req.EngSerViceType = "16k_zh" req.VoiceFormat = "wav" req.FilterPunc = 2 #是否过滤标点符号(目前支持中文普通话引擎)。 0:不过滤,1:过滤句末标点,2:过滤所有标点。默认为0。 req.ConvertNumMode = 0 #是否进行阿拉伯数字智能转换。0:不转换,直接输出中文数字,1:根据场景智能转换为阿拉伯数字。默认值为1 resp = client.SentenceRecognition(req) logger.info("file:%s, result:%s, sid:%s", self.AudioFile, resp.Result, resp.RequestId) return resp.Result except TencentCloudSDKException as err: logger.error("code:%s, message:%s, sid:%s", err.code, err.message, err.requestId) return ""
def stt(self, audio_file): try: fwave = open(audio_file, mode='rb') data = fwave.read() dataLen = len(data) base64Wav = base64.b64encode(data) fwave.close() params = '{"ProjectId":0,"SubServiceType":2,"EngSerViceType":"16k","SourceType":1,"VoiceFormat":"mp3","UsrAudioKey":"session-123", ' + '"Data":"' + str( base64Wav, 'utf-8') + '", "DataLen":' + str(dataLen) + '}' cred = credential.Credential(self.app_id, self.app_key) httpProfile = HttpProfile() httpProfile.endpoint = self.api_url clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile client = asr_client.AsrClient(cred, "ap-chengdu", clientProfile) req = models.SentenceRecognitionRequest() req.from_json_string(params) res = client.SentenceRecognition(req) resp = json.loads(res.to_json_string()) return resp['Result'] except TencentCloudSDKException as err: ASRServerError(err, "404")
def recognize_wav(wav_path, show_detail=True): # 通过本地语音上传方式调用 try: # 重要:<Your SecretId>、<Your SecretKey>需要替换成用户自己的账号信息 # 请参考接口说明中的使用步骤1进行获取。 cred = credential.Credential(tencent_secret_id, tencent_secret_key) httpProfile = HttpProfile() httpProfile.endpoint = "asr.tencentcloudapi.com" clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile clientProfile.signMethod = "TC3-HMAC-SHA256" client = asr_client.AsrClient(cred, "ap-shanghai", clientProfile) # 读取文件以及base64 with reader.SaveOpen(to_mp3(wav_path), 'rb') as fwave: data = str(fwave.read()) dataLen = len(data) base64Wav = base64.b64encode(data) # 发送请求 req = models.SentenceRecognitionRequest() params = { "ProjectId": 0, "SubServiceType": 2, "EngSerViceType": "16k_en", "SourceType": 1, "Url": "", "VoiceFormat": "mp3", "UsrAudioKey": "session-123", "Data": base64Wav, "DataLen": dataLen } req._deserialize(params) resp = client.SentenceRecognition(req) if show_detail: print(resp.to_json_string()) # windows系统使用下面一行替换上面一行 # print(resp.to_json_string().decode('UTF-8').encode('GBK') ) words = [] for w in resp.Result.split(): if not re.match('[a-zA-W]', w[-1:]): w = w[0:-1] words.append(w) return ' '.join(words).lower() except TencentCloudSDKException as err: print(err) return '[ERROR]'
def sentence_asr(self): # 本地文件方式请求 try: cred = credential.Credential(self.SecretId, self.SecretKey) httpProfile = HttpProfile() httpProfile.endpoint = "asr.tencentcloudapi.com" clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile clientProfile.signMethod = "TC3-HMAC-SHA256" client = asr_client.AsrClient(cred, "ap-shanghai", clientProfile) # 读取文件以及 base64 # 此处可以下载测试音频 https://asr-audio-1300466766.cos.ap-nanjing.myqcloud.com/test16k.wav with open(self.data, "rb") as f: content = base64.b64encode(f.read()).decode('utf-8') # 发送请求 req = models.SentenceRecognitionRequest() if self.type: params = { "ProjectId": 0, "SubServiceType": 2, "SourceType": 1, "UsrAudioKey": "session-123" } else: params = { "ProjectId": 0, "SubServiceType": 2, "SourceType": 0, "UsrAudioKey": "session-123" } req.url = self.data req._deserialize(params) req.EngSerViceType = "16k_zh" req.VoiceFormat = "wav" if self.type: req.DataLen = len(content) req.Data = content resp = client.SentenceRecognition(req) res = resp.to_json_string() res = json.loads(res)['Result'] res = res.replace('。', '').replace(',', '').replace(',', '') return res except TencentCloudSDKException as err: print('tencent返回错误:%s' % err)
def recognize(self, path: PathLike, lang: str = ""): if not lang: lang = self.lang if not isinstance(path, str): return ["ERROR!", "File must be a path string."] if lang not in self.lang_list: return ["ERROR!", "Invalid language."] try: with BytesIO() as f: f.seek(0) audio = pydub.AudioSegment.from_file(path)\ .set_frame_rate(16000)\ .set_channels(1) audio.export(f, format="wav", codec="s16le", bitrate='16k') # f.seek(0) # import pdb; pdb.set_trace() data = f.getvalue() data_len = len(data) # print(data_len) base64_wav = base64.b64encode(data).decode() req = models.SentenceRecognitionRequest() params = { "ProjectId": 0, "SubServiceType": 2, "EngSerViceType": self.languages[lang], "SourceType": 1, "Url": "", "VoiceFormat": "wav", "UsrAudioKey": "catbaron.voice_recog", "Data": base64_wav, "DataLen": data_len } req._deserialize(params) resp = self.client.SentenceRecognition(req) # print(resp.to_json_string()) return [resp.Result] except TencentCloudSDKException as err: return ["ERROR!", str(err)]
#https://cloud.tencent.com/product/asr/getting-started cred = credential.Credential("AKIDX1GGzR5U9avzoPSmfKl79uNaaQ17nK2T", "IJ9l1D9oBH7oHWhbEaeS6VK4XjdKkz0t") httpProfile = HttpProfile() httpProfile.endpoint = "asr.tencentcloudapi.com" clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile clientProfile.signMethod = "TC3-HMAC-SHA256" client = asr_client.AsrClient(cred, "ap-shanghai", clientProfile) #读取文件以及 base64 #此处可以下载测试音频 https://asr-audio-1300466766.cos.ap-nanjing.myqcloud.com/test16k.wav with open('./12.mp3', "rb") as f: if sys.version_info[0] == 2: content = base64.b64encode(f.read()) else: content = base64.b64encode(f.read()).decode('utf-8') #发送请求 req = models.SentenceRecognitionRequest() params = {"ProjectId":0,"SubServiceType":2,"SourceType":1,"UsrAudioKey":"session-123", "RequestId": '1c8400f1-913f-4938-a45f-0421b08782ad' } req._deserialize(params) req.DataLen = len(content) req.Data = content req.EngSerViceType = "16k_zh" req.VoiceFormat = "wav" resp = client.SentenceRecognition(req) print(resp.to_json_string()) #windows 如果是 GBK 编码则用下面 print 语句替换上面 print 语句 #print(resp.to_json_string().decode('UTF-8').encode('GBK') ) except TencentCloudSDKException as err: print(err,)