Exemplo n.º 1
0
def speech2text_baidu(audio_path: str = "test.wav",
                      if_microphone: bool = True):
    """Baidu ASR API."""
    # get API info from https://cloud.baidu.com/product/speech
    config_file = os.path.join(os.path.dirname(__file__), 'config.yml')
    with open(config_file) as f:
        config = yaml.load(f)
    app_id = str(config['app_id'])
    api_key = config['api_key']
    secret_key = config['secret_key']
    client = AipSpeech(app_id, api_key, secret_key)

    # input from microphone
    if if_microphone:
        result = client.asr(
            _record(),
            'pcm',
            16000,
            # recognize Mandarin
            {'dev_pid': 1537},
        )
    # input from file
    else:
        result = client.asr(
            _get_file_content(audio_path),
            'pcm',
            16000,
            {'dev_pid': 1537},
        )

    if result["err_msg"] != "success.":
        return "..."
    else:
        return result['result'][0]
Exemplo n.º 2
0
def speech_to_text_baidu(audio_path: str = "test.wav",
                         if_microphone: bool = True):
    # https://cloud.baidu.com/product/speech 申请api
    app_id = '10947352'
    api_key = 'gELihIXKQxswEye4Wb3gCdsb'
    secret_key = '2krKB6kQxfCdeuIDjGzXOfmqis7c1ByH'

    client = AipSpeech(app_id, api_key, secret_key)

    # 麦克风读入
    if if_microphone:
        result = client.asr(
            _record(),
            'pcm',
            16000,
            {
                'dev_pid': 1537,  # 识别普通话,使用输入法模型
            })
    # 文件读入
    else:
        result = client.asr(
            _get_file_content(audio_path),
            'pcm',
            16000,
            {
                'dev_pid': 1537,  # 识别普通话,使用输入法模型
            })

    if result["err_msg"] != "success.":
        return "..."
    else:
        return result['result'][0]
Exemplo n.º 3
0
    def baidu_sound(self, file):
        """百度语音识别"""
        APP_ID = '19236313'
        API_KEY = 'gZ4E58quu5HgFalbda9ktNl7'
        SECRET_KEY = 'QzGPaVmFUQoSZGO1zbr18MAzldmKY01K'
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

        if file.split(".")[1] == "wav":
            # 识别本地文件
            with open(file, 'rb') as fp:
                audio = fp.read()
            result = client.asr(audio, 'wav', 16000, {
                'dev_pid': 1537,
            })  # 关键为1537而非1536
            text = "音频文件格式正确,可以直接进行语音识别"
            #self.s(text)
            return result['result'][0]
        else:
            """格式转换"""
            text = "音频文件转换中,请继续等待,音频文件格式为:{type}".format(
                type=file.split(".")[1])
            self.s(text)
            audio_file = AudioSegment.from_file(file,
                                                format=file.split(".")[1])
            path = self.sound_file + "\record_1.wav"
            audio_file.export(path, format="wav")
            with open(path, 'rb') as fp:
                audio = fp.read()
            result = client.asr(audio, 'wav', 16000, {
                'dev_pid': 1537,
            })  # 关键为1537而非1536
            text = "音频文件格式转换后再次进行语音识别中,请等待"
            self.s(text)
            return result['result'][0]
Exemplo n.º 4
0
def speech_to_text_baidu(audio_path: str = "test.wav",
                         if_microphone: bool = True):
    # https://cloud.baidu.com/product/speech 申请api
    app_id = ""
    api_key = ""
    secret_key = ""
    client = AipSpeech(app_id, api_key, secret_key)

    # 麦克风读入
    if if_microphone:
        result = client.asr(
            _record(),
            'pcm',
            16000,
            {
                'dev_pid': 1537,  # 识别普通话,使用输入法模型
            })
    # 文件读入
    else:
        result = client.asr(
            _get_file_content(audio_path),
            'pcm',
            16000,
            {
                'dev_pid': 1537,  # 识别普通话,使用输入法模型
            })

    if result["err_msg"] != "success.":
        return "..."
    else:
        return result['result'][0]
Exemplo n.º 5
0
def Baidu_ASR(file_dir=None):

    APP_ID = ''
    API_KEY = ''
    SECRET_KEY = ''
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    uid, pqc_list = search_files(file_dir)
    result = []
    result.append(uid)
    for test_file in pqc_list:
        if isfile(test_file):
            FLAG = True
            # while FLAG:
            try:
                B_result = client.asr(get_file_content(test_file), 'wav',
                                      16000, {
                                          'lan': 'zh',
                                      })['result'][0]
                result.append(B_result)
                FLAG = False
            except:
                print('Baidu error', test_file)
                tmp = client.asr(get_file_content(test_file), 'wav', 16000, {
                    'lan': 'zh',
                })
                if tmp['err_msg'] == 'speech quality error.':
                    result.append([])
                    FLAG = False
        else:
            result.append([])
    return result
def process_voice(voice_url):
    logging.info("voice_url:%s" % voice_url)
    voice = bytearray(urllib.urlopen(voice_url).read())
    asr_client = AipSpeech(SPEECH_APPID, SPEECH_API_KEY, SPEECH_SECRET_KEY)
    response_zh = asr_client.asr(voice, 'amr', 8000)
    response_en = asr_client.asr(voice, 'amr', 8000, {'lan': 'en'})
    return 'BaiduASR: ' + '\n'.join(
        response_zh['result']) + '\n' + 'BaiduASR_en: ' + '\n'.join(
            response_en['result'])
Exemplo n.º 7
0
class Speech(object):
    def __init__(self, config_path):
        config = ConfigParser()
        config.read(config_path)
        self.__app_id = config["speech"]["app_id"]
        self.__api_key = config["speech"]["api_key"]
        self.__secret_key = config["speech"]["secret_key"]
        self.__client = AipSpeech(self.__app_id, self.__api_key,
                                  self.__secret_key)

    def parse(self,
              speech,
              format="pcm",
              rate=8000,
              dev_pid=BAIDU_AI_SPEECH_LANGUAGE.get("ONLY_CH_MANDARIN")):
        response = self.__client.asr(speech, format, rate,
                                     {"dev_pid": dev_pid})
        code = response.get("err_no")
        result = None
        if code == 0:
            result = response.get("result")
        else:
            print("百度语音解析失败,错误代码:{code},原因:【{reason}】".format(
                code=code, reason=BAIDU_AI_SPEECH_ERROR.get(code)))
        return result
def baiduyuyin():
    global myword
    APP_ID = '9424816'
    #print ('hello03')
    API_KEY = 'qaCMG6wQ0WVejR1IpXoS1ABB'
    #print ('hello04')
    SECRET_KEY = '4c36038b7b31ab119b9a56ed88f70229'
    #print ('hello05')
    # 初始化AipSpeech对象
    aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    #print ('hello06')

    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            #        print ('hello07')
            #        print (fp)
            return fp.read()

    # 识别本地文件
    #print ('hello08')
    #os.system('output.wav')
    try:
        #print ('hellllll')
        #print ('start here')
        result = aipSpeech.asr(get_file_content('output.wav'), 'wav', 8000,
                               {'lan': 'zh'})
        #print (result)
        keywords = result['result']
        myword = keywords[0][:-1]
    except Exception as err:
        #print ("听不清,杂音")
        myword = "听不清"
    #print ('myword is ',myword)
    return myword
Exemplo n.º 9
0
def zhRecognition(audio_file):
    # 判断中英文
    start = datetime.now()
    status = "error"
    message = "right"
    result = ""
    dev_pid_name = 1537
    format = False
    filename = audio_file
    path_file_number = glob.glob(pathname='*.wav')
    while len(path_file_number) >= 10:
        time.sleep(1)
        path_file_number = glob.glob(pathname='*.wav')
    if audio_file[-3:] != 'wav':
        filename = audio_file[:-4] + '.wav'
        command = 'ffmpeg -y -i %s %s' % (audio_file, filename)
        subprocess.call(command, shell=True)
        format = True
    try:
        with open(filename, 'rb') as f:
            file_content = f.read()
        aip = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        data = aip.asr(
            file_content,
            'wav',
            16000,
            {
                'dev_pid': dev_pid_name,  # was 1536 1537
            })
        end = datetime.now()
        print("error_resulut:", data)
        print("error_resulut_1:", data['err_msg'])
        if data['err_msg'] == 'request pv too much':
            dev_pid_name += 1
        print('used time is : ', end - start)
        if "err_no" in data:
            if data['err_no'] == 0:
                result = data['result'][0]
                status = "ok"
            elif data["err_no"] == 3301:
                message = "audio quality poor"
            else:
                message = "other"
        else:
            message = "time out"
        response_data = json.dumps({
            "status": status,
            "message": message,
            "result": result
        })
        if format:
            os.remove(filename)
        return response_data
    except:
        response_data = json.dumps({
            "status": "error",
            "message": "Please check the file naming format",
            "result": ""
        })
        return response_data
Exemplo n.º 10
0
class Speech(object):
    def __init__(self):
        self.APP_ID = '16250780'
        self.APP_KEY = 'xcnrNwkhe61iYGoaZVRNpnma'
        self.SECRET_KEY = 'rjHGH6zwDmGcx2lGisXYGilE2bnol7e9'
        self.client = AipSpeech(self.APP_ID, self.APP_KEY, self.SECRET_KEY)

    def get_file_content(self, filePath='audio.pcm'):
        with open(filePath, 'rb') as fp:
            return fp.read()

    def asr(self, filepath):
        back = self.client.asr(self.get_file_content(filepath), 'wav', 16000, {
            'dev_pid': 1536,
        })
        print back, '\n'
        print 'aip  resutl'

        # info =None
        # try:
        #     info=back.get('result')[0].encode('utf-8')
        #     return info
        # except Exception:
        #     return "error"
        return back.get('result')[0].encode('utf-8')
Exemplo n.º 11
0
class NLP:
    def __init__(self, app_token, aipSpeech_wav_para):
        self.app_token = app_token
        self.target_wav_para = aipSpeech_wav_para

        self.client = AipSpeech(self.app_token["APP_ID"],
                                self.app_token["API_KEY"],
                                self.app_token["SECRET_KEY"])

    # 读取文件
    def get_file_content(self, file_path):
        print("* get wave file")
        with open(file_path, 'rb') as fp:
            return fp.read()
        # 识别本地文件

    def translate(self, file_path):
        print("* start translate")
        result = self.client.asr(
            self.get_file_content(file_path),
            self.target_wav_para["SPEECH_FILE_STYLE"],
            self.target_wav_para["RATE"], {
                'dev_pid': self.target_wav_para["dev_pid"],
            })
        print("* done translate")
        return result

    def result(self, file_path):
        result = self.translate(file_path)
        if result['err_no'] == 0:
            return result['err_no'], result['result'][0]
        else:
            print(result['err_msg'])
            return result['err_no'], result['err_msg']
Exemplo n.º 12
0
class Voice2Word:
    def __init__(self):
        """ 你的 APPID AK SK """
        self.APP_ID = '17896871'
        self.API_KEY = 'Qjva533G96GmTKVblEYRZWSA'
        self.SECRET_KEY = 'Ya67pHTOs6OCkL35A8LPXQnI13B1wEXV'

        self.client = AipSpeech(self.APP_ID, self.API_KEY, self.SECRET_KEY)

    # 读取文件
    def __get_file_content(self, filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    def voice2word(self, filename='output.wav'):
        # 识别本地文件
        data = self.client.asr(self.__get_file_content(filename), 'wav', 16000,
                               {
                                   'dev_pid': 1536,
                               })
        try:
            data['result']
        except:
            return []
        return data['result']

    def word2voice(self, word, filename='audio.mp3'):
        result = self.client.synthesis(word, 'zh', 1, {
            'vol': 5,
        })

        # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
        if not isinstance(result, dict):
            with open(filename, 'wb+') as f:
                f.write(result)
Exemplo n.º 13
0
def voice2text(APP_ID, API_KEY, SECRET_KEY, file_path):
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    ret = client.asr(get_data(file_path), 'pcm', 16000, {'dev_pid': 1536}, )
    if ret:
        if ret['err_msg'] == 'success.':
            return ret['result']
    return None
Exemplo n.º 14
0
def speech2text(filepath, cuid='yixue', dev_pid=1737, rate=16000, format='wav'):
    # 识别本地文件
    try:
        # APP_ID = '16590304'
        # API_KEY = 'itxU5q7d5OnYEWk2pPibv18U'
        # SECRET_KEY = '37aGi2oPfh5WZ9whYhXGAUi7i3YmjkeN'
        """ 你的 APPID AK SK """
        APP_ID = '15414045'
        API_KEY = 'BwSTqlxahGvI5k0kGIYDlybZ'
        SECRET_KEY = 'g79fxW3Zqw1qrYKeQHmufv8zNXafc6Vt'
        client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        res = client.asr(get_file_content(filepath), format, rate, {
            'dev_pid': dev_pid,
            'cuid': cuid,
        })
        text = 'error'
        rtext = ''
        if res['err_no'] == 0:
            text = res['result']
            for t in text:
                rtext = rtext + t
        else:
            return text

        return rtext
    except Exception as err:
        print('ASR识别异常:'+str(err))
        return 'error'
Exemplo n.º 15
0
class BaiduASR(object):
    # 声音文件转字符串
    version = "1.0.0"

    def __init__(self, work_dir, file, auths):
        self.work_dir = work_dir
        self.file = file
        self.auths = {}
        # self.auths["appId"] = auths["appid"]
        self.auths["appId"] = ""
        self.auths["apiKey"] = auths["apikey"]
        self.auths["secretKey"] = auths["secretkey"]
        self.client = AipSpeech(**self.auths)

    def run(self):
        with open(self.file, "rb") as fp:
            rb = fp.read()
        ext = os.path.splitext(self.file)[1].lstrip(".")
        datas = self.client.asr(rb, ext)
        if datas.get("err_msg") == "success.":
            res = datas.get("result", "")
            str0 = res[0]
        else:
            str0 = json.dumps(datas, ensure_ascii=False)
        return str0

    """
Exemplo n.º 16
0
def baiduAPI():
	""" 你的 APPID AK SK """
	APP_ID = 'XXXX'
	API_KEY = 'XXXX'
	SECRET_KEY = 'XXXXXXXXX'

	client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

	# 读取文件
	filePath = "latestSpeech/output.wav"
	def get_file_content(filePath):
		with open(filePath, 'rb') as fp:
			return fp.read()

	# 识别本地文件  dev_pid: 1537-普通话(纯中文识别)\	1536-普通话(支持简单的英文识别)\  1936-普通话远场
	result_str = client.asr(get_file_content(filePath), 'wav', 16000, {
	    'dev_pid': 1537,
	})
	print(result_str)
	code = result_str["err_no"]
	if code == 3301:
		return "音频质量过差,请重新录制清晰的音频!"
	elif code == 3308:
	    return "音频过长,音频时长不超过60s!"
	elif code == 0:
		text = result_str["result"][0]
		print("文本:" + text)
		return text
	else:
		return "无法识别,请重新录音!"
Exemplo n.º 17
0
class Baidu:
	def __init__(self, config):
		APP_ID = config[SLUG]['app_id']
		API_KEY = config[SLUG]['api_key']
		SECRET_KEY = config[SLUG]['secret_key']
		self._client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
	# 读取文件


	# 识别本地文件
	def recognize(self,audio_file = 'record.wav'):
		file = get_file_content(audio_file)
		res = self._client.asr(file, 'wav', 16000, {
	    'dev_pid': 1536,
	})
		return res

	def synthesis(self,text = '你好百度',lang = 'zh',type = 1 , vol = 5):
		result  = self._client.synthesis(text, lang, type, {
	    'vol': vol,
	})
		# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
		if not isinstance(result, dict):
		    with open('speak.mp3', 'wb') as f:
		        f.write(result)
		        return 1
		else:
			print(result)
			return 0
	def say(self,audio_file = 'auido.mp3'):
		os.system('mpg123 ' + audio_file)
def baidu_speech():
    #// 成功返回
    #{
    #    "err_no": 0,
    #    "err_msg": "success.",
    #     "corpus_no": "15984125203285346378",
    #     "sn": "481D633F-73BA-726F-49EF-8659ACCC2F3D",
    #     "result": ["北京天气"]
    # }
    # // 失败返回
    # {
    #     "err_no": 2000,
    #     "err_msg": "data empty.",
    #     "sn": null
    # }
    # 你的 APPID AK SK
    APP_ID = '10639428'
    API_KEY = '6u8eL2q96PntqBX4cuwgb684'
    SECRET_KEY = 'ane4qq7jOokBfCQ2WzypHh8ZIVs5Pjqm'

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    # 识别本地文件
    re = client.asr(get_file_content('01.wav'), 'wav', 8000, {
        'lan': 'zh',
    })
    # print re.get('err_no')
    # print re.get('result','Err')[0].encode('utf-8')
    return re.get('result', 'Err')[0].encode('utf-8')
Exemplo n.º 19
0
def baiduAPI():
    """ 你的 APPID AK SK """
    APP_ID = '10372170'
    API_KEY = ''
    SECRET_KEY = ''

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    # 读取文件
    wav_file = "latestSpeech/output.wav"

    # pcm_file = "latestSpeech/output_1.pcm"
    # # 就是此前我们在cmd窗口中输入命令,这里面就是在让Python帮我们在cmd中执行命令
    # os.system("ffmpeg -y  -i %s  -acodec pcm_s16le -f s16le -ac 1 -ar 16000 %s"%(wav_file,pcm_file))
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    # 识别本地文件  dev_pid: 1537-普通话(纯中文识别)\	1536-普通话(支持简单的英文识别)\  1936-普通话远场
    result_str = client.asr(get_file_content(wav_file), 'wav', 16000, {
        'dev_pid': 1537,
    })
    code = result_str["err_no"]
    if code == 3301:
        return "音频质量过差,请重新录制清晰的音频!"
    elif code == 3308:
        return "音频过长,音频时长不超过60s!"
    elif code == 0:
        text = result_str["result"][0]
        print("您刚说的是:\n" + text)
        return text
    else:
        return "无法识别,请重新录音!"
def main():
    files = os.listdir(".\\output")
    files_num = len(files)

    APP_ID = '16799063'
    API_KEY = 'pKZkOBI1Fc4HnelKsNwWDv1y'
    SECRET_KEY = 'dYr6yk1ApFCOdO06zh6xnHiBtohREb4q'


    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()


    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)


    for i in files:
        print(".\\output\\" + i)
        result = client.asr(get_file_content(".\\output\\" + i), 'pcm', 16000, {
            'dev_pid': 1537,
        })
        print(result)
        if result["err_no"] == 0:
            with open(".\\text\\" + i[:-4] + ".txt", "a", encoding="utf-8") as f:
                f.write(result["result"][0])
        else:
            with open(".\\text\\" + i[:-4] + ".txt", "a", encoding="utf-8") as f:
                f.write(i[:-4] + "转换失败!")
    return files_num
Exemplo n.º 21
0
    def message(self):
        client = AipSpeech(self.app_id, self.api_key, self.secret_key)
        json_data = client.asr(
            self.get_file_content('D:/python37/program/output.wav'), 'pcm',
            16000, {'dev_pid': 1536})
        error_no = json_data['err_no']
        if error_no == 0:
            print("解析成功")
            num = 0
            waste_list = json_data['result'][0].split("逗号")
            #循环判断是属于哪种垃圾
            for name in waste_list:
                if name in self.waste_map_dict:
                    num = num + 1
                    print(name + '是' + self.waste_map_dict[name] + '\n')
                elif name == '退出程序':
                    num = -1
                    break
                else:
                    num = num + 1
                    print("没有检测到符合条件的垃圾")
            if num > 0:
                self.sound_recording()
                self.message()

        else:
            print("解析失败")
            print(json_data['err_msg'])
Exemplo n.º 22
0
class Recognition:
  def __init__(self):
    self.__APP_ID = '7023633'
    self.__API_KEY = 'liyMCxLhEsmQZ10TIXBwC2M5'
    self.__SECRET_KEY = '5c96379c38029b266ffadd93c005b481'
    self.client = AipSpeech(self.__APP_ID, self.__API_KEY, self.__SECRET_KEY)
    self.error = "ERROR"
  def recognise(self,filename="voices.wav"): # 将语音转文本STT
    # 读取录音文件
    with open(filename, 'rb') as fp:
      voices = fp.read()
    try:
      # 参数dev_pid:1536普通话(支持简单的英文识别)、1537普通话(纯中文识别)、1737英语、1637粤语、1837四川话、1936普通话远场
      result = self.client.asr(voices, 'wav', 16000, {'dev_pid': 1537, })
      # result = client.asr(get_file_content(path), 'wav', 16000, {'lan': 'zh', })
      # {'err_msg': 'speech quality error.', 'err_no': 3301, 'sn': '9495426851568098183'}
      # {'corpus_no': '6734930332586954481', 'err_msg': 'success.', 'err_no': 0, 'result': ['哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼哼 哼哼哼哼哼哼哼。'], 'sn': '307171910251568098164'}
      # print(result)
      if result['err_no'] == 0:
        # print(result["result"][0])
        return result["result"][0]
      else:
        return self.error
    except KeyError:
      print("KeyError")
      return self.error
class baidu_speech_2_word(object):

    #百度AI应用提供参数
    APP_ID = None
    API_KEY = None
    SECRET_KEY = None
    client = None

    def __init__(self, app_id, api_key, secret_key):
        #获取提供的百度AI接口参数
        self.APP_ID = app_id
        self.API_KEY = api_key
        self.SECRET_KEY = secret_key

        #获取授权
        self.client = AipSpeech(self.APP_ID, self.API_KEY, self.SECRET_KEY)

    def speech_2_word(self, sound_file, rate=16000, dev_pid=1537):
        with open(sound_file, 'rb') as fp:
            # 识别本地文件
            return self.client.asr(fp.read(), 'pcm', rate, {
                'dev_pid': dev_pid,
            })

        return None
Exemplo n.º 24
0
def voice2word(filePath2mp3):
    print("transform start")
    # MP3 to WAV
    sound = AudioSegment.from_mp3(filePath2mp3)
    #转换后至temp.wav
    sound.export('temp.wav', format="wav")
    print("transform completed")
    # WAV to Word(下面3行的内容要从百度语音api官网获得)
    APP_ID = 'xxxxx'
    API_KEY = 'xxxxx'
    SECRET_KEY = 'xxxxx'
    print("connect to Baidu-API")
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    '''
    额外的设置,默认不设
    #建立连接的超时时间(单位:毫秒)
    client.setConnectionTimeoutInMillis = 1000
    #通过打开的连接传输数据的超时时间(单位:毫秒)
    client.setConnectionTimeoutInMillis = 1000
    '''

    # 读取文件
    def get_file_content(filePath):
        """以二进制的形式读取文件"""
        with open(filePath, 'rb') as fp:
            return fp.read()

    # 识别本地文件
    result = client.asr(get_file_content('temp.wav'), 'wav', 8000, {
        'lan': 'zh',
    })

    return result
Exemplo n.º 25
0
class Baidu_sr():
    def __init__(self,
                 APP_ID='21179500',
                 API_KEY='ninYN8Qlg1AIgUvcTGpmQ1L8',
                 SECRET_KEY='daGMOpmE17obYnskrrYb6e5IzGdx8ghl'):

        self.client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    def __get_text(self, wav_bytes):
        result = self.client.asr(wav_bytes, 'wav', 16000, {
            'dev_pid': 1537,
        })
        try:
            text = result['result'][0]
        except Exception as e:
            print(e)
            text = "Error"
        return text

    # For real time voice recording
    def speech_recog(self):

        r = sr.Recognizer()
        mic = sr.Microphone()

        yield 'Info:Please try to speak something...'
        with mic as source:
            r.adjust_for_ambient_noise(source)
            audio = r.listen(source)
            audio_data = audio.get_wav_data(convert_rate=16000)
            yield "Info:Got you, now I'm trying to recognize that..."
            yield self.__get_text(audio_data)
Exemplo n.º 26
0
 def asr(msg):
     # 将语音消息存入文件,想通过百度翻译,再通过获得图灵机器人的回复
     APP_ID = '16516161'
     API_KEY = 'eycPzd5xfCMsd0jn4aWrjwDz'
     SECRET_KEY = 'PLWIGyEIYcsYQoHuw6lPzxmBrrmSgsoc'
     client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
     msg['Text'](msg['FileName'])
     # 先从本地获取mp3的bytestring作为数据样本
     fp = open(msg['FileName'], 'rb')
     data = fp.read()
     fp.close()
     # 主要部分
     aud = io.BytesIO(data)
     sound = AudioSegment.from_file(aud, format='mp3')
     raw_data = sound._data
     # 写入到文件,验证结果是否正确。
     l = len(raw_data)
     f = wave.open("tmp.wav", 'wb')
     f.setnchannels(1)
     f.setsampwidth(2)
     f.setframerate(16000)
     f.setnframes(l)
     f.writeframes(raw_data)
     f.close()
     fp = open("tmp.wav", 'rb')
     result = client.asr(fp.read(), 'wav', 16000, {'dev_pid': 1536, })
     fp.close()
     os.remove('tmp.wav')
     os.remove(msg['FileName'])
     return result['result'][0]
Exemplo n.º 27
0
def speechReco_BaiDu(filename):
    """ 你的 APPID AK SK """
    APP_ID = '6746004'
    API_KEY = 'uDtZD8h83SbyqVKyZI1vRRVj'
    SECRET_KEY = '76f710eec808e5dd74854c3180766b4d'

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    # 读取文件
    with open(filename, 'rb') as fp:
        filedata = fp.read()

        # 识别本地文件
        ret = client.asr(
            filedata,
            'wav',
            16000,
            {
                #'dev_pid': 1536,
                'dev_pid': 1737,  #英语
            })
        if (ret['err_no'] == 0):
            return ret['result'][0]

    return ""
Exemplo n.º 28
0
class DuAPI:
    APP_ID = '17674810'
    API_KEY = 'fVP9VGjFeUMNEf8dhyLyuiN7'
    SECRET_KEY = 'BG349KHUIt7Ddu1cNKGuY3B32p07OKaC'
    instance = None

    def __init__(self):
        self.aipClient = AipSpeech(self.APP_ID, self.API_KEY, self.SECRET_KEY)

    def asr(self, wav_path):
        def get_file_content(path):
            with open(path, 'rb') as fp:
                return fp.read()

        ans = self.aipClient.asr(get_file_content(wav_path), 'wav', 16000,
                                 {'dev_pid': 1936})
        if ans['err_msg'] == 'success.':
            return ans['result'][0]
        else:
            return ''

    @classmethod
    def get_instance(cls):
        if not cls.instance:
            cls.instance = DuAPI()
        return cls.instance
Exemplo n.º 29
0
def yuyinshibie(audio, type):
    save_wave_file('01.pcm', audio)
    if type == 1:
        APP_ID = '14711800'
        API_KEY = 'wyDhSG366cL1zy0GxDHZtHxk'
        SECRET_KEY = 'b2WprhYbk934KjegLaMk8WRrpw4zBEbW'
    else:
        APP_ID = '14731705'
        API_KEY = 'omZGAmrmqwR9tWOMiBLLKuWH'
        SECRET_KEY = 'POzePztxjWzVvqnykHGdEygu2QFETuQC'
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    # 读取文件
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    # 识别本地文件
    result = client.asr(get_file_content('01.pcm'), 'pcm', 8000, {
        'dev_pid': 1536,
    })
    try:
        return result['result'][0]
    except:
        return 0
Exemplo n.º 30
0
class BaiduASR():
    """
    百度的语音识别API.
    要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
    之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
    填入 config.xml 中.
    ...
        baidu_yuyin: 
            appid: '9670645'
            api_key: 'qg4haN8b2bGvFtCbBGqhrmZy'
            secret_key: '585d4eccb50d306c401d7df138bb02e7'
        ...
    """

    SLUG = "baidu-asr"

    def __init__(self, appid, api_key, secret_key, **args):
        super(self.__class__, self).__init__()
        self.client = AipSpeech(appid, api_key, secret_key)

    def transcribe(self, fp):
        # 识别本地文件
        res = self.client.asr(utils.get_file_content(fp), 'wav', 16000, {
            'dev_pid': 1936,
        })
        if res['err_no'] == 0:
            logger.info(('百度语音识别到了', res['result']))
            return ''.join(res['result'])
        else:
            logger.info('百度语音识别出错了:' + res['err_msg'])
            return ''
Exemplo n.º 31
0
class Recognition:
    def __init__(self, config_info):
        self.baidu = config_info['baidu']
        self.speech = AipSpeech(self.baidu['appId'], self.baidu['apiKey'], self.baidu['secretKey'])

    def get_wave_file(self, wave_file):
        if os.path.exists(wave_file):
            fp=wave.open(wave_file,'rb')
            nf = fp.getnframes()  # 获取文件的采样点数量
            print('sampwidth:', fp.getsampwidth(), 'framerate:', fp.getframerate(), 'channels:', fp.getnchannels())
            f_len = nf * fp._sampwidth  # 文件长度计算,每个采样2个字节
            audio_data = fp.readframes(nf)
            frame_rate = fp._framerate
            return (audio_data, frame_rate, f_len)
        else:
            raise FileNotFoundError

    def recognize(self, audio_file):
        # 格式支持:pcm(不压缩)、wav(不压缩,pcm编码)、amr(压缩格式)。
        # 推荐pcm 采样率 :16000 固定值。 编码:16bit 位深的单声道。
        # 百度服务端会将非pcm格式,转为pcm格式,因此使用wav、amr会有额外的转换耗时。
        (wave_content, frame_rate, len) = self.get_wave_file(audio_file)
        result = self.speech.asr(wave_content, 'pcm', frame_rate, {'lan': 'zh'})
        return result
Exemplo n.º 32
0
@author: jukuo
"""
#-*- coding: utf-8 -*-                    
from aip import AipSpeech
import make_wav

""" 你的 APPID AK SK """
APP_ID = '***'
API_KEY = '***'
SECRET_KEY = '** '

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
# 读取文件
def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

# 识别本地文件
r = GenAudio()
r.num_samples = 2000    #pyaudio内置缓冲大小
r.sampling_rate = 16000  #取样频率
r.level = 1500          #声音保存的阈值
r.count_num = 20        #count_num个取样之内出现COUNT_NUM个大于LEVEL的取样则记录声音
r.save_length = 8       #声音记录的最小长度:save_length * num_samples 个取样
r.time_count = 10        #录音时间,单位s
r.read_audio()
r.save_wav("./test.wav")
result=client.asr(get_file_content('test.wav'), 'pcm', 16000, {
    'dev_pid': 1536,
})
print(result['result'][0])