示例#1
0
def voice_recongnition(pcm_file):
    APP_ID = '16184075'
    API_KEY = 'OY4u5LVYfQUPB3oLEcfm1DNP'
    SECRET_KEY = 'm3MQrGkLTXGO3mgnljXtOBSXB8dG8HGp'
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    client.setConnectionTimeoutInMillis(5000)
    client.setSocketTimeoutInMillis(10000)
    # 识别本地文件
    try:
        results = client.asr(get_file_content(pcm_file), 'pcm', 16000, {
            'dev_pid': 1536,
        })
    except:
        print("无法连接服务器")
        return [None]
    return results['result']
示例#2
0
def stringToMp3(strings):
    per=0
    strings_txt = '又是新的一天。主人起床呀,懒虫,起床咯,死肥宅,起床啦。要上课啦!' 
    print(strings[0],strings[1])
    APPID = '******'
    APIKey = '*******'
    SecretKey = '***********************'
    aipSpeech = AipSpeech(APPID,APIKey,SecretKey)
    aipSpeech.setConnectionTimeoutInMillis(5000)
    aipSpeech.setSocketTimeoutInMillis(100000)

    result = aipSpeech.synthesis(strings_txt,'zh','1',\
                                {'vol':10,
                                'per':'0',
                                'spd':5
					})
    if not isinstance(result,dict):
        with open('test_tmp.mp3','wb') as f:
            f.write(result)
     
    print(dict)
    result = aipSpeech.synthesis(strings[0],'zh','1',\
                                {'vol':10,
                                'per':'0',
                                'spd':6
					})
    if not isinstance(result,dict):
        with open('test_tmp.mp3','ab') as f:
            f.write(result) 
   
    print(dict)
    result = aipSpeech.synthesis(strings[1],'zh','1',\
                                {'vol':10,
                                'per':'0',
                                'spd':6
					})
    if not isinstance(result,dict):
        with open('test_tmp.mp3','ab') as f:
            f.write(result) 

    print(dict)
示例#3
0
def init_aip():
    global aip_client
    global aip_client_ocr

    APP_ID = '11758766'
    API_KEY = 'I5yAyCHCEw5eQhjG0nuXkgHr'
    SECRET_KEY = '8OxzRjt1dMrRgtsmTHTLpH1SxWBywoju' 

    aip_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    #连接超时时间
    aip_client.setConnectionTimeoutInMillis(1000)
    #数据传输超时时间
    aip_client.setSocketTimeoutInMillis(3000)





    APP_ID = '14462175'
    API_KEY = 'KG8eaIU0ouFCaMLnQZ2u3IX1'
    SECRET_KEY = 'waX99HpVyaukCX6aruuazGNo6zaVOcVu' 
    aip_client_ocr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
示例#4
0
def init_aip():
    global aip_client
    global aip_client_ocr

    APP_ID = '11758766'
    API_KEY = 'I5yAyCHCEw5eQhjG0nuXkgHr'
    SECRET_KEY = '8OxzRjt1dMrRgtsmTHTLpH1SxWBywoju' 

    aip_client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    #连接超时时间
    aip_client.setConnectionTimeoutInMillis(1000)
    #数据传输超时时间
    aip_client.setSocketTimeoutInMillis(3000)





    APP_ID = '14462175'
    API_KEY = 'KG8eaIU0ouFCaMLnQZ2u3IX1'
    SECRET_KEY = 'waX99HpVyaukCX6aruuazGNo6zaVOcVu' 
    aip_client_ocr = AipOcr(APP_ID, API_KEY, SECRET_KEY)
示例#5
0
class BaiduTTS:
    def __init__(self):
        app_id = rospy.get_param("~app_id")
        app_key = rospy.get_param("~api_key")
        secret_key = rospy.get_param("~secret_key")
        timeout = int(rospy.get_param("~timeout"))

        self.client = AipSpeech(app_id, app_key, secret_key)
        self.client.setConnectionTimeoutInMillis(timeout)
        self.client.setSocketTimeoutInMillis(timeout)

    def tts(self, words):
        result = self.client.synthesis(words, 'zh', 1, {
            'vol': 5,
        })

        if isinstance(result, dict):
            rospy.logerr(result)
            return None
        audio_data = AudioData()
        audio_data.data = result
        return audio_data

    def asr(self, audio_data):
        res = self.client.asr("".join(audio_data.data), 'pcm', 16000, {
            'dev_pid': 1936,
        })
        rospy.loginfo(res)
        if "err_no" in res and res["err_no"] == 0:
            rospy.loginfo(res["result"])
            return res["result"][0].encode("utf-8")
        else:
            if "err_msg" in res:
                rospy.logerr(res["err_msg"])
            if "error_msg" in res:
                rospy.logerr(res["error_msg"])
            return ""
示例#6
0
class RcgCore(object):  # 不再使用线程
    def __init__(self, wav_file, timeout=600, bd_appid=None, bd_api_key=None, bd_secret_key=None, use_pro_api=True):
        self.wav_file = wav_file
        self.result = None
        if bd_appid and bd_api_key and bd_secret_key:
            self.aip_speech = AipSpeech(config.BD_RCG_APP_ID, config.BD_RCG_API_KEY, config.BD_RCG_SECRET_KEY)
        else:
            self.aip_speech = aip_speech
        self.aip_speech.setConnectionTimeoutInMillis(timeout * 1000)
        self.aip_speech.setSocketTimeoutInMillis(timeout * 1000)
        self.use_pro_api = use_pro_api

    def run(self):
        if self.use_pro_api:
            tmp_wav_path = io.BytesIO()
            utils.wav_8kto16k(self.wav_file,tmp_wav_path)
            file_content = utils.read(tmp_wav_path, 'rb')
        else:
            file_content = utils.read(self.wav_file, 'rb')
        # print(len(file_content))
        max_retry = config.RCG_MAX_RETRY
        for retry in range(max_retry + 1):
            try:
                # 注明pcm而非wav,免去再次百度转换(可在一定情况下避免err3301:音质问题)
                # 使用1537-8k 30qps测试
                if self.use_pro_api:
                    rst = self.aip_speech.asr_pro(file_content, 'pcm', 16000,
                                            {'dev_pid': 80001})
                else:
                    rst = self.aip_speech.asr(file_content, 'pcm', 8000,
                                            {'dev_pid': 1537})
                """
               dev_pid	语言	                     模型      是否有标点	    备注
                1536	普通话(支持简单的英文识别)	搜索模型	    无标点	支持自定义词库
                1537	普通话(纯中文识别)        输入法模型	有标点	不支持自定义词库
                1737	英语		                            无标点	不支持自定义词库
                1637	粤语		                            有标点	不支持自定义词库
                1837	四川话		                        有标点	不支持自定义词库
                1936	普通话远场	            远场模型	    有标点	不支持 
                """
                if rst['err_no'] == 0:
                    self.result = rst['result'][0]  # rcg text
                    logging.debug("Recognition: %s" % self.result)
                    break
                elif rst['err_no'] == 3304 or rst['err_no'] == '3304':  # qps超限,等待一秒
                    logging.warning('qps超限(等待1秒):%s' % rst.get('err_msg'))
                    time.sleep(1)
                elif rst['err_no'] == 3301:  # 音质差,返回空结果,不再重试
                    self.result = ''
                    logging.warning('音频质量差:%s' % rst.get('err_msg'))
                    break
                else:
                    logging.error('识别错误:%s' % rst.get('err_msg'))
                    logging.error(rst)
                    raise Exception('Recognition failed!')
            except Exception as e:
                logging.warning('RcgCore: on retry %d:' % retry)
                logging.warning(e)

    def get_result(self):
        return self.result
示例#7
0
    print('read: ' + filePath)
    mono = audio.set_frame_rate(16000).set_channels(1)
    print('set: ' + filePath)
    mono.export('test.pcm', format="s16le")
    print('export: ' + filePath)


if __name__ == '__main__':
    APP_ID = ''
    API_KEY = ''
    SECRET_KEY = ''

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

    client.setConnectionTimeoutInMillis(2000)
    client.setSocketTimeoutInMillis(60000)

    path = 'C:\\school\\test.pcm'
    song = AudioSegment.from_file(path)
    mss = len(song)
    start = 0
    while start < mss:
        print(str(start / 1000)[:-2] + ': ', end='')
        end = start + 30000
        if end > mss:
            end = mss
        buff = song[start:end].raw_data
        start = end
        dic = client.asr(buff, 'pcm', 16000, {
            'dev_pid': 1536,
        })
示例#8
0
class Client(object):
    def __init__(self, app_info_file):
        ## read app info
        with open(app_info_file, "rb") as f:
            self.app_info = json.load(f)

        APP_ID = self.app_info["app_id"]
        API_KEY = self.app_info["api_key"]
        SECRET_KEY = self.app_info["secret_key"]
        self.client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        self.attributes = ClientAttributes()

    def setConnectionTimeoutInMillis(self, ms):
        """
    ms: 建立连接的超时时间(单位:毫秒)
    """
        self.client.setConnectionTimeoutInMillis(ms)

    def setSocketTimeoutInMillis(self, ms):
        """
    ms: 通过打开的连接传输数据的超时时间(单位:毫秒)
    """
        self.client.setSocketTimeoutInMillis(ms)

    def setAttributes(self, attr):
        """
    attr: dict{str: str|int}
    """
        if "spd" in attr:
            self.attributes.spd = str(attr["spd"])
        if "pit" in attr:
            self.attributes.pit = str(attr["pit"])
        if "vol" in attr:
            self.attributes.vol = str(attr["vol"])
        if "per" in attr:
            self.attributes.per = str(attr["per"])

    def synthesis(self, text):
        """
    Simple synthesis of a sentence
    Return binary result, if error, print error and return None
    """
        attr = {}
        if self.attributes.spd:
            attr["spd"] = self.attributes.spd
        if self.attributes.pit:
            attr["pit"] = self.attributes.pit
        if self.attributes.vol:
            attr["vol"] = self.attributes.vol
        if self.attributes.per:
            attr["per"] = self.attributes.per

        result = self.client.synthesis(text, 'zh', 1, attr)

        if not isinstance(result, dict):
            return result
        else:
            print("Error:", result)
            raise Exception("Stop")

    def synthesisLongString(self, text):
        """
    Divide text into pieces by end of string indicators to make sure the synthesis 
    shorter than 1024 bytes.
    Yield a generator of audio pieces
    """

        if utf8len(text) <= 1024:
            yield self.synthesis(text)
            return

        ## helpers for divide string into pieces and synthesis
        INDIC = "SENTEND"

        def sep(matchObj):
            return matchObj.group(1) + INDIC

        def multiShortSynthesis(text, level=1):
            first_level_indicator = ".;!? ?。!」「"
            second_level_indicator = string.punctuation + ","
            if level == 1:
                indicators = first_level_indicator
            elif level == 2:
                indicators = second_level_indicator
            if level < 3:
                proc_text = re.sub("([" + indicators + "])", sep, text)

                split_proc = proc_text.split(INDIC)
                text_bytes = [utf8len(string) for string in split_proc]
            else:
                split_proc = text
                text_bytes = [utf8len(char) for char in split_proc]

            pointer = 0
            cur = ""
            cur_bytes = 0

            while pointer < len(split_proc):
                if text_bytes[pointer] > 1024:
                    if cur_bytes > 0:
                        yield (self.synthesis(cur))
                    for audio_piece in multiShortSynthesis(
                            split_proc[pointer], level + 1):
                        yield audio_piece
                elif text_bytes[pointer] <= 1024 - cur_bytes:
                    cur += split_proc[pointer]
                    cur_bytes += text_bytes[pointer]
                else:
                    yield (self.synthesis(cur))
                    cur = split_proc[pointer]
                    cur_bytes = text_bytes[pointer]
                pointer += 1

        ## run it
        for audio_piece in multiShortSynthesis(text, 1):
            yield audio_piece

    def synthesisFile(self, fp, storeFp, Range=None):
        """
    fp: input file pointer
    storeFp: output file pointer
    """
        data = fp.read().decode("utf8").splitlines()
        total_line = len(data)
        FROM = 1
        TO = total_line
        if Range is not None:
            FROM, TO = Range
            total_line = TO - FROM + 1
        cur = ""
        cur_bytes = 0

        for c in range(FROM - 1, TO + 1):
            line = data[c]
            if len(line) > 0:
                line_bytes = utf8len(line)
                if line_bytes + cur_bytes > 1023:
                    if cur_bytes == 0:
                        for audio_piece in self.synthesisLongString(line):
                            storeFp.write(audio_piece)
                    else:
                        for audio_piece in self.synthesisLongString(cur):
                            storeFp.write(audio_piece)
                        cur = line
                        cur_bytes = line_bytes
                else:
                    cur += "\n" + line
                    cur_bytes += line_bytes + 1
            sys.stdout.write("\rTransited: {0}/{1}%".format(
                c + 1 - FROM, total_line))
            sys.stdout.flush()
        print("Finished.")
示例#9
0
class BaiduSpeech(object):
    def __init__(self, APP_ID, API_KEY, SECRET_KEY, mode='tts'):
        call_dict = {'tts':self.tts, 'asr':self.asr}
        self.client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
        self.client.setConnectionTimeoutInMillis(10000)
        self.client.setSocketTimeoutInMillis(10000)
        self.wait_time = 0.001
        self.call_func = call_dict[mode]
    
    @staticmethod
    def get_file_content(filePath):
        with open(filePath, 'rb') as fp:
            return fp.read()

    def tts(self, texts, param={}):
        audios = []
        for text in texts:
            wait_time = self.wait_time
            while True:
                time.sleep(wait_time)
                try:
                    audio = self.client.synthesis(
                        text, "zh", 1, param
                        )
                    if not isinstance(audio, dict):
                        audios.append(audio)
                        break
                    else: # error
                        print("return error:{}".format(audio))
                        continue
                except Exception as e:
                    wait_time *= 2
                    print("Exception occur:{}".format(e))
                    continue
    
        return audios

    def asr(self, audio_file, param={}):
        wait_time = self.wait_time
        audio_format = param.pop('format', 'wav')
        audio_rate = param.pop('rate', 16000)
        # dev_pid = param.pop('dev_pid', 1737)

        # read wav
        audio_data = self.get_file_content(audio_file)
        text = None
        while True:
            time.sleep(wait_time)
            try:
                text_json = self.client.asr(
                    speech=audio_data, format=audio_format, rate=audio_rate, options=param
                    )
                if not isinstance(text_json, dict):
                    if text_json.get('err_no', 0) == 0:
                        text = text_json.get('result', '')
                        break
                    else:
                        print("error occur", text_json)
                else: # error
                    print("return error:{}".format(text_json))
                    continue
            except Exception as e:
                wait_time *= 2
                print("Exception occur:{}".format(e))
                continue
            return text
                
            
    def __call__(self, data, param={}):
        return self.call_func(data, param)