示例#1
0
def asr_mandarin(req_file: "UploadedFile"):
    load_data(req_file)
    keras.backend.clear_session()
    ms = ModelSpeech(asrPath)
    ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')

    r = ms.RecognizeSpeech_FromFile(filePath)

    ml = ModelLanguage('model_language')
    ml.LoadModel()
    str_pinyin = r
    r = ml.SpeechToText(str_pinyin)
    return r
示例#2
0
def speech_recognition(f):
    datapath = '.'
    modelpath = 'model_speech' + '\\'

    ms = ModelSpeech(datapath)

    ms.LoadModel(modelpath + 'speech_model251_e_0_step_625000.model')
    r = ms.RecognizeSpeech_FromFile(f)

    K.clear_session()
    print('*[提示] 语音识别结果:\n', r)
    ml = ModelLanguage('model_language')
    ml.LoadModel()
    str_pinyin = r
    r = ml.SpeechToText(str_pinyin)
    print('语音转文字结果:\n', r)
    return r
示例#3
0
def predict(video_file):
    from SpeechModel251 import ModelSpeech
    from LanguageModel2 import ModelLanguage
    from keras import backend as K

    datapath = 'dataset'
    modelpath = 'model_speech'

    ms = ModelSpeech(datapath)
    ms.LoadModel(modelpath + '/m251/speech_model251_e_0_step_60500.model')

    pinyin = ms.RecognizeSpeech_FromFile(video_file)
    K.clear_session()

    ml = ModelLanguage('model_language')
    ml.LoadModel()

    str_pinyin = pinyin
    text = ml.SpeechToText(str_pinyin)
    return pinyin, text
示例#4
0
def CTC_tf(current_path):

    datapath = ''
    modelpath = 'model_speech'

    system_type = plat.system()  # 由于不同的系统的文件路径表示不一样,需要进行判断
    if (system_type == 'Windows'):
        datapath = current_path
        modelpath = modelpath + '\\'
    elif (system_type == 'Linux'):
        datapath = 'dataset'
        modelpath = modelpath + '/'
    else:
        print('*[Message] Unknown System\n')
        datapath = 'dataset'
        modelpath = modelpath + '/'

    ms = ModelSpeech(datapath)

    ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model')

    #ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)

    rr = ms.RecognizeSpeech_FromFile(current_path + '\\chunk-00.wav')

    print('*[提示] 语音识别结果:\n', rr)

    ml = ModelLanguage('model_language')
    ml.LoadModel()

    #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5']
    #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1']
    #str_pinyin = ['ni3', 'hao3','a1']
    str_pinyin = rr
    #str_pinyin =  ['su1', 'bei3', 'jun1', 'de5', 'yi4','xie1', 'ai4', 'guo2', 'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'ming2', 'yi1', 'dong4', 'ta1', 'ju4', 'su1', 'bi3', 'ai4', 'dan4', 'tian2','mei2', 'bai3', 'ye3', 'fei1', 'qi3', 'kan4', 'zhan4']
    r = ml.SpeechToText(str_pinyin)
    print('语音转文字结果:\n', r)

    ctc_result = hanzi_pinyin(r)

    return ctc_result
示例#5
0
    datapath = 'C:\\test'
    modelpath = modelpath + '\\'
elif (system_type == 'Linux'):
    datapath = 'dataset'
    modelpath = modelpath + '/'
else:
    print('*[Message] Unknown System\n')
    datapath = 'dataset'
    modelpath = modelpath + '/'

ms = ModelSpeech(datapath)

ms.LoadModel(modelpath + 'speech_model251_e_0_step_12000.model')

#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True)

r = ms.RecognizeSpeech_FromFile('C:\\test\\online_star.wav')

print('*[提示] 语音识别结果:\n', r)

ml = ModelLanguage('model_language')
ml.LoadModel()

#str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5']
#str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1']
#str_pinyin = ['ni3', 'hao3','a1']
str_pinyin = r
#str_pinyin =  ['su1', 'bei3', 'jun1', 'de5', 'yi4','xie1', 'ai4', 'guo2', 'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'ming2', 'yi1', 'dong4', 'ta1', 'ju4', 'su1', 'bi3', 'ai4', 'dan4', 'tian2','mei2', 'bai3', 'ye3', 'fei1', 'qi3', 'kan4', 'zhan4']
r = ml.SpeechToText(str_pinyin)
print('语音转文字结果:\n', r)
示例#6
0
class App(QWidget):
    def __init__(self):
        super(App, self).__init__()
        self.record_name = 'saved_record.wav'
        self.ms = ModelSpeech('dataset')
        self.ms.LoadModel('model_speech/speech_model251_e_0_step_12000.model')
        self.ml = ModelLanguage('model_language')
        self.ml.LoadModel()
        self.title = 'ASR demo'
        self.left = 10
        self.top = 10
        self.width = 420
        self.height = 400
        self.rec = Recorder()
        #self.rec.start_thread()
        self.initUI()
        self.rec.start()

        #self.setWindowTitle("ASR demo")

    def initUI(self):
        self.setWindowTitle(self.title)
        self.setGeometry(self.left, self.top, self.width, self.height)

        button = QPushButton('Record', self)
        button.setToolTip('Press to start recording')
        button.move(100, 70)
        button.clicked.connect(self.start_record)

        button = QPushButton('To Transcript', self)
        button.setToolTip('Press to convert to transcript')
        button.move(200, 70)
        button.clicked.connect(self.stop_record)

        button = QPushButton('Clear', self)
        button.setToolTip('Press to clear transcripts')
        button.move(100, 100)
        button.clicked.connect(self.clear)

        self.text_edit = QTextEdit("What you said: ", self)
        self.text_edit.setReadOnly(True)
        self.text_edit.move(100, 140)

        #self.results=QLabel(self)
        #self.results.move(100,140)

        self.show()

    @pyqtSlot()
    def clear(self):
        self.text_edit.clear()
        self.text_edit.append("What you said: ")

    @pyqtSlot()
    def start_record(self):
        self.rec.start()

        #print('PyQt5 button click')
        #self.rec.start()

    @pyqtSlot()
    def stop_record(self):
        print(len(self.rec._frames))
        #print('PyQt5 button click')
        self.rec.stop()
        self.rec.save(self.record_name)
        r = self.ms.RecognizeSpeech_FromFile(self.record_name)
        self.w = self.ml.SpeechToText(r)
        print('语音转文字结果:\n', self.w)
        self.text_edit.append(self.w)