示例#1
0
 def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = abspath + 'model_speech/m' + ModelName + '/speech_model'+ModelName):
     '''
     训练模型
     参数:
         datapath: 数据保存的路径
         epoch: 迭代轮数
         save_step: 每多少步保存一次模型
         filename: 默认保存文件名,不含文件后缀名
     '''
     data=DataSpeech(datapath, 'train')  #首先获取的是 train数据集
     
     num_data = data.GetDataNum() # 获取数据的数量  
     
     yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) #将所有的数据使用生成器进行batch_size的封装,封装成一个个的对象
     
     for epoch in range(epoch): # 迭代轮数
         print('[running] train epoch %d .' % epoch)
         n_step = 0 # 迭代数据数
         while True:
             try:
                 print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step))
                 # data_genetator是一个生成器函数
                 
                 #self._model.fit_generator(yielddatas, save_step, nb_worker=2)
                 # 利用Python的生成器,逐个生成数据的batch并进行训练。生成器与模型将并行执行以提高效率。例如,该函数允许我们在CPU上进行实时的数据提升,同时在GPU上进行模型训练
                 self._model.fit_generator(yielddatas, save_step)    # self._model这个是初始化调用creatmodel返回的模型
                 # samples_per_epoch:整数,当模型处理的样本达到此数目时计一个epoch结束,执行下一个epoch
                 n_step += 1
             except StopIteration:
                 print('[error] generator error. please check data format.')
                 break
             
             self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step * save_step))  #进行模型的保存
             self.TestModel(self.datapath, str_dataset='train', data_count = 4) #进行训练集模型的测试
             self.TestModel(self.datapath, str_dataset='dev', data_count = 4) # 进行 验证集模型的测试
示例#2
0
	def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = 'model_speech/speech_model24'):
		'''
		训练模型
		参数:
			datapath: 数据保存的路径
			epoch: 迭代轮数
			save_step: 每多少步保存一次模型
			filename: 默认保存文件名,不含文件后缀名
		'''
		data=DataSpeech(datapath, 'train')
		
		num_data = data.GetDataNum() # 获取数据的数量
		
		yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH)
		
		for epoch in range(epoch): # 迭代轮数
			print('[running] train epoch %d .' % epoch)
			n_step = 0 # 迭代数据数
			while True:
				try:
					print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step))
					# data_genetator是一个生成器函数
					
					#self._model.fit_generator(yielddatas, save_step, nb_worker=2)
					self._model.fit_generator(yielddatas, save_step)
					n_step += 1
				except StopIteration:
					print('[error] generator error. please check data format.')
					break
				
				self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step * save_step))
				self.TestModel(self.datapath, str_dataset='train', data_count = 4)
				self.TestModel(self.datapath, str_dataset='dev', data_count = 4)
示例#3
0
    def TestModel(self,
                  datapath='',
                  str_dataset='dev',
                  data_count=32,
                  out_report=False):
        '''
		测试检验模型效果
		'''
        data = DataSpeech(self.datapath, str_dataset)
        #data.LoadDataList(str_dataset)
        num_data = data.GetDataNum()  # 获取数据的数量
        if (data_count <= 0 or data_count >
                num_data):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数

            words_num = 0
            word_error_num = 0

            nowtime = time.strftime('%Y%m%d_%H%M%S',
                                    time.localtime(time.time()))
            if (out_report == True):
                txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime +
                               '.txt',
                               'w',
                               encoding='UTF-8')  # 打开文件并读入

            txt = ''
            for i in range(data_count):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data)  # 从随机数开始连续向后取一定数量数据
                pre = self.Predict(data_input, data_input.shape[0] // 8)

                words_n = data_labels.shape[0]  # 获取每个句子的字数
                words_num += words_n  # 把句子的总字数加上
                edit_distance = GetEditDistance(data_labels, pre)  # 获取编辑距离
                if (edit_distance <= words_n):  # 当编辑距离小于等于句子字数时
                    word_error_num += edit_distance  # 使用编辑距离作为错误字数
                else:  # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字
                    word_error_num += words_n  # 就直接加句子本来的总字数就好了

                if (out_report == True):
                    txt += str(i) + '\n'
                    txt += 'True:\t' + str(data_labels) + '\n'
                    txt += 'Pred:\t' + str(pre) + '\n'
                    txt += '\n'

            print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:',
                  word_error_num / words_num * 100, '%')
            if (out_report == True):
                txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str(
                    word_error_num / words_num * 100) + ' %'
                txt_obj.write(txt)
                txt_obj.close()

        except StopIteration:
            print('[Error] Model Test Error. please check data format.')
示例#4
0
    def RecognizeSpeech(self, wavsignal, fs):
        '''
		最终做语音识别用的函数,识别一个wav序列的语音
		不过这里现在还有bug
		'''

        #data = self.data
        #data = DataSpeech('E:\\语音数据集')
        #data.LoadDataList('dev')
        # 获取输入特征
        #data_input = GetMfccFeature(wavsignal, fs)
        #t0=time.time()
        data_input = GetFrequencyFeature3(wavsignal, fs)
        #t1=time.time()
        #print('time cost:',t1-t0)

        input_length = len(data_input)
        input_length = input_length // 8

        data_input = np.array(data_input, dtype=np.float)
        #print(data_input,data_input.shape)
        data_input = data_input.reshape(data_input.shape[0],
                                        data_input.shape[1], 1)
        #t2=time.time()
        r1 = self.Predict(data_input, input_length)
        #t3=time.time()
        #print('time cost:',t3-t2)
        list_symbol_dic = DataSpeech.GetSymbolList()  # 获取拼音列表

        r_str = []
        for i in r1:
            r_str.append(list_symbol_dic[i])

        return r_str
        pass
    def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32):
        '''
        训练模型
        参数:
            datapath: 数据保存的路径
            epoch: 迭代轮数
            save_step: 每多少步保存一次模型
            filename: 默认保存文件名,不含文件后缀名
        '''
        data = DataSpeech(datapath, 'train')

        # num_data = data.GetDataNum()  # 获取数据的数量
        txt_loss = open(
            os.path.join(os.getcwd(), 'speech_log_file', 'Test_Report_loss.txt'),
            mode='a', encoding='UTF-8')

        txt_obj = open(
            os.path.join(os.getcwd(), 'speech_log_file', 'Test_Report_accuracy.txt'),
            mode='a', encoding='UTF-8')

        saver = tf.train.Saver()
        with tf.Session() as sess:
            # sess.run(tf.global_variables_initializer())
            saver.restore(sess,os.path.join(os.getcwd(), 'speech_model_file','speech.module-50'))
            summary_merge = tf.summary.merge_all()
            train_writter = tf.summary.FileWriter('summary_file',sess.graph)
            for i in range(51,epoch):
                yielddatas = data.data_genetator(batch_size, self.MAX_TIME)
                pbar = tqdm(yielddatas)
                train_epoch = 0
                train_epoch_size = save_step
                for input,_ in pbar:
                    feed = {self.input_data: input[0],self.label_data: input[1],self.input_length:input[2],self.label_length:input[3],
                            self.is_train:True}
                    _,loss,train_summary = sess.run([self.optimize,self.loss,summary_merge],feed_dict=feed)
                    train_writter.add_summary(train_summary,train_epoch+i*train_epoch_size)
                    pr = 'epoch:%d/%d,train_epoch: %d/%d ,loss: %s'% (epoch,i,train_epoch_size,train_epoch,loss)
                    pbar.set_description(pr)
                    txt = pr + '\n'
                    txt_loss.write(txt)
                    if train_epoch == train_epoch_size:
                        break
                    train_epoch +=1
                    if train_epoch%3000==0:
                        self.TestMode(data, sess, i,txt_obj)
                saver.save(sess, os.path.join(os.getcwd(), 'speech_model_file', 'speech.module'), global_step=i)
            txt_loss.close()
示例#6
0
    def TrainModel(self,
                   epoch=2,
                   save_step=1000,
                   batch_size=32,
                   start_nstep=0):
        '''
		训练模型
		参数:
			datapath: 数据保存的路径
			epoch: 迭代轮数
			save_step: 每多少步保存一次模型
			filename: 默认保存文件名,不含文件后缀名
		'''
        data = DataSpeech(self.datapath_thchs30, self.datapath_stcmds, 'train')

        num_data = data.GetDataNum()  # 获取数据的数量

        yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH)

        for epoch in range(epoch):  # 迭代轮数
            self.logger.debug("train epoch %s." % epoch)
            # 			print('[running] train epoch %d .' % epoch)
            n_step = start_nstep  # 迭代数据数
            while True:
                try:
                    self.logger.debug('epoch %d . Have train datas %d+' %
                                      (epoch, n_step * save_step))
                    # 					print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step))
                    # data_genetator是一个生成器函数

                    #self._model.fit_generator(yielddatas, save_step, nb_worker=2)
                    self._model.fit_generator(yielddatas, save_step)
                    n_step += 1
                except StopIteration:
                    self.logger.error(
                        "generator error. please check data format.")
                    # 					print('[error] generator error. please check data format.')
                    break

                self.SaveModel(filename='speech_model_%s_e_%s_step_%s' %
                               (ModelName, epoch, n_step * save_step))
                self.TestModel(str_dataset='train', data_count=4)
                self.TestModel(str_dataset='dev', data_count=4)
示例#7
0
    def TestModel(self,
                  datapath='',
                  str_dataset='dev',
                  data_count=32,
                  out_report=False,
                  show_ratio=True):
        '''
		测试检验模型效果
		'''
        data = DataSpeech(self.datapath, str_dataset)
        #data.LoadDataList(str_dataset)
        num_data = data.GetDataNum()  # 获取数据的数量
        if (data_count <= 0 or data_count >
                num_data):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数

            words_num = 0
            word_error_num = 0

            nowtime = time.strftime('%Y%m%d_%H%M%S',
                                    time.localtime(time.time()))
            if (out_report == True):
                txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime +
                               '.txt',
                               'w',
                               encoding='UTF-8')  # 打开文件并读入

            txt = ''
            for i in range(data_count):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data)  # 从随机数开始连续向后取一定数量数据

                # 数据格式出错处理 开始
                # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行
                num_bias = 0
                while (data_input.shape[0] > self.AUDIO_LENGTH):
                    print('*[Error]', 'wave data lenghth of num',
                          (ran_num + i) % num_data, 'is too long.',
                          '\n A Exception raise when test Speech Model.')
                    num_bias += 1
                    data_input, data_labels = data.GetData(
                        (ran_num + i + num_bias) %
                        num_data)  # 从随机数开始连续向后取一定数量数据
                # 数据格式出错处理 结束

                pre = self.Predict(data_input, data_input.shape[0] // 8)

                words_n = data_labels.shape[0]  # 获取每个句子的字数
                words_num += words_n  # 把句子的总字数加上
                edit_distance = GetEditDistance(data_labels, pre)  # 获取编辑距离
                if (edit_distance <= words_n):  # 当编辑距离小于等于句子字数时
                    word_error_num += edit_distance  # 使用编辑距离作为错误字数
                else:  # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字
                    word_error_num += words_n  # 就直接加句子本来的总字数就好了

                if (i % 10 == 0 and show_ratio == True):
                    print('测试进度:', i, '/', data_count)

                txt = ''
                if (out_report == True):
                    txt += str(i) + '\n'
                    txt += 'True:\t' + str(data_labels) + '\n'
                    txt += 'Pred:\t' + str(pre) + '\n'
                    txt += '\n'
                    txt_obj.write(txt)

            print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:',
                  word_error_num / words_num * 100, '%')
            if (out_report == True):
                txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str(
                    word_error_num / words_num * 100) + ' %'
                txt_obj.write(txt)
                txt_obj.close()

        except StopIteration:
            print('[Error] Model Test Error. please check data format.')
示例#8
0
    def TestModel(self,
                  datapath='',
                  str_dataset='dev',
                  data_count=64,
                  out_report=True,
                  show_ratio=True,
                  io_step_print=10,
                  io_step_file=10):
        '''
        测试检验模型效果
        
        io_step_print
            为了减少测试时标准输出的io开销,可以通过调整这个参数来实现
        
        io_step_file
            为了减少测试时文件读写的io开销,可以通过调整这个参数来实现
        
        '''

        data = DataSpeech(self.datapath, str_dataset)
        #data.LoadDataList(str_dataset)
        num_data = data.GetDataNum()  # 获取数据的数量
        if (
                data_count <= 0 or data_count > num_data
        ):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试(正常使用的是32或者自己传递进来的需要测试的个数)
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数(0-num_data)

            words_num = 0  #总得单次数量
            word_error_num = 0  #错误的单次数量

            nowtime = time.strftime('%Y%m%d_%H%M%S', time.localtime(
                time.time()))  # '20190924_103104'  就是时间日期的一个字符串
            if (out_report == True):  #如果说输出开关打开的话
                txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime +
                               '.txt',
                               'w',
                               encoding='UTF-8')  # 打开文件并读入

            txt = '测试报告\n模型编号 ' + ModelName + '\n\n'
            for i in range(data_count):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data)  # 从随机数开始连续向后取一定数量数据

                # 数据格式出错处理 开始
                # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行
                num_bias = 0
                while (data_input.shape[0] > self.AUDIO_LENGTH):
                    print('*[Error]', 'wave data lenghth of num',
                          (ran_num + i) % num_data, 'is too long.',
                          '\n A Exception raise when test Speech Model.')
                    num_bias += 1
                    data_input, data_labels = data.GetData(
                        (ran_num + i + num_bias) %
                        num_data)  # 从随机数开始连续向后取一定数量数据
                # 数据格式出错处理 结束

                pre = self.Predict(data_input,
                                   data_input.shape[0] // 8)  #预测的结果

                words_n = data_labels.shape[0]  # 获取每个句子的字数
                words_num += words_n  # 把句子的总字数加上
                edit_distance = GetEditDistance(
                    data_labels, pre)  # 获取编辑距离(预测的结果跟真实的结果之间的编辑距离(整数))
                if (edit_distance <= words_n):  # 当编辑距离小于等于句子字数时
                    word_error_num += edit_distance  # 使用编辑距离作为错误字数
                else:  # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字
                    word_error_num += words_n  # 就直接加句子本来的总字数就好了(错误率就是100%)

                if ((i % io_step_print == 0 or i == data_count - 1)
                        and show_ratio == True):
                    #print('测试进度:',i,'/',data_count)
                    print('Test Count: ', i, '/', data_count)

                if (out_report == True):
                    if (i % io_step_file == 0 or i == data_count - 1):
                        txt_obj.write(txt)
                        txt = ''

                    txt += str(i) + '\n'
                    txt += 'True:\t' + str(data_labels) + '\n'
                    txt += 'Pred:\t' + str(pre) + '\n'
                    txt += '\n'

            #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%')
            print(
                '*[Test Result] Speech Recognition ' + str_dataset +
                ' set word error ratio: ', word_error_num / words_num * 100,
                '%')
            if (out_report == True):  #将错误率进行写入
                txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str(
                    word_error_num / words_num * 100) + ' %'
                txt_obj.write(txt)
                txt = ''
                txt_obj.close()

        except StopIteration:
            print('[Error] Model Test Error. please check data format.')
示例#9
0
    def TestModel(self,
                  str_dataset='dev',
                  data_count=32,
                  out_report=False,
                  show_ratio=True):
        '''
		测试检验模型效果
		
		io_step_print
			为了减少测试时标准输出的io开销,可以通过调整这个参数来实现
		
		io_step_file
			为了减少测试时文件读写的io开销,可以通过调整这个参数来实现
		
		'''
        self.logger.debug("test model")
        data = DataSpeech(self.datapath_thchs30, self.datapath_stcmds,
                          str_dataset)
        #data.LoadDataList(str_dataset)
        num_data = data.GetDataNum()  # 获取数据的数量
        if (data_count <= 0 or data_count >
                num_data):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数

            words_num = 0
            word_error_num = 0

            nowtime = time.strftime('%Y%m%d_%H%M%S',
                                    time.localtime(time.time()))
            if (out_report == True):
                txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime +
                               '.txt',
                               'w',
                               encoding='UTF-8')  # 打开文件并读入

            txt = '测试报告\n模型编号 ' + ModelName + '\n\n'
            for i in range(data_count):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data)  # 从随机数开始连续向后取一定数量数据

                # 数据格式出错处理 开始
                # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行
                num_bias = 0
                while (data_input.shape[0] > self.AUDIO_LENGTH):
                    self.logger.error(
                        'wave data lenghth of num %s is too long. \n A Exception raise when test Speech Model.'
                        % ((ran_num + i) % num_data))
                    # 					print('*[Error]','wave data lenghth of num',(ran_num + i) % num_data, 'is too long.','\n A Exception raise when test Speech Model.')
                    num_bias += 1
                    data_input, data_labels = data.GetData(
                        (ran_num + i + num_bias) %
                        num_data)  # 从随机数开始连续向后取一定数量数据
                # 数据格式出错处理 结束

                pre = self.Predict(data_input, data_input.shape[0] // 8)

                words_n = data_labels.shape[0]  # 获取每个句子的字数
                words_num += words_n  # 把句子的总字数加上
                edit_distance = GetEditDistance(data_labels, pre)  # 获取编辑距离
                if (edit_distance <= words_n):  # 当编辑距离小于等于句子字数时
                    word_error_num += edit_distance  # 使用编辑距离作为错误字数
                else:  # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字
                    word_error_num += words_n  # 就直接加句子本来的总字数就好了

                if (i % 10 == 0 and show_ratio == True):
                    self.logger.debug('Test Count: %s/%s' % (i, data_count))

                if (out_report == True):
                    if (i % io_step_file == 0 or i == data_count - 1):
                        txt_obj.write(txt)
                        txt = ''

                    txt += str(i) + '\n'
                    txt += 'True:\t' + str(data_labels) + '\n'
                    txt += 'Pred:\t' + str(pre) + '\n'
                    txt += '\n'

            #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%')
            self.logger.info(
                'Speech Recognition %s set word error ratio: %s%%' %
                (str_dataset, word_error_num / words_num * 100))
            # 			print('*[Test Result] Speech Recognition ' + str_dataset + ' set word error ratio: ', word_error_num / words_num * 100, '%')
            if (out_report == True):
                txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str(
                    word_error_num / words_num * 100) + ' %'
                txt_obj.write(txt)
                txt = ''
                txt_obj.close()

        except StopIteration:
            self.logger.error(
                '[Error] Model Test Error. please check data format.')
示例#10
0
    def TrainModel(self,
                   datapath,
                   epoch=2,
                   save_step=1000,
                   batch_size=32,
                   filename=abspath + 'model_speech/m' + ModelName +
                   '/speech_model' + ModelName):
        '''
        训练模型
        参数:
            datapath: 数据保存的路径
            epoch: 迭代轮数
            save_step: 每多少步保存一次模型
            filename: 默认保存文件名,不含文件后缀名
        '''
        data = DataSpeech(datapath, 'train')

        num_data = data.GetDataNum()  # 获取数据的数量

        yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH)

        # 冻结层
        for layer in self._model.layers:
            layerName = str(layer.name)
            print("layerNAME:" + layerName)
            if layerName.startswith("conv2d_3") or layerName.startswith(
                    "conv2d_4"
            ) or layerName.startswith("conv2d_5") or layerName.startswith(
                    "conv2d_6") or layerName.startswith(
                        "conv2d_7") or layerName.startswith(
                            "conv2d_8") or layerName.startswith("conv2d_9"):
                layer.trainable = False
        self._model.compile(optimizer='rmsprop', loss='mse')

        #  可训练层
        for x in self._model.trainable_weights:
            print("可训练层:" + x.name)
            print('\n')

        # 不可训练层
        for x in self._model.non_trainable_weights:
            print("冻结层:" + x.name)
            print('\n')

        for epoch in range(epoch):  # 迭代轮数
            print('[running] train epoch %d .' % epoch)
            n_step = 0  # 迭代数据数
            while True:
                try:
                    print('[message] epoch %d . Have train datas %d+' %
                          (epoch, n_step * save_step))
                    # data_genetator是一个生成器函数

                    # self._model.fit_generator(yielddatas, save_step, nb_worker=2)
                    self._model.fit_generator(yielddatas, save_step)
                    n_step += 1
                except StopIteration:
                    print('[error] generator error. please check data format.')
                    break

                self.SaveModel(comment='_e_' + str(epoch) + '_step_' +
                               str(n_step * save_step))
                self.TestModel(self.datapath,
                               str_dataset='train',
                               data_count=4)
                self.TestModel(self.datapath, str_dataset='dev', data_count=4)