Пример #1
0
	def TrainModel(self, datapath, epoch = 2, save_step = 1000, batch_size = 32, filename = 'model_speech/speech_model2'):
		'''
		训练模型
		参数:
			datapath: 数据保存的路径
			epoch: 迭代轮数
			save_step: 每多少步保存一次模型
			filename: 默认保存文件名,不含文件后缀名
		'''
		data=DataSpeech(datapath, 'train')
		#data.LoadDataList()
		num_data = data.GetDataNum() # 获取数据的数量
		for epoch in range(epoch): # 迭代轮数
			print('[running] train epoch %d .' % epoch)
			n_step = 0 # 迭代数据数
			while True:
				try:
					print('[message] epoch %d . Have train datas %d+'%(epoch, n_step*save_step))
					# data_genetator是一个生成器函数
					yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH)
					#self._model.fit_generator(yielddatas, save_step, nb_worker=2)
					self._model.fit_generator(yielddatas, save_step)
					n_step += 1
				except StopIteration:
					print('[error] generator error. please check data format.')
					break
				
				self.SaveModel(comment='_e_'+str(epoch)+'_step_'+str(n_step * save_step))
				self.TestModel(self.datapath, str_dataset='train', data_count = 4)
				self.TestModel(self.datapath, str_dataset='dev', data_count = 4)
Пример #2
0
    def TestModel(self, datapath='', str_dataset='dev', data_count=32):
        '''
		测试检验模型效果
		'''
        data = DataSpeech(self.datapath, str_dataset)
        #data.LoadDataList(str_dataset)
        num_data = data.GetDataNum()  # 获取数据的数量
        if (data_count <= 0 or data_count >
                num_data):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数

            words_num = 0
            word_error_num = 0
            for i in range(data_count):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data)  # 从随机数开始连续向后取一定数量数据
                pre = self.Predict(data_input, data_input.shape[0] // 4)

                words_num += max(data_labels.shape[0], pre.shape[0])
                word_error_num += GetEditDistance(data_labels, pre)

            print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:',
                  word_error_num / words_num * 100, '%')
        except StopIteration:
            print('[Error] Model Test Error. please check data format.')
Пример #3
0
    def TestModel(self,
                  datapath='',
                  str_dataset='dev',
                  data_count=32,
                  out_report=False):
        '''
		测试检验模型效果
		'''
        data = DataSpeech(self.datapath, str_dataset)
        #data.LoadDataList(str_dataset)
        num_data = data.GetDataNum()  # 获取数据的数量
        if (data_count <= 0 or data_count >
                num_data):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数

            words_num = 0
            word_error_num = 0

            nowtime = time.strftime('%Y%m%d_%H%M%S',
                                    time.localtime(time.time()))
            if (out_report == True):
                txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime +
                               '.txt',
                               'w',
                               encoding='UTF-8')  # 打开文件并读入

            txt = ''
            for i in range(data_count):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data)  # 从随机数开始连续向后取一定数量数据
                pre = self.Predict(data_input, data_input.shape[0] // 4)

                words_n = data_labels.shape[0]  # 获取每个句子的字数
                words_num += words_n  # 把句子的总字数加上
                edit_distance = GetEditDistance(data_labels, pre)  # 获取编辑距离
                if (edit_distance <= words_n):  # 当编辑距离小于等于句子字数时
                    word_error_num += edit_distance  # 使用编辑距离作为错误字数
                else:  # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字
                    word_error_num += words_n  # 就直接加句子本来的总字数就好了

                if (out_report == True):
                    txt += str(i) + '\n'
                    txt += 'True:\t' + str(data_labels) + '\n'
                    txt += 'Pred:\t' + str(pre) + '\n'
                    txt += '\n'

            print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:',
                  word_error_num / words_num * 100, '%')
            if (out_report == True):
                txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + str(
                    word_error_num / words_num * 100) + ' %'
                txt_obj.write(txt)
                txt_obj.close()

        except StopIteration:
            print('[Error] Model Test Error. please check data format.')
Пример #4
0
    def TestModel(self, datapath, str_dataset='dev'):
        '''
		测试检验模型效果
		'''
        data = DataSpeech(datapath)
        data.LoadDataList(str_dataset)
        num_data = DataSpeech.GetDataNum()  # 获取数据的数量
        try:
            gen = data.data_genetator(num_data)
            for i in range(1):
                X, y = gen
            r = self._model.test_on_batch(X, y)
            print(r)
        except StopIteration:
            print('[Error] Model Test Error. please check data format.')