def TestModel(self, datapath='', str_dataset='dev', data_count=32): ''' 测试检验模型效果 ''' data = DataSpeech(self.datapath, str_dataset) #data = self.data #data.LoadDataList(str_dataset) num_data = data.GetDataNum() # 获取数据的数量 if (data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 data_count = num_data try: ran_num = random.randint(0, num_data - 1) # 获取一个随机数 words_num = 0 word_error_num = 0 for i in range(data_count): data_input, data_labels = data.GetData( (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 pre = self.Predict(data_input, data_input.shape[0] // 4) words_num += max(data_labels.shape[0], pre.shape[0]) word_error_num += GetEditDistance(data_labels, pre) print('*[测试结果] 语音识别语音单字错误率:', word_error_num / words_num * 100, '%') except StopIteration: print('[Error] Model Test Error. please check data format.')
def TrainModel(self, datapath, epoch=2, save_step=1000, filename='model_speech/LSTM_CNN_model'): ''' 训练模型 参数: datapath: 数据保存的路径 epoch: 迭代轮数 save_step: 每多少步保存一次模型 filename: 默认保存文件名,不含文件后缀名 ''' data = DataSpeech(datapath) data.LoadDataList('train') num_data = DataSpeech.GetDataNum() # 获取数据的数量 for epoch in range(epoch): # 迭代轮数 print('[running] train epoch %d .' % epoch) n_step = 0 # 迭代数据数 while True: try: print('[message] epoch %d . Have train datas %d+' % (epoch, n_step * save_step)) # data_genetator是一个生成器函数 yielddatas = data.data_genetator(self.BATCH_SIZE) self._model.fit_generator(yielddatas, save_step, nb_worker=2) n_step += 1 except StopIteration: print('[error] generator error. please check data format.') break self.SaveModel(comment='_e_' + str(epoch) + '_step_' + str(n_step))
def TestModel(self, datapath='', str_dataset='dev', data_count=32): ''' 测试检验模型效果 ''' data = DataSpeech(self.datapath, str_dataset) #data = self.data #data.LoadDataList(str_dataset) num_data = data.GetDataNum() # 获取数据的数量 if (data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 data_count = num_data try: gen = data.data_genetator(data_count) #for i in range(1): # [X, y, input_length, label_length ], labels = gen #r = self._model.test_on_batch([X, y, input_length, label_length ], labels) r = self._model.evaluate_generator(generator=gen, steps=1, max_queue_size=data_count, workers=1, use_multiprocessing=False) print(r) except StopIteration: print('[Error] Model Test Error. please check data format.')
def TestModel(self, datapath, str_dataset='dev'): ''' 测试检验模型效果 ''' data = DataSpeech(datapath) data.LoadDataList(str_dataset) num_data = DataSpeech.GetDataNum() # 获取数据的数量 try: gen = data.data_genetator(num_data) for i in range(1): X, y = gen r = self._model.test_on_batch(X, y) print(r) except StopIteration: print('[Error] Model Test Error. please check data format.')