示例#1
0
def test_batch(datapath = '/data/dataset/',
        batch_size = 4):
    # 准备测试数据,以及生成字典
    p = get_data(datapath = datapath, read_type = 'test', batch_size = batch_size)
    num2word = p.label_dict
    yielddatas = p.data_generator()
    # 载入训练好的模型,并进行识别
    model, model_data = creatModel()

        #model.load_weights('model_cnn_full.mdl')
    # 通过修改steps增减测试数据
    result = model_data.predict_generator(yielddatas, steps=2)
    #print(result.shape)
    pres = []
    for subresult in result:
        subresult = subresult.reshape((1,subresult.shape[0],subresult.shape[1]))
        pre, text = decode_ctc(subresult, num2word)
        #print(text)
        pres.append(pre)
    #print(pres)
    # 获得识别结果,通过将每个识别结果与label进行比对,获得总的识别率
    q = get_data(datapath = datapath, read_type = 'test', batch_size = 1)
    label_gen = q.label_generator()
    total_len = 0
    total_err = 0
    for pre in pres:
        label = label_gen.__next__()
        total_len += len(label)
        total_err += GetEditDistance(pre, label)
    print('word error rate is :', total_err/total_len*100, '%')
示例#2
0
def train(datapath='data/', batch_size=4, steps_per_epoch=1000, epochs=1):
    # 准备训练所需数据
    p = get_data(datapath=datapath, read_type='train', batch_size=batch_size)
    yielddatas = p.data_generator()
    # 导入模型结构,训练模型,保存模型参数
    model, model_data = creatModel()
    if os.path.exists('speech_model/model_cnn_fbank.mdl'):
        model.load_weights('speech_model/model_cnn_fbank.mdl')
    model.fit_generator(yielddatas, steps_per_epoch=steps_per_epoch, epochs=1)
    model.save_weights('speech_model/model_cnn_fbank.mdl')
示例#3
0
def train(datapath = 'E:/my_ch_speech_recognition/acoustic_model/data/',
		batch_size = 2, 
		steps_per_epoch = 1000, 
		epochs = 1):
	# 准备训练所需数据
	p = get_data(datapath = datapath, read_type = 'train', batch_size = batch_size)
	yielddatas = p.data_generator()
	# 导入模型结构,训练模型,保存模型参数
	model, model_data = creatModel()
	if os.path.exists('model_cnn_full.mdl'):
		model.load_weights('model_cnn_full.mdl')
	model.fit_generator(yielddatas, steps_per_epoch=steps_per_epoch, epochs=1)
	model.save_weights('model_cnn_full.mdl')
示例#4
0
def test(datapath='data/', batch_size=1):
    # 准备测试数据,以及生成字典
    p = get_data(datapath=datapath, read_type='test', batch_size=batch_size)
    num2word = p.label_dict
    yielddatas = p.data_generator()
    # 载入训练好的模型,并进行识别
    model, model_data = creatModel()
    model.load_weights('speech_model/model_cnn_fbank.mdl')
    result = model_data.predict_generator(yielddatas, steps=1)
    print(result.shape)
    # 将数字结果转化为文本结果
    result, text = decode_ctc(result, num2word)
    print('数字结果: ', result)
    print('文本结果:', text)