Exemplo n.º 1
0
def testModel():
    import difflib
    import tensorflow as tf
    import numpy as np
    yysb = utils.SpeechRecognition()

    # 1. 准备测试所需数据, 不必和训练数据一致,通过设置data_args.data_type测试,
    data_args = utils.data_hparams()
    data_args.data_type = 'test'
    data_args.shuffle = True
    data_args.batch_size = 1
    test = utils.get_data(data_args)

    # 2. 进行测试-------------------------------------------
    word_num = 0
    word_error_num = 0
    for i in range(100):
        print('\n 第 ', i, ' 个例子')
        label = test.han_lst[i]
        pinyin, hanzi = yysb.predict(os.path.join(test.data_path,
                                                  test.wav_lst[i]),
                                     test.pny_lst[i],
                                     label,
                                     come_from_file=True)
        #hanzi = yysb.testPinyin(' '.join(test.pny_lst[i]))#单独测试语言模型用
        word_error_num += min(len(label), utils.GetEditDistance(label, hanzi))
        word_num += len(label)
    print('词错误率:', word_error_num / word_num)
Exemplo n.º 2
0
def testClient():
    import requests
    import scipy
    data_args = utils.data_hparams()
    test = utils.get_data(data_args)

    _,wav = scipy.io.wavfile.read(os.path.join('/media/yangjinming/DATA/Dataset',test.wav_lst[0]))
    datas={'token':'SR', 'data':wav,'pre_type':'H'}
    r = requests.post('http://127.0.0.1:20000/', datas)
    r.encoding='utf-8'
    print(r.text)
#coding=utf-8
import os
import difflib
import tensorflow as tf
import numpy as np
from utils import decode_ctc, GetEditDistance


# 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取
from utils import get_data, data_hparams
data_args = data_hparams()
train_data = get_data(data_args)


# 1.声学模型-----------------------------------
from model_speech.cnn_ctc import Am, am_hparams

am_args = am_hparams()  # 參數初始化 EX: learning rate
# am_args.vocab_size = 230
am_args.vocab_size = len(train_data.am_vocab)   # 設定單字長度
am = Am(am_args)        # 利用設定好的參數,建造出一個model
print('loading acoustic model...')
am.ctc_model.load_weights('logs_am/model.h5')
am.ctc_model.summary()

# 2.语言模型-------------------------------------------
from model_language.transformer import Lm, lm_hparams

lm_args = lm_hparams()
lm_args.input_vocab_size = len(train_data.pny_vocab)
lm_args.label_vocab_size = len(train_data.han_vocab)
Exemplo n.º 4
0
from utils import get_data, data_hparams

# 0.准备训练所需数据------------------------------
train_data_args = data_hparams()
train_data_args.data_type = 'train'
train_data_args.data_path = 'E:/data/corpus/'
train_data_args.thchs30 = True
train_data_args.aishell = True
train_data_args.prime = True
train_data_args.stcmd = True
train_data_args.batch_size = 4
train_data_args.data_length = 500
# data_args.data_length = None
train_data_args.shuffle = False
train_data = get_data(train_data_args)

# 0.准备验证所需数据------------------------------
dev_data_args = data_hparams()
dev_data_args.data_type = 'dev'
dev_data_args.data_path = 'E:/data/corpus/'
dev_data_args.thchs30 = True
dev_data_args.aishell = True
dev_data_args.prime = False
dev_data_args.stcmd = False
dev_data_args.batch_size = 4
# data_args.data_length = None
dev_data_args.data_length = 100
dev_data_args.shuffle = True
dev_data = get_data(dev_data_args)
Exemplo n.º 5
0
import tensorflow as tf
import utils
from tqdm import tqdm
import keras
from keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard
import numpy as np
config = tf.ConfigProto()
config.gpu_options.allow_growth = True   #不全部占满显存, 按需分配
keras.backend.tensorflow_backend.set_session(tf.Session(config=config))

from datetime import datetime
modelVersion = str(datetime.now())[2:10].replace("-", "")


# 0.准备训练所需数据------------------------------
data_args = utils.data_hparams()
data_args.data_type = 'train'
#data_args.thchs30 = True
#data_args.aishell = True
#data_args.prime = True
#data_args.stcmd = True
data_args.batch_size = 16#可以将不一次性训练am和lm,同样显存情况下lm的batch_size可以比am的大许多
train_data = utils.get_data(data_args)

# 0.准备验证所需数据------------------------------
data_args = utils.data_hparams()
data_args.data_type = 'dev'
#data_args.thchs30 = True
#data_args.aishell = True
#data_args.prime = True
#data_args.stcmd = True