示例#1
0
def load_am():
    # 1.声学模型-----------------------------------
    from model_speech.cnn_ctc import Am, am_hparams

    am_args = am_hparams()
    am_args.vocab_size = len(train_data.am_vocab)
    am = Am(am_args)
    print('loading acoustic model...')
    am.ctc_model.load_weights('logs_am/model.h5')
示例#2
0
def train_am(x=None,y=None,fit_epoch=10):
    from model_speech.cnn_ctc import Am, am_hparams
    am_args = am_hparams()
    am_args.vocab_size = len(utils.pny_vocab)
    am_args.gpu_nums = 1
    am_args.lr = 0.0008
    am_args.is_training = True
    am = Am(am_args)

    if os.path.exists(os.path.join(utils.cur_path,'logs_am','model.h5')):
        print('加载声学模型...')
        am.ctc_model.load_weights(os.path.join(utils.cur_path,'logs_am','model.h5'))

    checkpoint = ModelCheckpoint(os.path.join(utils.cur_path,'checkpoint', "model_{epoch:02d}-{val_loss:.2f}.h5"),
        monitor='val_loss',save_best_only=True)
    eStop = EarlyStopping()#损失函数不再减小后patience轮停止训练
    #tensorboard --logdir=/media/yangjinming/DATA/GitHub/AboutPython/AboutDL/语音识别/logs_am/tbLog/ --host=127.0.0.1
    #tensbrd = TensorBoard(log_dir=os.path.join(utils.cur_path,'logs_am','tbLog'))

    if x is not None:#利用实时声音训练调整模型,使定制化
        size=1
        if type(x) == np.ndarray:
            x,y = utils.real_time2data([x],[y])
        else:
            size = len(x)
            x,y = utils.real_time2data(x,y)
        am.ctc_model.fit(x=x,y=y,batch_size=size,epochs=fit_epoch)
    else:#利用训练数据
        batch = train_data.get_am_batch()#获取的是生成器
        dev_batch = dev_data.get_am_batch()
        validate_step = 200#取N个验证的平均结果
        history = am.ctc_model.fit_generator(batch, steps_per_epoch=batch_num, epochs=epochs, callbacks=[eStop,checkpoint],
            workers=1, use_multiprocessing=False,verbose=1,validation_data=dev_batch, validation_steps=validate_step)

    am.ctc_model.save_weights(os.path.join(utils.cur_path,'logs_am','model.h5'))
    #写入序列化的 PB 文件
    #with keras.backend.get_session() as sess:
    sess = keras.backend.get_session()
    constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess,
                            sess.graph_def,output_node_names=['the_inputs','dense_2/truediv'])
    with tf.gfile.GFile(os.path.join(utils.cur_path,'logs_am','amModel.pb'), mode='wb') as f:
        f.write(constant_graph.SerializeToString())

    #保存TF serving用文件
    builder = tf.saved_model.builder.SavedModelBuilder(os.path.join(utils.cur_path,'logs_am',modelVersion))
    model_signature = tf.saved_model.signature_def_utils.predict_signature_def(
        inputs={'input': am.inputs}, outputs={'output': am.outputs})
    builder.add_meta_graph_and_variables(sess,[tf.saved_model.tag_constants.SERVING],
            {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: model_signature})
    builder.save()

    if x is None:
        sess.close()
示例#3
0
 def _model_init_keras(self, model_dir, config):
     # 1.声学模型训练-----------------------------------
     from model_speech.cnn_ctc import Am, am_hparams
     am_args = am_hparams()
     am_args.vocab_size = len(self.label_vocabulary)
     am_args.gpu_nums = 0
     am_args.lr = 0.0008
     am_args.is_training = True
     am = Am(am_args)
     base_model = am.ctc_model
     self.ctc_model = tf.keras.estimator.model_to_estimator(
         base_model, model_dir=model_dir, config=config)
示例#4
0
    def __init__(self,test_flag = True):
        # 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取
        self.test_flag = test_flag
        #print('加载声学模型中...')
        if K_usePB:
            self.AM_sess = tf.Session()
            with tf.gfile.GFile(os.path.join(cur_path,'logs_am','amModel.pb'), 'rb') as f:#加载模型
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                self.AM_sess.graph.as_default()
                tf.import_graph_def(graph_def, name='') #导入计算图
                self.AM_sess.run(tf.global_variables_initializer())#需要有一个初始化的过程
            self.AM_x = self.AM_sess.graph.get_tensor_by_name('the_inputs:0') #此处的x一定要和之前保存时输入的名称一致!
            self.AM_preds = self.AM_sess.graph.get_tensor_by_name('dense_2/truediv:0')
        else:
            from model_speech.cnn_ctc import Am, am_hparams
            am_args = am_hparams()
            am_args.vocab_size = len(pny_vocab)#这里有个坑,需要和训练时的长度一致,需要强烈关注!
            self.am = Am(am_args)
            self.am.ctc_model.load_weights(os.path.join(cur_path,'logs_am','model.h5'))

        #print('加载语言模型中...')
        if tf_usePB:
            self.sess = tf.Session()
            with tf.gfile.GFile(os.path.join(cur_path,'logs_lm','lmModel.pb'), 'rb') as f:#加载模型
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                self.sess.graph.as_default()
                tf.import_graph_def(graph_def, name='') # 导入计算图
                self.sess.run(tf.global_variables_initializer())# 需要有一个初始化的过程
            self.x = self.sess.graph.get_tensor_by_name('x:0') #此处的x一定要和之前保存时输入的名称一致!
            self.preds = self.sess.graph.get_tensor_by_name('preds:0')
        else:#ckpt
            from model_language.transformer import Lm, lm_hparams
            lm_args = lm_hparams()
            lm_args.input_vocab_size = len(pny_vocab)
            lm_args.label_vocab_size = len(han_vocab)
            lm_args.dropout_rate = 0.
            self.lm = Lm(lm_args)
            self.sess = tf.Session(graph=self.lm.graph)
            with self.lm.graph.as_default():
                saver =tf.train.Saver()
            with self.sess.as_default():
                lmPath = tf.train.latest_checkpoint(os.path.join(cur_path,'logs_lm'))
                saver.restore(self.sess, lmPath)
示例#5
0
def train_am(epochs):
    # 1.声学模型训练-----------------------------------
    from model_speech.cnn_ctc import Am, am_hparams
    am_args = am_hparams()
    am_args.vocab_size = len(train_data.am_vocab)
    am_args.gpu_nums = 1
    am_args.lr = 0.0008
    am_args.is_training = True
    am = Am(am_args)

    if os.path.exists('logs_am') and not os.listdir('logs_am'):
        model = os.listdir('logs_am')[0]
        am.ctc_model.load_weights(os.path.join('logs_am', model))

    batch_num = len(train_data.wav_lst) // train_data.batch_size

    # checkpoint
    ckpt = "model_{val_loss:.3f}_{epoch:04d}.h5"
    checkpoint = ModelCheckpoint(os.path.join('logs_am', ckpt),
                                 monitor='val_loss',
                                 save_weights_only=True,
                                 verbose=1,
                                 save_best_only=True)

    batch = train_data.get_am_batch()
    dev_batch = dev_data.get_am_batch()

    am.ctc_model.fit_generator(batch,
                               steps_per_epoch=batch_num,
                               epochs=10,
                               callbacks=[checkpoint],
                               workers=1,
                               use_multiprocessing=False,
                               validation_data=dev_batch,
                               validation_steps=2)
    am.ctc_model.save_weights('logs_am/model.h5')
import difflib
import tensorflow as tf
import numpy as np
from utils import decode_ctc, GetEditDistance


# 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取
from utils import get_data, data_hparams
data_args = data_hparams()
train_data = get_data(data_args)


# 1.声学模型-----------------------------------
from model_speech.cnn_ctc import Am, am_hparams

am_args = am_hparams()  # 參數初始化 EX: learning rate
# am_args.vocab_size = 230
am_args.vocab_size = len(train_data.am_vocab)   # 設定單字長度
am = Am(am_args)        # 利用設定好的參數,建造出一個model
print('loading acoustic model...')
am.ctc_model.load_weights('logs_am/model.h5')
am.ctc_model.summary()

# 2.语言模型-------------------------------------------
from model_language.transformer import Lm, lm_hparams

lm_args = lm_hparams()
lm_args.input_vocab_size = len(train_data.pny_vocab)
lm_args.label_vocab_size = len(train_data.han_vocab)
lm_args.dropout_rate = 0.
print('loading language model...')
import numpy as np
from utils import decode_ctc, GetEditDistance, cal_ctc_acc

# 0.准备解码所需字典,参数需和训练一致,也可以将字典保存到本地,直接进行读取
from utils import get_data, data_hparams
data_args = data_hparams()
data_args.data_length = 20000
train_data = get_data(data_args)
print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
print("=====train_data_amvocab", len(train_data.pny_vocab))

# 1.声学模型-----------------------------------
from model_speech.cnn_ctc import Am, am_hparams
# from model_speech.gru_ctc import Am, am_hparams

am_args = am_hparams()
am_args.vocab_size = len(train_data.am_vocab)
am = Am(am_args)
print('loading acoustic model...')
am.ctc_model.load_weights('logs_am/model.h5')
# am.ctc_model.load_weights('checkpoint/model_01-0.00.hdf5')
# 2.语言模型-------------------------------------------

# 3. 准备测试所需数据, 不必和训练数据一致,通过设置data_args.data_type测试,
#    此处应设为'test',我用了'train'因为演示模型较小,如果使用'test'看不出效果,
#    且会出现未出现的词。
data_args = data_hparams()
data_args.data_type = 'test'
data_args.zanghua = True
data_args.data_length = 20000
test_data = get_data(data_args)
示例#8
0
 def build_model(self):
     am_args = am_hparams()
     am_args.vocab_size = 230  #len(train_data.am_vocab)
     am = Am(am_args)
     return am.ctc_model