Exemplo n.º 1
0
def run_training():
    poems_vector,word_to_int,vocabularies = process_poems(FLAGS.file_path)
    batch_inputs,batch_outputs = generate_batch(FLAGS.batch_size,poems_vector,word_to_int)
    
    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size,None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size,None])
    
    end_points = rnn_model(model='lstm',input=input_data,output_data = output_targets,vocab_size = len(vocabularies)
                           ,run_size = 128,num_layers = 2,batch_size = 64,learning_rate = 0.01)
Exemplo n.º 2
0
def run_training():
    if not os.path.exists(os.path.dirname(FLAGS.checkpoints_dir)):
        os.mkdir(os.path.dirname(FLAGS.checkpoints_dir))
    if not os.path.exists(FLAGS.checkpoints_dir):
        os.mkdir(FLAGS.checkpoints_dir)

    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size, poems_vector, word_to_int)

    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model='lstm', input_data=input_data, output_data=output_targets, vocab_size=len(
        vocabularies), rnn_size=128, num_layers=2, batch_size=64, learning_rate=FLAGS.learning_rate)

    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    with tf.Session() as sess:
        # sess = tf_debug.LocalCLIDebugWrapperSession(sess=sess)
        # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoints_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("[INFO] restore from the checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])
        print('[INFO] start training...')
        try:
            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0
                n_chunk = len(poems_vector) // FLAGS.batch_size
                for batch in range(n_chunk):
                    loss, _, _ = sess.run([
                        end_points['total_loss'],
                        end_points['last_state'],
                        end_points['train_op']
                    ], feed_dict={input_data: batches_inputs[n], output_targets: batches_outputs[n]})
                    n += 1
                    print('[INFO] Epoch: %d , batch: %d , training loss: %.6f' % (epoch, batch, loss))
                if epoch % 6 == 0:
                    saver.save(sess, os.path.join(FLAGS.checkpoints_dir, FLAGS.model_prefix), global_step=epoch)
        except KeyboardInterrupt:
            print('[INFO] Interrupt manually, try saving checkpoint for now...')
            saver.save(sess, os.path.join(FLAGS.checkpoints_dir, FLAGS.model_prefix), global_step=epoch)
            print('[INFO] Last epoch were saved, next time will start from epoch {}.'.format(epoch))
Exemplo n.º 3
0
def run_training():
    # 预处理 把话转化为向量 文字转化为整数 返回语料库
    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    batch_inputs, batch_outputs = generate_batch(FLAGS.batch_size,
                                                 poems_vector, word_to_int)
    # inout携程placehoder none 输出的结果
    # 之后要做交叉熵 target
    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    # 定义模型
    # lstm效果更好
    # run size rnn有中间隐层 有多少神经元2
    end_points = rnn_model(model='lstm',
                           input=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           run_size=32,
                           num_layers=2,
                           batch_size=10,
                           learning_rate=0.01)
Exemplo n.º 4
0
def run_training():
    # 检查点保存路径
    print('its_not_ok:', FLAGS.checkpoints_dir)
    if not os.path.exists(os.path.dirname(FLAGS.checkpoints_dir)):
        os.mkdir(os.path.dirname(FLAGS.checkpoints_dir))
    if not os.path.exists(FLAGS.checkpoints_dir):
        os.mkdir(FLAGS.checkpoints_dir)

    # 引入预处理
    # 这里返回诗集转换成向量的数据,字与数字映射, 字集
    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    # batch_size 64 poems_vector 转为数字的映射  word_to_int:字与数字映射
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size,
                                                     poems_vector, word_to_int)
    # 返回输入与输出的batch信息

    # 输入、输出 占位符
    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           run_size=128,
                           num_layers=2,
                           batch_size=FLAGS.batch_size,
                           learning_rate=FLAGS.learning_rate)

    # 保存
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoints_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("[INFO] restore from the checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])
        print('[INFO] start training...')
        try:
            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0
                n_chunk = len(poems_vector) // FLAGS.batch_size
                for batch in range(n_chunk):
                    loss, _, _ = sess.run(
                        [
                            end_points['total_loss'], end_points['last_state'],
                            end_points['train_op']
                        ],
                        feed_dict={
                            input_data: batches_inputs[n],
                            output_targets: batches_outputs[n]
                        })
                    n += 1
                    print('[INFO] Epoch: %d, batch: %d, training loss: %.6f' %
                          (epoch, batch, loss))

                if epoch % 6 == 0:
                    saver.save(sess, './model/', global_step=epoch)
        except KeyboardInterrupt:
            print(
                '[INFO] Interrupt manually, try saving checkpoint for now ..')
            saver.save(sess,
                       os.path.join(FLAGS.checkpoints_dir, FLAGS.model_prefix),
                       global_step=epoch)
            print(
                '[INFO] Last epoch were saved, next time will start from epoch {}.'
                .format(epoch))
Exemplo n.º 5
0
def run_training():
    if not os.path.exists(os.path.dirname(FLAGS.checkpoints_dir)):
        os.mkdir(os.path.dirname(FLAGS.checkpoints_dir))
    if not os.path.exists(FLAGS.checkpoints_dir):
        os.mkdir(FLAGS.checkpoints_dir)

    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size,
                                                     poems_vector, word_to_int)

    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=64,
                           learning_rate=FLAGS.learning_rate)

    saver = tf.train.Saver()
    '''
    tf.group(tensor1, tensor2):
    tensor1和tensor2是操作,用于操作集合起来。比如:
    generator_train_op = tf.train.AdamOptimizer(g_loss, ...)
    discriminator_train_op = tf.train.AdamOptimizer(d_loss,...)
    train_ops = tf.groups(generator_train_op ,discriminator_train_op)
    with tf.Session() as sess:
        sess.run(train_ops) 
    一旦运行了train_ops,那么里面的generator_train_op和discriminator_train_op都将被调用
    这里注意的是:tf.group()返回的是个操作,而不是值,如果你想tensor1和tensor2填充Variable 那么返回是None
    如果真想返回值,那么可以用tf.tuple()
    全局变量:用来初始化计算图中的全局的变量,全局变量是指创建的变量在tf.GraphKeys.GLOBAL_VARIABLES中,
    在使用Variable创建变量时默认是collections默认是tf.GraphKeys.GLOBAL_VARIABLES
    局部变量:初始化计算图中所有的局部变量,局部变量是指创建的变量在tf.GraphKeys.LOCAL_VARIABLES中
    a = tf.Variable(1,name="a",collections=[tf.GraphKeys.LOCAL_VARIABLES])
    备注:在使用saver的时候,局部变量是不存在在模型文件中的
    '''
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoints_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print('[INFO] restore from the checkpoint %s' % checkpoint)
            start_epoch += int(checkpoint.split('-')[-1])
        print('[INFO] start training ... ')
        try:
            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0
                n_chunk = len(poems_vector) // FLAGS.batch_size
                for batch in range(n_chunk):
                    loss, _, _ = sess.run(
                        [
                            end_points['total_loss'], end_points['last_state'],
                            end_points['train_op']
                        ],
                        feed_dict={
                            input_data: batches_inputs[n],
                            output_targets: batches_inputs[n]
                        })
                    n += 1

                if epoch % 1 == 0:
                    saver.save(sess, './model/', global_step=epoch)
        except KeyboardInterrupt:
            print(
                '[INFO] Interrupt manually, try saving checkpoint for now ...')
            saver.save(sess,
                       os.path.join(FLAGS.checkpoints_dir, FLAGS.model_prefix),
                       global_step=epoch)
            print(
                '[INFO] Last epoch were saved, next time will start from epoch %d'
                % epoch)
Exemplo n.º 6
0
def run_training():
    # 检测模型参数文件夹及父文件夹, 不存在则新建
    if not os.path.exists(os.path.dirname(FLAGS.checkpoints_dir)):
        os.mkdir(os.path.dirname(FLAGS.checkpoints_dir))
    if not os.path.exists(FLAGS.checkpoints_dir):
        os.mkdir(FLAGS.checkpoints_dir)

    # 读取诗集文件
    # 依次得到数字ID表示的诗句、汉字-ID的映射map、所有的汉字的列表
    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    # 按照batch读取输入和输出数据
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size,
                                                     poems_vector, word_to_int)

    # 声明输入、输出的占位符
    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    # 通过rnn模型得到结果状态集
    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=64,
                           learning_rate=FLAGS.learning_rate)

    # 初始化saver和session
    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        sess.run(init_op)
        start_epoch = 0
        # 加载上次的模型参数(如果有)
        checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoints_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("[INFO] restore from the checkpoint {0}".format(checkpoint))
            # 如果有模型参数, 则取出对应的epoch, 训练从该epoch开始训练
            start_epoch += int(checkpoint.split('-')[-1])
        # 开始训练
        print('[INFO] start training...')
        try:
            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0
                # 计算一个epoch需要多少次batch训练完, 有余数则忽略掉末尾部分
                n_chunk = len(poems_vector) // FLAGS.batch_size
                for batch in range(n_chunk):
                    # 训练并计算loss
                    # batches_inputs[n]: 第n个batch的输入数据
                    # batches_outputs[n]: 第n个batch的输出数据
                    loss, _, _ = sess.run(
                        [
                            end_points['total_loss'], end_points['last_state'],
                            end_points['train_op']
                        ],
                        feed_dict={
                            input_data: batches_inputs[n],
                            output_targets: batches_outputs[n]
                        })
                    n += 1
                    print(
                        '[INFO] Epoch: %d , batch: %d , training loss: %.6f' %
                        (epoch, batch, loss))
                # 每训练6个epoch进行一次模型保存
                if epoch % 6 == 0:
                    saver.save(sess,
                               os.path.join(FLAGS.checkpoints_dir,
                                            FLAGS.model_prefix),
                               global_step=epoch)
        except KeyboardInterrupt:
            # 用户手动退出时, 尝试保存模型参数
            print(
                '[INFO] Interrupt manually, try saving checkpoint for now...')
            saver.save(sess,
                       os.path.join(FLAGS.checkpoints_dir, FLAGS.model_prefix),
                       global_step=epoch)
            print(
                '[INFO] Last epoch were saved, next time will start from epoch {}.'
                .format(epoch))
Exemplo n.º 7
0
def run_training():
    """
    模型训练
    :return: None
    """

    #调用process_poems方法预处理数据   并获取唐诗向量数据,汉字映射表,以及词汇表
    poems_vector, word_to_int, vocabularies = process_poems(file_path)

    #调用generate_batch方法获取批处理特征值和目标值
    batches_inputs, batches_outputs = generate_batch(batch_size, poems_vector,
                                                     word_to_int)
    #batches_inputs和batches_outputs均为505块64行length列    length值不定

    #定义批处理的输入数据和输出目标数据的占位tensor  形状为 [64, ?]  length长度为不定值
    input_data = tf.placeholder(tf.int32, [batch_size, None])  #[64, ?]
    output_targets = tf.placeholder(tf.int32, [batch_size, None])  #[64, ?]

    #调用模型返回训练数据
    end_points = rnn_model(model='rnn',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=batch_size,
                           learning_rate=learning_rate)

    #实例化一个模型保存对象供后续保存模型使用
    saver = tf.train.Saver(tf.global_variables())

    #创建初始化组合操作op   用于初始化全局变量和局部变量
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    #创建tf会话运行op
    with tf.Session() as sess:

        #初始化变量
        sess.run(init_op)

        #定义训练代数初始为第0代
        start_epoch = 0

        #打印提示训练开始
        print('[INFO] 开始训练...')

        try:
            #保存训练损失信息   后续写入mongodb数据库
            train_losses = []

            #循环epoch代进行训练
            for epoch in range(start_epoch, epochs):

                n = 0  #用于保存每个epoch下第几次训练的序号

                #计算训练一共有多少个数据块  每64首唐诗一个数据块  n_chunk实质为540,即全部数据包含540个批次
                n_chunk = len(poems_vector) // batch_size

                #当前epoch下进行540次循环训练,每次训练64首唐诗
                for batch in range(n_chunk):
                    #运行调用模型返回的op并且传入当前批次的特征值和目标值,即之前定义好的占位张量
                    #这里只接收返回的损失函数,训练状态以及最小化损失op只运行,无需保存返回值
                    loss, _, _ = sess.run(
                        [
                            end_points['total_loss'], end_points['last_state'],
                            end_points['train_op']
                        ],
                        feed_dict={
                            input_data: batches_inputs[n],
                            output_targets: batches_outputs[n]
                        })

                    n += 1  #训练批次序号+1

                    #打印提示当前epoch序号,batch序号, 当前交叉熵损失
                    print('[INFO] epoch序号: %d , batch序号: %d , 当前交叉熵损失: %.6f' %
                          (epoch, batch, loss))

                    #保存当前批次训练信息供后续写入mongoDB
                    train_loss = {
                        "epoch序号": epoch,
                        "batch序号": batch,
                        "当前交叉熵损失": float(loss)
                    }
                    train_losses.append(train_loss)

                #每训练6个epoch保存一次模型    global_step=epoch表示将epoch序号加入到保存模型文件后缀
                if epoch % 6 == 0:
                    saver.save(sess, './model/rnn_model/', global_step=epoch)

            #4代训练全部完成时打印提示信息
            print("[INFO] 训练已全部完成")

            #将训练信息写入数据库
            # 创建一个mongodb连接对象
            myclient = pymongo.MongoClient("mongodb://localhost:27017/")
            # 创建一个数据库  名为train_loss
            mydb = myclient["train_loss"]
            # 创建一个集合
            mycol = mydb["rnn_train_loss"]
            # 向集合插入文档
            mycol.insert_many(train_losses)
            #关闭数据库连接
            myclient.close()

        except KeyboardInterrupt:  #处理用户中断执行异常

            print('[INFO] 训练出现异常中断')