Пример #1
0
def run_training():
    if not os.path.exists(FLAGS.model_dir):
        os.makedirs(FLAGS.model_dir)

    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size,
                                                     poems_vector, word_to_int)

    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=64,
                           learning_rate=FLAGS.learning_rate)

    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    with tf.Session() as sess:
        # sess = tf_debug.LocalCLIDebugWrapperSession(sess=sess)
        # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(FLAGS.model_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("## restore from the checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])
        print('## start training...')
        try:
            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0
                n_chunk = len(poems_vector) // FLAGS.batch_size
                #for batch in range(n_chunk):
                #    loss, _, _ = sess.run([
                #        end_points['total_loss'],
                #        end_points['last_state'],
                #        end_points['train_op']
                #    ], feed_dict={input_data: batches_inputs[n], output_targets: batches_outputs[n]})
                #    n += 1
                #    print('Epoch: %d, batch: %d, training loss: %.6f' % (epoch, batch, loss))
                if epoch % 6 == 0:
                    saver.save(sess,
                               os.path.join(FLAGS.model_dir,
                                            FLAGS.model_prefix),
                               global_step=epoch)
        except KeyboardInterrupt:
            print('## Interrupt manually, try saving checkpoint for now...')
            saver.save(sess,
                       os.path.join(FLAGS.model_dir, FLAGS.model_prefix),
                       global_step=epoch)
            print(
                '## Last epoch were saved, next time will start from epoch {}.'
                .format(epoch))
Пример #2
0
def run_training():
    if not os.path.exists(FLAGS.model_dir):
        os.makedirs(FLAGS.model_dir)

    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size, poems_vector, word_to_int)

    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model='lstm', input_data=input_data, output_data=output_targets, vocab_size=len(
        vocabularies), rnn_size=128, num_layers=2, batch_size=64, learning_rate=FLAGS.learning_rate)

    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    with tf.Session() as sess:
        # sess = tf_debug.LocalCLIDebugWrapperSession(sess=sess)
        # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(FLAGS.model_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("## restore from the checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])
        print('## start training...')
        try:
            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0
                n_chunk = len(poems_vector) // FLAGS.batch_size
                for batch in range(n_chunk):
                    loss, _, _ = sess.run([
                        end_points['total_loss'],
                        end_points['last_state'],
                        end_points['train_op']
                    ], feed_dict={input_data: batches_inputs[n], output_targets: batches_outputs[n]})
                    n += 1
                    print('Epoch: %d, batch: %d, training loss: %.6f' % (epoch, batch, loss))
                if epoch % 6 == 0:
                    saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_prefix), global_step=epoch)
        except KeyboardInterrupt:
            print('## Interrupt manually, try saving checkpoint for now...')
            saver.save(sess, os.path.join(FLAGS.model_dir, FLAGS.model_prefix), global_step=epoch)
            print('## Last epoch were saved, next time will start from epoch {}.'.format(epoch))
Пример #3
0
def run_training():
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    poems_vector, word_to_int, vocabularies = process_poems(corpus_path)
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size,
                                                     poems_vector, word_to_int)

    print("## top ten vocabularies: %s" % str(vocabularies[:10]))
    print("## tail ten vocabularies: %s" % str(vocabularies[-10:]))
    print("## len(first vector)=%d, first vector[:50]: %s" %
          (len(poems_vector[0]), poems_vector[0][:50]))
    print("## len(last vector)=%d, second vector[:50]: %s" %
          (len(poems_vector[-1]), poems_vector[-1][:50]))

    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model='lstm',
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=FLAGS.batch_size,
                           learning_rate=FLAGS.learning_rate)

    saver = tf.train.Saver(tf.global_variables())
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    summary_op = tf.summary.merge_all()
    with tf.Session() as sess:

        train_writer = tf.summary.FileWriter(os.path.join(log_dir, "train"),
                                             sess.graph)

        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(model_dir)
        if checkpoint:
            saver.restore(sess, checkpoint)
            print("## restore from the checkpoint {0}".format(checkpoint),
                  flush=True)
            start_epoch += int(checkpoint.split('-')[-1]) + 1
        print('## start training...', flush=True)

        n_chunk = len(poems_vector) // FLAGS.batch_size

        try:
            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0

                for batch in range(n_chunk):
                    step = epoch * n_chunk + batch
                    if step % FLAGS.print_every_steps == 0:
                        loss, _, _, train_summary = sess.run(
                            [
                                end_points['total_loss'],
                                end_points['last_state'],
                                end_points['train_op'], summary_op
                            ],
                            feed_dict={
                                input_data: batches_inputs[n],
                                output_targets: batches_outputs[n]
                            })
                        train_writer.add_summary(train_summary,
                                                 global_step=step)
                        print(
                            '[%s] Step: %d, Epoch: %d, batch: %d, training loss: %.6f'
                            % (time.strftime('%Y-%m-%d %H:%M:%S'), step, epoch,
                               batch, loss),
                            flush=True)
                    else:
                        _, _ = sess.run(
                            [end_points['last_state'], end_points['train_op']],
                            feed_dict={
                                input_data: batches_inputs[n],
                                output_targets: batches_outputs[n]
                            })
                    n += 1
                    step += 1
                if epoch % FLAGS.save_every_epoch == 0:
                    saver.save(sess, model_file, global_step=epoch)
                    print("[%s] Saving checkpoint for epoch %d" %
                          (time.strftime('%Y-%m-%d %H:%M:%S'), epoch),
                          flush=True)
        except KeyboardInterrupt:
            print('## Interrupt manually, try saving checkpoint for now...')
            saver.save(sess, model_file, global_step=epoch)
            print(
                '## Last epoch were saved, next time will start from epoch {}.'
                .format(epoch),
                flush=True)
Пример #4
0
tf.app.flags.DEFINE_integer('batch_size', 64, 'batch size.')
tf.app.flags.DEFINE_float('learning_rate', 0.01, 'learning rate.')
tf.app.flags.DEFINE_string('model_dir', os.path.abspath('./model'), 'model save path.')
tf.app.flags.DEFINE_string('file_path', os.path.abspath('./data/poems.txt'), 'file name of poems.')
tf.app.flags.DEFINE_string('model_prefix', 'poems', 'model save prefix.')
tf.app.flags.DEFINE_integer('epochs', 50, 'train how many epochs.')

FLAGS = tf.app.flags.FLAGS


def run_training():
    if not os.path.exists(FLAGS.model_dir):  //没有路径
        os.makedirs(FLAGS.model_dir)  //创造路径

    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)  //前置处理,生成词向量
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size, poems_vector, word_to_int) //分成batch

    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model='lstm', input_data=input_data, output_data=output_targets, vocab_size=len(
        vocabularies), rnn_size=128, num_layers=2, batch_size=64, learning_rate=FLAGS.learning_rate)  //建立RNN模型

    saver = tf.train.Saver(tf.global_variables())  //
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    with tf.Session() as sess:
        # sess = tf_debug.LocalCLIDebugWrapperSession(sess=sess)
        # sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
        sess.run(init_op)

        start_epoch = 0
Пример #5
0
def run_training():
    # dir to save model

    if os.path.exists(FLAGS.model_dir):
        os.mkdir(FLAGS.model_dir)

    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)
    batches_input, batches_outputs = generate_batch(FLAGS.batch_size,
                                                    poems_vector, word_to_int)

    # print(word_to_int)
    # print(batches_input[0][0])
    # print(batches_outputs[0][1])
    # print(batches_outputs)
    # time.sleep(10000)

    input_data = tf.placeholder(tf.int32, [FLAGS.batch_size, None])
    output_targets = tf.placeholder(tf.int32, [FLAGS.batch_size, None])

    end_points = rnn_model(model="lstm",
                           input_data=input_data,
                           output_data=output_targets,
                           vocab_size=len(vocabularies),
                           rnn_size=128,
                           num_layers=2,
                           batch_size=64,
                           learning_rate=FLAGS.learning_rate)

    saver = tf.train.Saver(tf.global_variables())

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    with tf.Session() as sess:

        # 初始化所有的变量
        sess.run(init_op)

        start_epoch = 0
        checkpoint = tf.train.latest_checkpoint(FLAGS.model_dir)

        if checkpoint:
            saver.restore(sess, checkpoint)
            print("### restore from the checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])

        print(' ## start training... ')

        try:

            for epoch in range(start_epoch, FLAGS.epochs):
                n = 0
                n_chunk = len(poems_vector) // FLAGS.batch_size
                for batch in range(n_chunk):
                    loss, _, _ = sess.run(
                        [
                            end_points['total_loss'], end_points['last_state'],
                            end_points['train_op']
                        ],
                        feed_dict={
                            input_data: batches_input[n],
                            output_targets: batches_outputs[n]
                        })
                    n += 1
                    print('Epoch: %d, batch: %d, training loss: %.6f' %
                          (epoch, batch, loss))

                if epoch % 6:
                    saver.save(sess,
                               os.path.join(FLAGS.model_dir,
                                            FLAGS.model_prefix),
                               global_step=epoch)
        except KeyboardInterrupt:
            print('## Interrupt manually, try saving checkpoint for now...')
            saver.save(sess,
                       os.path.join(FLAGS.model_dir, FLAGS.model_prefix),
                       global_step=epoch)
            print(
                '## Last epoch were saved, next time will start from epoch {}.'
                .format(epoch))
Пример #6
0
def run_training():
    if not os.path.exists(FLAGS.model_dir):
        os.makedirs(FLAGS.model_dir)

    # poems_vector: 三维ndarray, 语料矩阵, 每层为一行诗, 分上下句(2x?). 其中每个字用对应的序号表示
    # word_to_int: pair of dict, 字到对应序号的映射
    # vocabularies: pair of list, 单词表, 出现频率由高到低
    poems_vector, word_to_int, vocabularies = process_poems(FLAGS.file_path)

    _, _, substr_len = poems_vector.shape
    # 语料矩阵按batch_size分为若干chunk.
    # batches_inputs: 四维ndarray, 每块为一chunk, 其中每层为一个数据(2 * substr_len)
    # batches_outputs: 四维ndarray, batches_inputs向左平移一位得到
    batches_inputs, batches_outputs = generate_batch(FLAGS.batch_size,
                                                     poems_vector, word_to_int)

    graph = tf.Graph()
    with graph.as_default():
        # declare placeholders of shape of (batch_size, 2, substr_len)
        input_data = tf.placeholder(tf.int32,
                                    [FLAGS.batch_size, 2, substr_len],
                                    name="left_word")
        output_targets = tf.placeholder(tf.int32,
                                        [FLAGS.batch_size, 2, substr_len],
                                        name="right_word")
        add_mat = tf.placeholder(tf.int32, [FLAGS.batch_size, 2, substr_len],
                                 name="additional_feature")
        # 取得模型
        rnn = RNNModel(model_name,
                       num_layers=2,
                       rnn_size=64,
                       batch_size=64,
                       vocabularies=vocabularies,
                       add_dim=add_feature_dim,
                       substr_len=substr_len)
        # get 2 endpoints
        endpoints = rnn.train(input_data=input_data,
                              add_data=add_mat,
                              label_data=output_targets,
                              learning_rate=FLAGS.learning_rate)
        # 只保存一个文件
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

    # session配置
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config, graph=graph) as sess:
        # init
        sess.run(init_op)
        # log
        summary_writer = tf.summary.FileWriter(FLAGS.log_path, graph=graph)

        # start_epoch, 训练完的趟数
        start_epoch = 0
        # 建立checkpoint
        checkpoint = tf.train.latest_checkpoint(FLAGS.model_dir)
        os.system('cls')
        if checkpoint:
            # 从检查点中恢复
            saver.restore(sess, checkpoint)
            print("## restore from checkpoint {0}".format(checkpoint))
            start_epoch += int(checkpoint.split('-')[-1])

        print('## start training...')
        print("## run `tensorboard --logdir %s`, and view localhost:6006." %
              (os.path.abspath("./log/train/%s" % model_name)))
        # n_chunk, chunk大小
        n_chunk = len(poems_vector) // FLAGS.batch_size
        tf.get_default_graph().finalize()
        for epoch in range(start_epoch, FLAGS.epochs):
            bar = Bar("epoch%d" % epoch, max=n_chunk)
            for batch in range(n_chunk):
                # train the both model
                summary = easyTrain(
                    sess,
                    endpoints,
                    inputs=(input_data, batches_inputs[batch]),
                    label=(output_targets, batches_outputs[batch]),
                    pos_data=(add_mat,
                              generate_add_mat(batches_inputs[batch],
                                               'binary')))
                # reduce IO
                if batch % 16 == 0:
                    summary_writer.add_summary(summary,
                                               epoch * n_chunk + batch)
                    bar.next(16)
            # save at the end of each epoch
            saver.save(sess,
                       os.path.join(FLAGS.model_dir, FLAGS.model_prefix),
                       global_step=epoch)
            bar.finish()
        # save on exit
        saver.save(sess,
                   os.path.join(FLAGS.model_dir, FLAGS.model_prefix),
                   global_step=epoch)
        print('## Last epoch were saved, next time will start from epoch {}.'.
              format(epoch))