Пример #1
0
def main(_):
    model_path = os.path.join('models', FLAGS.name)
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)

    # excel data
    QAs = get_excel_QAs(
        FLAGS.input_file)  # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response

    # # xhj data
    # from read_utils import loadConversations
    # QAs = loadConversations(FLAGS.input_file)

    text = get_QAs_text(QAs)

    if os.path.exists(os.path.join(model_path, 'converter.pkl')) is False:
        print('词库文件不存在,创建...')
        converter = TextConverter(text, FLAGS.max_vocab)
        converter.save_to_file(os.path.join(model_path, 'converter.pkl'))
    else:
        converter = TextConverter(
            filename=os.path.join(model_path, 'converter.pkl'))

    QA_arrs = converter.QAs_to_arrs(QAs, FLAGS.num_steps)
    samples = converter.samples_for_train(QA_arrs)
    g = batch_generator(samples, FLAGS.num_seqs)

    print(converter.vocab_size)
    model = DualLSTM(converter.vocab_size,
                     batch_size=FLAGS.num_seqs,
                     num_steps=FLAGS.num_steps,
                     lstm_size=FLAGS.lstm_size,
                     num_layers=FLAGS.num_layers,
                     learning_rate=FLAGS.learning_rate,
                     train_keep_prob=FLAGS.train_keep_prob,
                     use_embedding=FLAGS.use_embedding,
                     embedding_size=FLAGS.embedding_size)
    model.train(
        g,
        FLAGS.max_steps,
        model_path,
        FLAGS.save_every_n,
        FLAGS.log_every_n,
    )
Пример #2
0
def main(_):
    model_path = os.path.join('models', FLAGS.file_name)
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)

    if os.path.exists(os.path.join(
            model_path, 'converter.pkl')) or os.path.exists(
                os.path.join(model_path, 'QAs.pkl')) is False:
        print('词库文件不存在,创建...')
        QAs, text = load_origin_data('data/task3_train.txt')
        converter = TextConverter(text, 5000)
        converter.save_to_file(converter.vocab,
                               os.path.join(model_path, 'converter.pkl'))
        converter.save_to_file(QAs, os.path.join(model_path, 'QAs.pkl'))
    else:
        converter = TextConverter(
            filename=os.path.join(model_path, 'converter.pkl'))
        QAs = converter.load_obj(filename=os.path.join(model_path, 'QAs.pkl'))

    QA_arrs = converter.QAs_to_arrs(QAs, FLAGS.num_steps)

    thres = int(len(QA_arrs) * 0.9)
    train_samples = QA_arrs[:thres]
    val_samples = QA_arrs[thres:]

    train_g = batch_generator(train_samples, FLAGS.batch_size)
    val_g = val_samples_generator(val_samples)

    print('use embeding:', FLAGS.use_embedding)
    print('vocab size:', converter.vocab_size)

    from model3 import Model
    model = Model(converter.vocab_size, FLAGS, test=False, embeddings=None)

    # 继续上一次模型训练
    FLAGS.checkpoint_path = tf.train.latest_checkpoint(model_path)
    if FLAGS.checkpoint_path:
        model.load(FLAGS.checkpoint_path)

    model.train(train_g, FLAGS.max_steps, model_path, FLAGS.save_every_n,
                FLAGS.log_every_n, val_g)