def main(_): model_path = os.path.join('models', FLAGS.name) if os.path.exists(model_path) is False: os.makedirs(model_path) # excel data QAs = get_excel_QAs( FLAGS.input_file) # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response # # xhj data # from read_utils import loadConversations # QAs = loadConversations(FLAGS.input_file) text = get_QAs_text(QAs) if os.path.exists(os.path.join(model_path, 'converter.pkl')) is False: print('词库文件不存在,创建...') converter = TextConverter(text, FLAGS.max_vocab) converter.save_to_file(os.path.join(model_path, 'converter.pkl')) else: converter = TextConverter( filename=os.path.join(model_path, 'converter.pkl')) QA_arrs = converter.QAs_to_arrs(QAs, FLAGS.num_steps) samples = converter.samples_for_train(QA_arrs) g = batch_generator(samples, FLAGS.num_seqs) print(converter.vocab_size) model = DualLSTM(converter.vocab_size, batch_size=FLAGS.num_seqs, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, )
def main(_): model_path = os.path.join('models', FLAGS.file_name) if os.path.exists(model_path) is False: os.makedirs(model_path) if os.path.exists(os.path.join( model_path, 'converter.pkl')) or os.path.exists( os.path.join(model_path, 'QAs.pkl')) is False: print('词库文件不存在,创建...') QAs, text = load_origin_data('data/task3_train.txt') converter = TextConverter(text, 5000) converter.save_to_file(converter.vocab, os.path.join(model_path, 'converter.pkl')) converter.save_to_file(QAs, os.path.join(model_path, 'QAs.pkl')) else: converter = TextConverter( filename=os.path.join(model_path, 'converter.pkl')) QAs = converter.load_obj(filename=os.path.join(model_path, 'QAs.pkl')) QA_arrs = converter.QAs_to_arrs(QAs, FLAGS.num_steps) thres = int(len(QA_arrs) * 0.9) train_samples = QA_arrs[:thres] val_samples = QA_arrs[thres:] train_g = batch_generator(train_samples, FLAGS.batch_size) val_g = val_samples_generator(val_samples) print('use embeding:', FLAGS.use_embedding) print('vocab size:', converter.vocab_size) from model3 import Model model = Model(converter.vocab_size, FLAGS, test=False, embeddings=None) # 继续上一次模型训练 FLAGS.checkpoint_path = tf.train.latest_checkpoint(model_path) if FLAGS.checkpoint_path: model.load(FLAGS.checkpoint_path) model.train(train_g, FLAGS.max_steps, model_path, FLAGS.save_every_n, FLAGS.log_every_n, val_g)