def main(_): model_path = os.path.join('model', Config.file_name) if os.path.exists(model_path) is False: os.makedirs(model_path) et = TextConverter(text=None, save_dir='model/en_vocab.pkl', max_vocab=Config.en_vocab_size, seq_length=Config.seq_length) zt = TextConverter(text=None, save_dir='model/zh_vocab.pkl', max_vocab=Config.zh_vocab_size, seq_length=Config.seq_length + 1) # +1是因为,decoder层序列拆成input=[:-1]和label=[1:] print('english vocab lens:', et.vocab_size) print('chines20000e vocab lens:', zt.vocab_size) en_arrs = et.get_en_arrs('data/train.tags.data.en_clear') zh_arrs = zt.get_en_arrs('data/train.tags.data.zh_clear') train_g = batch_generator(en_arrs, zh_arrs, Config.batch_size) # 加载上一次保存的模型 model = Model(Config) checkpoint_path = tf.train.latest_checkpoint(model_path) if checkpoint_path: model.load(checkpoint_path) print('start to training...') model.train(train_g, model_path)
def main(_): model_path = os.path.join('models', Config.file_name) if os.path.exists(model_path) is False: os.makedirs(model_path) converter = TextConverter(vocab_dir='data/vocabs', max_vocab=Config.vocab_size, seq_length=Config.seq_length) print('vocab lens:', converter.vocab_size) en_arrs = converter.get_en_arrs('data/train/in.txt') de_arrs = converter.get_de_arrs('data/train/out.txt') train_g = batch_generator(en_arrs, de_arrs, Config.batch_size) # 加载上一次保存的模型 model = Model(Config) checkpoint_path = tf.train.latest_checkpoint(model_path) if checkpoint_path: model.load(checkpoint_path) print('start to training...') model.train(train_g, model_path)