corpus.train_file_stats() corpus.valid_file_stats() corpus.build_vocab(args.src_vocab_size, args.tgt_vocab_size, args.src_vocab, args.tgt_vocab, args.joint_vocab) corpus.corpus_numerate_train(args.src_enumerate_corpus, args.tgt_enumerate_corpus) corpus.corpus_train_lengths_sorting() corpus.corpus_numerate_valid() corpus.valid_batch_making(args.batch_size) corpus.corpus_numerate_esti() corpus.esti_variance_batch_making(batch_size=16) if args.save_vocab: print('Save src vocab to: ' + stats.fold_name + '/src_vocab.pt') print('Save tgt vocab to: ' + stats.fold_name + '/tgt_vocab.pt') stats.log_to_file('Save src vocab to: ' + stats.fold_name + '/src_vocab.pt') stats.log_to_file('Save tgt vocab to: ' + stats.fold_name + '/tgt_vocab.pt') if corpus.share_embedding: print('Save joint vocab to: ' + stats.fold_name + '/joint_vocab.pt') stats.log_to_file('Save joint vocab to: ' + stats.fold_name + '/joint_vocab.pt') torch.save(corpus.src_word2idx, stats.fold_name + '/src_vocab.pt') torch.save(corpus.tgt_word2idx, stats.fold_name + '/tgt_vocab.pt') if corpus.share_embedding: torch.save(corpus.joint_word2idx, stats.fold_name + '/joint_vocab.pt') if args.save_corpus: