示例#1
0
    corpus.train_file_stats()
    corpus.valid_file_stats()
    corpus.build_vocab(args.src_vocab_size, args.tgt_vocab_size,
                       args.src_vocab, args.tgt_vocab, args.joint_vocab)
    corpus.corpus_numerate_train(args.src_enumerate_corpus,
                                 args.tgt_enumerate_corpus)
    corpus.corpus_train_lengths_sorting()
    corpus.corpus_numerate_valid()
    corpus.valid_batch_making(args.batch_size)
    corpus.corpus_numerate_esti()
    corpus.esti_variance_batch_making(batch_size=16)

    if args.save_vocab:
        print('Save src vocab to: ' + stats.fold_name + '/src_vocab.pt')
        print('Save tgt vocab to: ' + stats.fold_name + '/tgt_vocab.pt')
        stats.log_to_file('Save src vocab to: ' + stats.fold_name +
                          '/src_vocab.pt')
        stats.log_to_file('Save tgt vocab to: ' + stats.fold_name +
                          '/tgt_vocab.pt')
        if corpus.share_embedding:
            print('Save joint vocab to: ' + stats.fold_name +
                  '/joint_vocab.pt')
            stats.log_to_file('Save joint vocab to: ' + stats.fold_name +
                              '/joint_vocab.pt')

        torch.save(corpus.src_word2idx, stats.fold_name + '/src_vocab.pt')
        torch.save(corpus.tgt_word2idx, stats.fold_name + '/tgt_vocab.pt')
        if corpus.share_embedding:
            torch.save(corpus.joint_word2idx,
                       stats.fold_name + '/joint_vocab.pt')

    if args.save_corpus: