def build_save_vocab(train_dataset, fields, opt): fields = inputters.build_vocab( train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, vocab_size_multiple=opt.vocab_size_multiple ) vocab_path = opt.save_data + '.vocab.pt' torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, opt, logger=None): """ Building and saving the vocab """ fields = inputters.build_vocab(train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, logger) # Can't save fields, so remove/reconstruct at training time. vocab_file = opt.save_data + '.vocab.pt' torch.save(inputters.save_fields_to_vocab(fields), vocab_file)
def build_save_vocab(train_dataset, fields, opt): fields = inputters.build_vocab( train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, vocab_size_multiple=opt.vocab_size_multiple ) # prefix = opt.save_data.split('/')[-1] # # vocab_path = opt.save_data + '/' + prefix + '.vocab.pt' vocab_path = opt.save_data + '.vocab.pt' torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, opt): fields = inputters.build_vocab( train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, opt.agenda_vocab, opt.agenda_vocab_size, opt.agenda_words_min_frequency, fixed_vocab=opt.fixed_vocab, free_src=opt.free_src, free_tgt=opt.free_tgt, vocab_size_multiple=opt.vocab_size_multiple ) vocab_path = opt.save_data + '.vocab.pt' torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, opt): """ Building and saving the vocab """ fields = inputters.build_vocab(train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency) # Can't save fields, so remove/reconstruct at training time. vocab_file = opt.save_data + '.vocab.pt' # torch.save(inputters.save_fields_to_vocab(fields), vocab_file) with open(vocab_file, 'wb') as f: pickle.dump(inputters.save_fields_to_vocab(fields), f)
def build_save_vocab(train_dataset, fields, opt): print("build_save_vocab") fields = inputters.build_vocab(train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, vocab_size_multiple=opt.vocab_size_multiple) vocab_path = opt.save_data + '.vocab.pt' torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, opt): fields = inputters.build_vocab(train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, vocab_size_multiple=opt.vocab_size_multiple) print(fields['tgt'].base_field.vocab.itos[:100]) vocab_path = opt.save_data + '.vocab.pt' torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, savepath, opt): """ Building and saving the vocab """ fields = inputters.build_vocab(train_dataset, fields, data_type='text', share_vocab=True, src_vocab_path='', src_vocab_size=100, src_words_min_frequency=1, tgt_vocab_path='', tgt_vocab_size=100, tgt_words_min_frequency=1) # Can't save fields, so remove/reconstruct at training time. vocab_file = savepath + '/vocab.pt' with open(vocab_file, 'wb') as f: pickle.dump(inputters.save_fields_to_vocab(fields), f)
def build_save_vocab(train_dataset, fields, opt): fields = inputters.build_vocab(train_dataset, fields, opt.data_type, opt.share_vocab, opt.src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, vocab_size_multiple=opt.vocab_size_multiple, opt.lower, opt.only_words, opt.save_data) # Can't save fields, so remove/reconstruct at training time. vocab_path = opt.save_data + '.vocab.pt' torch.save(fields, vocab_path)
def build_save_vocab(train_dataset, fields, opt, index): src_vocab = None if len(opt.src_vocab) > 0: src_vocab = opt.src_vocab[index] fields = inputters.build_vocab(train_dataset, fields, opt.data_type, opt.share_vocab, src_vocab, opt.src_vocab_size, opt.src_words_min_frequency, opt.tgt_vocab, opt.tgt_vocab_size, opt.tgt_words_min_frequency, vocab_size_multiple=opt.vocab_size_multiple) vocab_path = opt.save_data + "." + str(index) + '.vocab.pt' torch.save(fields, vocab_path)