def setVocab(self, args):
     vocab_name = "./datasets/vocab_{}.bin".format(args.dataname)
     if os.path.exists(vocab_name):
         src_vocab = Vocabulary.load(vocab_name)
     else:
         set_vocab = set()
         [[set_vocab.add(word) for word in word_arr]
          for word_arr in gens.word_list(args.train_source)]
         n_vocab = len(set_vocab) + 3
         print("n_vocab:{}".format(n_vocab))
         print("arg_vocab:{}".format(args.n_vocab))
         src_vocab = Vocabulary.new(
             gens.word_list(args.train_source), args.n_vocab)
         src_vocab.save(vocab_name)
     self.vocab = src_vocab
     return src_vocab
示例#2
0
 def getBatchGen_test(self, args, is_shuffle=True):
     tt_now_list = [[self.vocab.stoi(char) for char in char_arr]
                    for char_arr in gens.word_list(args.test_source)]
     ind_arr = list(range(len(tt_now_list)))
     if is_shuffle:
         random.shuffle(ind_arr)
     tt_now = (tt_now_list[ind] for ind in ind_arr)
     tt_gen = gens.batch(tt_now, args.test_batchsize)
     for tt in tt_gen:
         yield tt