def get_vocab(self): mem_tokens = [] for i in range(self.max_mem): mem_tokens.append('v{}'.format(i)) vocab = data_utils.Vocab(self.namespace.get_all_names() + SPECIAL_TKS + mem_tokens) return vocab
'Error: %s does not exist.' % args.feature_weights_file # read the data and pickle it or load it preproc_data_path = os.path.join(args.model_dir, 'preproc_data_%s.pkl' % args.task) domain2data = read_data(args.data_path) print('Saving domain2data object to %s...' % preproc_data_path) with open(preproc_data_path, 'wb') as f: pickle.dump(domain2data, f) assert set(task_trg_domains) == set(domain2data.keys()) # create the vocabulary or load it if it was already created vocab_path = os.path.join(args.model_dir, 'vocab.txt') vocab = data_utils.Vocab(args.max_vocab_size, vocab_path) # retrieve all available tokenised sentences tokenised_sentences = data_utils.get_all_docs(domain2data.items(), unlabeled=False)[0] vocab.create(tokenised_sentences) del tokenised_sentences # load word vectors if we are using them word2vec = None if args.word2vec_path: vocab_word2vec_file = os.path.join(args.model_dir, 'vocab_word2vec.txt') word2vec = similarity.load_word_vectors(args.word2vec_path, vocab_word2vec_file, vocab.word2id, vector_size=args.vector_size,