def save_word2vec_format(self, dest, source): with tf.variable_scope('model', reuse=None): if source is VocabType.Token: vocab_size = self.word_vocab_size embedding_size = self.config.EMBEDDINGS_SIZE index = self.index_to_word var_name = 'WORDS_VOCAB' elif source is VocabType.Target: vocab_size = self.target_word_vocab_size embedding_size = self.config.EMBEDDINGS_SIZE * 3 index = self.index_to_target_word var_name = 'TARGET_WORDS_VOCAB' else: raise ValueError( 'vocab type should be VocabType.Token or VocabType.Target.' ) embeddings = tf.get_variable(var_name, shape=(vocab_size + 1, embedding_size), dtype=tf.float32, trainable=False) self.saver = tf.train.Saver() self.load_model(self.sess) np_embeddings = self.sess.run(embeddings) with open(dest, 'w') as words_file: common.save_word2vec_file(words_file, vocab_size, embedding_size, index, np_embeddings)
def save_word2vec_format(self, dest_save_path: str, vocab_type: VocabType): if vocab_type not in VocabType: raise ValueError('`vocab_type` should be `VocabType.Token`, `VocabType.Target` or `VocabType.Path`.') vocab_embedding_matrix = self._get_vocab_embedding_as_np_array(vocab_type) index_to_word = self.vocabs.get(vocab_type).index_to_word with open(dest_save_path, 'w') as words_file: common.save_word2vec_file(words_file, index_to_word, vocab_embedding_matrix)