Пример #1
0
 def save_word2vec_format(self, dest, source):
     with tf.variable_scope('model', reuse=None):
         if source is VocabType.Token:
             vocab_size = self.word_vocab_size
             embedding_size = self.config.EMBEDDINGS_SIZE
             index = self.index_to_word
             var_name = 'WORDS_VOCAB'
         elif source is VocabType.Target:
             vocab_size = self.target_word_vocab_size
             embedding_size = self.config.EMBEDDINGS_SIZE * 3
             index = self.index_to_target_word
             var_name = 'TARGET_WORDS_VOCAB'
         else:
             raise ValueError(
                 'vocab type should be VocabType.Token or VocabType.Target.'
             )
         embeddings = tf.get_variable(var_name,
                                      shape=(vocab_size + 1,
                                             embedding_size),
                                      dtype=tf.float32,
                                      trainable=False)
         self.saver = tf.train.Saver()
         self.load_model(self.sess)
         np_embeddings = self.sess.run(embeddings)
     with open(dest, 'w') as words_file:
         common.save_word2vec_file(words_file, vocab_size, embedding_size,
                                   index, np_embeddings)
Пример #2
0
 def save_word2vec_format(self, dest_save_path: str, vocab_type: VocabType):
     if vocab_type not in VocabType:
         raise ValueError('`vocab_type` should be `VocabType.Token`, `VocabType.Target` or `VocabType.Path`.')
     vocab_embedding_matrix = self._get_vocab_embedding_as_np_array(vocab_type)
     index_to_word = self.vocabs.get(vocab_type).index_to_word
     with open(dest_save_path, 'w') as words_file:
         common.save_word2vec_file(words_file, index_to_word, vocab_embedding_matrix)