def create_keyedvector_from_matrix(self, embedding_matrix, word2id): """ Imports the necessary attributes for the Embedding object from an embedding matrix and a word2id vocabulary. Can be used for custom pre-trained embeddings. Parameters ---------- embedding_matrix: numpy.ndarray Embedding matrix as a numpy object word2id: dict Word vocabulary (key: word, value: word_index) """ vocab = { word: word2id[word] for word in sorted(word2id, key=word2id.__getitem__, reverse=False) } embedding_matrix = embedding_matrix vector_size = embedding_matrix.shape[1] kv = KeyedVectors(vector_size) kv.vector_size = vector_size kv.vectors = embedding_matrix kv.index2word = list(vocab.keys()) kv.vocab = { word: Vocab(index=word_id, count=0) for word, word_id in vocab.items() } self.embedding = kv
def _set_keyedvector(self, attrname, keys, dim, vec=None): keyed_vec = KeyedVectors(dim) dummy_max_count = len(keys) + 1 for i, key in enumerate(keys): key = str(key) keyed_vec.vocab[key] = Vocab(index=i, count=dummy_max_count - i) # dummy count keyed_vec.index2word.append(key) if vec is not None: keyed_vec.vectors = vec keyed_vec.init_sims() setattr(self, attrname, keyed_vec)