def create_keyed_vector(old_keyed_vector, new_matrix): vector_size = new_matrix.shape[1] keyed_vector = KeyedVectors(vector_size) keyed_vector.vector_size = vector_size keyed_vector.vocab = old_keyed_vector.vocab keyed_vector.index2word = old_keyed_vector.index2word keyed_vector.vectors = new_matrix assert (len(old_keyed_vector.vocab), vector_size) == keyed_vector.vectors.shape return keyed_vector
def apply_w2v_regression(model, regression): """Given a word2vec model and a linear regression, apply that regression to all the vectors in the model. ::param model:: A gensim `KeyedVectors` or `Word2Vec` instance ::param regression:: A `sklearn.linear_model.LinearRegression` instance ::returns:: A gensim `KeyedVectors` instance """ aligned_model = KeyedVectors() # Word2Vec() aligned_model.vocab = model.vocab.copy() aligned_model.vector_size = model.vector_size aligned_model.index2word = model.index2word # aligned_model.reset_weights() aligned_model.syn0 = regression.predict(model.syn0).astype(np.float32) return aligned_model
def __create_keyed_vector(matrix, orig_vocab): vocab = dict() index_to_word = [] for word, word_id in sorted(orig_vocab.token2id.items(), key=itemgetter(1)): index_to_word.append(word) vocab[word] = Vocab(index=word_id, count=orig_vocab.word_freq[word_id]) vector_size = matrix.shape[1] keyed_vector = KeyedVectors(vector_size) keyed_vector.vector_size = vector_size keyed_vector.vocab = vocab keyed_vector.index2word = index_to_word keyed_vector.vectors = matrix assert (len(vocab), vector_size) == keyed_vector.vectors.shape return keyed_vector