def rem_add(x, rem, add, wv: KeyedVectors): y = wv[parse(x)] - wv[parse(rem)] + wv[parse(add)] return wv.similar_by_vector(y, topn=5)
class VectorSpaceModel(object): """Base class for models that represent words as vectors. For now, this really is just a wrapper around the Gensim KeyedVectors / Word2Vec class. """ def __init__(self, name=None): self.name = name self.m = KeyedVectors() return @classmethod def load(cls, filename, modelname=None, **kwargs): if filename.endswith('.pkl'): model = cls.load_pickle(filename, modelname=modelname, **kwargs) else: model = cls.load_w2v(filename, modelname=modelname, **kwargs) return model @classmethod def load_pickle(cls, filename, **kwargs): debug("Loading pickled model from file {:}".format(filename)) model = pickle.load(filename) return model @classmethod def load_w2v(cls, filename, modelname=None, **kwargs): """Load the model from disk.""" debug("Loading word2vec model from file {:}".format(filename)) if filename.endswith(".bin"): m = KeyedVectors.load_word2vec_format(filename, binary=True) else: m = KeyedVectors.load_word2vec_format(filename) model = cls() model.m = m if modelname is None: modelname = os.path.basename(filename) modelname = re.sub('.bin', '', modelname) model.name = modelname return model def save_pickle(self, filename): debug("Saving model {:} to pickle file {:}".format(self.name, filename)) pickle.dump(self, filename) return def __getitem__(self, word): return(self.m[word]) def most_similar(self, query, k=5): """Return the most similar words to the query. `query` can be either a string or a vector. If it is a string, then its vector will be looked up in the current VSM. """ if type(query) is str: results = self.m.most_similar(query, topn=k) else: results = self.m.similar_by_vector(query, topn=k) return results def __repr__(self): return "<VectorSpaceModel {:} with {:,} vectors>".format(repr(self.name), self.m.syn0.shape[0])