def analogy(self, pos, neg, n=10, metric="cosine"): """ Analogy similarity. Parameters ---------- pos : list neg : list Returns ------- Tuple of 2 numpy.array: 1. position in self.vocab 2. cosine similarity Example ------- `king - man + woman = queen` will be: `pos=['king', 'woman'], neg=['man']` """ exclude = pos + neg pos = [(word, 1.0) for word in pos] neg = [(word, -1.0) for word in neg] mean = [] for word, direction in pos + neg: mean.append(direction * self[word]) mean = np.array(mean).mean(axis=0) metrics = distance(self.vectors, mean, metric=metric) best = metrics.argsort()[::-1][:n + len(exclude)] exclude_idx = [np.where(best == self.ix(word)) for word in exclude if self.ix(word) in best] new_best = np.delete(best, exclude_idx) best_metrics = metrics[new_best] return new_best[:n], best_metrics[:n]
def closest(self, vector, n=10, metric="cosine"): """Returns the closest n words to a vector Parameters ------- vector : numpy.array n : int (default 10) Returns ------- Tuple of 2 numpy.array: 1. position in self.vocab 2. cosine similarity """ distances = distance(self.vectors, vector, metric=metric) best = np.argsort(distances)[::-1][1:n + 1] best_metrics = distances[best] return best, best_metrics
def distance(self, *args, **kwargs): """ Compute the distance distance between two vectors or more (all combinations) of words Parameters ---------- words : one or more words n : int (default 10) number of neighbors to return metric : string (default "cosine") What metric to use """ metric = kwargs.get("metric", "cosine") # Default is cosine combinations = list(itertools.combinations(args, r=2)) ret = [] for word1, word2 in combinations: dist = distance(self[word1], self[word2], metric=metric) ret.append((word1, word2, dist)) return ret
def analogy(self, pos, neg, n=10, metric="cosine"): """ Analogy similarity. Parameters ---------- pos : list neg : list Returns ------- Tuple of 2 numpy.array: 1. position in self.vocab 2. cosine similarity Example ------- `king - man + woman = queen` will be: `pos=['king', 'woman'], neg=['man']` """ exclude = pos + neg pos = [(word, 1.0) for word in pos] neg = [(word, -1.0) for word in neg] mean = [] for word, direction in pos + neg: mean.append(direction * self[word]) mean = np.array(mean).mean(axis=0) metrics = distance(self.vectors, mean, metric=metric) best = metrics.argsort()[::-1][:n + len(exclude)] exclude_idx = [ np.where(best == self.ix(word)) for word in exclude if self.ix(word) in best ] new_best = np.delete(best, exclude_idx) best_metrics = metrics[new_best] return new_best[:n], best_metrics[:n]