def __call__(self, model_name): predictions = {} print('Evaluating "%s" on "%s".' % (model_name, self.args['test']), flush=True) # load k-means model kmeans = pickle.load(open(os.path.join(".", 'kmeans.pickle'), 'rb')) print('The number of clusters is %d.' % (kmeans.n_clusters), flush=True) # partition test data according to k-means model clusters_test = kmeans.predict(self.Y_all_test - self.X_all_test) try: with np.load(os.path.join(".", '%s.test.npz') % model_name) as npz: Y_hat_clusters = { int(cluster): npz[cluster] for cluster in npz.files } except FileNotFoundError: Y_hat_clusters = {} if kmeans.n_clusters != len(Y_hat_clusters): print('Missing the output for the model "%s"!' % model_name, file=sys.stderr, flush=True) return predictions # get estimated hypernyms for each term in test/validation set Y_all_hat = extract(clusters_test, Y_hat_clusters) # ensure we have the same number of estimates as we have of test terms assert len(self.subsumptions_test) == Y_all_hat.shape[0] # compute unit-norm of hypernym estimates Y_all_hat_norm = Y_all_hat / np.linalg.norm(Y_all_hat, axis=1)[:, np.newaxis] # find similar words print('nn_vec...') similar_indices = nn_vec(Y_all_hat_norm, self.w2v.syn0norm, topn=15, sort=True, return_sims=False, nthreads=self.args['threads'], verbose=False) print('nn_vec results covert...') similar_words = [[self.w2v.index2word[ind] for ind in row] for row in similar_indices] print('done') for i, (hyponym, hypernym) in enumerate(self.subsumptions_test): predictions[hyponym] = similar_words[i] return predictions
if kmeans.n_clusters != len(Y_hat_clusters): print('Missing the output for the model "%s"!' % model) continue Y_all_hat = extract(clusters_test, Y_hat_clusters) assert len(subsumptions_test) == Y_all_hat.shape[0] measures = [{} for _ in range(10)] if not args['non_optimized']: # normalize Y_all_hat to make dot product equeal to cosine and monotonically decreasing function of euclidean distance Y_all_hat_norm = Y_all_hat / np.linalg.norm(Y_all_hat,axis=1)[:,np.newaxis] print('nn_vec...') similar_indices = nn_vec(Y_all_hat_norm, w2v.syn0norm, topn=10, sort=True, return_sims=False, nthreads=args['threads'], verbose=False) print('nn_vec results covert...') similar_words = [[w2v.index2word[ind] for ind in row] for row in similar_indices] print('done') file_ptr_ms = open(str(model)+"_test_candidates1",'w') file_ptr_hypo = open("test_hypo1",'w') file_ptr_gold = open("test_gold1",'w') prev_hypo = '' gold_list = '' out_ms = '' count = 0 for i, (hyponym, hypernym) in enumerate(subsumptions_test): if args['non_optimized']: Y_hat = Y_all_hat[i].reshape(X_all_test.shape[1],) actual = [w for w,_ in w2v.most_similar(positive=[Y_hat], topn=10)] else: