def add_cos(va, vb, vc, vd): """ Uses the following formula for scoring: cos(vb - va + vc, vd) """ x = normalize(vb - va + vc) return cosine(x, vd)
def eval_diff_vect(WR): """ Uses the DiffVect dataset for performing 1-NN relation classification. We will use PairDiff to create a vector for a word-pair and then measure the similarity between the target pair and the reamining word-pairs in the dataset. If the 1-NN has the same relation label as the target pair, then we consider it to be a correct match. We compute accuracy = correct_matches / total_instances. """ analogy_file = open(os.path.join(pkg_dir, "../benchmarks/diff-vec")) relation = {} pairs = [] label = "" while 1: line = analogy_file.readline() if len(line) == 0: break if line.startswith(':'): # This is a label label = line.split(':')[1].strip() else: p = line.strip().split() (a, b) = p pairs.append((a, b)) relation[(a, b)] = label analogy_file.close() n = len(pairs) M = numpy.zeros((n, WR.dim), dtype=numpy.float64) for (i, (a, b)) in enumerate(pairs): M[i, :] = normalize(get_embedding(a, WR) - get_embedding(b, WR)) S = numpy.dot(M, M.T) preds = (-S).argsort()[:, 1] corrects = sum( [relation[pairs[i]] == relation[pairs[preds[i]]] for i in range(n)]) accuracy = float(100 * corrects) / float(n) print "DiffVec Accuracy =", accuracy return accuracy
def evaluate_embeddings(embed_fname, dim, res_fname): """ This function can be used to evaluate an embedding. """ res = {} WR = WordReps() # We will load vectors only for the words in the benchmarks. words = set() with open(os.path.join(pkg_dir, "../benchmarks/all_words.txt")) as F: for line in F: words.add(line.strip()) WR.read_model(embed_fname, dim, words) # semantic similarity benchmarks. benchmarks = ["ws", "rg", "mc", "rw", "scws", "men", "simlex"] for bench in benchmarks: (corr, sig) = get_correlation( os.path.join(pkg_dir, "../benchmarks/%s_pairs.txt" % bench), WR.vects, "spearman") print "%s = %f" % (bench, corr) res[bench] = corr cands = list(words) M = numpy.zeros((len(cands), WR.dim), dtype=numpy.float64) for (i, w) in enumerate(cands): M[i, :] = normalize(get_embedding(w, WR)) # word analogy benchmarks. res["Google_res"] = eval_Google_Analogies(WR, M, cands) res["MSR_res"] = eval_MSR_Analogies(WR, M, cands) res["SemEval_res"] = eval_SemEval(WR, "CosAdd") res["DiffVec_acc"] = eval_diff_vect(WR) #res["SAT_res"] = eval_SAT_Analogies(WR, scoring_method) res_file = open(res_fname, 'w') res_file.write( "#RG, MC, WS, RW, SCWS, MEN, SimLex, sem, syn, total, SemEval, MSR, DiffVec\n" ) res_file.write("%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f\n" % (res["rg"], res["mc"], res["ws"], res["rw"], res["scws"], res["men"], res["simlex"], res["Google_res"]["semantic"], res["Google_res"]["syntactic"], res["Google_res"]["total"], res["SemEval_res"]["acc"], res["MSR_res"]["accuracy"], res["DiffVec_acc"])) res_file.close() return res
def eval_MSR_Analogies(WR, M, cands): """ Evaluate the accuracy of the learnt vectors on the analogy task using MSR dataset. We consider the set of fourth words in the test dataset as the candidate space for the correct answer. """ analogy_file = open( os.path.join(pkg_dir, "../benchmarks/msr-analogies.txt")) questions = [] total_questions = 0 corrects = 0 while 1: line = analogy_file.readline() if len(line) == 0: break p = line.strip().split() total_questions += 1 questions.append((p[0], p[1], p[2], p[3])) analogy_file.close() print "== MSR Analogy Dataset ==" print "Total no. of questions =", len(questions) print "Total no. of candidates =", len(cands) # predict the fourth word for each question. count = 1 for (a, b, c, d) in questions: print "%d / %d" % (count, len(questions)), "\r", count += 1 # set of candidates for the current question are the fourth # words in all questions, except the three words for the current question. scores = [] va = get_embedding(a, WR) vb = get_embedding(b, WR) vc = get_embedding(c, WR) x = normalize(vb - va + vc) s = numpy.dot(M, x) nns = [cands[i] for i in (-s).argsort()[:4]] nns = filter(lambda y: y not in [a, b, c], nns) if nns[0] == d: corrects += 1 accuracy = float(corrects) / float(len(questions)) print "MSR accuracy =", accuracy return {"accuracy": accuracy}
def PairDiff(va, vb, vc, vd): """ Uses the following formula for scoring: cos(vd - vc, vb - va) """ return cosine(normalize(vd - vc), normalize(vb - va))
def subt_cos(va, vb, vc, vd): """ Uses the following formula for scoring: cos(va - vc, vb - vd) """ return cosine(normalize(va - vc), normalize(vb - vd))
def eval_Google_Analogies(WR, M, cands): """ Evaluate the accuracy of the learnt vectors on the analogy task. We consider the set of fourth words in the test dataset as the candidate space for the correct answer. """ analogy_file = open( os.path.join(pkg_dir, "../benchmarks/google-analogies.txt")) questions = collections.OrderedDict() total_questions = {} corrects = {} while 1: line = analogy_file.readline() if len(line) == 0: break if line.startswith(':'): # This is a label label = line.split(':')[1].strip() questions[label] = [] total_questions[label] = 0 corrects[label] = 0 else: p = line.strip().split() total_questions[label] += 1 questions[label].append((p[0], p[1], p[2], p[3])) analogy_file.close() print "== Google Analogy Dataset ==" print "Total no. of question types =", len(questions) print "Total no. of candidates =", len(cands) # predict the fourth word for each question. count = 1 for label in questions: for (a, b, c, d) in questions[label]: print "%d%% (%d / %d)" % ((100 * count) / float(valid_questions), count, valid_questions), "\r", count += 1 va = get_embedding(a, WR) vb = get_embedding(b, WR) vc = get_embedding(c, WR) x = normalize(vb - va + vc) s = numpy.dot(M, x) nns = [cands[i] for i in (-s).argsort()[:4]] nns = filter(lambda y: y not in [a, b, c], nns) if nns[0] == d: corrects[label] += 1 # Compute accuracy n = semantic_total = syntactic_total = semantic_corrects = syntactic_corrects = 0 for label in total_questions: n += total_questions[label] if label.startswith("gram"): syntactic_total += total_questions[label] syntactic_corrects += corrects[label] else: semantic_total += total_questions[label] semantic_corrects += corrects[label] print "Percentage of questions attempted = %f (%d / %d)" % ( (100 * valid_questions) / float(n), valid_questions, n) for label in questions: acc = float(100 * corrects[label]) / float(total_questions[label]) print "%s = %f (correct = %d, attempted = %d, total = %d)" % ( label, acc, corrects[label], len( questions[label]), total_questions[label]) semantic_accuracy = float(100 * semantic_corrects) / float(semantic_total) syntactic_accuracy = float( 100 * syntactic_corrects) / float(syntactic_total) total_corrects = semantic_corrects + syntactic_corrects accuracy = float(100 * total_corrects) / float(n) print "Semantic Accuracy =", semantic_accuracy print "Syntactic Accuracy =", syntactic_accuracy print "Total accuracy =", accuracy return { "semantic": semantic_accuracy, "syntactic": syntactic_accuracy, "total": accuracy }