def evaluate_simi(wv, w2i, vocab): wv_dict = dict() for w in vocab: wv_dict[w] = wv[w2i[w], :] if isinstance(wv_dict, dict): w = Embedding.from_dict(wv_dict) # Calculate results on similarity print("Calculating similarity benchmarks") similarity_tasks = { "WS353": fetch_WS353(), "RG65": fetch_RG65(), # "WS353R": fetch_WS353(which="relatedness"), # "WS353S": fetch_WS353(which="similarity"), "SimLex999": fetch_SimLex999(), "MTurk": fetch_MTurk(), "RW": fetch_RW(), "MEN": fetch_MEN(), } # similarity_results = {} for name, data in iteritems(similarity_tasks): print( "Sample data from {}, num of samples: {} : pair \"{}\" and \"{}\" is assigned score {}" .format(name, len(data.X), data.X[0][0], data.X[0][1], data.y[0])) score = evaluate_similarity(w, data.X, data.y) print("Spearman correlation of scores on {} {}".format(name, score))
def __init__(self, embedder, prefix="", **kwargs): try: from web.datasets.similarity import fetch_MEN, fetch_WS353, fetch_SimLex999, fetch_RW except ImportError: raise RuntimeError( "Please install web (https://github.com/kudkudak/word-embeddings-benchmarks)" ) self._embedder = embedder self._prefix = prefix # Define tasks logger.info("Downloading benchmark data") tasks = { # TODO: Pick a bit better tasks "MEN": fetch_MEN(), "WS353": fetch_WS353(), "SIMLEX999": fetch_SimLex999(), "RW": fetch_RW() } # Print sample data for name, data in iteritems(tasks): logger.info( "Sample data from {}: pair \"{}\" and \"{}\" is assigned score {}" .format(name, data.X[0][0], data.X[0][1], data.y[0])) logger.info("Checking embedder for " + prefix) logger.info(embedder(["love"])[0, 0:5]) # Test embedder self._tasks = tasks super(SimilarityWordEmbeddingEval, self).__init__(**kwargs)
def get_dataset(dataset_name): if dataset_name == "WS353": return fetch_WS353("similarity") elif dataset_name == "MEN": return fetch_MEN("all") elif dataset_name == "SimLex-999": return fetch_SimLex999() elif dataset_name == "MTurk": return fetch_MTurk() elif dataset_name == "WS353": return fetch_WS353('all') elif dataset_name == "RG65": return fetch_RG65() elif dataset_name == "RW": return fetch_RW() elif dataset_name == "TR9856": return fetch_TR9856() elif dataset_name == "MSR": return fetch_msr_analogy() elif dataset_name == "Google": return fetch_google_analogy() else: raise Exception("{}: dataset not supported".format(dataset_name))
def test_ws353_fetcher(): data1 = fetch_WS353(which="set1") data2 = fetch_WS353(which="set2") data3 = fetch_WS353(which="similarity") data4 = fetch_WS353(which="relatedness") data5 = fetch_WS353(which="all") V5 = set([" ".join(sorted(x)) for x in data5.X]) V1 = set([" ".join(sorted(x)) for x in data1.X]) V2 = set([" ".join(sorted(x)) for x in data2.X]) V3 = set([" ".join(sorted(x)) for x in data3.X]) V4 = set([" ".join(sorted(x)) for x in data4.X]) # sd and scores have same length assert data1.sd.shape[0] == data1.y.shape[0] assert data2.sd.shape[0] == data2.y.shape[0] # WSR = WSR-SET1 u WSR-SET2 assert data5.X.shape[0] == 353 assert V5 == V2.union(V1) assert V5 == V3.union(V4) # Two word pairs reoccurr assert len(V5) == 351
def web_tests(emb): """ :param emb: dict of words and their corresponding embeddings :return: dict of word-embeddings-benchmarks tests and scores received """ similarity_tasks = { 'WS353': fetch_WS353(), 'RG65': fetch_RG65(), 'RW': fetch_RW(), 'MTurk': fetch_MTurk(), 'MEN': fetch_MEN(), 'SimLex999': fetch_SimLex999() } web_emb = Embedding(Vocabulary(list(emb.keys())), list(emb.values())) similarity_results = {} for name, data in iteritems(similarity_tasks): similarity_results[name] = evaluate_similarity(web_emb, data.X, data.y) logging.info("Spearman correlation of scores on {} {}".format( name, evaluate_similarity(web_emb, data.X, data.y))) return similarity_results
def call_module(g_filename): # Configure logging logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S') # Fetch GloVe embedding (warning: it might take few minutes) #w_glove = fetch_GloVe(corpus="wiki-6B", dim=300) kargs = {'vocab_size':200000, 'dim':400} fname=g_filename w_custom = load_embedding(fname, format="glove", normalize=True, lower=True, clean_words=False, load_kwargs=kargs) # Define tasks tasks = { "MEN": fetch_MEN(), "WS353": fetch_WS353(), "SIMLEX999": fetch_SimLex999() } # Print sample data for name, data in iteritems(tasks): print("Sample data from {}: pair \"{}\" and \"{}\" is assigned score {}".format(name, data.X[0][0], data.X[0][1], data.y[0])) # Calculate results using helper function for name, data in iteritems(tasks): print("Spearman correlation of scores on {} {}".format(name, evaluate_similarity(w_custom, data.X, data.y)))
def evaluateOnAll(w): similarity_tasks = { "MTurk": fetch_MTurk(), "MEN": fetch_MEN(), "WS353": fetch_WS353(), "RubensteinAndGoodenough": fetch_RG65(), "Rare Words": fetch_RW(), "SIMLEX999": fetch_SimLex999(), "TR9856": fetch_TR9856() } similarity_results = {} for name, data in iteritems(similarity_tasks): similarity_results[name] = evaluate_similarity(w, data.X, data.y) print("Spearman correlation of scores on {} {}".format(name, similarity_results[name])) # Calculate results on analogy print("Calculating analogy benchmarks") analogy_tasks = { "Google": fetch_google_analogy(), "MSR": fetch_msr_analogy() } analogy_results = {} for name, data in iteritems(analogy_tasks): analogy_results[name] = evaluate_analogy(w, data.X, data.y) print("Analogy prediction accuracy on {} {}".format(name, analogy_results[name])) analogy_results["SemEval2012_2"] = calAnswersonSemEval(w)['all'] print("Analogy prediction accuracy on {} {}".format("SemEval2012", analogy_results["SemEval2012_2"])) analogy = pd.DataFrame([analogy_results]) sim = pd.DataFrame([similarity_results]) results = sim.join(analogy) return results
import argparse import pickle from web.datasets.similarity import fetch_WS353 from web.embeddings import load_embedding from web.evaluate import evaluate_similarity if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--word-vectors') args = parser.parse_args() ws353 = fetch_WS353() embedding = load_embedding(args.word_vectors, lower=True, clean_words=True, format='dict') print('Spearman`s rank on WS353 ', evaluate_similarity(embedding, ws353.X, ws353.y))
print_error_vs_len_defs, print_plot_error_vs_frequency, print_error_vs_avg_count_def, load_dict, spearman_train_test) if __name__ == "__main__": plt.style.use('ggplot') np.random.seed(0) # Configure logging logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S') # SETUP datasets = [('SL999', fetch_SimLex999()), ('SL333', fetch_SimLex999(which='333')), ('SV3500-t', fetch_SimVerb3500(which='test')), ('WS353', fetch_WS353()), #('WS353R', fetch_WS353(which='relatedness')), #('RW', fetch_RW()), ('MEN-t', fetch_MEN(which='test')), ('SCWS', fetch_SCWS()), ('MTurk', fetch_MTurk())] logging.info(fuel.config.data_path) vocab_defs_fname = os.path.join(fuel.config.data_path[0], "vocab.txt") logging.info("using vocab for definition {}".format(vocab_defs_fname)) # END SETUP parser = argparse.ArgumentParser("Evaluate embeddings") parser.add_argument("emb_filename", help="Location of embeddings") parser.add_argument("emb_format", help="either 'glove', 'dict' or 'dict_poly'") parser.add_argument("root_dicts",
import logging from six import iteritems from web.datasets.similarity import fetch_MEN, fetch_WS353, fetch_SimLex999 from web.embeddings import fetch_GloVe from web.evaluate import evaluate_similarity # Configure logging logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S') # Fetch GloVe embedding (warning: it might take few minutes) w_glove = fetch_GloVe(corpus="wiki-6B", dim=300) # Define tasks tasks = { "MEN": fetch_MEN(), "WS353": fetch_WS353(), "SIMLEX999": fetch_SimLex999() } # Print sample data for name, data in iteritems(tasks): print("Sample data from {}: pair \"{}\" and \"{}\" is assigned score {}". format(name, data.X[0][0], data.X[0][1], data.y[0])) # Calculate results using helper function for name, data in iteritems(tasks): print "Spearman correlation of scores on {} {}".format( name, evaluate_similarity(w_glove, data.X, data.y))
""" Simple example showing evaluating embedding on similarity datasets """ import logging from six import iteritems from web.datasets.similarity import fetch_MEN, fetch_WS353, fetch_SimLex999 from web.embeddings import fetch_GloVe from web.similarity import evaluate_similarity # Configure logging logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S') # Fetch GloVe embedding (warning: it might take few minutes) w_glove = fetch_GloVe(corpus="wiki-6B", dim=300) # Define tasks tasks = { "MEN": fetch_MEN(), "WS353": fetch_WS353(), "SIMLEX999": fetch_SimLex999() } # Print sample data for name, data in iteritems(tasks): print("Sample data from {}: pair \"{}\" and \"{}\" is assigned score {}".format(name, data.X[0][0], data.X[0][1], data.y[0])) # Calculate results using helper function for name, data in iteritems(tasks): print "Spearman correlation of scores on {} {}".format(name, evaluate_similarity(w_glove, data.X, data.y))