def retrofitter(): return Retrofitter(max_iter=100, alpha=None, beta=None, tol=1e-2, verbose=False, introspecting=False)
def system_2(): # Data------------ with open(wordentail_filename) as f: wordentail_data = json.load(f) X_glove = pd.DataFrame(GLOVE).T print(X_glove.shape) def convert_edges_to_indices(edges, Q): lookup = dict(zip(Q.index, range(Q.shape[0]))) index_edges = defaultdict(set) for start, finish_nodes in edges.items(): s = lookup.get(start) if s: f = {lookup[n] for n in finish_nodes if n in lookup} if f: index_edges[s] = f return index_edges wn_index_edges = convert_edges_to_indices(wn_edges, X_glove) wn_retro = Retrofitter(verbose=True) X_retro = wn_retro.fit(X_glove, wn_index_edges) print(X_retro.shape) def retro_vec(w): """Return `w`'s Retrofitted representation if available, else return a random vector.""" return X_retro.loc[w].values if w in X_retro.index else randvec( w, n=glove_dim) # Model----------- net = TorchShallowNeuralClassifier(hidden_dim=50, max_iter=100) print(net) # Exp------------- result = nli.wordentail_experiment( train_data=wordentail_data['word_disjoint']['train'], assess_data=wordentail_data['word_disjoint']['dev'], model=net, vector_func=retro_vec, vector_combo_func=vec_concatenate) return result['macro-F1']
# ### As before, but now 2 has no outgoing edges # In[7]: edges_isolated = {0: {1, 2}, 1: {0, 2}, 2: set()} _ = retrofitting.plot_retro_path(Q_hat, edges_isolated) # ### All nodes connected to all others, but $\alpha = 0$ # In[8]: _ = retrofitting.plot_retro_path(Q_hat, edges_all, retrofitter=Retrofitter(alpha=lambda x: 0)) # ## WordNet # # Faruqui et al. conduct experiments on three knowledge graphs: [WordNet](https://wordnet.princeton.edu), [FrameNet](https://framenet.icsi.berkeley.edu/fndrupal/), and the [Penn Paraphrase Database (PPDB)](http://paraphrase.org/). [The repository for their paper](https://github.com/mfaruqui/retrofitting) includes the graphs that they derived for their experiments. # # Here, we'll reproduce just one of the two WordNet experiments they report, in which the graph is formed based on synonymy. # ### Background on WordNet # # WordNet is an incredible, hand-built lexical resource capturing a wealth of information about English words and their inter-relationships. ([Here is a collection of WordNets in other languages.](http://globalwordnet.org)) For a detailed overview using NLTK, see [this tutorial](http://compprag.christopherpotts.net/wordnet.html). # # The core concepts: # # * A __lemma__ is something like our usual notion of __word__. Lemmas are highly sense-disambiguated. For instance, there are six lemmas that are consistent with the string `crane`: the bird, the machine, the poets, ... #
return edges wn_edges = get_wordnet_edges() data_home = 'data' glove_dict = utils.glove2dict( os.path.join(data_home, 'glove.6B', 'glove.6B.300d.txt')) X_glove = pd.DataFrame(glove_dict).T print(X_glove.shape) def convert_edges_to_indices(edges, Q): lookup = dict(zip(Q.index, range(Q.shape[0]))) index_edges = defaultdict(set) for start, finish_nodes in edges.items(): s = lookup.get(start) if s: f = {lookup[n] for n in finish_nodes if n in lookup} if f: index_edges[s] = f return index_edges wn_index_edges = convert_edges_to_indices(wn_edges, X_glove) wn_retro = Retrofitter(verbose=True) X_retro = wn_retro.fit(X_glove, wn_index_edges) print(X_retro.shape)