Exemplo n.º 1
0
def test_examine_model_weights(featurizer, vectorize, corpus, kb):
    dataset = rel_ext.Dataset(corpus, kb)
    splits = dataset.build_splits(
        split_names=['tiny_train', 'tiny_dev', 'rest'],
        split_fracs=[0.05, 0.05, 0.90],
        seed=1)
    results = rel_ext.experiment(splits,
                                 train_split='tiny_train',
                                 test_split='tiny_dev',
                                 featurizers=[featurizer],
                                 vectorize=vectorize,
                                 verbose=False)
    rel_ext.examine_model_weights(results)
Exemplo n.º 2
0
                                           solver='liblinear')

# In[12]:

baseline_results = rel_ext.experiment(splits,
                                      train_split='train',
                                      test_split='dev',
                                      featurizers=featurizers,
                                      model_factory=model_factory,
                                      verbose=True)

# Studying model weights might yield insights:

# In[13]:

rel_ext.examine_model_weights(baseline_results)

# ### Distributed representations
#
# This simple baseline sums the GloVe vector representations for all of the words in the "middle" span and feeds those representations into the standard `LogisticRegression`-based `model_factory`. The crucial parameter that enables this is `vectorize=False`. This essentially says to `rel_ext.experiment` that your featurizer or your model will do the work of turning examples into vectors; in that case, `rel_ext.experiment` just organizes these representations by relation type.

# In[14]:

GLOVE_HOME = os.path.join('data', 'glove.6B')

# In[15]:

glove_lookup = utils.glove2dict(os.path.join(GLOVE_HOME, 'glove.6B.300d.txt'))

# In[16]: