def test_find_neighbors():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['e', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd'], ['f', 'z', 'g'],
                  ['c', 'z', 'g']])
    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(
            model,
            entities=['b'],
            n_neighbors=3,
            entities_subset=['a', 'c', 'd', 'e', 'f'])

    assert str(e.value) == "KGE model is not fit!"
    model.fit(X)
    neighbors, dist = find_nearest_neighbours(
        model,
        entities=['b'],
        n_neighbors=3,
        entities_subset=['a', 'c', 'd', 'e', 'f'])
    assert np.all(neighbors == [['e', 'd', 'c']])

    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(
            model,
            entities=['b'],
            n_neighbors=30,
            entities_subset=['a', 'c', 'd', 'e', 'f'])
    assert str(
        e.value
    ) == "n_neighbors must be less than the number of entities being fit!"

    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(model,
                                                  entities=['b'],
                                                  n_neighbors=3,
                                                  entities_subset='a')
    assert str(
        e.value
    ) == "Invalid type for entities_subset! Must be a list or np.array"

    with pytest.raises(AssertionError) as e:
        neighbors, dist = find_nearest_neighbours(
            model,
            entities='b',
            n_neighbors=3,
            entities_subset=['a', 'c', 'd', 'e', 'f'])
    assert str(
        e.value) == "Invalid type for entities! Must be a list or np.array"
示例#2
0
def test_fit_predict_DistMult():
    model = DistMult(batches_count=2, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, 
                     optimizer='adagrad', optimizer_params={'lr':0.1})
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'y', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model.fit(X)
    y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True)
    print(y_pred)
    assert y_pred[0] > y_pred[1]
示例#3
0
def test_is_fitted_on():

    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])

    model.fit(X)

    X1 = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                   ['c', 'z', 'a'], ['g', 'z', 'd']])

    X2 = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                   ['c', 'z', 'a'], ['a', 'x', 'd']])

    # Fits the train triples
    assert model.is_fitted_on(X) is True
    # Doesn't fit the extra entity triples
    assert model.is_fitted_on(X1) is False
    # Doesn't fit the extra relationship triples
    assert model.is_fitted_on(X2) is False
示例#4
0
def test_predict_twice():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])
    model.fit(X)

    X_test1 = np.array([['a', 'y', 'b'], ['b', 'y', 'a']])

    X_test2 = np.array([['a', 'y', 'c'], ['c', 'z', 'a']])

    preds1 = model.predict(X_test1)
    preds2 = model.predict(X_test2)

    assert not np.array_equal(preds1, preds2)
示例#5
0
def test_lookup_embeddings():
    model = DistMult(batches_count=2, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, 
                     optimizer='adagrad', optimizer_params={'lr':0.1})
    X = np.array([['a', 'y', 'b'],
                  ['b', 'y', 'a'],
                  ['a', 'y', 'c'],
                  ['c', 'y', 'a'],
                  ['a', 'y', 'd'],
                  ['c', 'y', 'd'],
                  ['b', 'y', 'c'],
                  ['f', 'y', 'e']])
    model.fit(X)
    model.get_embeddings(['a', 'b'], embedding_type='entity')
示例#6
0
def test_predict():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})
    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])
    model.fit(X)

    preds1 = model.predict(X)
    preds2 = model.predict(to_idx(X, model.ent_to_idx, model.rel_to_idx),
                           from_idx=True)

    np.testing.assert_array_equal(preds1, preds2)
示例#7
0
"""---
# 3. Training ComplEx model
"""

import tensorflow
print(tensorflow.__version__)

from ampligraph.latent_features import DistMult

model = DistMult(batches_count=100, 
                seed=0, 
                epochs=200, 
                k=150, 
                eta=5,
                optimizer='adam', 
                optimizer_params={'lr':1e-3},
                loss='multiclass_nll', 
                regularizer='LP', 
                regularizer_params={'p':3, 'lambda':1e-5}, 
                verbose=True)

positives_filter = X

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(data['train'], early_stopping = False)

"""---
# 4.  Saving and restoring a model
示例#8
0
def test_calibrate_with_negatives():
    model = DistMult(batches_count=2,
                     seed=555,
                     epochs=1,
                     k=10,
                     loss='pairwise',
                     loss_params={'margin': 5},
                     optimizer='adagrad',
                     optimizer_params={'lr': 0.1})

    X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                  ['c', 'z', 'a'], ['a', 'z', 'd']])
    model.fit(X)

    X_pos = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'],
                      ['c', 'z', 'a'], ['d', 'z', 'd']])

    X_neg = np.array([['a', 'y', 'd'], ['d', 'y', 'a'], ['c', 'y', 'a'],
                      ['a', 'z', 'd']])

    with pytest.raises(RuntimeError):
        model.predict_proba(X_pos)

    with pytest.raises(ValueError):
        model.calibrate(X_pos,
                        X_neg,
                        positive_base_rate=50,
                        batches_count=2,
                        epochs=10)

    model.calibrate(X_pos, X_neg, batches_count=2, epochs=10)

    probas = model.predict_proba(np.concatenate((X_pos, X_neg)))

    assert np.logical_and(probas > 0, probas < 1).all()
 # Fit & Train model via ampliGraph library
 log_key = mdl[j]+": "+graph_data[i]
 log_file = open("eval_log.txt", "a")
 print("\n\n----"+log_key+"----", file=log_file)
 print("------------------------------------------------")
 print("%d) Implementation Model: %s" % (1, mdl[j]))
 print("------------------------------------------------")
 start_time = time.time()  # START: Training Time Tracker    
 K.clear_session()  # Kills current TF comp-graph & creates a new one
 
 if (mdl[j] == "ComplEx"):
     model = ComplEx(verbose=True)
 elif (mdl[j] == "ConvKB"):
     model = ConvKB(verbose=True)
 elif (mdl[j] == "DistMult"):
     model = DistMult(verbose=True)
 elif (mdl[j] == "HolE"):
     model = HolE(verbose=True)
 elif (mdl[j] == "TransE"):
     model = TransE(verbose=True)
 elif (mdl[j] == "RandomBaseline"):
     model = RandomBaseline(verbose=True)
 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)  # TensorFlow will tell you all messages that have the label ERROR
 model.fit(train_X)
 
 # Save model at its best-performance point
 save_model(model, 'best_ampliGraph_model.pkl')
 del model  # Delete older model
 # Load recently save best-performance model
 model = restore_model('./best_ampliGraph_model.pkl')    
 if model.is_fitted: