def test_retrain(): model = ComplEx(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 1}, regularizer='LP', regularizer_params={'lambda': 0.1, 'p': 2}, optimizer='adagrad', optimizer_params={'lr':0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) y_pred_1st, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) model.fit(X) y_pred_2nd, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) np.testing.assert_array_equal(y_pred_1st, y_pred_2nd)
def test_fit_predict_wn18_ComplEx(): X = load_wn18() model = ComplEx(batches_count=1, seed=555, epochs=5, k=100, loss='pairwise', loss_params={'margin': 1}, regularizer='LP', regularizer_params={'lambda': 0.1, 'p': 2}, optimizer='adagrad', optimizer_params={'lr':0.1}) model.fit(X['train']) y = model.predict(X['test'][:1], get_ranks=True) print(y)
def test_fit_predict_CompleEx(): model = ComplEx(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 1}, regularizer='LP', regularizer_params={'lambda': 0.1, 'p': 2}, optimizer='adagrad', optimizer_params={'lr':0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) print(y_pred) assert y_pred[0] > y_pred[1]
def test_missing_entity_ComplEx(): X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model = ComplEx(batches_count=1, seed=555, epochs=2, k=5) model.fit(X) with pytest.raises(ValueError): model.predict(['a', 'y', 'zzzzzzzzzzz']) with pytest.raises(ValueError): model.predict(['a', 'xxxxxxxxxx', 'e']) with pytest.raises(ValueError): model.predict(['zzzzzzzz', 'y', 'e'])
def test_large_graph_mode(): set_entity_threshold(10) X = load_wn18() model = ComplEx(batches_count=100, seed=555, epochs=1, k=50, loss='multiclass_nll', loss_params={'margin': 5}, verbose=True, optimizer='sgd', optimizer_params={'lr': 0.001}) model.fit(X['train']) X_filter = np.concatenate((X['train'], X['valid'], X['test']), axis=0) evaluate_performance(X['test'][::1000], model, X_filter, verbose=True, corrupt_side='s,o') y = model.predict(X['test'][:1]) print(y) reset_entity_threshold()
from ampligraph.evaluation import evaluate_performance, hits_at_n_score, mrr_score X = load_wn18() model = ComplEx(batches_count=10, seed=0, epochs=20, k=50, eta=2, loss="nll", optimizer="adam", optimizer_params={"lr": 0.01}) model.fit(X['train']) y_pred = model.predict(X['test'][:5, ]) from scipy.special import expit print(expit(y_pred)) ranks = evaluate_performance(X['test'][:10], model=model) print(ranks) mrr = mrr_score(ranks) hits_10 = hits_at_n_score(ranks, n=10) print("MRR: %f, Hits@10: %f" % (mrr, hits_10)) import matplotlib.pyplot as plt from sklearn.manifold import TSNE
# TRAINING: Evaluate model's performance test_X = filter_unseen_entities(test_X, model, verbose=True, strict=False) test_y = test_X[:,1] scores_validtn = evaluate_performance(test_X, model=model, filter_triples=positives_filter, # positives_filter # Corruption strategy filter defined above use_default_protocol=True, # corrupt subj and obj separately while evaluating strict=False, verbose=True) end_time = time.time() # STOP: Training Time Tracker print("\nTraining Time:", end_time - start_time, "seconds") # PRINT: Training Time Tracker print("Training Time:", end_time - start_time, "seconds", file=log_file) pred_y_res = model.predict(test_X) pred_y_proba = expit(pred_y_res) # Evalute results via ML standards ground_truth = test_y # Already NUMPY and 'int32' predictions = np.rint(pred_y_proba).astype(np.int32) predictions_proba = np.round(pred_y_proba, decimals=2).astype(np.float32) ground_truth, predictions = to_categorical(ground_truth, dtype=np.int32), to_categorical(predictions, dtype=np.int32) #print(ground_truth[25,:]) #print(predictions[25,:]) print('\n-------------------') print("class \t accuracy \t roc_score") print("class \t accuracy \t roc_score", file=log_file) for x in range(ground_truth.shape[1]): acc_res = accuracy_score(ground_truth[:,x], predictions[:,x]) roc_res = roc_auc_score(ground_truth[:,x], predictions[:,x])
["Missandei", 'SPOUSE', 'Grey Worm'], ["Brienne of Tarth", 'SPOUSE', 'Jaime Lannister'] ]) unseen_filter = np.array(list({tuple(i) for i in np.vstack((positives_filter, X_unseen))})) ranks_unseen = evaluate_performance( X_unseen, model=model, filter_triples=unseen_filter, # Corruption strategy filter defined above corrupt_side = 's+o', use_default_protocol=False, # corrupt subj and obj separately while evaluating verbose=True ) scores = model.predict(X_unseen) from scipy.special import expit probs = expit(scores) pd.DataFrame(list(zip([' '.join(x) for x in X_unseen], ranks_unseen, np.squeeze(scores), np.squeeze(probs))), columns=['statement', 'rank', 'score', 'prob']).sort_values("score") """--- # 7. Visualizing Embeddings with Tensorboard projector """ from ampligraph.utils import create_tensorboard_visualizations