def test_evaluate_performance_TransE(): X = load_wn18() model = TransE(batches_count=10, seed=0, epochs=100, k=100, eta=5, optimizer_params={'lr': 0.1}, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad') model.fit(np.concatenate((X['train'], X['valid']))) filter_triples = np.concatenate((X['train'], X['valid'], X['test'])) ranks = evaluate_performance(X['test'][:200], model=model, filter_triples=filter_triples, verbose=True) # ranks = evaluate_performance(X['test'][:200], model=model) mrr = mrr_score(ranks) hits_10 = hits_at_n_score(ranks, n=10) print("ranks: %s" % ranks) print("MRR: %f" % mrr) print("Hits@10: %f" % hits_10)
def test_fit_predict_wn18_TransE(): X = load_wn18() model = TransE(batches_count=1, seed=555, epochs=5, k=100, loss='pairwise', loss_params={'margin': 5}, verbose=True, optimizer='adagrad', optimizer_params={'lr': 0.1}) model.fit(X['train']) y, _ = model.predict(X['test'][:1], get_ranks=True) print(y)
def test_evaluate_performance_default_protocol_with_filter(): wn18 = load_wn18() X_filter = np.concatenate((wn18['train'], wn18['valid'], wn18['test'])) model = TransE(batches_count=10, seed=0, epochs=1, k=50, eta=10, verbose=True, embedding_model_params={'normalize_ent_emb':False, 'norm':1}, loss = 'self_adversarial', loss_params={'margin':1, 'alpha':0.5}, optimizer='adam', optimizer_params={'lr':0.0005}) model.fit(wn18['train']) from ampligraph.evaluation import evaluate_performance ranks_sep = [] from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='o', use_default_protocol=False) ranks_sep.extend(ranks) from ampligraph.evaluation import evaluate_performance from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s', use_default_protocol=False) ranks_sep.extend(ranks) print('----------EVAL WITH FILTER-----------------') print('----------Subj and obj corrupted separately-----------------') mr_sep = mr_score(ranks_sep) print('MAR:', mr_sep) print('Mrr:', mrr_score(ranks_sep)) print('hits10:', hits_at_n_score(ranks_sep, 10)) print('hits3:', hits_at_n_score(ranks_sep, 3)) print('hits1:', hits_at_n_score(ranks_sep, 1)) from ampligraph.evaluation import evaluate_performance from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s+o', use_default_protocol=True) print('----------corrupted with default protocol-----------------') mr_joint = mr_score(ranks) mrr_joint = mrr_score(ranks) print('MAR:', mr_joint) print('Mrr:', mrr_joint) print('hits10:', hits_at_n_score(ranks, 10)) print('hits3:', hits_at_n_score(ranks, 3)) print('hits1:', hits_at_n_score(ranks, 1)) np.testing.assert_equal(mr_sep, mr_joint) assert(mrr_joint is not np.Inf)
def test_fit_predict_TransE_early_stopping_without_filter(): X = load_wn18() model = TransE(batches_count=1, seed=555, epochs=7, k=50, loss='pairwise', loss_params={'margin': 5}, verbose=True, optimizer='adagrad', optimizer_params={'lr':0.1}) model.fit(X['train'], True, {'x_valid': X['valid'][::100], 'criteria':'mrr', 'stop_interval': 2, 'burn_in':1, 'check_interval':2}) y, _ = model.predict(X['test'][:1], get_ranks=True) print(y)
def test_conve_bce_combo(): # no exception model = ConvE(loss='bce') # no exception model = TransE(loss='nll') # Invalid combination. Hence exception. with pytest.raises(ValueError): model = TransE(loss='bce') # Invalid combination. Hence exception. with pytest.raises(ValueError): model = ConvE(loss='nll')
def test_create_tensorboard_visualizations(): # test if tensorflow API are still operative X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model = TransE(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}) model.fit(X) create_tensorboard_visualizations(model, 'tensorboard_files')
def test_fit_predict_transE(): model = TransE(batches_count=1, seed=555, epochs=20, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr':0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'y', 'a'], ['a', 'y', 'd'], ['c', 'y', 'd'], ['b', 'y', 'c'], ['f', 'y', 'e']]) model.fit(X) y_pred, _ = model.predict(np.array([['f', 'y', 'e'], ['b', 'y', 'd']]), get_ranks=True) print(y_pred) assert y_pred[0] > y_pred[1]
def train_transe(train_samples: iter): model = TransE(batches_count=100, seed=0, epochs=200, k=150, eta=5, optimizer='adam', optimizer_params={'lr': 1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, verbose=True) model.fit(train_samples, early_stopping=False) return model
def test_evaluate_performance_too_many_entities_warning(): X = load_yago3_10() model = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True) model.fit(X['train']) # no entity list declared with pytest.warns(UserWarning): evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o') # with larger than threshold entity list with pytest.warns(UserWarning): # TOO_MANY_ENT_TH threshold is set to 50,000 entities. Using explicit value to comply with linting # and thus avoiding exporting unused global variable. entities_subset = np.union1d(np.unique(X["train"][:, 0]), np.unique(X["train"][:, 2]))[:50000] evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset) # with small entity list (no exception expected) evaluate_performance(X['test'][::100], model, verbose=True, corrupt_side='o', entities_subset=entities_subset[:10]) # with smaller dataset, no entity list declared (no exception expected) X_wn18rr = load_wn18rr() model_wn18 = TransE(batches_count=200, seed=0, epochs=1, k=5, eta=1, verbose=True) model_wn18.fit(X_wn18rr['train']) evaluate_performance(X_wn18rr['test'][::100], model_wn18, verbose=True, corrupt_side='o')
def perform_test(): X = load_wn18rr() k = 5 unique_entities = np.unique( np.concatenate([X['train'][:, 0], X['train'][:, 2]], 0)) unique_relations = np.unique(X['train'][:, 1]) model = TransE(batches_count=100, seed=555, epochs=1, k=k, loss='multiclass_nll', loss_params={'margin': 5}, verbose=True, optimizer='sgd', optimizer_params={'lr': 0.001}) model.fit(X['train']) # verify ent and rel shapes assert (model.trained_model_params[0].shape[0] == len(unique_entities)) assert ( model.trained_model_params[1].shape[0] == len(unique_relations)) # verify k assert (model.trained_model_params[0].shape[1] == k) assert (model.trained_model_params[1].shape[1] == k)
embedding_model_params={'norm': DEFAULT_NORM_TRANSE, 'normalize_ent_emb': DEFAULT_NORMALIZE_EMBEDDINGS, 'negative_corruption_entities': DEFAULT_CORRUPTION_ENTITIES, 'corrupt_sides': DEFAULT_CORRUPT_SIDE_TRAIN}, optimizer=DEFAULT_OPTIM, optimizer_params={'lr': DEFAULT_LR}, loss=DEFAULT_LOSS, loss_params={}, regularizer=DEFAULT_REGULARIZER, regularizer_params={}, initializer=DEFAULT_INITIALIZER, initializer_params={'uniform': DEFAULT_XAVIER_IS_UNIFORM}, verbose=DEFAULT_VERBOSE): """ model = TransE(verbose=True, k=70, epochs=40) """ model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10, # Use adam optimizer with learning rate 1e-3 optimizer='adam', optimizer_params={'lr': 1e-3}, # Use pairwise loss with margin 0.5 loss='pairwise', loss_params={'margin': 0.5}, # Use L2 regularizer with regularizer weight 1e-5 regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5}, # Enable stdout messages (set to false if you don't want to display) verbose=True)""" print("Training...") x_orig = load_wn18() model.fit(X_train)
if r[0] in known_entities and r[2] in known_entities ]) X_train, X_valid = X['train'], X['valid'] print('Train set size: ', X_train.shape) print('Test set size: ', X_valid.shape) ke_kwargs = {"verbose": True, "k": 70, "epochs": 100} # ComplEx brings double dimensions because of the twofold nature of complex numbers model = ComplEx(**ke_kwargs) print("Training...") model.fit(X_train) save_model(model, model_name_path=ke_model_path) # If we don't transpose the multidimensionality of the embeddings to 3D but take just 3-D-embeddings, # This can't be with ComplEX because, it will be an even number and 3 is not ke_kwargs['k'] = 3 model2 = TransE(**ke_kwargs) model2.fit(X_train) save_model(model2, model_name_path=ke_model_path + '2') else: model = restore_model(model_name_path=ke_model_path) model2 = restore_model(model_name_path=ke_model_path + '2') with open(ke_wnkeys_path, 'rb') as handle: tok2id, id2tok = pickle.load(handle) def find_in_tok2id(w): for s in tok2id.keys(): if w in s: print(w, s, "it is alphabetically there")
"""--- # 3. Training TransE model """ import tensorflow print(tensorflow.__version__) from ampligraph.latent_features import TransE model = TransE(batches_count=100, seed=0, epochs=200, k=150, eta=5, optimizer='adam', optimizer_params={'lr':1e-3}, loss='multiclass_nll', regularizer='LP', regularizer_params={'p':3, 'lambda':1e-5}, verbose=True) positives_filter = X import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) model.fit(data['train'], early_stopping = False) """--- # 4. Saving and restoring a model
print("------------------------------------------------") print("%d) Implementation Model: %s" % (1, mdl[j])) print("------------------------------------------------") start_time = time.time() # START: Training Time Tracker K.clear_session() # Kills current TF comp-graph & creates a new one if (mdl[j] == "ComplEx"): model = ComplEx(verbose=True) elif (mdl[j] == "ConvKB"): model = ConvKB(verbose=True) elif (mdl[j] == "DistMult"): model = DistMult(verbose=True) elif (mdl[j] == "HolE"): model = HolE(verbose=True) elif (mdl[j] == "TransE"): model = TransE(verbose=True) elif (mdl[j] == "RandomBaseline"): model = RandomBaseline(verbose=True) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # TensorFlow will tell you all messages that have the label ERROR model.fit(train_X) # Save model at its best-performance point save_model(model, 'best_ampliGraph_model.pkl') del model # Delete older model # Load recently save best-performance model model = restore_model('./best_ampliGraph_model.pkl') if model.is_fitted: print('The model is fit!') else: print('The model is not fit! Did you skip a step?')
embedding_model_params={'norm': DEFAULT_NORM_TRANSE, 'normalize_ent_emb': DEFAULT_NORMALIZE_EMBEDDINGS, 'negative_corruption_entities': DEFAULT_CORRUPTION_ENTITIES, 'corrupt_sides': DEFAULT_CORRUPT_SIDE_TRAIN}, optimizer=DEFAULT_OPTIM, optimizer_params={'lr': DEFAULT_LR}, loss=DEFAULT_LOSS, loss_params={}, regularizer=DEFAULT_REGULARIZER, regularizer_params={}, initializer=DEFAULT_INITIALIZER, initializer_params={'uniform': DEFAULT_XAVIER_IS_UNIFORM}, verbose=DEFAULT_VERBOSE): """ model = TransE(verbose=True, k=70, epochs=40) """pyt model = ComplEx(batches_count=10, seed=0, epochs=60, k=50, eta=10, # Use adam optimizer with learning rate 1e-3 optimizer='adam', optimizer_params={'lr': 1e-3}, # Use pairwise loss with margin 0.5 loss='pairwise', loss_params={'margin': 0.5}, # Use L2 regularizer with regularizer weight 1e-5 regularizer='LP', regularizer_params={'p': 2, 'lambda': 1e-5}, # Enable stdout messages (set to false if you don't want to display) verbose=True)""" print("Training...") x_orig = load_wn18() model.fit(X_train) save_model(model, model_name_path=ke_model_path)
# # Load target entities target_entities = tes.load_from_file( '../example_data/imdb/imdb_target_entities') print(target_entities.get_entities()[:10]) ##### RUN ONLY ONCE ###### ################################## # Ampligraph embedding model (train new model) ################################### model = TransE(batches_count=100, seed=555, epochs=100, k=100, loss='pairwise', optimizer='sgd', loss_params={ 'margin': 1.0, 'normalize_ent_emb': True }, verbose=True) model.fit(kg_triples.as_numpy_array()) # Save model for later usage, the it can be reloaded using load_model(os.path.join(experiment_dir,'model_transE.pkl')) save_model(model, os.path.join(out_dir, 'imdb_transE.pkl')) ##################### End ########### ALTERNATIVE ##################### ## OR ## Relaoad a pretrained model ############################# # Restore models trained using our modified restore model function ######################
def select_kge(kge_name, batch_size, epochs, seed, verbose): model = '' # Select kge_name if kge_name == 'complex': # ComplEx model model = ComplEx( batches_count=batch_size, epochs=epochs, k=150, eta=20, optimizer='adam', optimizer_params={'margin': 5}, #,'lr':learning_rate}, # default lr:0.1 loss='multiclass_nll', loss_params={}, regularizer='LP', regularizer_params={ 'p': 2, 'lambda': 1e-4 }, seed=seed, verbose=verbose) elif kge_name == 'hole': # HolE model model = HolE(batches_count=batch_size, epochs=epochs, k=100, eta=20, optimizer='adam', optimizer_params={'lr': learning_rate}, loss='multiclass_nll', regularizer='LP', regularizer_params={ 'p': 3, 'lambda': 1e-5 }, seed=seed, verbose=verbose) elif kge_name == 'transe': # TransE model model = TransE( batches_count=batch_size, epochs=epochs, k=350, eta=20, optimizer='adam', optimizer_params={'margin': 5}, #,'lr':learning_rate}, # default lr:0.1 loss='multiclass_nll', #loss='pairwise', loss_params={}, #loss_params={'margin:5'}, regularizer='LP', regularizer_params={ 'p': 2, 'lambda': 1e-4 }, seed=seed, verbose=verbose) else: sys.exit('Given kge_name is not valid.') return model