Exemplo n.º 1
0
def kge(triples, kge_name, verbose):
    # Train test split
    t_size = math.ceil(len(triples)*0.2)
    X_train, X_test = train_test_split_no_unseen(triples, test_size=t_size)

    # Select kge_name
    if kge_name == 'complex':
        # ComplEx model
        model = ComplEx(batches_count=50,
                        epochs=300,
                        k=100,
                        eta=20,
                        optimizer='adam',
                        optimizer_params={'lr':1e-4},
                        loss='multiclass_nll',
                        regularizer='LP',
                        regularizer_params={'p':3, 'lambda':1e-5},
                        seed=0,
                        verbose=verbose)
    else:
        sys.exit('Given kge_name is not valid.')

    model.fit(X_train)

    #Embedding evaluation
    if verbose:
        filter_triples = np.concatenate((X_train, X_test))
        ranks = evaluate_performance(X_test,
                                     model=model,
                                     filter_triples=filter_triples,
                                     use_default_protocol=True,
                                     verbose=True)

        mrr = mrr_score(ranks)
        print("MRR: %.2f" % (mrr))
        mr = mr_score(ranks)
        print("MR: %.2f" % (mr))
        hits_10 = hits_at_n_score(ranks, n=10)
        print("Hits@10: %.2f" % (hits_10))
        hits_3 = hits_at_n_score(ranks, n=3)
        print("Hits@3: %.2f" % (hits_3))
        hits_1 = hits_at_n_score(ranks, n=1)
        print("Hits@1: %.2f" % (hits_1))

        print('''
        - Ampligraph example -
        MRR: 0.25
        MR: 4927.33
        Hits@10: 0.35
        Hits@3: 0.28
        Hits@1: 0.19
        ''')

    return model
Exemplo n.º 2
0
def test_evaluate_RandomBaseline():
    model = RandomBaseline(seed=0)
    X = load_wn18()
    model.fit(X["train"])
    ranks = evaluate_performance(X["test"],
                                 model=model,
                                 use_default_protocol=False,
                                 corrupt_side='s+o',
                                 verbose=False)
    hits10 = hits_at_n_score(ranks, n=10)
    hits1 = hits_at_n_score(ranks, n=1)
    assert (hits10 == 0.0002 and hits1 == 0.0)
Exemplo n.º 3
0
def kge(triples, kge_name, epochs, batch_size, learning_rate, seed, verbose):
    kge_name = parsed_args.kge
    kge_model_savepath = f'./temp/ampligraph.model'

    if not os.path.isfile(kge_model_savepath):
        #Embedding evaluation
        if verbose:
            # Train test split
            t_size = math.ceil(len(triples) * 0.2)
            X_train, X_test = train_test_split_no_unseen(triples,
                                                         test_size=t_size)

            eval_model = select_kge(kge_name, batch_size, epochs, seed,
                                    verbose)

            eval_model.fit(X_train)
            filter_triples = np.concatenate((X_train, X_test))
            ranks = evaluate_performance(X_test,
                                         model=eval_model,
                                         filter_triples=filter_triples,
                                         use_default_protocol=True,
                                         verbose=True)

            mrr = mrr_score(ranks)
            print("MRR: %.2f" % (mrr))
            mr = mr_score(ranks)
            print("MR: %.2f" % (mr))
            hits_10 = hits_at_n_score(ranks, n=10)
            print("Hits@10: %.2f" % (hits_10))
            hits_3 = hits_at_n_score(ranks, n=3)
            print("Hits@3: %.2f" % (hits_3))
            hits_1 = hits_at_n_score(ranks, n=1)
            print("Hits@1: %.2f" % (hits_1))

            print('''
            - Ampligraph example -
            MRR: 0.25
            MR: 4927.33
            Hits@10: 0.35
            Hits@3: 0.28
            Hits@1: 0.19
            ''')

        model = select_kge(kge_name, batch_size, epochs, seed, verbose)

        print('Training...')
        model.fit(np.array(triples))
        save_model(model, model_name_path=kge_model_savepath)
    else:
        model = restore_model(model_name_path=kge_model_savepath)

    return model
Exemplo n.º 4
0
def compute_metrics(model, train_samples, test_samples):
    ranks = evaluate_performance(
        test_samples,
        model=model,
        filter_triples=train_samples,  # Corruption strategy filter defined above
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)
    return {
        'MRR': mrr_score(ranks),
        'Hits@10': hits_at_n_score(ranks, n=10),
        'Hits@3': hits_at_n_score(ranks, n=3),
        'Hits@1': hits_at_n_score(ranks, n=1)
    }
Exemplo n.º 5
0
def test_evaluate_performance_TransE():
    X = load_wn18()
    model = TransE(batches_count=10,
                   seed=0,
                   epochs=100,
                   k=100,
                   eta=5,
                   optimizer_params={'lr': 0.1},
                   loss='pairwise',
                   loss_params={'margin': 5},
                   optimizer='adagrad')
    model.fit(np.concatenate((X['train'], X['valid'])))

    filter_triples = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][:200],
                                 model=model,
                                 filter_triples=filter_triples,
                                 verbose=True)

    # ranks = evaluate_performance(X['test'][:200], model=model)

    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
Exemplo n.º 6
0
def test_evaluate_performance_so_side_corruptions_without_filter():
    X = load_wn18()
    model = ComplEx(batches_count=10,
                    seed=0,
                    epochs=5,
                    k=200,
                    eta=10,
                    loss='nll',
                    regularizer=None,
                    optimizer='adam',
                    optimizer_params={'lr': 0.01},
                    verbose=True)
    model.fit(X['train'])

    X_filter = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][::20],
                                 model,
                                 X_filter,
                                 verbose=True,
                                 use_default_protocol=False,
                                 corrupt_side='s+o')
    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
    assert (mrr is not np.Inf)
Exemplo n.º 7
0
def evaluate_model(X_test, model, positives_filter):
    ranks = evaluate_performance(
        X_test,
        model=model,
        filter_triples=
        positives_filter,  # Corruption strategy filter defined above
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)

    mrr = mrr_score(ranks)
    print("MRR: %.2f" % (mrr))

    hits_10 = hits_at_n_score(ranks, n=10)
    print("Hits@10: %.2f" % (hits_10))
    hits_3 = hits_at_n_score(ranks, n=3)
    print("Hits@3: %.2f" % (hits_3))
    hits_1 = hits_at_n_score(ranks, n=1)
    print("Hits@1: %.2f" % (hits_1))
Exemplo n.º 8
0
def test_evaluate_RandomBaseline():
    model = RandomBaseline(seed=0)
    X = load_wn18()
    model.fit(X["train"])
    ranks = evaluate_performance(X["test"],
                                 model=model,
                                 corrupt_side='s+o',
                                 verbose=False)
    hits10 = hits_at_n_score(ranks, n=10)
    hits1 = hits_at_n_score(ranks, n=1)
    assert ranks.shape == (len(X['test']), )
    assert hits10 < 0.01 and hits1 == 0.0

    ranks = evaluate_performance(X["test"],
                                 model=model,
                                 corrupt_side='s,o',
                                 verbose=False)
    hits10 = hits_at_n_score(ranks, n=10)
    hits1 = hits_at_n_score(ranks, n=1)
    assert ranks.shape == (len(X['test']), 2)
    assert hits10 < 0.01 and hits1 == 0.0

    ranks_filtered = evaluate_performance(X["test"],
                                          filter_triples=np.concatenate(
                                              (X['train'], X['valid'],
                                               X['test'])),
                                          model=model,
                                          corrupt_side='s,o',
                                          verbose=False)
    hits10 = hits_at_n_score(ranks_filtered, n=10)
    hits1 = hits_at_n_score(ranks_filtered, n=1)
    assert ranks_filtered.shape == (len(X['test']), 2)
    assert hits10 < 0.01 and hits1 == 0.0
    assert np.all(ranks_filtered <= ranks)
    assert np.any(ranks_filtered != ranks)
Exemplo n.º 9
0
def test_evaluate_performance_nll_complex():
    X = load_wn18()
    model = ComplEx(batches_count=10, seed=0, epochs=10, k=150, optimizer_params={'lr': 0.1}, eta=10, loss='nll',
                    optimizer='adagrad', verbose=True)
    model.fit(np.concatenate((X['train'], X['valid'])))

    filter = np.concatenate((X['train'], X['valid'], X['test']))
    ranks = evaluate_performance(X['test'][:200], model=model, filter_triples=filter, verbose=True)

    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("ranks: %s" % ranks)
    print("MRR: %f" % mrr)
    print("Hits@10: %f" % hits_10)
Exemplo n.º 10
0
def test_evaluate_performance_default_protocol_with_filter():
    wn18 = load_wn18()

    X_filter = np.concatenate((wn18['train'], wn18['valid'], wn18['test']))


    model = TransE(batches_count=10, seed=0, epochs=1, 
                    k=50, eta=10,  verbose=True, 
                    embedding_model_params={'normalize_ent_emb':False, 'norm':1},
                    loss = 'self_adversarial', loss_params={'margin':1, 'alpha':0.5}, 
                    optimizer='adam', 
                    optimizer_params={'lr':0.0005})

    model.fit(wn18['train'])


    from ampligraph.evaluation import evaluate_performance
    ranks_sep = []
    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='o',
                                 use_default_protocol=False)

    ranks_sep.extend(ranks)
    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s',
                                 use_default_protocol=False)
    ranks_sep.extend(ranks)
    print('----------EVAL WITH FILTER-----------------')
    print('----------Subj and obj corrupted separately-----------------')
    mr_sep = mr_score(ranks_sep)
    print('MAR:', mr_sep)
    print('Mrr:', mrr_score(ranks_sep))
    print('hits10:', hits_at_n_score(ranks_sep, 10))
    print('hits3:', hits_at_n_score(ranks_sep, 3))
    print('hits1:', hits_at_n_score(ranks_sep, 1))


    from ampligraph.evaluation import evaluate_performance

    from ampligraph.evaluation import hits_at_n_score, mrr_score, mr_score
    ranks = evaluate_performance(wn18['test'][::100], model, X_filter, verbose=True, corrupt_side='s+o',
                                 use_default_protocol=True)
    print('----------corrupted with default protocol-----------------')
    mr_joint = mr_score(ranks)
    mrr_joint = mrr_score(ranks)
    print('MAR:', mr_joint)
    print('Mrr:', mrr_joint)
    print('hits10:', hits_at_n_score(ranks, 10))
    print('hits3:', hits_at_n_score(ranks, 3))
    print('hits1:', hits_at_n_score(ranks, 1))
    
    np.testing.assert_equal(mr_sep, mr_joint)
    assert(mrr_joint is not np.Inf)
else:
    print('The model is not fit! Did you skip a step?')

from ampligraph.evaluation import evaluate_performance
ranks = evaluate_performance(X_test, 
                             model=model, 
                             filter_triples=positives_filter,   # Corruption strategy filter defined above 
                             use_default_protocol=True, # corrupt subj and obj separately while evaluating
                             verbose=True)

from ampligraph.evaluation import mr_score, mrr_score, hits_at_n_score

mrr = mrr_score(ranks)
print("MRR: %.2f" % (mrr))

hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

data = pd.read_csv('triplet.csv')
data.drop(data[data['name'] == 'no pc_item'].index, inplace=True)
data.drop(data[data['prop'] == 'no price'].index, inplace=True)
print(data.head())

import itertools
pcItem = data['name'].unique()
pcItem_embeddings = dict(zip(pcItem,model.get_embeddings(pcItem)))
Exemplo n.º 12
0
def run_single_exp(config, dataset, model):
    hyperparams = config["hyperparams"][dataset][model]
    if hyperparams is None:
        print("dataset {0}...model {1} \
                      experiment is not conducted yet..." \
                     .format(dataset, config["model_name_map"][model]))
        return {"hyperparams": ".??"}
    print("dataset {0}...model {1}...\
                  hyperparameter:...{2}" \
                 .format(dataset,
                         config["model_name_map"][model],
                         hyperparams))

    es_code = "{0}_{1}".format(dataset, model)

    load_func = getattr(ampligraph.datasets,
                        config["load_function_map"][dataset])
    X = load_func()
    # logging.debug("Loaded...{0}...".format(dataset))

    # load model
    model_class = getattr(ampligraph.latent_features,
                          config["model_name_map"][model])
    model = model_class(**hyperparams)
    # Fit the model on training and validation set
    # The entire dataset will be used to filter out false positives statements
    # created by the corruption procedure:
    filter = np.concatenate((X['train'], X['valid'], X['test']))

    if es_code in config["no_early_stopping"]:
        logging.debug("Fit without early stopping...")
        model.fit(X["train"])
    else:
        logging.debug("Fit with early stopping...")
        model.fit(
            X["train"], True, {
                'x_valid': X['valid'][::10],
                'criteria': 'mrr',
                'x_filter': filter,
                'stop_interval': 2,
                'burn_in': 0,
                'check_interval': 100
            })

    # Run the evaluation procedure on the test set. Will create filtered rankings.
    # To disable filtering: filter_triples=None
    ranks = evaluate_performance(X['test'], model, filter, verbose=False)

    # compute and print metrics:
    mr = mr_score(ranks)
    mrr = mrr_score(ranks)
    hits_1 = hits_at_n_score(ranks, n=1)
    hits_3 = hits_at_n_score(ranks, n=3)
    hits_10 = hits_at_n_score(ranks, n=10)

    return {
        "mr": mr,
        "mrr": mrr,
        "H@1": hits_1,
        "H@3": hits_3,
        "H@10": hits_10,
        "hyperparams": hyperparams
    }
Exemplo n.º 13
0
                optimizer="adam",
                optimizer_params={"lr": 0.01})

model.fit(X['train'])

y_pred = model.predict(X['test'][:5, ])

from scipy.special import expit

print(expit(y_pred))

ranks = evaluate_performance(X['test'][:10], model=model)
print(ranks)

mrr = mrr_score(ranks)
hits_10 = hits_at_n_score(ranks, n=10)
print("MRR: %f, Hits@10: %f" % (mrr, hits_10))

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

embs = model.get_embeddings(embs_labels, type='entity')
embs_2d = TSNE(n_components=2).fit_transform(embs)

fig, ax = plt.subplots()
ax.scatter(embs_2d[:, 0], embs_2d[:, 1])
for i, lab in enumerate(embs_labels):
    ax.annotate(lab, (embs_2d[i, 0], embs_2d[i, 1]))

plt.show(fig)
Exemplo n.º 14
0
def main():
    # load Wordnet18 dataset:
    # X = load_wn18()
    X = load_fb15k_237()
    modify_flag = False

    # Initialize a ComplEx neural embedding model with pairwise loss function:
    # The model will be trained for 300 epochs.
    model = ComplEx(
        batches_count=10,
        seed=0,
        epochs=30,
        k=150,
        eta=10,
        # Use adam optimizer with learning rate 1e-3
        optimizer='adam',
        optimizer_params={'lr': 1e-3},
        # Use pairwise loss with margin 0.5
        loss='pairwise',
        loss_params={'margin': 0.5},
        # Use L2 regularizer with regularizer weight 1e-5
        regularizer='LP',
        regularizer_params={
            'p': 2,
            'lambda': 1e-5
        },
        # Enable stdout messages (set to false if you don't want to display)
        verbose=True)  #, modify_flag = modify_flag)

    if False:
        # ground truth params (have not tried yet)
        # k: 350; epochs: 4000; eta: 30; loss: self_adversarial; loss_params: alpha: 1; margin: 0.5; optimizer: adam; optimizer_params: lr: 0.0001; seed: 0; batches_count: 50
        model = ComplEx(
            batches_count=50,
            seed=0,
            epochs=4000,
            k=350,
            eta=30,
            # Use adam optimizer with learning rate 1e-3
            optimizer='adam',
            optimizer_params={'lr': 1e-4},
            # Use pairwise loss with margin 0.5
            loss='self_adversarial',
            loss_params={
                'margin': 0.5,
                'alpha': 1
            },
            # Use L2 regularizer with regularizer weight 1e-5
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-5
            },
            # Enable stdout messages (set to false if you don't want to display)
            verbose=True,
            modify_flag=modify_flag)

    # For evaluation, we can use a filter which would be used to filter out
    # positives statements created by the corruption procedure.
    # Here we define the filter set by concatenating all the positives
    filter = np.concatenate((X['train'], X['valid'], X['test']))

    # Fit the model on training and validation set
    model.fit(X['train'],
              early_stopping=True,
              early_stopping_params= \
                  {
                      'x_valid': X['valid'],  # validation set
                      'criteria': 'hits10',  # Uses hits10 criteria for early stopping
                      'burn_in': 100,  # early stopping kicks in after 100 epochs
                      'check_interval': 20,  # validates every 20th epoch
                      'stop_interval': 5,  # stops if 5 successive validation checks are bad.
                      'x_filter': filter,  # Use filter for filtering out positives
                      'corruption_entities': 'all',  # corrupt using all entities
                      'corrupt_side': 's+o'  # corrupt subject and object (but not at once)
                  }
              )

    # Run the evaluation procedure on the test set (with filtering).
    # To disable filtering: filter_triples=None
    # Usually, we corrupt subject and object sides separately and compute ranks

    # restore model
    sys.exit()

    # import the inspect_checkpoint library

    sys.exit()
    ranks = evaluate_performance(
        X['test'],
        model=model,
        filter_triples=filter,
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)

    # compute and print metrics:
    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("MRR: %f, Hits@10: %f" % (mrr, hits_10))