示例#1
0
def test_train_test_split_fast():
    X = load_fb15k_237()
    x_all = np.concatenate([X['train'], X['valid'], X['test']], 0)
    unique_entities = len(set(x_all[:, 0]).union(x_all[:, 2]))
    unique_rels = len(set(x_all[:, 1]))

    x_train, x_test = train_test_split_no_unseen(x_all, 0.90)

    assert x_train.shape[0] + x_test.shape[0] == x_all.shape[0]

    unique_entities_train = len(set(x_train[:, 0]).union(x_train[:, 2]))
    unique_rels_train = len(set(x_train[:, 1]))

    assert unique_entities_train == unique_entities and unique_rels_train == unique_rels

    with pytest.raises(Exception) as e:
        x_train, x_test = train_test_split_no_unseen(x_all, 0.99, allow_duplication=False)

    assert str(e.value) == "Cannot create a test split of the desired size. " \
                                    "Some entities will not occur in both training and test set. "  \
                                    "Set allow_duplication=True,"  \
                                    "remove filter on test predicates or "  \
                                    "set test_size to a smaller value."

    x_train, x_test = train_test_split_no_unseen(x_all, 0.99, allow_duplication=True)
    assert x_train.shape[0] + x_test.shape[0] > x_all.shape[0]

    unique_entities_train = len(set(x_train[:, 0]).union(x_train[:, 2]))
    unique_rels_train = len(set(x_train[:, 1]))

    assert unique_entities_train == unique_entities and unique_rels_train == unique_rels
示例#2
0
def test_load_fb15k_237():
    fb15k_237 = load_fb15k_237()
    assert len(fb15k_237['train']) == 272115

    # - 9 because 9 triples containing unseen entities are removed
    assert len(fb15k_237['valid']) == 17535 - 9

    # - 28 because 28 triples containing unseen entities are removed
    assert len(fb15k_237['test']) == 20466 - 28
示例#3
0
def main():
    # load Wordnet18 dataset:
    # X = load_wn18()
    X = load_fb15k_237()
    modify_flag = False

    # Initialize a ComplEx neural embedding model with pairwise loss function:
    # The model will be trained for 300 epochs.
    model = ComplEx(
        batches_count=10,
        seed=0,
        epochs=30,
        k=150,
        eta=10,
        # Use adam optimizer with learning rate 1e-3
        optimizer='adam',
        optimizer_params={'lr': 1e-3},
        # Use pairwise loss with margin 0.5
        loss='pairwise',
        loss_params={'margin': 0.5},
        # Use L2 regularizer with regularizer weight 1e-5
        regularizer='LP',
        regularizer_params={
            'p': 2,
            'lambda': 1e-5
        },
        # Enable stdout messages (set to false if you don't want to display)
        verbose=True)  #, modify_flag = modify_flag)

    if False:
        # ground truth params (have not tried yet)
        # k: 350; epochs: 4000; eta: 30; loss: self_adversarial; loss_params: alpha: 1; margin: 0.5; optimizer: adam; optimizer_params: lr: 0.0001; seed: 0; batches_count: 50
        model = ComplEx(
            batches_count=50,
            seed=0,
            epochs=4000,
            k=350,
            eta=30,
            # Use adam optimizer with learning rate 1e-3
            optimizer='adam',
            optimizer_params={'lr': 1e-4},
            # Use pairwise loss with margin 0.5
            loss='self_adversarial',
            loss_params={
                'margin': 0.5,
                'alpha': 1
            },
            # Use L2 regularizer with regularizer weight 1e-5
            regularizer='LP',
            regularizer_params={
                'p': 2,
                'lambda': 1e-5
            },
            # Enable stdout messages (set to false if you don't want to display)
            verbose=True,
            modify_flag=modify_flag)

    # For evaluation, we can use a filter which would be used to filter out
    # positives statements created by the corruption procedure.
    # Here we define the filter set by concatenating all the positives
    filter = np.concatenate((X['train'], X['valid'], X['test']))

    # Fit the model on training and validation set
    model.fit(X['train'],
              early_stopping=True,
              early_stopping_params= \
                  {
                      'x_valid': X['valid'],  # validation set
                      'criteria': 'hits10',  # Uses hits10 criteria for early stopping
                      'burn_in': 100,  # early stopping kicks in after 100 epochs
                      'check_interval': 20,  # validates every 20th epoch
                      'stop_interval': 5,  # stops if 5 successive validation checks are bad.
                      'x_filter': filter,  # Use filter for filtering out positives
                      'corruption_entities': 'all',  # corrupt using all entities
                      'corrupt_side': 's+o'  # corrupt subject and object (but not at once)
                  }
              )

    # Run the evaluation procedure on the test set (with filtering).
    # To disable filtering: filter_triples=None
    # Usually, we corrupt subject and object sides separately and compute ranks

    # restore model
    sys.exit()

    # import the inspect_checkpoint library

    sys.exit()
    ranks = evaluate_performance(
        X['test'],
        model=model,
        filter_triples=filter,
        use_default_protocol=
        True,  # corrupt subj and obj separately while evaluating
        verbose=True)

    # compute and print metrics:
    mrr = mrr_score(ranks)
    hits_10 = hits_at_n_score(ranks, n=10)
    print("MRR: %f, Hits@10: %f" % (mrr, hits_10))