Exemplo n.º 1
0
def test_hoppy_v1():
    nb_entities = 10
    nb_predicates = 5
    embedding_size = 10

    rs = np.random.RandomState(0)

    for _ in range(16):
        for nb_hops in range(6):
            for use_attention in [True, False]:
                with torch.no_grad():
                    entity_embeddings = nn.Embedding(nb_entities,
                                                     embedding_size * 2,
                                                     sparse=True)
                    predicate_embeddings = nn.Embedding(nb_predicates,
                                                        embedding_size * 2,
                                                        sparse=True)

                    base = ComplEx(entity_embeddings)

                    if use_attention:
                        reformulator = AttentiveReformulator(
                            nb_hops, predicate_embeddings)
                    else:
                        reformulator = LinearReformulator(
                            nb_hops, embedding_size * 2)

                    model = SimpleHoppy(base,
                                        entity_embeddings,
                                        hops=reformulator)

                    xs = torch.from_numpy(rs.randint(nb_entities, size=32))
                    xp = torch.from_numpy(rs.randint(nb_predicates, size=32))
                    xo = torch.from_numpy(rs.randint(nb_entities, size=32))

                    xs_emb = entity_embeddings(xs)
                    xp_emb = predicate_embeddings(xp)
                    xo_emb = entity_embeddings(xo)

                    scores = model.forward(xp_emb, xs_emb, xo_emb)
                    inf = model.score(xp_emb, xs_emb, xo_emb)

                    scores_sp, scores_po = scores

                    inf = inf.cpu().numpy()
                    scores_sp = scores_sp.cpu().numpy()
                    scores_po = scores_po.cpu().numpy()

                    for i in range(xs.shape[0]):
                        np.testing.assert_allclose(inf[i],
                                                   scores_sp[i, xo[i]],
                                                   rtol=1e-3,
                                                   atol=1e-3)
                        np.testing.assert_allclose(inf[i],
                                                   scores_po[i, xs[i]],
                                                   rtol=1e-3,
                                                   atol=1e-3)
Exemplo n.º 2
0
def test_multi():
    nb_entities = 10
    nb_predicates = 5
    embedding_size = 10

    init_size = 1.0

    rs = np.random.RandomState(0)

    for _ in range(8):
        for nb_hops in range(1, 6):
            for use_attention in [True, False]:
                for pt in {'max', 'min', 'sum', 'mixture'}:
                    with torch.no_grad():
                        entity_embeddings = nn.Embedding(nb_entities,
                                                         embedding_size * 2,
                                                         sparse=True)
                        predicate_embeddings = nn.Embedding(nb_predicates,
                                                            embedding_size * 2,
                                                            sparse=True)

                        entity_embeddings.weight.data *= init_size
                        predicate_embeddings.weight.data *= init_size

                        base = ComplEx(entity_embeddings)

                        models = []
                        for i in range(nb_hops):
                            if use_attention:
                                reformulator = AttentiveReformulator(
                                    i, predicate_embeddings)
                            else:
                                reformulator = LinearReformulator(
                                    i, embedding_size * 2)

                            h_model = SimpleHoppy(base,
                                                  entity_embeddings,
                                                  hops=reformulator)
                            models += [h_model]

                        model = Multi(models=models,
                                      pooling_type=pt,
                                      embedding_size=embedding_size * 2)

                        xs = torch.from_numpy(rs.randint(nb_entities, size=32))
                        xp = torch.from_numpy(
                            rs.randint(nb_predicates, size=32))
                        xo = torch.from_numpy(rs.randint(nb_entities, size=32))

                        xs_emb = entity_embeddings(xs)
                        xp_emb = predicate_embeddings(xp)
                        xo_emb = entity_embeddings(xo)

                        scores = model.forward(xp_emb, xs_emb, xo_emb)
                        inf = model.score(xp_emb, xs_emb, xo_emb)

                        scores_sp, scores_po = scores

                        inf = inf.cpu().numpy()
                        scores_sp = scores_sp.cpu().numpy()
                        scores_po = scores_po.cpu().numpy()

                        for i in range(xs.shape[0]):
                            np.testing.assert_allclose(inf[i],
                                                       scores_sp[i, xo[i]],
                                                       rtol=1e-3,
                                                       atol=1e-3)
                            np.testing.assert_allclose(inf[i],
                                                       scores_po[i, xs[i]],
                                                       rtol=1e-3,
                                                       atol=1e-3)
Exemplo n.º 3
0
def _test_reasoning_v5(_st):
    torch.set_num_threads(multiprocessing.cpu_count())

    nb_entities = 10
    nb_predicates = 5
    embedding_size = 20

    rs = np.random.RandomState(0)

    triples = [('a', 'p', 'b'), ('b', 'q', 'c'), ('c', 'r', 'd'),
               ('d', 's', 'e')]

    entity_to_index = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}
    predicate_to_index = {'p': 0, 'q': 1, 'r': 2, 's': 3}

    for st in [_st]:
        with torch.no_grad():
            kernel = GaussianKernel()

            entity_embeddings = nn.Embedding(nb_entities,
                                             embedding_size * 2,
                                             sparse=True)
            predicate_embeddings = nn.Embedding(nb_predicates,
                                                embedding_size * 2,
                                                sparse=True)

            fact_rel = torch.from_numpy(
                np.array([predicate_to_index[p] for (_, p, _) in triples]))
            fact_arg1 = torch.from_numpy(
                np.array([entity_to_index[s] for (s, _, _) in triples]))
            fact_arg2 = torch.from_numpy(
                np.array([entity_to_index[o] for (_, _, o) in triples]))
            facts = [fact_rel, fact_arg1, fact_arg2]

            model = NeuralKB(entity_embeddings=entity_embeddings,
                             predicate_embeddings=predicate_embeddings,
                             kernel=kernel,
                             facts=facts,
                             scoring_type=st)

            indices = torch.from_numpy(
                np.array([
                    predicate_to_index['p'], predicate_to_index['q'],
                    predicate_to_index['r'], predicate_to_index['s']
                ]))
            reformulator = SymbolicReformulator(predicate_embeddings, indices)
            hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator)
            rhoppy = RecursiveHoppy(model,
                                    entity_embeddings,
                                    hops=reformulator,
                                    depth=1)

            xs_np = rs.randint(nb_entities, size=32)
            xp_np = rs.randint(nb_predicates, size=32)
            xo_np = rs.randint(nb_entities, size=32)

            xs_np[0] = 0
            xp_np[0] = 0
            xo_np[0] = 1

            xs_np[1] = 1
            xp_np[1] = 1
            xo_np[1] = 2

            xs_np[2] = 0
            xp_np[2] = 3
            xo_np[2] = 4

            xs = torch.from_numpy(xs_np)
            xp = torch.from_numpy(xp_np)
            xo = torch.from_numpy(xo_np)

            xs_emb = entity_embeddings(xs)
            xp_emb = predicate_embeddings(xp)
            xo_emb = entity_embeddings(xo)

            scores = hoppy.forward(xp_emb, xs_emb, xo_emb)
            inf = hoppy.score(xp_emb, xs_emb, xo_emb)

            scores_h = rhoppy.depth_r_forward(xp_emb, xs_emb, xo_emb, depth=1)
            inf_h = rhoppy.depth_r_score(xp_emb, xs_emb, xo_emb, depth=1)

            print(inf)
            print(inf_h)

            assert inf[2] > 0.95

            scores_sp, scores_po = scores
            scores_h_sp, scores_h_po = scores_h

            inf = inf.cpu().numpy()
            scores_sp = scores_sp.cpu().numpy()
            scores_po = scores_po.cpu().numpy()

            inf_h = inf_h.cpu().numpy()
            scores_h_sp = scores_h_sp.cpu().numpy()
            scores_h_po = scores_h_po.cpu().numpy()

            np.testing.assert_allclose(inf, inf_h)
            np.testing.assert_allclose(scores_sp, scores_h_sp)
            np.testing.assert_allclose(scores_po, scores_h_po)

            for i in range(xs.shape[0]):
                np.testing.assert_allclose(inf[i],
                                           scores_sp[i, xo[i]],
                                           rtol=1e-2,
                                           atol=1e-2)
                np.testing.assert_allclose(inf[i],
                                           scores_po[i, xs[i]],
                                           rtol=1e-2,
                                           atol=1e-2)

                np.testing.assert_allclose(inf_h[i],
                                           scores_h_sp[i, xo[i]],
                                           rtol=1e-2,
                                           atol=1e-2)
                np.testing.assert_allclose(inf_h[i],
                                           scores_h_po[i, xs[i]],
                                           rtol=1e-2,
                                           atol=1e-2)
Exemplo n.º 4
0
def test_masking_v2():
    nb_entities = 10
    nb_predicates = 5
    embedding_size = 10

    rs = np.random.RandomState(0)

    for _ in range(1):
        for position in [0, 1, 2]:
            for st in ['min', 'concat']:
                with torch.no_grad():
                    triples = [('a', 'p', 'b'), ('b', 'q', 'c'),
                               ('a', 'p', 'c')]
                    entity_to_index = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
                    predicate_to_index = {'p': 0, 'q': 1}

                    kernel = GaussianKernel()

                    entity_emb = nn.Embedding(nb_entities,
                                              embedding_size * 2,
                                              sparse=True)
                    predicate_emb = nn.Embedding(nb_predicates,
                                                 embedding_size * 2,
                                                 sparse=True)

                    fact_rel = torch.from_numpy(
                        np.array(
                            [predicate_to_index[p] for (_, p, _) in triples]))
                    fact_arg1 = torch.from_numpy(
                        np.array([entity_to_index[s]
                                  for (s, _, _) in triples]))
                    fact_arg2 = torch.from_numpy(
                        np.array([entity_to_index[o]
                                  for (_, _, o) in triples]))
                    facts = [fact_rel, fact_arg1, fact_arg2]

                    base = NeuralKB(entity_embeddings=entity_emb,
                                    predicate_embeddings=predicate_emb,
                                    kernel=kernel,
                                    facts=facts,
                                    scoring_type=st)

                    indices = torch.from_numpy(
                        np.array(
                            [predicate_to_index['p'],
                             predicate_to_index['q']]))
                    reformulator = SymbolicReformulator(predicate_emb, indices)
                    model = SimpleHoppy(base, entity_emb, hops=reformulator)

                    xs_np = rs.randint(nb_entities, size=32)
                    xp_np = rs.randint(nb_predicates, size=32)
                    xo_np = rs.randint(nb_entities, size=32)
                    xi_np = np.array([position] * xs_np.shape[0])

                    xs_np[0] = 0
                    xp_np[0] = 0
                    xo_np[0] = 1

                    xs_np[1] = 1
                    xp_np[1] = 1
                    xo_np[1] = 2

                    xs_np[2] = 0
                    xp_np[2] = 0
                    xo_np[2] = 2

                    xs = torch.from_numpy(xs_np)
                    xp = torch.from_numpy(xp_np)
                    xo = torch.from_numpy(xo_np)
                    xi = torch.from_numpy(xi_np)

                    xs_emb = entity_emb(xs)
                    xp_emb = predicate_emb(xp)
                    xo_emb = entity_emb(xo)

                    # xi = None
                    base.mask_indices = xi

                    scores = model.forward(xp_emb, xs_emb, xo_emb)
                    inf = model.score(xp_emb, xs_emb, xo_emb)

                    if position in {0, 1}:
                        assert inf[2] < 0.5
                    else:
                        assert inf[2] > 0.9

                    scores_sp, scores_po = scores

                    inf = inf.cpu().numpy()
                    scores_sp = scores_sp.cpu().numpy()
                    scores_po = scores_po.cpu().numpy()

                    for i in range(xs.shape[0]):
                        np.testing.assert_allclose(inf[i],
                                                   scores_sp[i, xo[i]],
                                                   rtol=1e-2,
                                                   atol=1e-2)
                        np.testing.assert_allclose(inf[i],
                                                   scores_po[i, xs[i]],
                                                   rtol=1e-2,
                                                   atol=1e-2)
Exemplo n.º 5
0
def test_learning_v2():
    embedding_size = 100

    torch.manual_seed(0)

    triples, hops = [], []

    for i in range(16):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({e
                         for (e, _, _) in triples + hops}
                        | {e
                           for (e, _, e) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    kernel = GaussianKernel()

    entity_embeddings = nn.Embedding(nb_entities,
                                     embedding_size * 2,
                                     sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size * 2,
                                        sparse=True)

    fact_rel = torch.from_numpy(
        np.array([predicate_to_index[p] for (_, p, _) in triples]))
    fact_arg1 = torch.from_numpy(
        np.array([entity_to_index[s] for (s, _, _) in triples]))
    fact_arg2 = torch.from_numpy(
        np.array([entity_to_index[o] for (_, _, o) in triples]))
    facts = [fact_rel, fact_arg1, fact_arg2]

    model = NeuralKB(entity_embeddings=entity_embeddings,
                     predicate_embeddings=predicate_embeddings,
                     kernel=kernel,
                     facts=facts)

    reformulator = AttentiveReformulator(2, predicate_embeddings)
    hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator)

    for s, p, o in hops:
        xs_np = np.array([entity_to_index[s]])
        xp_np = np.array([predicate_to_index[p]])
        xo_np = np.array([entity_to_index[o]])

        with torch.no_grad():
            xs = torch.from_numpy(xs_np)
            xp = torch.from_numpy(xp_np)
            xo = torch.from_numpy(xo_np)

            xs_emb = entity_embeddings(xs)
            xp_emb = predicate_embeddings(xp)
            xo_emb = entity_embeddings(xo)

            inf = hoppy.score(xp_emb, xs_emb, xo_emb)

            inf_np = inf.cpu().numpy()
            assert inf_np < 0.5
Exemplo n.º 6
0
def test_learning_v3():
    embedding_size = 10
    batch_size = 16

    triples, hops = [], []

    for i in range(16):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({e
                         for (e, _, _) in triples + hops}
                        | {e
                           for (e, _, e) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    torch.manual_seed(0)

    kernel = GaussianKernel()

    entity_embeddings = nn.Embedding(nb_entities,
                                     embedding_size * 2,
                                     sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size * 2,
                                        sparse=True)

    fact_rel = torch.from_numpy(
        np.array([predicate_to_index[p] for (_, p, _) in triples]))
    fact_arg1 = torch.from_numpy(
        np.array([entity_to_index[s] for (s, _, _) in triples]))
    fact_arg2 = torch.from_numpy(
        np.array([entity_to_index[o] for (_, _, o) in triples]))
    facts = [fact_rel, fact_arg1, fact_arg2]

    model = NeuralKB(entity_embeddings=entity_embeddings,
                     predicate_embeddings=predicate_embeddings,
                     kernel=kernel,
                     facts=facts)

    reformulator = AttentiveReformulator(2, predicate_embeddings)
    hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator)

    N3_reg = N3()

    params = [
        p for p in hoppy.parameters()
        if not torch.equal(p, entity_embeddings.weight)
        and not torch.equal(p, predicate_embeddings.weight)
    ]

    loss_function = nn.CrossEntropyLoss(reduction='mean')

    p_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['p']])))
    q_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['q']])))
    # r_emb = predicate_embeddings(torch.from_numpy(np.array([predicate_to_index['r']])))

    optimizer = optim.Adagrad(params, lr=0.1)

    hops_data = []
    for i in range(128):
        hops_data += hops

    batches = make_batches(len(hops_data), batch_size)

    c, d = 0.0, 0.0

    for batch_start, batch_end in batches:
        hops_batch = hops_data[batch_start:batch_end]

        s_lst = [s for (s, _, _) in hops_batch]
        p_lst = [p for (_, p, _) in hops_batch]
        o_lst = [o for (_, _, o) in hops_batch]

        xs_np = np.array([entity_to_index[s] for s in s_lst])
        xp_np = np.array([predicate_to_index[p] for p in p_lst])
        xo_np = np.array([entity_to_index[o] for o in o_lst])

        xs = torch.from_numpy(xs_np)
        xp = torch.from_numpy(xp_np)
        xo = torch.from_numpy(xo_np)

        xs_emb = entity_embeddings(xs)
        xp_emb = predicate_embeddings(xp)
        xo_emb = entity_embeddings(xo)

        sp_scores, po_scores = hoppy.forward(xp_emb, xs_emb, xo_emb)

        loss = loss_function(sp_scores, xo) + loss_function(po_scores, xs)

        factors = [hoppy.factor(e) for e in [xp_emb, xs_emb, xo_emb]]
        loss += 0.1 * N3_reg(factors)

        tmp = hoppy.hops(xp_emb)
        hop_1_emb = tmp[0]
        hop_2_emb = tmp[1]

        c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy()
        d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    assert c > 0.95
    assert d > 0.95