示例#1
0
def test_classic_clutrr_v2():
    embedding_size = 50

    triples, hops = [], []
    xxx = []

    for i in range(16):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]
        xxx += [(f'a{i}', 'p', f'c{i}'), (f'a{i}', 'q', f'c{i}'), (f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({s for (s, _, _) in triples + hops} | {o for (_, _, o) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    kernel = GaussianKernel()

    entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates, embedding_size, sparse=True)

    model = NeuralKB(kernel=kernel)

    indices = torch.from_numpy(np.array([predicate_to_index['p'], predicate_to_index['q']]))
    _hops = SymbolicReformulator(predicate_embeddings, indices)
    hoppy = Hoppy(model, hops_lst=[(_hops, False)], max_depth=1)

    for s in entity_lst:
        for p in predicate_lst:
            for o in entity_lst:
                xs_np = np.array([entity_to_index[s], entity_to_index[s]])
                xp_np = np.array([predicate_to_index[p], predicate_to_index[p]])
                xo_np = np.array([entity_to_index[o], entity_to_index[o]])

                with torch.no_grad():
                    xs = torch.from_numpy(xs_np)
                    xp = torch.from_numpy(xp_np)
                    xo = torch.from_numpy(xo_np)

                    xs_emb = entity_embeddings(xs)
                    xp_emb = predicate_embeddings(xp)
                    xo_emb = entity_embeddings(xo)

                    rel_emb = encode_relation(facts=triples, relation_embeddings=predicate_embeddings,
                                              relation_to_idx=predicate_to_index)
                    arg1_emb, arg2_emb = encode_arguments(facts=triples, entity_embeddings=entity_embeddings,
                                                          entity_to_idx=entity_to_index)

                    facts = [rel_emb, arg1_emb, arg2_emb]

                    inf = hoppy.score(xp_emb, xs_emb, xo_emb, facts=facts)
                    inf_np = inf.cpu().numpy()

                    print(s, p, o, inf_np)
                    assert inf_np[0] > 0.9 if (s, p, o) in (triples + xxx) else inf_np[0] < 0.1, inf_np
示例#2
0
def test_classic_clutrr_v7():
    torch.set_num_threads(multiprocessing.cpu_count())

    embedding_size = 50

    torch.manual_seed(0)
    rs = np.random.RandomState(0)

    triples = [
        ('a', 'p', 'b'),
        ('b', 'q', 'c'),
        ('c', 'p', 'd'),
        ('d', 'q', 'e'),
        ('e', 'p', 'f'),
        ('f', 'q', 'g'),
        ('g', 'p', 'h'),
        ('h', 'q', 'i'),
        ('i', 'p', 'l'),
        ('l', 'q', 'm'),
        ('m', 'p', 'n'),
        ('n', 'q', 'o'),
        ('o', 'p', 'p'),
        ('p', 'q', 'q'),
        ('q', 'p', 'r'),
        ('r', 'q', 's'),
        ('s', 'p', 't'),
        ('t', 'q', 'u'),
        ('u', 'p', 'v'),
        ('v', 'q', 'w'),

        ('x', 'r', 'y'),
        ('x', 's', 'y')
    ]

    entity_lst = sorted({e for (e, _, _) in triples} | {e for (_, _, e) in triples})
    predicate_lst = sorted({p for (_, p, _) in triples})

    nb_entities = len(entity_lst)
    nb_predicates = len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    with torch.no_grad():
        kernel = GaussianKernel()

        entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True)
        predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True)

        rel_emb = encode_relation(facts=triples,
                                  relation_embeddings=predicate_embeddings,
                                  relation_to_idx=predicate_to_index)

        arg1_emb, arg2_emb = encode_arguments(facts=triples,
                                              entity_embeddings=entity_embeddings,
                                              entity_to_idx=entity_to_index)

        facts = [rel_emb, arg1_emb, arg2_emb]

        k = 5

        model = NeuralKB(kernel=kernel, k=k)

        indices = torch.from_numpy(np.array([predicate_to_index['p'], predicate_to_index['q']]))
        reformulator = SymbolicReformulator(predicate_embeddings, indices)

        hoppy0 = Hoppy(model, hops_lst=[(reformulator, False)], max_depth=0)
        hoppy1 = Hoppy(model, hops_lst=[(reformulator, False)], max_depth=1)
        hoppy2 = Hoppy(model, hops_lst=[(reformulator, False)], max_depth=2)
        hoppy3 = Hoppy(model, hops_lst=[(reformulator, False)], max_depth=3)
        # hoppy4 = Hoppy(model, hops_lst=[(reformulator, False)], depth=4)

        xs_np = rs.randint(nb_entities, size=12)
        xp_np = rs.randint(nb_predicates, size=12)
        xo_np = rs.randint(nb_entities, size=12)

        xs_np[0] = entity_to_index['a']
        xp_np[0] = predicate_to_index['r']
        xo_np[0] = entity_to_index['c']

        xs_np[1] = entity_to_index['a']
        xp_np[1] = predicate_to_index['r']
        xo_np[1] = entity_to_index['e']

        xs_np[2] = entity_to_index['a']
        xp_np[2] = predicate_to_index['r']
        xo_np[2] = entity_to_index['g']

        xs_np[3] = entity_to_index['a']
        xp_np[3] = predicate_to_index['r']
        xo_np[3] = entity_to_index['i']

        xs_np[4] = entity_to_index['a']
        xp_np[4] = predicate_to_index['r']
        xo_np[4] = entity_to_index['m']

        xs_np[5] = entity_to_index['a']
        xp_np[5] = predicate_to_index['r']
        xo_np[5] = entity_to_index['o']

        xs_np[6] = entity_to_index['a']
        xp_np[6] = predicate_to_index['r']
        xo_np[6] = entity_to_index['q']

        xs_np[7] = entity_to_index['a']
        xp_np[7] = predicate_to_index['r']
        xo_np[7] = entity_to_index['s']

        xs_np[8] = entity_to_index['a']
        xp_np[8] = predicate_to_index['r']
        xo_np[8] = entity_to_index['u']

        xs = torch.from_numpy(xs_np)
        xp = torch.from_numpy(xp_np)
        xo = torch.from_numpy(xo_np)

        xs_emb = entity_embeddings(xs)
        xp_emb = predicate_embeddings(xp)
        xo_emb = entity_embeddings(xo)

        # res0 = hoppy0.forward(xp_emb, xs_emb, xo_emb, facts=facts)
        inf0 = hoppy0.score(xp_emb, xs_emb, xo_emb, facts=facts)
        # (scores0_sp, subs0_sp), (scores0_po, subs0_po) = res0

        # res1 = hoppy1.forward(xp_emb, xs_emb, xo_emb, facts=facts)
        inf1 = hoppy1.score(xp_emb, xs_emb, xo_emb, facts=facts)
        # (scores1_sp, subs1_sp), (scores1_po, subs1_po) = res1

        # res2 = hoppy2.forward(xp_emb, xs_emb, xo_emb, facts=facts)
        inf2 = hoppy2.score(xp_emb, xs_emb, xo_emb, facts=facts)
        # (scores2_sp, subs2_sp), (scores2_po, subs2_po) = res2

        # res3 = hoppy3.forward(xp_emb, xs_emb, xo_emb, facts=facts)
        inf3 = hoppy3.score(xp_emb, xs_emb, xo_emb, facts=facts)
        # (scores3_sp, subs3_sp), (scores3_po, subs3_po) = res3

        # res4 = hoppy4.forward(xp_emb, xs_emb, xo_emb, facts=facts)
        # inf4 = hoppy4.score(xp_emb, xs_emb, xo_emb, facts=facts)
        # (scores4_sp, subs4_sp), (scores4_po, subs4_po) = res4

        inf0_np = inf0.cpu().numpy()
        inf1_np = inf1.cpu().numpy()
        inf2_np = inf2.cpu().numpy()
        inf3_np = inf3.cpu().numpy()
        # inf4_np = inf4.cpu().numpy()

        np.testing.assert_allclose(inf0_np, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1)
        np.testing.assert_allclose(inf1_np, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1)
        np.testing.assert_allclose(inf2_np, [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1)
        np.testing.assert_allclose(inf3_np, [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1)
        # np.testing.assert_allclose(inf4_np, [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], rtol=1e-1, atol=1e-1)

        print(inf3_np)

        print(entity_embeddings.weight[entity_to_index['c'], 0].item())
        print(entity_embeddings.weight[entity_to_index['e'], 0].item())
        print(entity_embeddings.weight[entity_to_index['g'], 0].item())
        print(entity_embeddings.weight[entity_to_index['i'], 0].item())
示例#3
0
def test_classic_clutrr_v3():
    embedding_size = 40
    batch_size = 8

    torch.manual_seed(0)

    triples, hops = [], []

    for i in range(32):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({s for (s, _, _) in triples + hops} | {o for (_, _, o) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    kernel = GaussianKernel(slope=None)

    entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates, embedding_size, sparse=True)

    # _hops = LinearReformulator(2, embedding_size)
    _hops = AttentiveReformulator(2, predicate_embeddings)

    model = NeuralKB(kernel=kernel)
    hoppy = Hoppy(model, hops_lst=[(_hops, False)], max_depth=1)

    params = [p for p in hoppy.parameters()
              if not torch.equal(p, entity_embeddings.weight)
              and not torch.equal(p, predicate_embeddings.weight)]

    for tensor in params:
        print(f'\t{tensor.size()}\t{tensor.device}')

    loss_function = nn.BCELoss()

    optimizer = optim.Adagrad(params, lr=0.1)

    hops_data = []
    for i in range(64):
        hops_data += hops

    batches = make_batches(len(hops_data), batch_size)

    rs = np.random.RandomState()

    c, d = 0.0, 0.0
    p_emb = predicate_embeddings(torch.from_numpy(np.array([predicate_to_index['p']])))
    q_emb = predicate_embeddings(torch.from_numpy(np.array([predicate_to_index['q']])))

    for batch_start, batch_end in batches:
        hops_batch = hops_data[batch_start:batch_end]

        s_lst = [s for (s, _, _) in hops_batch]
        p_lst = [p for (_, p, _) in hops_batch]
        o_lst = [o for (_, _, o) in hops_batch]

        nb_positives = len(s_lst)
        nb_negatives = nb_positives * 3

        s_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist()
        nb_negatives = len(s_n_lst)
        o_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist()
        p_n_lst = list(islice(cycle(p_lst), nb_negatives))

        xs_np = np.array([entity_to_index[s] for s in s_lst] + s_n_lst)
        xp_np = np.array([predicate_to_index[p] for p in p_lst + p_n_lst])
        xo_np = np.array([entity_to_index[o] for o in o_lst] + o_n_lst)

        xs_emb = entity_embeddings(torch.from_numpy(xs_np))
        xp_emb = predicate_embeddings(torch.from_numpy(xp_np))
        xo_emb = entity_embeddings(torch.from_numpy(xo_np))

        rel_emb = encode_relation(facts=triples,
                                  relation_embeddings=predicate_embeddings,
                                  relation_to_idx=predicate_to_index)

        arg1_emb, arg2_emb = encode_arguments(facts=triples,
                                              entity_embeddings=entity_embeddings,
                                              entity_to_idx=entity_to_index)

        facts = [rel_emb, arg1_emb, arg2_emb]

        scores = hoppy.score(xp_emb, xs_emb, xo_emb, facts=facts)

        labels_np = np.zeros(xs_np.shape[0])
        labels_np[:nb_positives] = 1
        labels = torch.from_numpy(labels_np).float()

        loss = loss_function(scores, labels)

        hop_1_emb = hoppy.hops_lst[0][0].hops_lst[0](xp_emb)
        hop_2_emb = hoppy.hops_lst[0][0].hops_lst[1](xp_emb)

        c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy()
        d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy()

        print(c, d)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    assert c > 0.95 and d > 0.95