Пример #1
0
def test_gaussian_v1():
    nb_entities = 10
    embedding_size = 20
    slope = 1.0

    seed = 0

    np.random.seed(seed)
    torch.manual_seed(seed)

    with torch.no_grad():
        x_emb = nn.Embedding(nb_entities, embedding_size, sparse=True)
        y_emb = nn.Embedding(nb_entities, embedding_size, sparse=True)

        x_emb.weight.data *= 1e-3
        y_emb.weight.data *= 1e-3

        kernel = GaussianKernel(slope=slope)

        a = kernel(x_emb.weight, y_emb.weight)
        b = kernel(x_emb.weight, x_emb.weight)

        c = kernel.pairwise(x_emb.weight, y_emb.weight)
        d = kernel.pairwise(x_emb.weight, x_emb.weight)

        a_np = a.numpy()
        b_np = b.numpy()
        c_np = c.numpy()
        d_np = d.numpy()

        np.testing.assert_allclose(a_np, np.diag(c_np), rtol=1e-7, atol=1e-7)
        np.testing.assert_allclose(b_np, np.diag(d_np), rtol=1e-7, atol=1e-7)
Пример #2
0
def test_clutrr_v3():
    embedding_size = 20
    batch_size = 8

    torch.manual_seed(0)

    triples, hops = [], []

    for i in range(32):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({s
                         for (s, _, _) in triples + hops}
                        | {o
                           for (_, _, o) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    kernel = GaussianKernel(slope=None)

    entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size,
                                        sparse=True)

    # _hops = LinearReformulator(2, embedding_size)
    _hops = AttentiveReformulator(2, predicate_embeddings)

    model = NeuralKB(kernel=kernel, scoring_type='concat')
    hoppy = Hoppy(model, hops_lst=[(_hops, False)], depth=1)

    params = [
        p for p in hoppy.parameters()
        if not torch.equal(p, entity_embeddings.weight)
        and not torch.equal(p, predicate_embeddings.weight)
    ]

    for tensor in params:
        print(f'\t{tensor.size()}\t{tensor.device}')

    loss_function = nn.BCELoss()

    optimizer = optim.Adagrad(params, lr=0.1)

    hops_data = []
    for i in range(64):
        hops_data += hops

    batches = make_batches(len(hops_data), batch_size)

    rs = np.random.RandomState()

    c, d = 0.0, 0.0
    p_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['p']])))
    q_emb = predicate_embeddings(
        torch.from_numpy(np.array([predicate_to_index['q']])))

    for batch_start, batch_end in batches:
        hops_batch = hops_data[batch_start:batch_end]

        s_lst = [s for (s, _, _) in hops_batch]
        p_lst = [p for (_, p, _) in hops_batch]
        o_lst = [o for (_, _, o) in hops_batch]

        nb_positives = len(s_lst)
        nb_negatives = nb_positives * 3

        s_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist()
        nb_negatives = len(s_n_lst)
        o_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist()
        p_n_lst = list(islice(cycle(p_lst), nb_negatives))

        xs_np = np.array([entity_to_index[s] for s in s_lst] + s_n_lst)
        xp_np = np.array([predicate_to_index[p] for p in p_lst + p_n_lst])
        xo_np = np.array([entity_to_index[o] for o in o_lst] + o_n_lst)

        xs_emb = entity_embeddings(torch.from_numpy(xs_np))
        xp_emb = predicate_embeddings(torch.from_numpy(xp_np))
        xo_emb = entity_embeddings(torch.from_numpy(xo_np))

        rel_emb = encode_relation(facts=triples,
                                  relation_embeddings=predicate_embeddings,
                                  relation_to_idx=predicate_to_index)
        arg1_emb, arg2_emb = encode_arguments(
            facts=triples,
            entity_embeddings=entity_embeddings,
            entity_to_idx=entity_to_index)

        facts = [rel_emb, arg1_emb, arg2_emb]

        scores = hoppy.score(xp_emb,
                             xs_emb,
                             xo_emb,
                             facts=facts,
                             entity_embeddings=entity_embeddings.weight)

        labels_np = np.zeros(xs_np.shape[0])
        labels_np[:nb_positives] = 1
        labels = torch.from_numpy(labels_np).float()

        # for s, p, o, l in zip(xs_np, xp_np, xo_np, labels):
        #     print(s, p, o, l)

        loss = loss_function(scores, labels)

        hop_1_emb = hoppy.hops_lst[0][0].hops_lst[0](xp_emb)
        hop_2_emb = hoppy.hops_lst[0][0].hops_lst[1](xp_emb)

        c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy()
        d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy()

        print(c, d)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    assert c > 0.95 and d > 0.95
Пример #3
0
def test_learning_v3():
    embedding_size = 10
    batch_size = 16

    triples, hops = [], []

    for i in range(16):
        triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')]
        hops += [(f'a{i}', 'r', f'c{i}')]

    entity_lst = sorted({e
                         for (e, _, _) in triples + hops}
                        | {e
                           for (e, _, e) in triples + hops})
    predicate_lst = sorted({p for (_, p, _) in triples + hops})

    nb_entities, nb_predicates = len(entity_lst), len(predicate_lst)

    entity_to_index = {e: i for i, e in enumerate(entity_lst)}
    predicate_to_index = {p: i for i, p in enumerate(predicate_lst)}

    torch.manual_seed(0)

    kernel = GaussianKernel()

    entity_embeddings = nn.Embedding(nb_entities,
                                     embedding_size * 2,
                                     sparse=True)
    predicate_embeddings = nn.Embedding(nb_predicates,
                                        embedding_size * 2,
                                        sparse=True)

    fact_rel = torch.LongTensor(
        np.array([predicate_to_index[p] for (_, p, _) in triples]))
    fact_arg1 = torch.LongTensor(
        np.array([entity_to_index[s] for (s, _, _) in triples]))
    fact_arg2 = torch.LongTensor(
        np.array([entity_to_index[o] for (_, _, o) in triples]))
    facts = [fact_rel, fact_arg1, fact_arg2]

    model = NeuralKB(entity_embeddings=entity_embeddings,
                     predicate_embeddings=predicate_embeddings,
                     kernel=kernel,
                     facts=facts)

    reformulator = AttentiveReformulator(2, predicate_embeddings)
    hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator)

    N3_reg = N3()

    params = [
        p for p in hoppy.parameters()
        if not torch.equal(p, entity_embeddings.weight)
        and not torch.equal(p, predicate_embeddings.weight)
    ]

    loss_function = nn.CrossEntropyLoss(reduction='mean')

    p_emb = predicate_embeddings(
        torch.LongTensor(np.array([predicate_to_index['p']])))
    q_emb = predicate_embeddings(
        torch.LongTensor(np.array([predicate_to_index['q']])))
    # r_emb = predicate_embeddings(torch.LongTensor(np.array([predicate_to_index['r']])))

    optimizer = optim.Adagrad(params, lr=0.1)

    hops_data = []
    for i in range(128):
        hops_data += hops

    batches = make_batches(len(hops_data), batch_size)

    c, d = 0.0, 0.0

    for batch_start, batch_end in batches:
        hops_batch = hops_data[batch_start:batch_end]

        s_lst = [s for (s, _, _) in hops_batch]
        p_lst = [p for (_, p, _) in hops_batch]
        o_lst = [o for (_, _, o) in hops_batch]

        xs_np = np.array([entity_to_index[s] for s in s_lst])
        xp_np = np.array([predicate_to_index[p] for p in p_lst])
        xo_np = np.array([entity_to_index[o] for o in o_lst])

        xs = torch.LongTensor(xs_np)
        xp = torch.LongTensor(xp_np)
        xo = torch.LongTensor(xo_np)

        xs_emb = entity_embeddings(xs)
        xp_emb = predicate_embeddings(xp)
        xo_emb = entity_embeddings(xo)

        sp_scores, po_scores = hoppy.forward(xp_emb, xs_emb, xo_emb)

        loss = loss_function(sp_scores, xo) + loss_function(po_scores, xs)

        factors = [hoppy.factor(e) for e in [xp_emb, xs_emb, xo_emb]]
        loss += 0.1 * N3_reg(factors)

        tmp = hoppy.hops(xp_emb)
        hop_1_emb = tmp[0]
        hop_2_emb = tmp[1]

        c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy()
        d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    assert c > 0.95
    assert d > 0.95
Пример #4
0
def main(argv):
    argparser = argparse.ArgumentParser(
        'CLUTRR', formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    train_path = test_path = "data/clutrr-emnlp/data_test/64.csv"

    argparser.add_argument('--train',
                           action='store',
                           type=str,
                           default=train_path)
    argparser.add_argument('--test', nargs='+', type=str, default=[test_path])

    # model params
    argparser.add_argument('--embedding-size',
                           '-k',
                           action='store',
                           type=int,
                           default=20)
    argparser.add_argument('--k-max',
                           '-m',
                           action='store',
                           type=int,
                           default=10)
    argparser.add_argument('--max-depth',
                           '-d',
                           action='store',
                           type=int,
                           default=2)

    argparser.add_argument('--hops',
                           nargs='+',
                           type=str,
                           default=['2', '2', '1R'])

    # training params
    argparser.add_argument('--epochs',
                           '-e',
                           action='store',
                           type=int,
                           default=100)
    argparser.add_argument('--learning-rate',
                           '-l',
                           action='store',
                           type=float,
                           default=0.1)
    argparser.add_argument('--batch-size',
                           '-b',
                           action='store',
                           type=int,
                           default=8)
    argparser.add_argument('--optimizer',
                           '-o',
                           action='store',
                           type=str,
                           default='adagrad',
                           choices=['adagrad', 'adam', 'sgd'])

    argparser.add_argument('--seed', action='store', type=int, default=0)

    argparser.add_argument('--evaluate-every',
                           '-V',
                           action='store',
                           type=int,
                           default=32)

    argparser.add_argument('--N2', action='store', type=float, default=None)
    argparser.add_argument('--N3', action='store', type=float, default=None)
    argparser.add_argument('--entropy',
                           '-E',
                           action='store',
                           type=float,
                           default=None)

    argparser.add_argument('--reformulator',
                           '-r',
                           action='store',
                           type=str,
                           default='linear',
                           choices=['static', 'linear', 'attentive', 'memory'])
    argparser.add_argument('--nb-rules',
                           '-R',
                           action='store',
                           type=int,
                           default=4)

    argparser.add_argument('--slope',
                           '-S',
                           action='store',
                           type=float,
                           default=None)
    argparser.add_argument('--init-size',
                           '-i',
                           action='store',
                           type=float,
                           default=1.0)

    argparser.add_argument('--debug', '-D', action='store_true', default=False)

    args = argparser.parse_args(argv)

    train_path = args.train
    test_paths = args.test

    embedding_size = args.embedding_size

    k_max = args.k_max
    max_depth = args.max_depth

    hops_str = args.hops

    nb_epochs = args.epochs
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    optimizer_name = args.optimizer

    seed = args.seed

    evaluate_every = args.evaluate_every

    N2_weight = args.N2
    N3_weight = args.N3
    entropy_weight = args.entropy

    reformulator_name = args.reformulator
    nb_rules = args.nb_rules

    slope = args.slope
    init_size = args.init_size

    is_debug = args.debug

    np.random.seed(seed)
    random_state = np.random.RandomState(seed)
    torch.manual_seed(seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger.info(f'Device: {device}')

    if torch.cuda.is_available():
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    data = Data(train_path=train_path, test_paths=test_paths)

    relation_to_predicate = data.relation_to_predicate
    predicate_to_relations = data.predicate_to_relations
    entity_lst, predicate_lst, relation_lst = data.entity_lst, data.predicate_lst, data.relation_lst

    nb_examples = len(data.train)
    nb_entities = len(entity_lst)
    nb_predicates = len(predicate_lst)
    nb_relations = len(relation_lst)

    entity_to_idx = {e: i for i, e in enumerate(entity_lst)}
    relation_to_idx = {r: i for i, r in enumerate(relation_lst)}

    kernel = GaussianKernel(slope=slope)

    entity_embeddings = nn.Embedding(nb_entities, embedding_size,
                                     sparse=False).to(device)

    if entropy_weight is None:
        relation_embeddings = nn.Embedding(nb_relations,
                                           embedding_size,
                                           sparse=False).to(device)
        relation_embeddings.weight.data *= init_size
    else:
        relation_embeddings = AttentiveEmbedding(nb_predicates=nb_predicates,
                                                 nb_relations=nb_relations,
                                                 embedding_size=embedding_size,
                                                 device=device).to(device)
        make_easy(predicate_lst, predicate_to_relations, relation_to_idx,
                  relation_embeddings)

    model = NeuralKB(kernel=kernel, k=k_max).to(device)
    memory = None

    def make_hop(s: str) -> Tuple[BaseReformulator, bool]:
        nonlocal memory
        if s.isdigit():
            nb_hops, is_reversed = int(s), False
        else:
            nb_hops, is_reversed = int(s[:-1]), True
        res = None
        if reformulator_name in {'static'}:
            res = StaticReformulator(nb_hops, embedding_size)
        elif reformulator_name in {'linear'}:
            res = LinearReformulator(nb_hops, embedding_size)
        elif reformulator_name in {'attentive'}:
            res = AttentiveReformulator(nb_hops, relation_embeddings)
        elif reformulator_name in {'memory'}:
            memory = MemoryReformulator.Memory(
                nb_hops, nb_rules,
                embedding_size) if memory is None else memory
            res = MemoryReformulator(memory)
        assert res is not None
        return res, is_reversed

    hops_lst = [make_hop(s) for s in hops_str]
    hoppy = Hoppy(model=model, depth=max_depth, hops_lst=hops_lst).to(device)

    def scoring_function(story: List[Fact], targets: List[Fact]) -> Tensor:
        story_rel = encode_relation(story, relation_embeddings,
                                    relation_to_idx, device)
        story_arg1, story_arg2 = encode_arguments(story, entity_embeddings,
                                                  entity_to_idx, device)

        targets_rel = encode_relation(targets, relation_embeddings,
                                      relation_to_idx, device)
        targets_arg1, targets_arg2 = encode_arguments(targets,
                                                      entity_embeddings,
                                                      entity_to_idx, device)

        facts = [story_rel, story_arg1, story_arg2]
        scores = hoppy.score(targets_rel, targets_arg1, targets_arg2, facts)

        return scores

    def evaluate(instances: List[Instance],
                 path: str,
                 sample_size: Optional[int] = None) -> float:
        res = 0.0
        if len(instances) > 0:
            res = accuracy(scoring_function=scoring_function,
                           instances=instances,
                           sample_size=sample_size,
                           relation_to_predicate=relation_to_predicate,
                           predicate_to_relations=predicate_to_relations)
            logger.info(f'Test Accuracy on {path}: {res:.6f}')
        return res

    loss_function = nn.BCELoss()

    N2_reg = N2() if N2_weight is not None else None
    N3_reg = N3() if N3_weight is not None else None
    entropy_reg = Entropy(
        use_logits=False) if entropy_weight is not None else None

    params_lst = [
        p for p in hoppy.parameters()
        if not torch.equal(p, entity_embeddings.weight)
    ]
    params_lst += relation_embeddings.parameters()

    params = nn.ParameterList(params_lst).to(device)

    for tensor in params_lst:
        logger.info(f'\t{tensor.size()}\t{tensor.device}')

    optimizer_factory = {
        'adagrad': lambda arg: optim.Adagrad(arg, lr=learning_rate),
        'adam': lambda arg: optim.Adam(arg, lr=learning_rate),
        'sgd': lambda arg: optim.SGD(arg, lr=learning_rate)
    }

    assert optimizer_name in optimizer_factory
    optimizer = optimizer_factory[optimizer_name](params)

    global_step = 0
    hinton = HintonDiagram(max_arr=[0.0, 1.0])

    for epoch_no in range(1, nb_epochs + 1):
        batcher = Batcher(batch_size=batch_size,
                          nb_examples=nb_examples,
                          nb_epochs=1,
                          random_state=random_state)
        nb_batches = len(batcher.batches)
        epoch_loss_values = []

        for batch_no, (batch_start, batch_end) in enumerate(batcher.batches,
                                                            start=1):
            global_step += 1

            indices_batch = batcher.get_batch(batch_start, batch_end)
            instances_batch = [data.train[i] for i in indices_batch]

            batch_loss_values = []

            for i, instance in enumerate(instances_batch):
                story, target = instance.story, instance.target
                s, r, o = target

                if is_debug is True and i == 0:
                    # print('STORY', story)
                    # print('TARGET', target)
                    r_lst = [
                        r for p in predicate_lst
                        for r in predicate_to_relations[p]
                    ]
                    r_idx_lst = [relation_to_idx[r] for r in r_lst]
                    with torch.no_grad():
                        # show_rules(model=hoppy, kernel=kernel, relation_embeddings=relation_embeddings,
                        #            data=data, relation_to_idx=relation_to_idx, device=device)
                        r_idx_tensor = torch.from_numpy(
                            np.array(r_idx_lst, dtype=np.int64)).to(device)
                        r_tensor = relation_embeddings(r_idx_tensor)
                        k = kernel.pairwise(r_tensor, r_tensor)
                        # print(r_lst)
                        print(hinton(k.cpu().numpy()))

                story_rel = encode_relation(story, relation_embeddings,
                                            relation_to_idx, device)
                story_arg1, story_arg2 = encode_arguments(
                    story, entity_embeddings, entity_to_idx, device)

                facts = [story_rel, story_arg1, story_arg2]

                pos_predicate = relation_to_predicate[r]
                p_relation_lst = sorted(relation_to_predicate.keys())

                target_lst = [(s, x, o) for x in p_relation_lst]
                label_lst = [
                    int(pos_predicate == relation_to_predicate[r])
                    for r in p_relation_lst
                ]

                rel_emb = encode_relation(target_lst, relation_embeddings,
                                          relation_to_idx, device)
                arg1_emb, arg2_emb = encode_arguments(target_lst,
                                                      entity_embeddings,
                                                      entity_to_idx, device)

                scores = hoppy.score(rel_emb, arg1_emb, arg2_emb, facts)
                labels = torch.Tensor(label_lst).float()

                # if i == 0:
                #     print(scores)
                #     print(labels)

                loss = loss_function(scores, labels)

                factors = [
                    hoppy.factor(e) for e in [rel_emb, arg1_emb, arg2_emb]
                ]

                loss += N2_weight * N2_reg(
                    factors) if N2_weight is not None else 0.0
                loss += N3_weight * N3_reg(
                    factors) if N3_weight is not None else 0.0

                if entropy_weight is not None:
                    attention = relation_embeddings.attention
                    if i == 0:
                        pass
                        # print(scores.cpu().detach().numpy())
                        # print(labels.cpu().detach().numpy())

                        # print(hinton(attention.cpu().detach().numpy()))
                        # print(attention.cpu().detach().numpy())

                    loss += entropy_weight * entropy_reg([attention])

                loss_value = loss.item()

                batch_loss_values += [loss_value]
                epoch_loss_values += [loss_value]

                loss.backward()

            optimizer.step()
            optimizer.zero_grad()

            loss_mean, loss_std = np.mean(batch_loss_values), np.std(
                batch_loss_values)
            logger.info(
                f'Epoch {epoch_no}/{nb_epochs}\tBatch {batch_no}/{nb_batches}\tLoss {loss_mean:.4f} ± {loss_std:.4f}'
            )

            if global_step % evaluate_every == 0:
                for test_path in test_paths:
                    instances = data.test[test_path]
                    evaluate(instances=instances, path=test_path)

                    if is_debug is True:
                        for i in range(3):
                            story, target = instances[i].story, instances[
                                i].target
                            # print('INSTANCE', target, story)

                if is_debug is True:
                    r_lst = [
                        r for p in predicate_lst
                        for r in predicate_to_relations[p]
                    ]
                    r_idx_lst = [relation_to_idx[r] for r in r_lst]
                    with torch.no_grad():
                        show_rules(model=hoppy,
                                   kernel=kernel,
                                   relation_embeddings=relation_embeddings,
                                   data=data,
                                   relation_to_idx=relation_to_idx,
                                   device=device)
                        r_idx_tensor = torch.from_numpy(
                            np.array(r_idx_lst, dtype=np.int64)).to(device)
                        r_tensor = relation_embeddings(r_idx_tensor)
                        k = kernel.pairwise(r_tensor, r_tensor)
                        # print(r_lst)
                        print(hinton(k.cpu().numpy()))

        loss_mean, loss_std = np.mean(epoch_loss_values), np.std(
            epoch_loss_values)

        slope = kernel.slope.item() if isinstance(kernel.slope,
                                                  Tensor) else kernel.slope
        logger.info(
            f'Epoch {epoch_no}/{nb_epochs}\tLoss {loss_mean:.4f} ± {loss_std:.4f}\tSlope {slope:.4f}'
        )

    for test_path in test_paths:
        evaluate(instances=data.test[test_path], path=test_path)

    logger.info("Training finished")