def test_gaussian_v1(): nb_entities = 10 embedding_size = 20 slope = 1.0 seed = 0 np.random.seed(seed) torch.manual_seed(seed) with torch.no_grad(): x_emb = nn.Embedding(nb_entities, embedding_size, sparse=True) y_emb = nn.Embedding(nb_entities, embedding_size, sparse=True) x_emb.weight.data *= 1e-3 y_emb.weight.data *= 1e-3 kernel = GaussianKernel(slope=slope) a = kernel(x_emb.weight, y_emb.weight) b = kernel(x_emb.weight, x_emb.weight) c = kernel.pairwise(x_emb.weight, y_emb.weight) d = kernel.pairwise(x_emb.weight, x_emb.weight) a_np = a.numpy() b_np = b.numpy() c_np = c.numpy() d_np = d.numpy() np.testing.assert_allclose(a_np, np.diag(c_np), rtol=1e-7, atol=1e-7) np.testing.assert_allclose(b_np, np.diag(d_np), rtol=1e-7, atol=1e-7)
def test_clutrr_v2(): embedding_size = 20 triples, hops = [], [] xxx = [] for i in range(16): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] xxx += [(f'a{i}', 'p', f'c{i}'), (f'a{i}', 'q', f'c{i}'), (f'a{i}', 'r', f'c{i}')] entity_lst = sorted({s for (s, _, _) in triples + hops} | {o for (_, _, o) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size, sparse=True) for scoring_type in ['concat']: # ['min', 'concat']: model = NeuralKB(kernel=kernel, scoring_type=scoring_type) indices = torch.LongTensor(np.array([predicate_to_index['p'], predicate_to_index['q']])) _hops = SymbolicReformulator(predicate_embeddings, indices) hoppy = Hoppy(model, hops_lst=[(_hops, False)], depth=1) for s in entity_lst: for p in predicate_lst: for o in entity_lst: xs_np = np.array([entity_to_index[s]]) xp_np = np.array([predicate_to_index[p]]) xo_np = np.array([entity_to_index[o]]) with torch.no_grad(): xs = torch.LongTensor(xs_np) xp = torch.LongTensor(xp_np) xo = torch.LongTensor(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) rel_emb = encode_relation(facts=triples, relation_embeddings=predicate_embeddings, relation_to_idx=predicate_to_index) arg1_emb, arg2_emb = encode_arguments(facts=triples, entity_embeddings=entity_embeddings, entity_to_idx=entity_to_index) facts = [rel_emb, arg1_emb, arg2_emb] inf = hoppy.score(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) inf_np = inf.cpu().numpy() print(s, p, o, inf_np) assert inf_np[0] > 0.9 if (s, p, o) in (triples + xxx) else inf_np[0] < 0.1
def test_smart_clutrr_v1(): embedding_size = 50 triples, hops = [], [] for i in range(16): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] entity_lst = sorted({e for (e, _, _) in triples + hops} | {e for (e, _, e) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size, sparse=True) for scoring_type in ['concat']: model = NeuralKB(kernel=kernel) for s in entity_lst: for p in predicate_lst: for o in entity_lst: xs_np = np.array([entity_to_index[s]]) xp_np = np.array([predicate_to_index[p]]) xo_np = np.array([entity_to_index[o]]) with torch.no_grad(): xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) rel_emb = encode_relation(facts=triples, relation_embeddings=predicate_embeddings, relation_to_idx=predicate_to_index) arg1_emb, arg2_emb = encode_arguments(facts=triples, entity_embeddings=entity_embeddings, entity_to_idx=entity_to_index) facts = [rel_emb, arg1_emb, arg2_emb] inf = model.score(xp_emb, xs_emb, xo_emb, facts=facts) inf_np = inf.cpu().numpy() assert inf_np[0] > 0.95 if (s, p, o) in triples else inf_np[0] < 0.01
def test_clutrr_v3(): embedding_size = 20 batch_size = 8 torch.manual_seed(0) triples, hops = [], [] for i in range(32): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] entity_lst = sorted({s for (s, _, _) in triples + hops} | {o for (_, _, o) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel(slope=None) entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size, sparse=True) # _hops = LinearReformulator(2, embedding_size) _hops = AttentiveReformulator(2, predicate_embeddings) model = NeuralKB(kernel=kernel, scoring_type='concat') hoppy = Hoppy(model, hops_lst=[(_hops, False)], depth=1) params = [ p for p in hoppy.parameters() if not torch.equal(p, entity_embeddings.weight) and not torch.equal(p, predicate_embeddings.weight) ] for tensor in params: print(f'\t{tensor.size()}\t{tensor.device}') loss_function = nn.BCELoss() optimizer = optim.Adagrad(params, lr=0.1) hops_data = [] for i in range(64): hops_data += hops batches = make_batches(len(hops_data), batch_size) rs = np.random.RandomState() c, d = 0.0, 0.0 p_emb = predicate_embeddings( torch.from_numpy(np.array([predicate_to_index['p']]))) q_emb = predicate_embeddings( torch.from_numpy(np.array([predicate_to_index['q']]))) for batch_start, batch_end in batches: hops_batch = hops_data[batch_start:batch_end] s_lst = [s for (s, _, _) in hops_batch] p_lst = [p for (_, p, _) in hops_batch] o_lst = [o for (_, _, o) in hops_batch] nb_positives = len(s_lst) nb_negatives = nb_positives * 3 s_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist() nb_negatives = len(s_n_lst) o_n_lst = rs.permutation(nb_entities)[:nb_negatives].tolist() p_n_lst = list(islice(cycle(p_lst), nb_negatives)) xs_np = np.array([entity_to_index[s] for s in s_lst] + s_n_lst) xp_np = np.array([predicate_to_index[p] for p in p_lst + p_n_lst]) xo_np = np.array([entity_to_index[o] for o in o_lst] + o_n_lst) xs_emb = entity_embeddings(torch.from_numpy(xs_np)) xp_emb = predicate_embeddings(torch.from_numpy(xp_np)) xo_emb = entity_embeddings(torch.from_numpy(xo_np)) rel_emb = encode_relation(facts=triples, relation_embeddings=predicate_embeddings, relation_to_idx=predicate_to_index) arg1_emb, arg2_emb = encode_arguments( facts=triples, entity_embeddings=entity_embeddings, entity_to_idx=entity_to_index) facts = [rel_emb, arg1_emb, arg2_emb] scores = hoppy.score(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) labels_np = np.zeros(xs_np.shape[0]) labels_np[:nb_positives] = 1 labels = torch.from_numpy(labels_np).float() # for s, p, o, l in zip(xs_np, xp_np, xo_np, labels): # print(s, p, o, l) loss = loss_function(scores, labels) hop_1_emb = hoppy.hops_lst[0][0].hops_lst[0](xp_emb) hop_2_emb = hoppy.hops_lst[0][0].hops_lst[1](xp_emb) c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy() d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy() print(c, d) loss.backward() optimizer.step() optimizer.zero_grad() assert c > 0.95 and d > 0.95
def main(argv): parser = argparse.ArgumentParser('KBC Research', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--train', action='store', required=True, type=str) parser.add_argument('--dev', action='store', type=str, default=None) parser.add_argument('--test', action='store', type=str, default=None) parser.add_argument('--test-i', action='store', type=str, default=None) parser.add_argument('--test-ii', action='store', type=str, default=None) parser.add_argument('--embedding-size', '-k', action='store', type=int, default=20) parser.add_argument('--k-max', '-K', action='store', type=int, default=1) parser.add_argument('--max-depth', '-d', action='store', type=int, default=1) parser.add_argument('--hops', nargs='+', type=str, default=['1', '2']) # training params parser.add_argument('--epochs', '-e', action='store', type=int, default=100) parser.add_argument('--learning-rate', '-l', action='store', type=float, default=0.1) parser.add_argument('--batch-size', '-b', action='store', type=int, default=8) parser.add_argument('--N2', action='store', type=float, default=None) parser.add_argument('--N3', action='store', type=float, default=None) parser.add_argument('--reformulator', '-r', action='store', type=str, default='linear', choices=['static', 'linear', 'attentive', 'memory']) parser.add_argument('--nb-rules', '-R', action='store', type=int, default=4) parser.add_argument('--seed', action='store', type=int, default=0) parser.add_argument('--validate-every', '-V', action='store', type=int, default=None) parser.add_argument('--input-type', '-I', action='store', type=str, default='standard', choices=['standard', 'reciprocal']) parser.add_argument('--init-size', '-i', action='store', type=float, default=1.0) parser.add_argument('--load', action='store', type=str, default=None) parser.add_argument('--save', action='store', type=str, default=None) parser.add_argument('--quiet', '-q', action='store_true', default=False) args = parser.parse_args(argv) import pprint pprint.pprint(vars(args)) train_path = args.train dev_path = args.dev test_path = args.test test_i_path = args.test_i test_ii_path = args.test_ii embedding_size = args.embedding_size hops_str = args.hops nb_epochs = args.epochs learning_rate = args.learning_rate batch_size = args.batch_size N2_weight = args.N2 N3_weight = args.N3 reformulator_type = args.reformulator nb_rules = args.nb_rules eval_batch_size = batch_size seed = args.seed validate_every = args.validate_every input_type = args.input_type init_size = args.init_size load_path = args.load save_path = args.save is_quiet = args.quiet # set the seeds np.random.seed(seed) random_state = np.random.RandomState(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = 'cpu' logger.info(f'Device: {device}') if torch.cuda.is_available(): torch.set_default_tensor_type(torch.cuda.FloatTensor) data = Data(train_path=train_path, dev_path=dev_path, test_path=test_path, test_i_path=test_i_path, test_ii_path=test_ii_path, input_type=input_type) triples_name_pairs = [ (data.dev_triples, 'dev'), (data.test_triples, 'test'), (data.test_i_triples, 'test-I'), (data.test_ii_triples, 'test-II'), ] rank = embedding_size init_size = init_size entity_embeddings = nn.Embedding(data.nb_entities, rank, sparse=True) predicate_embeddings = nn.Embedding(data.nb_predicates, rank, sparse=True) entity_embeddings.weight.data *= init_size predicate_embeddings.weight.data *= init_size kernel = GaussianKernel(slope=1.0) fact_rel = torch.from_numpy(np.array([data.predicate_to_idx[p] for (_, p, _) in data.train_triples])).to(device) fact_arg1 = torch.from_numpy(np.array([data.entity_to_idx[s] for (s, _, _) in data.train_triples])).to(device) fact_arg2 = torch.from_numpy(np.array([data.entity_to_idx[o] for (_, _, o) in data.train_triples])).to(device) facts = [fact_rel, fact_arg1, fact_arg2] base_model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, facts=facts, kernel=kernel).to(device) memory = None def make_hop(s: str) -> Tuple[BaseReformulator, bool]: nonlocal memory if s.isdigit(): nb_hops, is_reversed = int(s), False else: nb_hops, is_reversed = int(s[:-1]), True res = None if reformulator_type in {'static'}: res = StaticReformulator(nb_hops, rank) elif reformulator_type in {'linear'}: res = LinearReformulator(nb_hops, rank) elif reformulator_type in {'attentive'}: res = AttentiveReformulator(nb_hops, predicate_embeddings) elif reformulator_type in {'memory'}: memory = MemoryReformulator.Memory(nb_hops, nb_rules, rank) if memory is None else memory res = MemoryReformulator(memory) elif reformulator_type in {'ntp'}: res = NTPReformulator(nb_hops=nb_hops, embedding_size=embedding_size, kernel=kernel) assert res is not None return res, is_reversed hops_lst = [make_hop(s) for s in hops_str] model = MultiHoppy(model=base_model, entity_embeddings=entity_embeddings, hops_lst=hops_lst).to(device) params_lst = [p for p in model.parameters()] + [predicate_embeddings.weight] params = nn.ParameterList(params_lst).to(device) if load_path is not None: model.load_state_dict(torch.load(load_path)) for tensor in params_lst: logger.info(f'\t{tensor.size()}\t{tensor.device}') optimizer = optim.Adagrad(params, lr=learning_rate) loss_function = nn.BCELoss() N2_reg = N2() if N2_weight is not None else None N3_reg = N3() if N3_weight is not None else None for epoch_no in range(1, nb_epochs + 1): batcher = Batcher(data, batch_size, 1, random_state) nb_batches = len(batcher.batches) epoch_loss_values = [] for batch_no, (batch_start, batch_end) in enumerate(batcher.batches, 1): xp_batch_np, xs_batch_np, xo_batch_np, xi_batch_np = batcher.get_batch(batch_start, batch_end) xp_batch = torch.from_numpy(xp_batch_np.astype('int64')).to(device) xs_batch = torch.from_numpy(xs_batch_np.astype('int64')).to(device) xo_batch = torch.from_numpy(xo_batch_np.astype('int64')).to(device) xi_batch = torch.from_numpy(xi_batch_np.astype('int64')).to(device) xp_batch_emb = predicate_embeddings(xp_batch) xs_batch_emb = entity_embeddings(xs_batch) xo_batch_emb = entity_embeddings(xo_batch) sp_scores, po_scores = model.forward(xp_batch_emb, xs_batch_emb, xo_batch_emb, mask_indices=xi_batch) factors = [model.factor(e) for e in [xp_batch_emb, xs_batch_emb, xo_batch_emb]] sp_objects = [data.sp_to_o_lst.get((xs, xp), None) for xs, xp in zip(xs_batch_np, xp_batch_np)] po_subjects = [data.po_to_s_lst.get((xp, xo), None) for xp, xo in zip(xp_batch_np, xo_batch_np)] sp_targets = compute_bce_targets(xp_batch.shape[0], data.nb_entities, sp_objects, device=device) po_targets = compute_bce_targets(xp_batch.shape[0], data.nb_entities, po_subjects, device=device) s_loss = loss_function(sp_scores, sp_targets) o_loss = loss_function(po_scores, po_targets) loss = s_loss + o_loss loss += N2_weight * N2_reg(factors) if N2_weight is not None else 0.0 loss += N3_weight * N3_reg(factors) if N3_weight is not None else 0.0 loss.backward() optimizer.step() optimizer.zero_grad() loss_value = loss.item() epoch_loss_values += [loss_value] if not is_quiet: logger.info(f'Epoch {epoch_no}/{nb_epochs}\tBatch {batch_no}/{nb_batches}\tLoss {loss_value:.6f}') loss_mean, loss_std = np.mean(epoch_loss_values), np.std(epoch_loss_values) logger.info(f'Epoch {epoch_no}/{nb_epochs}\tLoss {loss_mean:.4f} ± {loss_std:.4f}') if validate_every is not None and epoch_no % validate_every == 0: for triples, name in [(t, n) for t, n in triples_name_pairs if len(t) > 0]: metrics = evaluate(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, test_triples=triples, all_triples=data.all_triples, entity_to_index=data.entity_to_idx, predicate_to_index=data.predicate_to_idx, model=model, batch_size=eval_batch_size, device=device) logger.info(f'Epoch {epoch_no}/{nb_epochs}\t{name} results\t{metrics_to_str(metrics)}') for triples, name in [(t, n) for t, n in triples_name_pairs if len(t) > 0]: metrics = evaluate(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, test_triples=triples, all_triples=data.all_triples, entity_to_index=data.entity_to_idx, predicate_to_index=data.predicate_to_idx, model=model, batch_size=eval_batch_size, device=device) logger.info(f'Final \t{name} results\t{metrics_to_str(metrics)}') if save_path is not None: torch.save(model.state_dict(), save_path) logger.info("Training finished")
def main(argv): parser = argparse.ArgumentParser('KBC Research', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--train', action='store', required=True, type=str) parser.add_argument('--dev', action='store', type=str, default=None) parser.add_argument('--test', action='store', type=str, default=None) parser.add_argument('--test-i', action='store', type=str, default=None) parser.add_argument('--test-ii', action='store', type=str, default=None) # model params parser.add_argument('--model', '-m', action='store', type=str, default='distmult', choices=['distmult', 'complex']) parser.add_argument('--embedding-size', '-k', action='store', type=int, default=100) parser.add_argument('--batch-size', '-b', action='store', type=int, default=100) parser.add_argument('--eval-batch-size', '-B', action='store', type=int, default=None) # training params parser.add_argument('--epochs', '-e', action='store', type=int, default=100) parser.add_argument('--learning-rate', '-l', action='store', type=float, default=0.1) parser.add_argument('--optimizer', '-o', action='store', type=str, default='adagrad', choices=['adagrad', 'adam', 'sgd']) parser.add_argument('--N2', action='store', type=float, default=None) parser.add_argument('--N3', action='store', type=float, default=None) parser.add_argument('--seed', action='store', type=int, default=0) parser.add_argument('--validate-every', '-V', action='store', type=int, default=None) parser.add_argument('--input-type', '-I', action='store', type=str, default='standard', choices=['standard', 'reciprocal']) parser.add_argument('--gradient-accumulation-steps', '--gas', action='store', type=int, default=1) parser.add_argument('--load', action='store', type=str, default=None) parser.add_argument('--save', action='store', type=str, default=None) parser.add_argument('--quiet', '-q', action='store_true', default=False) args = parser.parse_args(argv) import pprint pprint.pprint(vars(args)) train_path = args.train dev_path = args.dev test_path = args.test test_i_path = args.test_i test_ii_path = args.test_ii model_name = args.model optimizer_name = args.optimizer embedding_size = args.embedding_size batch_size = args.batch_size eval_batch_size = batch_size if args.eval_batch_size is None else args.eval_batch_size nb_epochs = args.epochs seed = args.seed learning_rate = args.learning_rate N2_weight = args.N2 N3_weight = args.N3 validate_every = args.validate_every input_type = args.input_type gradient_accumulation_steps = args.gradient_accumulation_steps load_path = args.load save_path = args.save is_quiet = args.quiet # set the seeds np.random.seed(seed) random_state = np.random.RandomState(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f'Device: {device}') data = Data(train_path=train_path, dev_path=dev_path, test_path=test_path, test_i_path=test_i_path, test_ii_path=test_ii_path, input_type=input_type) triples_name_pairs = [ (data.dev_triples, 'dev'), (data.test_triples, 'test'), (data.test_i_triples, 'test-I'), (data.test_ii_triples, 'test-II'), ] rank = embedding_size * 2 if model_name in {'complex'} else embedding_size init_size = 1e-3 entity_embeddings = nn.Embedding(data.nb_entities, rank, sparse=True) predicate_embeddings = nn.Embedding(data.nb_predicates, rank, sparse=True) entity_embeddings.weight.data *= init_size predicate_embeddings.weight.data *= init_size parameters_lst = nn.ModuleDict({ 'entities': entity_embeddings, 'predicates': predicate_embeddings }) parameters_lst.to(device) if load_path is not None: parameters_lst.load_state_dict(torch.load(load_path)) kernel = facts = None if model_name in {'ntpzero'}: kernel = GaussianKernel(slope=None) fact_rel = torch.from_numpy(np.array([data.predicate_to_idx[p] for (_, p, _) in data.train_triples])).to(device) fact_arg1 = torch.from_numpy(np.array([data.entity_to_idx[s] for (s, _, _) in data.train_triples])).to(device) fact_arg2 = torch.from_numpy(np.array([data.entity_to_idx[o] for (_, _, o) in data.train_triples])).to(device) facts = [fact_rel, fact_arg1, fact_arg2] model_factory = { 'distmult': lambda: DistMult(entity_embeddings=entity_embeddings), 'complex': lambda: ComplEx(entity_embeddings=entity_embeddings), 'ntpzero': lambda: NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts) } assert model_name in model_factory model = model_factory[model_name]() model.to(device) logger.info('Model state:') for param_tensor in parameters_lst.state_dict(): logger.info(f'\t{param_tensor}\t{parameters_lst.state_dict()[param_tensor].size()}') optimizer_factory = { 'adagrad': lambda: optim.Adagrad(parameters_lst.parameters(), lr=learning_rate), 'adam': lambda: optim.Adam(parameters_lst.parameters(), lr=learning_rate), 'sgd': lambda: optim.SGD(parameters_lst.parameters(), lr=learning_rate) } assert optimizer_name in optimizer_factory optimizer = optimizer_factory[optimizer_name]() loss_function = nn.CrossEntropyLoss(reduction='mean') N2_reg = N2() if N2_weight is not None else None N3_reg = N3() if N3_weight is not None else None for epoch_no in range(1, nb_epochs + 1): batcher = Batcher(data, batch_size, 1, random_state) nb_batches = len(batcher.batches) epoch_loss_values = [] for batch_no, (batch_start, batch_end) in enumerate(batcher.batches, 1): xp_batch, xs_batch, xo_batch, xi_batch = batcher.get_batch(batch_start, batch_end) xp_batch = torch.from_numpy(xp_batch.astype('int64')).to(device) xs_batch = torch.from_numpy(xs_batch.astype('int64')).to(device) xo_batch = torch.from_numpy(xo_batch.astype('int64')).to(device) xi_batch = torch.from_numpy(xi_batch.astype('int64')).to(device) xp_batch_emb = predicate_embeddings(xp_batch) xs_batch_emb = entity_embeddings(xs_batch) xo_batch_emb = entity_embeddings(xo_batch) sp_scores, po_scores = model.forward(xp_batch_emb, xs_batch_emb, xo_batch_emb) factors = [model.factor(e) for e in [xp_batch_emb, xs_batch_emb, xo_batch_emb]] s_loss = loss_function(sp_scores, xo_batch) o_loss = loss_function(po_scores, xs_batch) loss = s_loss + o_loss loss += N2_weight * N2_reg(factors) if N2_weight is not None else 0.0 loss += N3_weight * N3_reg(factors) if N3_weight is not None else 0.0 if gradient_accumulation_steps > 1: loss = loss / gradient_accumulation_steps loss.backward() if batch_no % gradient_accumulation_steps == 0 or batch_no == nb_batches: optimizer.step() optimizer.zero_grad() loss_value = loss.item() epoch_loss_values += [loss_value] if not is_quiet: logger.info(f'Epoch {epoch_no}/{nb_epochs}\tBatch {batch_no}/{nb_batches}\tLoss {loss_value:.6f}') loss_mean, loss_std = np.mean(epoch_loss_values), np.std(epoch_loss_values) logger.info(f'Epoch {epoch_no}/{nb_epochs}\tLoss {loss_mean:.4f} ± {loss_std:.4f}') if validate_every is not None and epoch_no % validate_every == 0: for triples, name in [(t, n) for t, n in triples_name_pairs if len(t) > 0]: metrics = evaluate(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, test_triples=triples, all_triples=data.all_triples, entity_to_index=data.entity_to_idx, predicate_to_index=data.predicate_to_idx, model=model, batch_size=eval_batch_size, device=device) logger.info(f'Epoch {epoch_no}/{nb_epochs}\t{name} results\t{metrics_to_str(metrics)}') for triples, name in [(t, n) for t, n in triples_name_pairs if len(t) > 0]: metrics = evaluate(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, test_triples=triples, all_triples=data.all_triples, entity_to_index=data.entity_to_idx, predicate_to_index=data.predicate_to_idx, model=model, batch_size=eval_batch_size, device=device) logger.info(f'Final \t{name} results\t{metrics_to_str(metrics)}') if save_path is not None: torch.save(parameters_lst.state_dict(), save_path) logger.info("Training finished")
def test_learning_v2(): embedding_size = 100 torch.manual_seed(0) triples, hops = [], [] for i in range(16): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] entity_lst = sorted({e for (e, _, _) in triples + hops} | {e for (e, _, e) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.LongTensor( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.LongTensor( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.LongTensor( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts) reformulator = AttentiveReformulator(2, predicate_embeddings) hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator) for s, p, o in hops: xs_np = np.array([entity_to_index[s]]) xp_np = np.array([predicate_to_index[p]]) xo_np = np.array([entity_to_index[o]]) with torch.no_grad(): xs = torch.LongTensor(xs_np) xp = torch.LongTensor(xp_np) xo = torch.LongTensor(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) inf = hoppy.score(xp_emb, xs_emb, xo_emb) inf_np = inf.cpu().numpy() assert inf_np < 0.5
def test_masking_v1(): nb_entities = 10 nb_predicates = 5 embedding_size = 10 init_size = 1.0 rs = np.random.RandomState(0) for _ in range(1): for position in [0, 1]: for st in ['min', 'concat']: with torch.no_grad(): triples = [ ('a', 'p', 'b'), ('c', 'q', 'd') ] entity_to_index = {'a': 0, 'b': 1, 'c': 2, 'd': 3} predicate_to_index = {'p': 0, 'q': 1} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) entity_embeddings.weight.data *= init_size predicate_embeddings.weight.data *= init_size fact_rel = torch.LongTensor(np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.LongTensor(np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.LongTensor(np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts, scoring_type=st) xs_np = rs.randint(nb_entities, size=32) xp_np = rs.randint(nb_predicates, size=32) xo_np = rs.randint(nb_entities, size=32) xi_np = np.array([position] * xs_np.shape[0]) xs_np[0] = 0 xp_np[0] = 0 xo_np[0] = 1 xs_np[1] = 2 xp_np[1] = 1 xo_np[1] = 3 xs = torch.LongTensor(xs_np) xp = torch.LongTensor(xp_np) xo = torch.LongTensor(xo_np) xi = torch.LongTensor(xi_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) model.mask_indices = xi scores = model.forward(xp_emb, xs_emb, xo_emb) inf = model.score(xp_emb, xs_emb, xo_emb) if position == 0: assert inf[0] < 0.5 assert inf[1] > 0.9 elif position == 1: assert inf[0] > 0.9 assert inf[1] < 0.5 scores_sp, scores_po = scores inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() for i in range(xs.shape[0]): np.testing.assert_allclose(inf[i], scores_sp[i, xo[i]], rtol=1e-5, atol=1e-5) np.testing.assert_allclose(inf[i], scores_po[i, xs[i]], rtol=1e-5, atol=1e-5)
kernel_values = kernel(query_repeat, facts).view(batch_size, fact_size) mask = torch.arange(fact_size).expand(batch_size, fact_size) < nb_facts.unsqueeze(1) return kernel_values * mask def uniform(a: Tensor, b: Tensor, c: Optional[Tensor] = None) -> Tuple[Tensor, Optional[Tensor]]: if a.shape[0] > b.shape[0]: m = a.shape[0] // b.shape[0] b = b.view(b.shape[0], 1, b.shape[1], b.shape[2]).repeat(1, m, 1, 1).view(-1, b.shape[1], b.shape[2]) if c is not None: c = c.view(-1, 1).repeat(1, m).view(-1) return b, c if __name__ == '__main__': kernel = GaussianKernel() batch_size = 8 fact_size = 32 embedding_size = 10 query = torch.rand(batch_size, embedding_size) facts = torch.rand(batch_size, fact_size, embedding_size) nb_facts = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=torch.int32) tmp = lookup(query, facts, nb_facts, kernel) print(tmp)
def main(argv): argparser = argparse.ArgumentParser('CLUTRR', formatter_class=argparse.ArgumentDefaultsHelpFormatter) train_path = test_path = "data/clutrr-emnlp/data_test/64.csv" argparser.add_argument('--train', action='store', type=str, default=train_path) argparser.add_argument('--test', nargs='+', type=str, default=[test_path]) # model params argparser.add_argument('--embedding-size', '-k', action='store', type=int, default=20) argparser.add_argument('--k-max', '-m', action='store', type=int, default=10) argparser.add_argument('--max-depth', '-d', action='store', type=int, default=2) argparser.add_argument('--test-max-depth', action='store', type=int, default=None) argparser.add_argument('--hops', nargs='+', type=str, default=['2', '2', '1R']) # training params argparser.add_argument('--epochs', '-e', action='store', type=int, default=100) argparser.add_argument('--learning-rate', '-l', action='store', type=float, default=0.1) argparser.add_argument('--batch-size', '-b', action='store', type=int, default=8) argparser.add_argument('--optimizer', '-o', action='store', type=str, default='adagrad', choices=['adagrad', 'adam', 'sgd']) argparser.add_argument('--seed', action='store', type=int, default=0) argparser.add_argument('--evaluate-every', '-V', action='store', type=int, default=32) argparser.add_argument('--N2', action='store', type=float, default=None) argparser.add_argument('--N3', action='store', type=float, default=None) argparser.add_argument('--entropy', '-E', action='store', type=float, default=None) argparser.add_argument('--scoring-type', '-s', action='store', type=str, default='concat', choices=['concat', 'min']) argparser.add_argument('--tnorm', '-t', action='store', type=str, default='min', choices=['min', 'prod']) argparser.add_argument('--reformulator', '-r', action='store', type=str, default='linear', choices=['static', 'linear', 'attentive', 'memory', 'ntp']) argparser.add_argument('--nb-rules', '-R', action='store', type=int, default=4) argparser.add_argument('--GNTP-R', action='store', type=int, default=None) argparser.add_argument('--slope', '-S', action='store', type=float, default=None) argparser.add_argument('--init-size', '-i', action='store', type=float, default=1.0) argparser.add_argument('--init', action='store', type=str, default='uniform') argparser.add_argument('--ref-init', action='store', type=str, default='random') argparser.add_argument('--debug', '-D', action='store_true', default=False) argparser.add_argument('--load', action='store', type=str, default=None) argparser.add_argument('--save', action='store', type=str, default=None) args = argparser.parse_args(argv) train_path = args.train test_paths = args.test embedding_size = args.embedding_size k_max = args.k_max max_depth = args.max_depth test_max_depth = args.test_max_depth hops_str = args.hops nb_epochs = args.epochs learning_rate = args.learning_rate batch_size = args.batch_size optimizer_name = args.optimizer seed = args.seed evaluate_every = args.evaluate_every N2_weight = args.N2 N3_weight = args.N3 entropy_weight = args.entropy scoring_type = args.scoring_type tnorm_name = args.tnorm reformulator_name = args.reformulator nb_rules = args.nb_rules gntp_R = args.GNTP_R slope = args.slope init_size = args.init_size init_type = args.init ref_init_type = args.ref_init is_debug = args.debug load_path = args.load save_path = args.save np.random.seed(seed) random_state = np.random.RandomState(seed) torch.manual_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f'Device: {device}') if torch.cuda.is_available(): torch.set_default_tensor_type(torch.cuda.FloatTensor) data = Data(train_path=train_path, test_paths=test_paths) rel_to_predicate = data.relation_to_predicate predicate_to_rel = data.predicate_to_relations entity_lst, predicate_lst, relation_lst = data.entity_lst, data.predicate_lst, data.relation_lst nb_examples = len(data.train) nb_entities = len(entity_lst) nb_relations = len(relation_lst) entity_to_idx = {e: i for i, e in enumerate(entity_lst)} relation_to_idx = {r: i for i, r in enumerate(relation_lst)} kernel = GaussianKernel(slope=slope) entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=False).to(device) nn.init.uniform_(entity_embeddings.weight, -1.0, 1.0) entity_embeddings.requires_grad = False relation_embeddings = nn.Embedding(nb_relations, embedding_size, sparse=False).to(device) if init_type in {'uniform'}: nn.init.uniform_(relation_embeddings.weight, -1.0, 1.0) relation_embeddings.weight.data *= init_size model = BatchNeuralKB(kernel=kernel, scoring_type=scoring_type).to(device) memory = None def make_hop(s: str) -> Tuple[BaseReformulator, bool]: nonlocal memory if s.isdigit(): nb_hops, is_reversed = int(s), False else: nb_hops, is_reversed = int(s[:-1]), True res = None if reformulator_name in {'static'}: res = StaticReformulator(nb_hops, embedding_size, init_name=ref_init_type) elif reformulator_name in {'linear'}: res = LinearReformulator(nb_hops, embedding_size, init_name=ref_init_type) elif reformulator_name in {'attentive'}: res = AttentiveReformulator(nb_hops, relation_embeddings, init_name=ref_init_type) elif reformulator_name in {'memory'}: if memory is None: memory = MemoryReformulator.Memory(nb_hops, nb_rules, embedding_size, init_name=ref_init_type) res = MemoryReformulator(memory) elif reformulator_name in {'ntp'}: res = NTPReformulator(nb_hops=nb_hops, embedding_size=embedding_size, kernel=kernel, init_name=ref_init_type) assert res is not None return res, is_reversed hops_lst = [make_hop(s) for s in hops_str] hoppy = BatchHoppy(model=model, k=k_max, depth=max_depth, tnorm_name=tnorm_name, hops_lst=hops_lst, R=gntp_R).to(device) def scoring_function(instances_batch: List[Instance], relation_lst: List[str], is_train: bool = False) -> Tuple[Tensor, List[Tensor]]: rel_emb_lst: List[Tensor] = [] arg1_emb_lst: List[Tensor] = [] arg2_emb_lst: List[Tensor] = [] story_rel_lst: List[Tensor] = [] story_arg1_lst: List[Tensor] = [] story_arg2_lst: List[Tensor] = [] embeddings_lst: List[Tensor] = [] label_lst: List[int] = [] for i, instance in enumerate(instances_batch): story, target = instance.story, instance.target s, r, o = target story_rel = encode_relation(story, relation_embeddings.weight, relation_to_idx, device) story_arg1, story_arg2 = encode_arguments(story, entity_embeddings.weight, entity_to_idx, device) embeddings = encode_entities(story, entity_embeddings.weight, entity_to_idx, device) true_predicate = rel_to_predicate[r] target_lst: List[Tuple[str, str, str]] = [(s, x, o) for x in relation_lst] label_lst += [int(true_predicate == rel_to_predicate[r]) for r in relation_lst] rel_emb = encode_relation(target_lst, relation_embeddings.weight, relation_to_idx, device) arg1_emb, arg2_emb = encode_arguments(target_lst, entity_embeddings.weight, entity_to_idx, device) batch_size = rel_emb.shape[0] fact_size = story_rel.shape[0] entity_size = embeddings.shape[0] # [B, E] rel_emb_lst += [rel_emb] arg1_emb_lst += [arg1_emb] arg2_emb_lst += [arg2_emb] # [B, F, E] story_rel_lst += [story_rel.view(1, fact_size, -1).repeat(batch_size, 1, 1)] story_arg1_lst += [story_arg1.view(1, fact_size, -1).repeat(batch_size, 1, 1)] story_arg2_lst += [story_arg2.view(1, fact_size, -1).repeat(batch_size, 1, 1)] # [B, N, E] embeddings_lst += [embeddings.view(1, entity_size, -1).repeat(batch_size, 1, 1)] def cat_pad(t_lst: List[Tensor]) -> Tuple[Tensor, Tensor]: lengths: List[int] = [t.shape[1] for t in t_lst] max_len: int = max(lengths) res_t: Tensor = torch.cat([F.pad(t, pad=[0, max_len - lengths[i]]) for i, t in enumerate(t_lst)], dim=0) res_l: Tensor = torch.tensor([t.shape[1] for t in t_lst for _ in range(t.shape[0])], dtype=torch.long) return res_t, res_l rel_emb = torch.cat(rel_emb_lst, dim=0) arg1_emb = torch.cat(arg1_emb_lst, dim=0) arg2_emb = torch.cat(arg2_emb_lst, dim=0) story_rel, nb_facts = cat_pad(story_rel_lst) story_arg1, _ = cat_pad(story_arg1_lst) story_arg2, _ = cat_pad(story_arg2_lst) facts = [story_rel, story_arg1, story_arg2] _embeddings, nb_embeddings = cat_pad(embeddings_lst) max_depth_ = hoppy.depth if not is_train and test_max_depth is not None: hoppy.depth = test_max_depth scores = hoppy.score(rel_emb, arg1_emb, arg2_emb, facts, nb_facts, _embeddings, nb_embeddings) if not is_train and test_max_depth is not None: hoppy.depth = max_depth_ return scores, [rel_emb, arg1_emb, arg2_emb] def evaluate(instances: List[Instance], path: str, sample_size: Optional[int] = None) -> float: res = 0.0 if len(instances) > 0: res = accuracy_b(scoring_function=scoring_function, instances=instances, sample_size=sample_size, relation_to_predicate=rel_to_predicate, predicate_to_relations=predicate_to_rel, batch_size=batch_size) logger.info(f'Test Accuracy on {path}: {res:.6f}') return res loss_function = nn.BCELoss() N2_reg = N2() if N2_weight is not None else None N3_reg = N3() if N3_weight is not None else None entropy_reg = Entropy(use_logits=False) if entropy_weight is not None else None params_lst = [p for p in hoppy.parameters() if not torch.equal(p, entity_embeddings.weight)] params_lst += relation_embeddings.parameters() params = nn.ParameterList(params_lst).to(device) if load_path is not None: model.load_state_dict(torch.load(load_path)) for tensor in params_lst: logger.info(f'\t{tensor.size()}\t{tensor.device}') optimizer_factory = { 'adagrad': lambda arg: optim.Adagrad(arg, lr=learning_rate), 'adam': lambda arg: optim.Adam(arg, lr=learning_rate), 'sgd': lambda arg: optim.SGD(arg, lr=learning_rate) } assert optimizer_name in optimizer_factory optimizer = optimizer_factory[optimizer_name](params) global_step = 0 for epoch_no in range(1, nb_epochs + 1): batcher = Batcher(batch_size=batch_size, nb_examples=nb_examples, nb_epochs=1, random_state=random_state) nb_batches = len(batcher.batches) epoch_loss_values = [] for batch_no, (batch_start, batch_end) in enumerate(batcher.batches, start=1): global_step += 1 indices_batch = batcher.get_batch(batch_start, batch_end) instances_batch = [data.train[i] for i in indices_batch] label_lst: List[int] = [] for i, instance in enumerate(instances_batch): story, target = instance.story, instance.target s, r, o = target true_predicate = rel_to_predicate[r] label_lst += [int(true_predicate == rel_to_predicate[r]) for r in relation_lst] scores, query_emb_lst = scoring_function(instances_batch, relation_lst, is_train=True) labels = torch.tensor(label_lst, dtype=torch.float32) loss = loss_function(scores, labels) factors = [hoppy.factor(e) for e in query_emb_lst] loss += N2_weight * N2_reg(factors) if N2_weight is not None else 0.0 loss += N3_weight * N3_reg(factors) if N3_weight is not None else 0.0 if entropy_weight is not None: for hop, _ in hops_lst: attn_logits = hop.projection(query_emb_lst[0]) attention = torch.softmax(attn_logits, dim=1) loss += entropy_weight * entropy_reg([attention]) loss_value = loss.item() epoch_loss_values += [loss_value] loss.backward() optimizer.step() optimizer.zero_grad() logger.info(f'Epoch {epoch_no}/{nb_epochs}\tBatch {batch_no}/{nb_batches}\tLoss {loss_value:.4f}') if global_step % evaluate_every == 0: for test_path in test_paths: instances = data.test[test_path] evaluate(instances=instances, path=test_path) if is_debug is True: with torch.no_grad(): show_rules(model=hoppy, kernel=kernel, relation_embeddings=relation_embeddings, data=data, relation_to_idx=relation_to_idx, device=device) loss_mean, loss_std = np.mean(epoch_loss_values), np.std(epoch_loss_values) slope = kernel.slope.item() if isinstance(kernel.slope, Tensor) else kernel.slope logger.info(f'Epoch {epoch_no}/{nb_epochs}\tLoss {loss_mean:.4f} ± {loss_std:.4f}\tSlope {slope:.4f}') import time start = time.time() for test_path in test_paths: evaluate(instances=data.test[test_path], path=test_path) end = time.time() logger.info(f'Evaluation took {end - start} seconds.') if save_path is not None: torch.save(model.state_dict(), save_path) logger.info("Training finished")
def main(argv): parser = argparse.ArgumentParser('KBC Research', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--train', action='store', required=True, type=str) parser.add_argument('--dev', action='store', type=str, default=None) parser.add_argument('--test', action='store', type=str, default=None) parser.add_argument('--test-i', action='store', type=str, default=None) parser.add_argument('--test-ii', action='store', type=str, default=None) parser.add_argument('--embedding-size', '-k', action='store', type=int, default=20) parser.add_argument('--k-max', '-K', action='store', type=int, default=3) parser.add_argument('--hops', nargs='+', type=str, default=['1', '2']) # training params parser.add_argument('--epochs', '-e', action='store', type=int, default=100) parser.add_argument('--learning-rate', '-l', action='store', type=float, default=0.001) parser.add_argument('--batch-size', '-b', action='store', type=int, default=8) parser.add_argument('--eval-batch-size', '-E', action='store', type=int, default=None) parser.add_argument('--optimizer', '-o', action='store', type=str, default='adam', choices=['adagrad', 'adam', 'sgd']) parser.add_argument('--N2', action='store', type=float, default=None) parser.add_argument('--N3', action='store', type=float, default=None) parser.add_argument('--reformulator', '-r', action='store', type=str, default='linear', choices=['static', 'linear', 'attentive', 'memory', 'ntp']) parser.add_argument('--nb-rules', '-R', action='store', type=int, default=4) parser.add_argument('--GNTP-R', action='store', type=int, default=None) parser.add_argument('--seed', action='store', type=int, default=0) parser.add_argument('--validate-every', '-V', action='store', type=int, default=None) parser.add_argument('--input-type', '-I', action='store', type=str, default='standard', choices=['standard', 'reciprocal']) parser.add_argument('--init-size', '-i', action='store', type=float, default=1.0) parser.add_argument('--init', action='store', type=str, default='uniform') parser.add_argument('--ref-init', action='store', type=str, default='uniform') parser.add_argument('--load', action='store', type=str, default=None) parser.add_argument('--save', action='store', type=str, default=None) parser.add_argument('--nb-negatives', action='store', type=int, default=1) parser.add_argument('--quiet', '-q', action='store_true', default=False) parser.add_argument('--freeze-entities', '-f', action='store', type=int, default=None) parser.add_argument('--refresh-interval', '--refresh', action='store', type=int, default=None) parser.add_argument('--index-type', '--index', action='store', type=str, default='faiss', choices=['np', 'faiss', 'nms']) parser.add_argument('--lower-bound', '--lb', action='store', type=float, default=-1.0) parser.add_argument('--upper-bound', '--ub', action='store', type=float, default=1.0) parser.add_argument('--slow-eval', action='store_true', default=False) args = parser.parse_args(argv) import pprint pprint.pprint(vars(args)) train_path = args.train dev_path = args.dev test_path = args.test test_i_path = args.test_i test_ii_path = args.test_ii embedding_size = args.embedding_size k_max = args.k_max hops_str = args.hops nb_epochs = args.epochs learning_rate = args.learning_rate batch_size = args.batch_size optimizer_name = args.optimizer N2_weight = args.N2 N3_weight = args.N3 reformulator_type = args.reformulator nb_rules = args.nb_rules gntp_R = args.GNTP_R eval_batch_size = batch_size if args.eval_batch_size is None else args.eval_batch_size seed = args.seed validate_every = args.validate_every input_type = args.input_type init_size = args.init_size init_type = args.init ref_init_type = args.ref_init load_path = args.load save_path = args.save nb_neg = args.nb_negatives is_quiet = args.quiet freeze_entities = args.freeze_entities refresh_interval = args.refresh_interval index_type = args.index_type lower_bound = args.lower_bound upper_bound = args.upper_bound slow_eval = args.slow_eval evaluate_ = evaluate_naive if slow_eval else evaluate # set the seeds np.random.seed(seed) random_state = np.random.RandomState(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) rs = np.random.RandomState(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f'Device: {device}') if torch.cuda.is_available(): torch.set_default_tensor_type(torch.cuda.FloatTensor) data = Data(train_path=train_path, dev_path=dev_path, test_path=test_path, test_i_path=test_i_path, test_ii_path=test_ii_path, input_type=input_type) triples_name_pairs = [ (data.dev_triples, 'dev'), (data.test_triples, 'test'), (data.test_i_triples, 'test-I'), (data.test_ii_triples, 'test-II'), ] entity_embeddings = nn.Embedding(data.nb_entities, embedding_size, sparse=False) predicate_embeddings = nn.Embedding(data.nb_predicates, embedding_size, sparse=False) if init_type in {'uniform'}: nn.init.uniform_(entity_embeddings.weight, lower_bound, upper_bound) nn.init.uniform_(predicate_embeddings.weight, lower_bound, upper_bound) nn.init.uniform_(entity_embeddings.weight, lower_bound, upper_bound) nn.init.uniform_(predicate_embeddings.weight, lower_bound, upper_bound) entity_embeddings.weight.data *= init_size predicate_embeddings.weight.data *= init_size if freeze_entities is not None: entity_embeddings.weight.requires_grad = False kernel = GaussianKernel(slope=1.0) fact_rel = torch.from_numpy(np.array([data.predicate_to_idx[p] for (_, p, _) in data.train_triples])).to(device) fact_arg1 = torch.from_numpy(np.array([data.entity_to_idx[s] for (s, _, _) in data.train_triples])).to(device) fact_arg2 = torch.from_numpy(np.array([data.entity_to_idx[o] for (_, _, o) in data.train_triples])).to(device) facts = [fact_rel, fact_arg1, fact_arg2] base_model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, k=k_max, facts=facts, kernel=kernel, device=device, index_type=index_type, refresh_interval=refresh_interval).to(device) memory = None def make_hop(s: str) -> Tuple[BaseReformulator, bool]: nonlocal memory if s.isdigit(): nb_hops, is_reversed = int(s), False else: nb_hops, is_reversed = int(s[:-1]), True res = None if reformulator_type in {'static'}: res = StaticReformulator(nb_hops, embedding_size, init_name=ref_init_type, lower_bound=lower_bound, upper_bound=upper_bound) elif reformulator_type in {'linear'}: res = LinearReformulator(nb_hops, embedding_size, init_name=ref_init_type, lower_bound=lower_bound, upper_bound=upper_bound) elif reformulator_type in {'attentive'}: res = AttentiveReformulator(nb_hops, predicate_embeddings, init_name=ref_init_type, lower_bound=lower_bound, upper_bound=upper_bound) elif reformulator_type in {'memory'}: if memory is None: memory = MemoryReformulator.Memory(nb_hops, nb_rules, embedding_size, init_name=ref_init_type, lower_bound=lower_bound, upper_bound=upper_bound) res = MemoryReformulator(memory) elif reformulator_type in {'ntp'}: res = NTPReformulator(nb_hops=nb_hops, embedding_size=embedding_size, kernel=kernel, init_name=ref_init_type, lower_bound=lower_bound, upper_bound=upper_bound) assert res is not None return res, is_reversed hops_lst = [make_hop(s) for s in hops_str] # model = MultiHoppy(model=base_model, entity_embeddings=entity_embeddings, hops_lst=hops_lst).to(device) model = SimpleHoppy(model=base_model, entity_embeddings=entity_embeddings, hops_lst=hops_lst).to(device) def scoring_function(batch_xs: np.ndarray, batch_xp: np.ndarray, batch_xo: np.ndarray) -> np.ndarray: with torch.no_grad(): tensor_xs = torch.from_numpy(batch_xs).to(device) tensor_xp = torch.from_numpy(batch_xp).to(device) tensor_xo = torch.from_numpy(batch_xo).to(device) tensor_xs_emb = entity_embeddings(tensor_xs) tensor_xp_emb = predicate_embeddings(tensor_xp) tensor_xo_emb = entity_embeddings(tensor_xo) scores_ = model.score(tensor_xp_emb, tensor_xs_emb, tensor_xo_emb) return scores_.cpu().numpy() params_lst = {p for p in model.parameters()} | {entity_embeddings.weight, predicate_embeddings.weight} params = nn.ParameterList(params_lst).to(device) if load_path is not None: model.load_state_dict(torch.load(load_path)) for tensor in params_lst: logger.info(f'\t{tensor.size()}\t{tensor.device}') optimizer_factory = { 'adagrad': lambda arg: optim.Adagrad(arg, lr=learning_rate), 'adam': lambda arg: optim.Adam(arg, lr=learning_rate), 'sgd': lambda arg: optim.SGD(arg, lr=learning_rate) } assert optimizer_name in optimizer_factory optimizer = optimizer_factory[optimizer_name](params) # loss_function = nn.BCELoss(reduction="sum") loss_function = nn.BCELoss() N2_reg = N2() if N2_weight is not None else None N3_reg = N3() if N3_weight is not None else None for epoch_no in range(1, nb_epochs + 1): batcher = Batcher(data, batch_size, 1, random_state) nb_batches = len(batcher.batches) if freeze_entities is not None and epoch_no > freeze_entities: entity_embeddings.weight.requires_grad = True epoch_loss_values = [] for batch_no, (batch_start, batch_end) in enumerate(batcher.batches, 1): xp_batch_np, xs_batch_np, xo_batch_np, xi_batch_np = batcher.get_batch(batch_start, batch_end) t = xp_batch_np.shape[0] assert nb_neg > 0 xp_exp_np = np.repeat(xp_batch_np, nb_neg * 3 + 1) xs_exp_np = np.repeat(xs_batch_np, nb_neg * 3 + 1) xo_exp_np = np.repeat(xo_batch_np, nb_neg * 3 + 1) xi_exp_np = np.repeat(xi_batch_np, nb_neg * 3 + 1) xt_exp_np = np.zeros_like(xp_exp_np) xt_exp_np[0::nb_neg * 3 + 1] = 1 for i in range(t): a_ = rs.permutation(data.nb_entities) b_ = rs.permutation(data.nb_entities) c_ = rs.permutation(data.nb_entities) d_ = rs.permutation(data.nb_entities) while a_.shape[0] < nb_neg: a_ = np.concatenate([a_, rs.permutation(data.nb_entities)]) b_ = np.concatenate([b_, rs.permutation(data.nb_entities)]) c_ = np.concatenate([c_, rs.permutation(data.nb_entities)]) d_ = np.concatenate([d_, rs.permutation(data.nb_entities)]) a = a_[:nb_neg] b = b_[:nb_neg] c = c_[:nb_neg] d = d_[:nb_neg] xs_exp_np[(i * nb_neg * 3) + i + 1:(i * nb_neg * 3) + nb_neg + i + 1] = a xo_exp_np[(i * nb_neg * 3) + nb_neg + i + 1:(i * nb_neg * 3) + nb_neg * 2 + i + 1] = b xs_exp_np[(i * nb_neg * 3) + nb_neg * 2 + i + 1:(i * nb_neg * 3) + nb_neg * 3 + i + 1] = c xo_exp_np[(i * nb_neg * 3) + nb_neg * 2 + i + 1:(i * nb_neg * 3) + nb_neg * 3 + i + 1] = d xp_batch = torch.from_numpy(xp_exp_np.astype('int64')).to(device) xs_batch = torch.from_numpy(xs_exp_np.astype('int64')).to(device) xo_batch = torch.from_numpy(xo_exp_np.astype('int64')).to(device) xi_batch = torch.from_numpy(xi_exp_np.astype('int64')).to(device) xt_batch = torch.from_numpy(xt_exp_np.astype('int64')).float().to(device) # Disable masking # xi_batch = None xp_batch_emb = predicate_embeddings(xp_batch) xs_batch_emb = entity_embeddings(xs_batch) xo_batch_emb = entity_embeddings(xo_batch) factors = [model.factor(e) for e in [xp_batch_emb, xs_batch_emb, xo_batch_emb]] scores = model.score(xp_batch_emb, xs_batch_emb, xo_batch_emb, mask_indices=xi_batch) # scores = base_model.score(xp_batch_emb, xs_batch_emb, xo_batch_emb, mask_indices=xi_batch) # print(scores) loss = loss_function(scores, xt_batch) loss += N2_weight * N2_reg(factors) if N2_weight is not None else 0.0 loss += N3_weight * N3_reg(factors) if N3_weight is not None else 0.0 loss.backward() optimizer.step() optimizer.zero_grad() loss_value = loss.item() epoch_loss_values += [loss_value] if not is_quiet: logger.info(f'Epoch {epoch_no}/{nb_epochs}\tBatch {batch_no}/{nb_batches}\tLoss {loss_value:.6f}') loss_mean, loss_std = np.mean(epoch_loss_values), np.std(epoch_loss_values) logger.info(f'Epoch {epoch_no}/{nb_epochs}\tLoss {loss_mean:.4f} ± {loss_std:.4f}') if validate_every is not None and epoch_no % validate_every == 0: if 'countries' in train_path: dev_auc = evaluate_on_countries('dev', data.entity_to_idx, data.predicate_to_idx, scoring_function) print('Last AUC-PR (dev) {:.4f}'.format(dev_auc)) test_auc = evaluate_on_countries('test', data.entity_to_idx, data.predicate_to_idx, scoring_function) print('Last AUC-PR (test) {:.4f}'.format(test_auc)) else: for triples, name in [(t, n) for t, n in triples_name_pairs if len(t) > 0]: metrics = evaluate_(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, test_triples=triples, all_triples=data.all_triples, entity_to_index=data.entity_to_idx, predicate_to_index=data.predicate_to_idx, model=model, batch_size=eval_batch_size, device=device) logger.info(f'Epoch {epoch_no}/{nb_epochs}\t{name} results\t{metrics_to_str(metrics)}') if 'countries' in train_path: dev_auc = evaluate_on_countries('dev', data.entity_to_idx, data.predicate_to_idx, scoring_function) print('Last AUC-PR (dev) {:.4f}'.format(dev_auc)) test_auc = evaluate_on_countries('test', data.entity_to_idx, data.predicate_to_idx, scoring_function) print('Last AUC-PR (test) {:.4f}'.format(test_auc)) else: for triples, name in [(t, n) for t, n in triples_name_pairs if len(t) > 0]: metrics = evaluate_(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, test_triples=triples, all_triples=data.all_triples, entity_to_index=data.entity_to_idx, predicate_to_index=data.predicate_to_idx, model=model, batch_size=eval_batch_size, device=device) logger.info(f'Final \t{name} results\t{metrics_to_str(metrics)}') if save_path is not None: torch.save(model.state_dict(), save_path) logger.info("Training finished")
def test_smart_v1(): embedding_size = 50 rs = np.random.RandomState(0) for _ in range(32): with torch.no_grad(): triples = [('a', 'p', 'b'), ('c', 'q', 'd'), ('e', 'q', 'f'), ('g', 'q', 'h'), ('i', 'q', 'l'), ('m', 'q', 'n'), ('o', 'q', 'p'), ('q', 'q', 'r'), ('s', 'q', 't'), ('u', 'q', 'v')] entity_lst = sorted({s for (s, _, _) in triples} | {o for (_, _, o) in triples}) predicate_lst = sorted({p for (_, p, _) in triples}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.LongTensor( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.LongTensor( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.LongTensor( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts) xs_np = rs.randint(nb_entities, size=32) xp_np = rs.randint(nb_predicates, size=32) xo_np = rs.randint(nb_entities, size=32) xs_np[0] = 0 xp_np[0] = 0 xo_np[0] = 1 xs_np[1] = 2 xp_np[1] = 1 xo_np[1] = 3 xs = torch.LongTensor(xs_np) xp = torch.LongTensor(xp_np) xo = torch.LongTensor(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) print('xp_emb', xp_emb.shape) res_sp, res_po = model.forward(xp_emb, xs_emb, xo_emb) inf = model.score(xp_emb, xs_emb, xo_emb) assert inf[0] > 0.9 assert inf[1] > 0.9 scores_sp, emb_sp = res_sp scores_po, emb_po = res_po print(scores_sp.shape, emb_sp.shape) print(scores_po.shape, emb_po.shape) inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() print('AAA', inf) print('BBB', scores_sp)
def test_reasoning_v6(): torch.set_num_threads(multiprocessing.cpu_count()) embedding_size = 50 torch.manual_seed(0) rs = np.random.RandomState(0) triples = [('a', 'p', 'b'), ('b', 'q', 'c'), ('c', 'p', 'd'), ('d', 'q', 'e'), ('e', 'p', 'f'), ('f', 'q', 'g'), ('g', 'p', 'h'), ('h', 'q', 'i'), ('i', 'p', 'l'), ('l', 'q', 'm'), ('m', 'p', 'n'), ('n', 'q', 'o'), ('o', 'p', 'p'), ('p', 'q', 'q'), ('q', 'p', 'r'), ('r', 'q', 's'), ('s', 'p', 't'), ('t', 'q', 'u'), ('u', 'p', 'v'), ('v', 'q', 'w'), ('x', 'r', 'y'), ('x', 's', 'y')] entity_lst = sorted({e for (e, _, _) in triples} | {e for (_, _, e) in triples}) predicate_lst = sorted({p for (_, p, _) in triples}) nb_entities = len(entity_lst) nb_predicates = len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} for st in ['min', 'concat']: with torch.no_grad(): kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.from_numpy( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.from_numpy( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.from_numpy( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts, scoring_type=st) indices = torch.from_numpy( np.array([predicate_to_index['p'], predicate_to_index['q']])) reformulator = SymbolicReformulator(predicate_embeddings, indices) k = 5 rhoppy0 = RecursiveHoppy(model, entity_embeddings, hops=reformulator, depth=0, k=k) rhoppy1 = RecursiveHoppy(model, entity_embeddings, hops=reformulator, depth=1, k=k) rhoppy2 = RecursiveHoppy(model, entity_embeddings, hops=reformulator, depth=2, k=k) rhoppy3 = RecursiveHoppy(model, entity_embeddings, hops=reformulator, depth=3, k=k) rhoppy4 = RecursiveHoppy(model, entity_embeddings, hops=reformulator, depth=4, k=k) xs_np = rs.randint(nb_entities, size=12) xp_np = rs.randint(nb_predicates, size=12) xo_np = rs.randint(nb_entities, size=12) xs_np[0] = entity_to_index['a'] xp_np[0] = predicate_to_index['r'] xo_np[0] = entity_to_index['c'] xs_np[1] = entity_to_index['a'] xp_np[1] = predicate_to_index['r'] xo_np[1] = entity_to_index['e'] xs_np[2] = entity_to_index['a'] xp_np[2] = predicate_to_index['r'] xo_np[2] = entity_to_index['g'] xs_np[3] = entity_to_index['a'] xp_np[3] = predicate_to_index['r'] xo_np[3] = entity_to_index['i'] xs_np[4] = entity_to_index['a'] xp_np[4] = predicate_to_index['r'] xo_np[4] = entity_to_index['m'] xs_np[5] = entity_to_index['a'] xp_np[5] = predicate_to_index['r'] xo_np[5] = entity_to_index['o'] xs_np[6] = entity_to_index['a'] xp_np[6] = predicate_to_index['r'] xo_np[6] = entity_to_index['q'] xs_np[7] = entity_to_index['a'] xp_np[7] = predicate_to_index['r'] xo_np[7] = entity_to_index['s'] xs_np[8] = entity_to_index['a'] xp_np[8] = predicate_to_index['r'] xo_np[8] = entity_to_index['u'] # xs_np[9] = entity_to_index['a'] # xp_np[9] = predicate_to_index['r'] # xo_np[9] = entity_to_index['w'] xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) scores0 = rhoppy0.forward(xp_emb, xs_emb, xo_emb) inf0 = rhoppy0.score(xp_emb, xs_emb, xo_emb) for i in range(xs.shape[0]): scores_sp, scores_po = scores0 inf_np = inf0.cpu().numpy() scores_sp_np = scores_sp.cpu().numpy() scores_po_np = scores_po.cpu().numpy() np.testing.assert_allclose(inf_np[i], scores_sp_np[i, xo[i]], rtol=1e-5, atol=1e-5) np.testing.assert_allclose(inf_np[i], scores_po_np[i, xs[i]], rtol=1e-5, atol=1e-5) scores1 = rhoppy1.forward(xp_emb, xs_emb, xo_emb) inf1 = rhoppy1.score(xp_emb, xs_emb, xo_emb) for i in range(xs.shape[0]): scores_sp, scores_po = scores1 inf_np = inf1.cpu().numpy() scores_sp_np = scores_sp.cpu().numpy() scores_po_np = scores_po.cpu().numpy() np.testing.assert_allclose(inf_np[i], scores_sp_np[i, xo[i]], rtol=1e-5, atol=1e-5) np.testing.assert_allclose(inf_np[i], scores_po_np[i, xs[i]], rtol=1e-5, atol=1e-5) scores2 = rhoppy2.forward(xp_emb, xs_emb, xo_emb) inf2 = rhoppy2.score(xp_emb, xs_emb, xo_emb) for i in range(xs.shape[0]): scores_sp, scores_po = scores2 inf_np = inf2.cpu().numpy() scores_sp_np = scores_sp.cpu().numpy() scores_po_np = scores_po.cpu().numpy() np.testing.assert_allclose(inf_np[i], scores_sp_np[i, xo[i]], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf_np[i], scores_po_np[i, xs[i]], rtol=1e-1, atol=1e-1) scores3 = rhoppy3.forward(xp_emb, xs_emb, xo_emb) inf3 = rhoppy3.score(xp_emb, xs_emb, xo_emb) for i in range(xs.shape[0]): scores_sp, scores_po = scores3 inf_np = inf3.cpu().numpy() scores_sp_np = scores_sp.cpu().numpy() scores_po_np = scores_po.cpu().numpy() np.testing.assert_allclose(inf_np[i], scores_sp_np[i, xo[i]], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf_np[i], scores_po_np[i, xs[i]], rtol=1e-1, atol=1e-1) scores4 = rhoppy4.forward(xp_emb, xs_emb, xo_emb) inf4 = rhoppy4.score(xp_emb, xs_emb, xo_emb) for i in range(xs.shape[0]): scores_sp, scores_po = scores4 inf_np = inf4.cpu().numpy() scores_sp_np = scores_sp.cpu().numpy() scores_po_np = scores_po.cpu().numpy() np.testing.assert_allclose(inf_np[i], scores_sp_np[i, xo[i]], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf_np[i], scores_po_np[i, xs[i]], rtol=1e-1, atol=1e-1) print(inf0) print(inf1) print(inf2) print(inf3) print(inf4) inf0_np = inf0.cpu().numpy() inf1_np = inf1.cpu().numpy() inf2_np = inf2.cpu().numpy() inf3_np = inf3.cpu().numpy() inf4_np = inf4.cpu().numpy() np.testing.assert_allclose(inf0_np, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf1_np, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf2_np, [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf3_np, [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf4_np, [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], rtol=1e-1, atol=1e-1)
def test_reasoning_v5(): torch.set_num_threads(multiprocessing.cpu_count()) nb_entities = 10 nb_predicates = 5 embedding_size = 10 rs = np.random.RandomState(0) triples = [('a', 'p', 'b'), ('b', 'q', 'c'), ('c', 'r', 'd'), ('d', 's', 'e')] entity_to_index = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4} predicate_to_index = {'p': 0, 'q': 1, 'r': 2, 's': 3} for st in ['min', 'concat']: with torch.no_grad(): kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.from_numpy( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.from_numpy( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.from_numpy( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts, scoring_type=st) indices = torch.from_numpy( np.array([ predicate_to_index['p'], predicate_to_index['q'], predicate_to_index['r'], predicate_to_index['s'] ])) reformulator = SymbolicReformulator(predicate_embeddings, indices) hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator) rhoppy = RecursiveHoppy(model, entity_embeddings, hops=reformulator, depth=1) xs_np = rs.randint(nb_entities, size=32) xp_np = rs.randint(nb_predicates, size=32) xo_np = rs.randint(nb_entities, size=32) xs_np[0] = 0 xp_np[0] = 0 xo_np[0] = 1 xs_np[1] = 1 xp_np[1] = 1 xo_np[1] = 2 xs_np[2] = 0 xp_np[2] = 3 xo_np[2] = 4 xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) scores = hoppy.forward(xp_emb, xs_emb, xo_emb) inf = hoppy.score(xp_emb, xs_emb, xo_emb) scores_h = rhoppy.depth_r_forward(xp_emb, xs_emb, xo_emb, depth=1) inf_h = rhoppy.depth_r_score(xp_emb, xs_emb, xo_emb, depth=1) print(inf) print(inf_h) assert inf[2] > 0.95 scores_sp, scores_po = scores scores_h_sp, scores_h_po = scores_h inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() inf_h = inf_h.cpu().numpy() scores_h_sp = scores_h_sp.cpu().numpy() scores_h_po = scores_h_po.cpu().numpy() np.testing.assert_allclose(inf, inf_h) np.testing.assert_allclose(scores_sp, scores_h_sp) np.testing.assert_allclose(scores_po, scores_h_po) for i in range(xs.shape[0]): np.testing.assert_allclose(inf[i], scores_sp[i, xo[i]], rtol=1e-5, atol=1e-5) np.testing.assert_allclose(inf[i], scores_po[i, xs[i]], rtol=1e-5, atol=1e-5) np.testing.assert_allclose(inf_h[i], scores_h_sp[i, xo[i]], rtol=1e-5, atol=1e-5) np.testing.assert_allclose(inf_h[i], scores_h_po[i, xs[i]], rtol=1e-5, atol=1e-5)
def test_clutrr_v4(): embedding_size = 50 rs = np.random.RandomState(0) for _ in range(32): with torch.no_grad(): triples = [('a', 'p', 'b'), ('c', 'q', 'd'), ('e', 'q', 'f'), ('g', 'q', 'h'), ('i', 'q', 'l'), ('m', 'q', 'n'), ('o', 'q', 'p'), ('q', 'q', 'r'), ('s', 'q', 't'), ('u', 'q', 'v')] entity_lst = sorted({s for (s, _, _) in triples} | {o for (_, _, o) in triples}) predicate_lst = sorted({p for (_, p, _) in triples}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) rel_emb = encode_relation(facts=triples, relation_embeddings=predicate_embeddings, relation_to_idx=predicate_to_index) arg1_emb, arg2_emb = encode_arguments( facts=triples, entity_embeddings=entity_embeddings, entity_to_idx=entity_to_index) facts = [rel_emb, arg1_emb, arg2_emb] model = NeuralKB(kernel=kernel) xs_np = rs.randint(nb_entities, size=32) xp_np = rs.randint(nb_predicates, size=32) xo_np = rs.randint(nb_entities, size=32) xs_np[0] = 0 xp_np[0] = 0 xo_np[0] = 1 xs_np[1] = 2 xp_np[1] = 1 xo_np[1] = 3 xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) print('xp_emb', xp_emb.shape) scores_sp, scores_po = model.forward( xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) inf = model.score(xp_emb, xs_emb, xo_emb, facts=facts) assert inf[0] > 0.9 assert inf[1] > 0.9 inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() print('AAA', inf) print('BBB', scores_sp)
def test_learning_v3(): embedding_size = 10 batch_size = 16 triples, hops = [], [] for i in range(16): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] entity_lst = sorted({e for (e, _, _) in triples + hops} | {e for (e, _, e) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} torch.manual_seed(0) kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.LongTensor( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.LongTensor( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.LongTensor( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts) reformulator = AttentiveReformulator(2, predicate_embeddings) hoppy = SimpleHoppy(model, entity_embeddings, hops=reformulator) N3_reg = N3() params = [ p for p in hoppy.parameters() if not torch.equal(p, entity_embeddings.weight) and not torch.equal(p, predicate_embeddings.weight) ] loss_function = nn.CrossEntropyLoss(reduction='mean') p_emb = predicate_embeddings( torch.LongTensor(np.array([predicate_to_index['p']]))) q_emb = predicate_embeddings( torch.LongTensor(np.array([predicate_to_index['q']]))) # r_emb = predicate_embeddings(torch.LongTensor(np.array([predicate_to_index['r']]))) optimizer = optim.Adagrad(params, lr=0.1) hops_data = [] for i in range(128): hops_data += hops batches = make_batches(len(hops_data), batch_size) c, d = 0.0, 0.0 for batch_start, batch_end in batches: hops_batch = hops_data[batch_start:batch_end] s_lst = [s for (s, _, _) in hops_batch] p_lst = [p for (_, p, _) in hops_batch] o_lst = [o for (_, _, o) in hops_batch] xs_np = np.array([entity_to_index[s] for s in s_lst]) xp_np = np.array([predicate_to_index[p] for p in p_lst]) xo_np = np.array([entity_to_index[o] for o in o_lst]) xs = torch.LongTensor(xs_np) xp = torch.LongTensor(xp_np) xo = torch.LongTensor(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) sp_scores, po_scores = hoppy.forward(xp_emb, xs_emb, xo_emb) loss = loss_function(sp_scores, xo) + loss_function(po_scores, xs) factors = [hoppy.factor(e) for e in [xp_emb, xs_emb, xo_emb]] loss += 0.1 * N3_reg(factors) tmp = hoppy.hops(xp_emb) hop_1_emb = tmp[0] hop_2_emb = tmp[1] c = kernel.pairwise(p_emb, hop_1_emb).mean().cpu().detach().numpy() d = kernel.pairwise(q_emb, hop_2_emb).mean().cpu().detach().numpy() loss.backward() optimizer.step() optimizer.zero_grad() assert c > 0.95 assert d > 0.95
def test_clutrr_v7(): torch.set_num_threads(multiprocessing.cpu_count()) embedding_size = 50 torch.manual_seed(0) rs = np.random.RandomState(0) triples = [('a', 'p', 'b'), ('b', 'q', 'c'), ('c', 'p', 'd'), ('d', 'q', 'e'), ('e', 'p', 'f'), ('f', 'q', 'g'), ('g', 'p', 'h'), ('h', 'q', 'i'), ('i', 'p', 'l'), ('l', 'q', 'm'), ('m', 'p', 'n'), ('n', 'q', 'o'), ('o', 'p', 'p'), ('p', 'q', 'q'), ('q', 'p', 'r'), ('r', 'q', 's'), ('s', 'p', 't'), ('t', 'q', 'u'), ('u', 'p', 'v'), ('v', 'q', 'w'), ('x', 'r', 'y'), ('x', 's', 'y')] entity_lst = sorted({e for (e, _, _) in triples} | {e for (_, _, e) in triples}) predicate_lst = sorted({p for (_, p, _) in triples}) nb_entities = len(entity_lst) nb_predicates = len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} with torch.no_grad(): kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) rel_emb = encode_relation(facts=triples, relation_embeddings=predicate_embeddings, relation_to_idx=predicate_to_index) arg1_emb, arg2_emb = encode_arguments( facts=triples, entity_embeddings=entity_embeddings, entity_to_idx=entity_to_index) facts = [rel_emb, arg1_emb, arg2_emb] k = 5 model = NeuralKB(kernel=kernel) indices = torch.from_numpy( np.array([predicate_to_index['p'], predicate_to_index['q']])) reformulator = SymbolicReformulator(predicate_embeddings, indices) hoppy0 = Hoppy(model, hops_lst=[(reformulator, False)], depth=0) hoppy1 = Hoppy(model, hops_lst=[(reformulator, False)], depth=1) hoppy2 = Hoppy(model, hops_lst=[(reformulator, False)], depth=2) hoppy3 = Hoppy(model, hops_lst=[(reformulator, False)], depth=3) hoppy4 = Hoppy(model, hops_lst=[(reformulator, False)], depth=4) xs_np = rs.randint(nb_entities, size=12) xp_np = rs.randint(nb_predicates, size=12) xo_np = rs.randint(nb_entities, size=12) xs_np[0] = entity_to_index['a'] xp_np[0] = predicate_to_index['r'] xo_np[0] = entity_to_index['c'] xs_np[1] = entity_to_index['a'] xp_np[1] = predicate_to_index['r'] xo_np[1] = entity_to_index['e'] xs_np[2] = entity_to_index['a'] xp_np[2] = predicate_to_index['r'] xo_np[2] = entity_to_index['g'] xs_np[3] = entity_to_index['a'] xp_np[3] = predicate_to_index['r'] xo_np[3] = entity_to_index['i'] xs_np[4] = entity_to_index['a'] xp_np[4] = predicate_to_index['r'] xo_np[4] = entity_to_index['m'] xs_np[5] = entity_to_index['a'] xp_np[5] = predicate_to_index['r'] xo_np[5] = entity_to_index['o'] xs_np[6] = entity_to_index['a'] xp_np[6] = predicate_to_index['r'] xo_np[6] = entity_to_index['q'] xs_np[7] = entity_to_index['a'] xp_np[7] = predicate_to_index['r'] xo_np[7] = entity_to_index['s'] xs_np[8] = entity_to_index['a'] xp_np[8] = predicate_to_index['r'] xo_np[8] = entity_to_index['u'] xs = torch.from_numpy(xs_np) xp = torch.from_numpy(xp_np) xo = torch.from_numpy(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) # res0 = hoppy0.forward(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings) inf0 = hoppy0.score(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) # (scores0_sp, subs0_sp), (scores0_po, subs0_po) = res0 # res1 = hoppy1.forward(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings) inf1 = hoppy1.score(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) # (scores1_sp, subs1_sp), (scores1_po, subs1_po) = res1 # res2 = hoppy2.forward(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings) inf2 = hoppy2.score(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) # (scores2_sp, subs2_sp), (scores2_po, subs2_po) = res2 # res3 = hoppy3.forward(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings) inf3 = hoppy3.score(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) # (scores3_sp, subs3_sp), (scores3_po, subs3_po) = res3 # res4 = hoppy4.forward(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings) inf4 = hoppy4.score(xp_emb, xs_emb, xo_emb, facts=facts, entity_embeddings=entity_embeddings.weight) # (scores4_sp, subs4_sp), (scores4_po, subs4_po) = res4 inf0_np = inf0.cpu().numpy() inf1_np = inf1.cpu().numpy() inf2_np = inf2.cpu().numpy() inf3_np = inf3.cpu().numpy() inf4_np = inf4.cpu().numpy() np.testing.assert_allclose(inf0_np, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf1_np, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf2_np, [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf3_np, [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) np.testing.assert_allclose(inf4_np, [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], rtol=1e-1, atol=1e-1) print(inf3_np) print(entity_embeddings.weight[entity_to_index['c'], 0].item()) print(entity_embeddings.weight[entity_to_index['e'], 0].item()) print(entity_embeddings.weight[entity_to_index['g'], 0].item()) print(entity_embeddings.weight[entity_to_index['i'], 0].item())
def test_learning_v1(): embedding_size = 50 triples, hops = [], [] for i in range(16): triples += [(f'a{i}', 'p', f'b{i}'), (f'b{i}', 'q', f'c{i}')] hops += [(f'a{i}', 'r', f'c{i}')] entity_lst = sorted({e for (e, _, _) in triples + hops} | {e for (e, _, e) in triples + hops}) predicate_lst = sorted({p for (_, p, _) in triples + hops}) nb_entities, nb_predicates = len(entity_lst), len(predicate_lst) entity_to_index = {e: i for i, e in enumerate(entity_lst)} predicate_to_index = {p: i for i, p in enumerate(predicate_lst)} kernel = GaussianKernel() entity_embeddings = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_embeddings = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.LongTensor( np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.LongTensor( np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.LongTensor( np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] for st in ['min', 'concat']: model = NeuralKB(entity_embeddings=entity_embeddings, predicate_embeddings=predicate_embeddings, kernel=kernel, facts=facts, scoring_type=st) for s in entity_lst: for p in predicate_lst: for o in entity_lst: xs_np = np.array([entity_to_index[s]]) xp_np = np.array([predicate_to_index[p]]) xo_np = np.array([entity_to_index[o]]) with torch.no_grad(): xs = torch.LongTensor(xs_np) xp = torch.LongTensor(xp_np) xo = torch.LongTensor(xo_np) xs_emb = entity_embeddings(xs) xp_emb = predicate_embeddings(xp) xo_emb = entity_embeddings(xo) inf = model.score(xp_emb, xs_emb, xo_emb) inf_np = inf.cpu().numpy() if (s, p, o) in triples: assert inf_np[0] > 0.95 else: assert inf_np[0] < 0.01
def test_masking_v2(): nb_entities = 10 nb_predicates = 5 embedding_size = 10 rs = np.random.RandomState(0) for _ in range(1): for position in [0, 1, 2]: for st in ['min', 'concat']: with torch.no_grad(): triples = [ ('a', 'p', 'b'), ('b', 'q', 'c'), ('a', 'p', 'c') ] entity_to_index = {'a': 0, 'b': 1, 'c': 2, 'd': 3} predicate_to_index = {'p': 0, 'q': 1} kernel = GaussianKernel() entity_emb = nn.Embedding(nb_entities, embedding_size * 2, sparse=True) predicate_emb = nn.Embedding(nb_predicates, embedding_size * 2, sparse=True) fact_rel = torch.LongTensor(np.array([predicate_to_index[p] for (_, p, _) in triples])) fact_arg1 = torch.LongTensor(np.array([entity_to_index[s] for (s, _, _) in triples])) fact_arg2 = torch.LongTensor(np.array([entity_to_index[o] for (_, _, o) in triples])) facts = [fact_rel, fact_arg1, fact_arg2] base = NeuralKB(entity_embeddings=entity_emb, predicate_embeddings=predicate_emb, kernel=kernel, facts=facts, scoring_type=st) indices = torch.LongTensor(np.array([predicate_to_index['p'], predicate_to_index['q']])) reformulator = SymbolicReformulator(predicate_emb, indices) model = SimpleHoppy(base, entity_emb, hops=reformulator) xs_np = rs.randint(nb_entities, size=32) xp_np = rs.randint(nb_predicates, size=32) xo_np = rs.randint(nb_entities, size=32) xi_np = np.array([position] * xs_np.shape[0]) xs_np[0] = 0 xp_np[0] = 0 xo_np[0] = 1 xs_np[1] = 1 xp_np[1] = 1 xo_np[1] = 2 xs_np[2] = 0 xp_np[2] = 0 xo_np[2] = 2 xs = torch.LongTensor(xs_np) xp = torch.LongTensor(xp_np) xo = torch.LongTensor(xo_np) xi = torch.LongTensor(xi_np) xs_emb = entity_emb(xs) xp_emb = predicate_emb(xp) xo_emb = entity_emb(xo) # xi = None base.mask_indices = xi scores = model.forward(xp_emb, xs_emb, xo_emb) inf = model.score(xp_emb, xs_emb, xo_emb) if position in {0, 1}: assert inf[2] < 0.5 else: assert inf[2] > 0.9 scores_sp, scores_po = scores inf = inf.cpu().numpy() scores_sp = scores_sp.cpu().numpy() scores_po = scores_po.cpu().numpy() for i in range(xs.shape[0]): np.testing.assert_allclose(inf[i], scores_sp[i, xo[i]], rtol=1e-5, atol=1e-5) np.testing.assert_allclose(inf[i], scores_po[i, xs[i]], rtol=1e-5, atol=1e-5)
def main(argv): argparser = argparse.ArgumentParser( 'CLUTRR', formatter_class=argparse.ArgumentDefaultsHelpFormatter) train_path = test_path = "data/clutrr-emnlp/data_test/64.csv" argparser.add_argument('--train', action='store', type=str, default=train_path) argparser.add_argument('--test', nargs='+', type=str, default=[test_path]) # model params argparser.add_argument('--embedding-size', '-k', action='store', type=int, default=20) argparser.add_argument('--k-max', '-m', action='store', type=int, default=10) argparser.add_argument('--max-depth', '-d', action='store', type=int, default=2) argparser.add_argument('--hops', nargs='+', type=str, default=['2', '2', '1R']) # training params argparser.add_argument('--epochs', '-e', action='store', type=int, default=100) argparser.add_argument('--learning-rate', '-l', action='store', type=float, default=0.1) argparser.add_argument('--batch-size', '-b', action='store', type=int, default=8) argparser.add_argument('--optimizer', '-o', action='store', type=str, default='adagrad', choices=['adagrad', 'adam', 'sgd']) argparser.add_argument('--seed', action='store', type=int, default=0) argparser.add_argument('--evaluate-every', '-V', action='store', type=int, default=32) argparser.add_argument('--N2', action='store', type=float, default=None) argparser.add_argument('--N3', action='store', type=float, default=None) argparser.add_argument('--entropy', '-E', action='store', type=float, default=None) argparser.add_argument('--reformulator', '-r', action='store', type=str, default='linear', choices=['static', 'linear', 'attentive', 'memory']) argparser.add_argument('--nb-rules', '-R', action='store', type=int, default=4) argparser.add_argument('--slope', '-S', action='store', type=float, default=None) argparser.add_argument('--init-size', '-i', action='store', type=float, default=1.0) argparser.add_argument('--debug', '-D', action='store_true', default=False) args = argparser.parse_args(argv) train_path = args.train test_paths = args.test embedding_size = args.embedding_size k_max = args.k_max max_depth = args.max_depth hops_str = args.hops nb_epochs = args.epochs learning_rate = args.learning_rate batch_size = args.batch_size optimizer_name = args.optimizer seed = args.seed evaluate_every = args.evaluate_every N2_weight = args.N2 N3_weight = args.N3 entropy_weight = args.entropy reformulator_name = args.reformulator nb_rules = args.nb_rules slope = args.slope init_size = args.init_size is_debug = args.debug np.random.seed(seed) random_state = np.random.RandomState(seed) torch.manual_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logger.info(f'Device: {device}') if torch.cuda.is_available(): torch.set_default_tensor_type(torch.cuda.FloatTensor) data = Data(train_path=train_path, test_paths=test_paths) relation_to_predicate = data.relation_to_predicate predicate_to_relations = data.predicate_to_relations entity_lst, predicate_lst, relation_lst = data.entity_lst, data.predicate_lst, data.relation_lst nb_examples = len(data.train) nb_entities = len(entity_lst) nb_predicates = len(predicate_lst) nb_relations = len(relation_lst) entity_to_idx = {e: i for i, e in enumerate(entity_lst)} relation_to_idx = {r: i for i, r in enumerate(relation_lst)} kernel = GaussianKernel(slope=slope) entity_embeddings = nn.Embedding(nb_entities, embedding_size, sparse=False).to(device) if entropy_weight is None: relation_embeddings = nn.Embedding(nb_relations, embedding_size, sparse=False).to(device) relation_embeddings.weight.data *= init_size else: relation_embeddings = AttentiveEmbedding(nb_predicates=nb_predicates, nb_relations=nb_relations, embedding_size=embedding_size, device=device).to(device) make_easy(predicate_lst, predicate_to_relations, relation_to_idx, relation_embeddings) model = NeuralKB(kernel=kernel, k=k_max).to(device) memory = None def make_hop(s: str) -> Tuple[BaseReformulator, bool]: nonlocal memory if s.isdigit(): nb_hops, is_reversed = int(s), False else: nb_hops, is_reversed = int(s[:-1]), True res = None if reformulator_name in {'static'}: res = StaticReformulator(nb_hops, embedding_size) elif reformulator_name in {'linear'}: res = LinearReformulator(nb_hops, embedding_size) elif reformulator_name in {'attentive'}: res = AttentiveReformulator(nb_hops, relation_embeddings) elif reformulator_name in {'memory'}: memory = MemoryReformulator.Memory( nb_hops, nb_rules, embedding_size) if memory is None else memory res = MemoryReformulator(memory) assert res is not None return res, is_reversed hops_lst = [make_hop(s) for s in hops_str] hoppy = Hoppy(model=model, depth=max_depth, hops_lst=hops_lst).to(device) def scoring_function(story: List[Fact], targets: List[Fact]) -> Tensor: story_rel = encode_relation(story, relation_embeddings, relation_to_idx, device) story_arg1, story_arg2 = encode_arguments(story, entity_embeddings, entity_to_idx, device) targets_rel = encode_relation(targets, relation_embeddings, relation_to_idx, device) targets_arg1, targets_arg2 = encode_arguments(targets, entity_embeddings, entity_to_idx, device) facts = [story_rel, story_arg1, story_arg2] scores = hoppy.score(targets_rel, targets_arg1, targets_arg2, facts) return scores def evaluate(instances: List[Instance], path: str, sample_size: Optional[int] = None) -> float: res = 0.0 if len(instances) > 0: res = accuracy(scoring_function=scoring_function, instances=instances, sample_size=sample_size, relation_to_predicate=relation_to_predicate, predicate_to_relations=predicate_to_relations) logger.info(f'Test Accuracy on {path}: {res:.6f}') return res loss_function = nn.BCELoss() N2_reg = N2() if N2_weight is not None else None N3_reg = N3() if N3_weight is not None else None entropy_reg = Entropy( use_logits=False) if entropy_weight is not None else None params_lst = [ p for p in hoppy.parameters() if not torch.equal(p, entity_embeddings.weight) ] params_lst += relation_embeddings.parameters() params = nn.ParameterList(params_lst).to(device) for tensor in params_lst: logger.info(f'\t{tensor.size()}\t{tensor.device}') optimizer_factory = { 'adagrad': lambda arg: optim.Adagrad(arg, lr=learning_rate), 'adam': lambda arg: optim.Adam(arg, lr=learning_rate), 'sgd': lambda arg: optim.SGD(arg, lr=learning_rate) } assert optimizer_name in optimizer_factory optimizer = optimizer_factory[optimizer_name](params) global_step = 0 hinton = HintonDiagram(max_arr=[0.0, 1.0]) for epoch_no in range(1, nb_epochs + 1): batcher = Batcher(batch_size=batch_size, nb_examples=nb_examples, nb_epochs=1, random_state=random_state) nb_batches = len(batcher.batches) epoch_loss_values = [] for batch_no, (batch_start, batch_end) in enumerate(batcher.batches, start=1): global_step += 1 indices_batch = batcher.get_batch(batch_start, batch_end) instances_batch = [data.train[i] for i in indices_batch] batch_loss_values = [] for i, instance in enumerate(instances_batch): story, target = instance.story, instance.target s, r, o = target if is_debug is True and i == 0: # print('STORY', story) # print('TARGET', target) r_lst = [ r for p in predicate_lst for r in predicate_to_relations[p] ] r_idx_lst = [relation_to_idx[r] for r in r_lst] with torch.no_grad(): # show_rules(model=hoppy, kernel=kernel, relation_embeddings=relation_embeddings, # data=data, relation_to_idx=relation_to_idx, device=device) r_idx_tensor = torch.from_numpy( np.array(r_idx_lst, dtype=np.int64)).to(device) r_tensor = relation_embeddings(r_idx_tensor) k = kernel.pairwise(r_tensor, r_tensor) # print(r_lst) print(hinton(k.cpu().numpy())) story_rel = encode_relation(story, relation_embeddings, relation_to_idx, device) story_arg1, story_arg2 = encode_arguments( story, entity_embeddings, entity_to_idx, device) facts = [story_rel, story_arg1, story_arg2] pos_predicate = relation_to_predicate[r] p_relation_lst = sorted(relation_to_predicate.keys()) target_lst = [(s, x, o) for x in p_relation_lst] label_lst = [ int(pos_predicate == relation_to_predicate[r]) for r in p_relation_lst ] rel_emb = encode_relation(target_lst, relation_embeddings, relation_to_idx, device) arg1_emb, arg2_emb = encode_arguments(target_lst, entity_embeddings, entity_to_idx, device) scores = hoppy.score(rel_emb, arg1_emb, arg2_emb, facts) labels = torch.Tensor(label_lst).float() # if i == 0: # print(scores) # print(labels) loss = loss_function(scores, labels) factors = [ hoppy.factor(e) for e in [rel_emb, arg1_emb, arg2_emb] ] loss += N2_weight * N2_reg( factors) if N2_weight is not None else 0.0 loss += N3_weight * N3_reg( factors) if N3_weight is not None else 0.0 if entropy_weight is not None: attention = relation_embeddings.attention if i == 0: pass # print(scores.cpu().detach().numpy()) # print(labels.cpu().detach().numpy()) # print(hinton(attention.cpu().detach().numpy())) # print(attention.cpu().detach().numpy()) loss += entropy_weight * entropy_reg([attention]) loss_value = loss.item() batch_loss_values += [loss_value] epoch_loss_values += [loss_value] loss.backward() optimizer.step() optimizer.zero_grad() loss_mean, loss_std = np.mean(batch_loss_values), np.std( batch_loss_values) logger.info( f'Epoch {epoch_no}/{nb_epochs}\tBatch {batch_no}/{nb_batches}\tLoss {loss_mean:.4f} ± {loss_std:.4f}' ) if global_step % evaluate_every == 0: for test_path in test_paths: instances = data.test[test_path] evaluate(instances=instances, path=test_path) if is_debug is True: for i in range(3): story, target = instances[i].story, instances[ i].target # print('INSTANCE', target, story) if is_debug is True: r_lst = [ r for p in predicate_lst for r in predicate_to_relations[p] ] r_idx_lst = [relation_to_idx[r] for r in r_lst] with torch.no_grad(): show_rules(model=hoppy, kernel=kernel, relation_embeddings=relation_embeddings, data=data, relation_to_idx=relation_to_idx, device=device) r_idx_tensor = torch.from_numpy( np.array(r_idx_lst, dtype=np.int64)).to(device) r_tensor = relation_embeddings(r_idx_tensor) k = kernel.pairwise(r_tensor, r_tensor) # print(r_lst) print(hinton(k.cpu().numpy())) loss_mean, loss_std = np.mean(epoch_loss_values), np.std( epoch_loss_values) slope = kernel.slope.item() if isinstance(kernel.slope, Tensor) else kernel.slope logger.info( f'Epoch {epoch_no}/{nb_epochs}\tLoss {loss_mean:.4f} ± {loss_std:.4f}\tSlope {slope:.4f}' ) for test_path in test_paths: evaluate(instances=data.test[test_path], path=test_path) logger.info("Training finished")