from zincbase import KB from zincbase.utils.data_science import calc_auc_roc kb = KB() Xs = [] Ys = [] csvfile = csv.reader(open('./assets/countries_s3_test.csv', 'r'), delimiter='\t') for row in csvfile: Xs.append([row[0], row[1]]) Ys.append(row[2]) kb.from_csv('./assets/countries_s3_train.csv', delimiter='\t') kb.build_kg_model(cuda=True, embedding_size=1000, gamma=0.1) kb.train_kg_model(steps=40000, batch_size=512, lr=0.000002, neg_to_pos=64) y_true = [] sample = [] for ((head, relation), tail) in zip(Xs, Ys): for candidate_region in [ 'oceania', 'asia', 'europe', 'africa', 'americas' ]: y_true.append(1 if candidate_region == tail else 0) sample.append((kb._entity2id[head], kb._relation2id[relation], kb._entity2id[candidate_region])) sample = torch.LongTensor(sample).cuda() with torch.no_grad(): y_score, _ = kb._kg_model(sample) y_score = y_score.squeeze(1).cpu().numpy()
"""Test the combination of edge attributes and negative examples.""" import context from zincbase import KB kb = KB() kb.seed(555) kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t') # specifying both a ~ and a truthiness < 0 is probably unnecessary. kb.store('~locatedin(canada, africa)', {'truthiness': -1.}) kb.build_kg_model(cuda=False, embedding_size=100, pred_attributes=['truthiness']) kb.train_kg_model(steps=1000, batch_size=4, neg_ratio=0.01) canada_in_africa = kb.estimate_triple_prob('canada', 'locatedin', 'africa') canada_in_asia = kb.estimate_triple_prob('canada', 'locatedin', 'asia') canada_in_america = kb.estimate_triple_prob('canada', 'locatedin', 'northern_america') assert 2 * canada_in_africa < canada_in_asia assert canada_in_america > 2 * canada_in_asia clafrica_truthiness = kb.estimate_triple_prob_with_attrs( 'canada', 'locatedin', 'africa', 'truthiness') clasia_truthiness = kb.estimate_triple_prob_with_attrs('canada', 'locatedin', 'asia', 'truthiness')
from zincbase import KB kb = KB() kb.seed(555) kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t') rule_num = kb.store('~locatedin(canada, africa)') b = list(kb.query('locatedin(canada, X)')) assert len(b) == 1 assert b[0]['X'] == 'northern_america' assert kb.delete_rule(rule_num) kb.build_kg_model(cuda=False, embedding_size=100) kb.train_kg_model(steps=500, batch_size=512, neg_ratio=0.01) canada_in_africa_naive = kb.estimate_triple_prob('canada', 'locatedin', 'africa') canada_in_asia_naive = kb.estimate_triple_prob('canada', 'locatedin', 'asia') austria_neighbors_spain_naive = kb.estimate_triple_prob( 'austria', 'neighbor', 'spain') austria_neighbors_france_naive = kb.estimate_triple_prob( 'austria', 'neighbor', 'france') kb = KB() kb.seed(555) kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')
""" from zincbase import KB kb = KB() kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t') print(list(kb.query('locatedin(X, northern_europe)'))) # prints [{'X': 'norway'}, {'X': 'iceland'}, {'X': 'faroe_islands'}, ...] print(list(kb.query('neighbor(austria, X)'))) # prints [{'X': 'italy'}, {'X': 'czechia'}, {'X': 'slovenia'}, ...] kb.build_kg_model(cuda=True, embedding_size=100) kb.train_kg_model(steps=1000, batch_size=512) # takes < 1 minute print(kb.estimate_triple_prob('mali', 'locatedin', 'africa')) # prints a number close to 1 print(kb.get_most_likely('singapore', 'locatedin', '?', k=2)) # prints [{'prob': 0.9672, 'triple': ('singapore', 'locatedin', 'south_eastern_asia')}, ...] print(kb.get_most_likely('austria', 'neighbor', '?', k=8)) # prints [{'prob': 0.9749, 'triple': ('austria', 'neighbor', 'liechtenstein')} ...] kb.fit_knn() print(kb.get_nearest_neighbors('uganda', k=4))
# # # # # # # # # # # # # # # # # # # # # # # # kb.attr('tom', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0}) kb.attr('todd', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0}) kb.attr('oleg', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0}) kb.attr('john', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0}) kb.attr('akshay', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0}) kb.attr('vedant', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0}) kb.attr('other1', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0}) kb.attr('other2', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0}) kb.attr('other3', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0}) kb.attr('other4', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0}) kb.attr('other5', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0}) kb.attr('other6', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0}) kb.build_kg_model(cuda=False, embedding_size=30, node_attributes=['owns_a_raincoat', 'doesnt_own_raincoat'], attr_loss_to_graph_loss=0.9) # Ideally use bs=1 to overfit on this small dataset # bs=2 at least checks that it works with > 1 bs kb.train_kg_model(steps=12001, batch_size=2, neg_to_pos=4) # # # # # # # # # # # # # # # # # # # # # # # # # People from Seattle should be more likely to # own an umbrella (attribute prediction test) # # # # # # # # # # # # # # # # # # # # # # # # x = kb._kg_model.run_embedding(kb.get_embedding('other1'), 'owns_a_raincoat') y = kb._kg_model.run_embedding(kb.get_embedding('other1'), 'doesnt_own_raincoat') assert round(x) == 1 assert round(y) == 0 x = kb._kg_model.run_embedding(kb.get_embedding('other2'), 'owns_a_raincoat')