def train(folder, radio): res_folder = generate_res_folder(folder, "iptranse", radio) folder = folder + "sharing/" + radio2str(radio) + "/" print(folder) print("res folder:", res_folder) ents, rels, triples, ref_ent1, ref_ent2 = read_input(folder) paths = generate_2steps_path(triples) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) early = False model = IPTransE_Model(sess, len(ents), len(rels), ref_ent1, ref_ent2) for epoch in range(1, iptranse_epoch + 1): train_rel(model, triples, ents, rels, paths, epoch) if epoch % print_loss == 0 or epoch == iptranse_epoch - 1: early = model.eva(res_folder, epoch) if epoch in [600, 650, 700, 750, 800]: sim_mat = model.ref_sim_mat() dynamic_alignment(model, sim_mat, ref_ent1, ref_ent2, triples, ents) early = model.eva(res_folder, epoch, iter=True) if early: sys.exit(0)
def train(folder, radio): print("data:", folder) res_folder = generate_res_folder(folder, "mtransh_sim", radio) folder = folder + "sharing/" + radio2str(radio) + "/" print("res folder:", res_folder) triples1, triples2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input( folder) sim_mat = np.load(folder + "label_sim.npy") label_sim = np.load(folder + "label_sim.npy") sim_mat[sim_mat < 0.85] = 0 print("label2vec:") eval_alignment_mul(label_sim) print("filtered label2vec:") eval_alignment_mul(sim_mat) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) model = Model(sess, ent_num, rel_num, ref_ent1, ref_ent2, sim_mat, label_sim) epochs = 500 for epo in range(1, epochs + 1): loss, t = train_tris_1epo(model, triples1, triples2) print("epoch {}: triple_loss = {:.3f}, time = {:.3f} s".format( epo, loss, t)) if epo % 5 == 0: train_sim_1epo(model) if epo % 10 == 0: model.eva(res_folder, epo) if model.is_early: break
def structure_embedding(folder, radio): res_folder = generate_res_folder(folder, "jape", radio) folder = folder + "sharing/" + radio2str(radio) + "/" print("res folder:", res_folder) triples_data1, triples_data2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input( folder) small = ent_num < 50000 graph = tf.Graph() with graph.as_default(): pos_hs = tf.placeholder(tf.int32, shape=[None]) pos_rs = tf.placeholder(tf.int32, shape=[None]) pos_ts = tf.placeholder(tf.int32, shape=[None]) with tf.variable_scope('relation2vec' + 'embedding'): ent_embeddings = tf.Variable( tf.truncated_normal([ent_num, embed_size], stddev=1.0 / math.sqrt(embed_size))) rel_embeddings = tf.Variable( tf.truncated_normal([rel_num, embed_size], stddev=1.0 / math.sqrt(embed_size))) # ent_embeddings = tf.Variable(random_unit_embeddings(ent_num, embed_size)) # rel_embeddings = tf.Variable(random_unit_embeddings(rel_num, embed_size)) ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1) rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1) ref_ent_s = tf.constant(ref_ent1, dtype=tf.int32) ref_ent_t = tf.constant(ref_ent2, dtype=tf.int32) phs = tf.nn.embedding_lookup(ent_embeddings, pos_hs) prs = tf.nn.embedding_lookup(rel_embeddings, pos_rs) pts = tf.nn.embedding_lookup(ent_embeddings, pos_ts) optimizer, loss = only_pos_loss(phs, prs, pts) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=config) as sess: tf.global_variables_initializer().run() num_steps = triples_num // batch_size ppre_hits1, pre_hits1 = -1, -1 is_early = False for epoch in range(1, epochs + 1): pos_loss = 0 start = time.time() for step in range(num_steps): batch_pos = generate_pos_batch(triples_data1, triples_data2, step) feed_dict = { pos_hs: [x[0] for x in batch_pos], pos_rs: [x[1] for x in batch_pos], pos_ts: [x[2] for x in batch_pos] } (_, loss_val) = sess.run([optimizer, loss], feed_dict=feed_dict) pos_loss += loss_val random.shuffle(triples_data1.train_triples) random.shuffle(triples_data2.train_triples) end = time.time() print("{}/{}, relation_loss = {:.3f}, time = {:.3f} s".format( epoch, epochs, pos_loss, end - start)) if epoch % print_loss == 0: ppre_hits1, pre_hits1, is_early = jape_eva( ent_embeddings, ref_ent_s, ref_ent_t, epoch, res_folder, ppre_hits1, pre_hits1, is_early, small)
def mtranse(folder1, radio): res_folder = generate_res_folder(folder1, "mtranse", radio) folder1 = folder1 + "mapping/" + radio2str(radio) + "/" print("res folder:", res_folder) triples1, triples2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = \ generate_input(folder1) mat_m = random_orthogonal_matrix(embed_size) graph = tf.Graph() small = ent_num < 50000 with graph.as_default(): pos_hs = tf.placeholder(tf.int32, shape=[None]) pos_rs = tf.placeholder(tf.int32, shape=[None]) pos_ts = tf.placeholder(tf.int32, shape=[None]) sup_hs = tf.placeholder(tf.int32, shape=[None]) sup_ts = tf.placeholder(tf.int32, shape=[None]) train = tf.placeholder(tf.bool) with tf.variable_scope('relation2vec' + 'embedding'): ent_embeddings = tf.Variable( tf.truncated_normal([ent_num, embed_size], stddev=1.0 / math.sqrt(embed_size), dtype=tf.float64)) rel_embeddings = tf.Variable( tf.truncated_normal([rel_num, embed_size], stddev=1.0 / math.sqrt(embed_size), dtype=tf.float64)) ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1) rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1) with tf.variable_scope('translation' + 'embedding'): m = tf.Variable(mat_m) eye = tf.constant(np.eye(embed_size)) phs = tf.nn.embedding_lookup(ent_embeddings, pos_hs) prs = tf.nn.embedding_lookup(rel_embeddings, pos_rs) pts = tf.nn.embedding_lookup(ent_embeddings, pos_ts) e1s = tf.nn.embedding_lookup(ent_embeddings, sup_hs) e2s = tf.nn.embedding_lookup(ent_embeddings, sup_ts) optimizer, loss = tf.cond(train, lambda: relation_loss(phs, prs, pts), lambda: m_loss(e1s, e2s, m, eye)) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=config) as sess: tf.global_variables_initializer().run() num_steps = triples_num // batch_size sup_batch_size = len(sup_ents_pairs) // num_steps assert sup_batch_size > 1 # print("num of steps:", num_steps) # print("sup batch size", sup_batch_size) ppre_hits1, pre_hits1 = -1, -1 is_early = False for e in range(1, epochs + 1): triple_loss = 0 mapping_loss = 0 start = time.time() for b in range(num_steps * 2): loss_type = True if b % 2 == 0 else False batch_pos, sup_batch = generate_triple_batch( batch_size, triples1, triples2, sup_batch_size, sup_ents_pairs) feed_dict = { pos_hs: [x[0] for x in batch_pos], pos_rs: [x[1] for x in batch_pos], pos_ts: [x[2] for x in batch_pos], sup_hs: [x[0] for x in sup_batch], sup_ts: [x[1] for x in sup_batch], train: loss_type } (_, loss_val) = sess.run([optimizer, loss], feed_dict=feed_dict) if loss_type: triple_loss += loss_val else: mapping_loss += loss_val end = time.time() print( "{}/{}, rel_loss = {:.3f}, trans_loss = {:.3f}, time = {:.3f} s" .format(e, epochs, triple_loss, mapping_loss, end - start)) if e % print_loss == 0: embed1 = tf.nn.embedding_lookup(ent_embeddings, ref_ent1).eval() embed2 = tf.nn.embedding_lookup(ent_embeddings, ref_ent2).eval() embed12 = np.matmul(embed1, m.eval()) prec_set1, hits1 = eval_alignment_multi_embed( embed12, embed2) # prec_set1, hits1 = eval_alignment_mul(np.matmul(embed12, embed2.T), d=True) embed21 = np.matmul(embed2, tf.matrix_inverse(m).eval()) prec_set2, hits12 = eval_alignment_multi_embed( embed21, embed1) # prec_set2, hits12 = eval_alignment_mul(np.matmul(embed21, embed1.T), d=True) gc.collect() if not is_early: ppre_hits1, pre_hits1, is_early = early_stop( ppre_hits1, pre_hits1, hits1, small=small) if is_early: out_path = radio_2file(e, res_folder) pair2file(out_path + "res1", prec_set1) pair2file(out_path + "res2", prec_set2) np.save(out_path + "ents_vec", ent_embeddings.eval()) sys.exit(0) if e % save_hits1 == 0: out_path = radio_2file(e, res_folder) pair2file(out_path + "res1", prec_set1) pair2file(out_path + "res2", prec_set2)
def structure_embedding(folder, radio): res_folder = generate_res_folder(folder, "jape", radio) folder = folder + "sharing/" + radio2str(radio) + "/" print("res folder:", res_folder) triples_data1, triples_data2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input( folder) small = ent_num < 50000 cross_sim_mat, kb1_sim_mat, kb2_sim_mat = get_all_sim_mat_sparse(folder) ids_list1, ids_list2 = get_ids_by_order(folder) graph = tf.Graph() with graph.as_default(): pos_hs = tf.placeholder(tf.int32, shape=[None]) pos_rs = tf.placeholder(tf.int32, shape=[None]) pos_ts = tf.placeholder(tf.int32, shape=[None]) neg_hs = tf.placeholder(tf.int32, shape=[None]) neg_rs = tf.placeholder(tf.int32, shape=[None]) neg_ts = tf.placeholder(tf.int32, shape=[None]) flag = tf.placeholder(tf.bool) with tf.variable_scope('relation2vec' + 'embedding'): ent_embeddings = tf.Variable( tf.truncated_normal([ent_num, embed_size], stddev=1.0 / math.sqrt(embed_size))) rel_embeddings = tf.Variable( tf.truncated_normal([rel_num, embed_size], stddev=1.0 / math.sqrt(embed_size))) ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1) rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1) ref_ent_s = tf.constant(ref_ent1, dtype=tf.int32) ref_ent_t = tf.constant(ref_ent2, dtype=tf.int32) with tf.variable_scope('sparse' + 'sim'): cross_sparse_sim = sparse_mat_2sparse_tensor(cross_sim_mat) kb1_sparse_sim = sparse_mat_2sparse_tensor(kb1_sim_mat) kb2_sparse_sim = sparse_mat_2sparse_tensor(kb2_sim_mat) ents_1 = tf.nn.embedding_lookup(ent_embeddings, ids_list1) ents_2 = tf.nn.embedding_lookup(ent_embeddings, ids_list2) phs = tf.nn.embedding_lookup(ent_embeddings, pos_hs) prs = tf.nn.embedding_lookup(rel_embeddings, pos_rs) pts = tf.nn.embedding_lookup(ent_embeddings, pos_ts) nhs = tf.nn.embedding_lookup(ent_embeddings, neg_hs) nrs = tf.nn.embedding_lookup(rel_embeddings, neg_rs) nts = tf.nn.embedding_lookup(ent_embeddings, neg_ts) optimizer, loss = tf.cond( flag, lambda: loss_with_neg(phs, prs, pts, nhs, nrs, nts), lambda: sim_loss_sparse_with_kb12(ents_1, ents_2, cross_sparse_sim, kb1_sparse_sim, kb2_sparse_sim)) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=config) as sess: tf.global_variables_initializer().run() num_steps = triples_num // batch_size ppre_hits1, pre_hits1 = -1, -1 is_early = False for epoch in range(1, epochs + 1): train_loss = 0 start = time.time() if epoch % 2 == 0: for step in range(num_steps): batch_pos, batch_neg = generate_pos_neg_batch( triples_data1, triples_data2, step) feed_dict = { pos_hs: [x[0] for x in batch_pos], pos_rs: [x[1] for x in batch_pos], pos_ts: [x[2] for x in batch_pos], neg_hs: [x[0] for x in batch_neg], neg_rs: [x[1] for x in batch_neg], neg_ts: [x[2] for x in batch_neg], flag: True } (_, loss_val) = sess.run([optimizer, loss], feed_dict=feed_dict) train_loss += loss_val else: batch_pos, batch_neg = generate_pos_neg_batch( triples_data1, triples_data2, 1) feed_dict = { pos_hs: [x[0] for x in batch_pos], pos_rs: [x[1] for x in batch_pos], pos_ts: [x[2] for x in batch_pos], neg_hs: [x[0] for x in batch_neg], neg_rs: [x[1] for x in batch_neg], neg_ts: [x[2] for x in batch_neg], flag: False } (_, loss_val) = sess.run([optimizer, loss], feed_dict=feed_dict) train_loss += loss_val random.shuffle(triples_data1.train_triples) random.shuffle(triples_data2.train_triples) end = time.time() loss_print = "rel loss" if epoch % 2 == 0 else "sim loss" loss_print = "rel loss" if epoch % 2 == 0 else "sim loss" print("{}/{}, {} = {:.3f}, time = {:.3f} s".format( epoch, epochs, loss_print, train_loss, end - start)) if epoch % print_loss == 0: ppre_hits1, pre_hits1, is_early = jape_eva( ent_embeddings, ref_ent_s, ref_ent_t, epoch, res_folder, ppre_hits1, pre_hits1, is_early, small) if is_early: break
def structure_embedding(folder, radio): res_folder = generate_res_folder(folder, "mtransd", radio) folder = folder + "sharing/" + radio2str(radio) + "/" print("res folder:", res_folder) triples_data1, triples_data2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input( folder) small = ent_num < 50000 graph = tf.Graph() with graph.as_default(): pos_h = tf.placeholder(tf.int32, [None]) pos_t = tf.placeholder(tf.int32, [None]) pos_r = tf.placeholder(tf.int32, [None]) neg_h = tf.placeholder(tf.int32, [None]) neg_t = tf.placeholder(tf.int32, [None]) neg_r = tf.placeholder(tf.int32, [None]) with tf.variable_scope('relation2vec' + 'embedding'): ent_embeddings = tf.Variable( tf.truncated_normal([ent_num, embed_size], stddev=1.0 / math.sqrt(embed_size))) rel_embeddings = tf.Variable( tf.truncated_normal([rel_num, embed_size], stddev=1.0 / math.sqrt(embed_size))) ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1) rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1) margin = tf.constant(1.0) ent_transfer = tf.get_variable( name="ent_transfer", shape=[ent_num, embed_size], initializer=tf.contrib.layers.xavier_initializer( uniform=False)) rel_transfer = tf.get_variable( name="rel_transfer", shape=[rel_num, embed_size], initializer=tf.contrib.layers.xavier_initializer( uniform=False)) ref_ent_s = tf.constant(ref_ent1, dtype=tf.int32) ref_ent_t = tf.constant(ref_ent2, dtype=tf.int32) pos_h_e = tf.nn.embedding_lookup(ent_embeddings, pos_h) pos_t_e = tf.nn.embedding_lookup(ent_embeddings, pos_t) pos_r_e = tf.nn.embedding_lookup(rel_embeddings, pos_r) pos_h_t = tf.nn.embedding_lookup(ent_transfer, pos_h) pos_t_t = tf.nn.embedding_lookup(ent_transfer, pos_t) pos_r_t = tf.nn.embedding_lookup(rel_transfer, pos_r) neg_h_e = tf.nn.embedding_lookup(ent_embeddings, neg_h) neg_t_e = tf.nn.embedding_lookup(ent_embeddings, neg_t) neg_r_e = tf.nn.embedding_lookup(rel_embeddings, neg_r) neg_h_t = tf.nn.embedding_lookup(ent_transfer, neg_h) neg_t_t = tf.nn.embedding_lookup(ent_transfer, neg_t) neg_r_t = tf.nn.embedding_lookup(rel_transfer, neg_r) pos_h_e = calc(pos_h_e, pos_h_t, pos_r_t) pos_t_e = calc(pos_t_e, pos_t_t, pos_r_t) neg_h_e = calc(neg_h_e, neg_h_t, neg_r_t) neg_t_e = calc(neg_t_e, neg_t_t, neg_r_t) optimizer, loss = transe_loss(pos_h_e, pos_r_e, pos_t_e, neg_h_e, neg_r_e, neg_t_e, margin) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=config) as sess: tf.global_variables_initializer().run() num_steps = triples_num // batch_size ppre_hits1, pre_hits1 = -1, -1 is_early = False for epoch in range(1, epochs + 1): pos_loss = 0 start = time.time() for step in range(num_steps): batch_pos, batch_neg = generate_pos_neg_batch( triples_data1, triples_data2, step, multi=1) feed_dict = { pos_h: [x[0] for x in batch_pos], pos_r: [x[1] for x in batch_pos], pos_t: [x[2] for x in batch_pos], neg_h: [x[0] for x in batch_neg], neg_r: [x[1] for x in batch_neg], neg_t: [x[2] for x in batch_neg] } (_, loss_val) = sess.run([optimizer, loss], feed_dict=feed_dict) pos_loss += loss_val random.shuffle(triples_data1.train_triples) random.shuffle(triples_data2.train_triples) end = time.time() print("{}/{}, relation_loss = {:.3f}, time = {:.3f} s".format( epoch, epochs, pos_loss, end - start)) if epoch % print_loss == 0: ppre_hits1, pre_hits1, is_early = jape_eva( ent_embeddings, ref_ent_s, ref_ent_t, epoch, res_folder, ppre_hits1, pre_hits1, is_early, small) if is_early: break