Пример #1
0
def train(folder, radio):
    res_folder = generate_res_folder(folder, "iptranse", radio)
    folder = folder + "sharing/" + radio2str(radio) + "/"
    print(folder)
    print("res folder:", res_folder)

    ents, rels, triples, ref_ent1, ref_ent2 = read_input(folder)
    paths = generate_2steps_path(triples)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    early = False
    model = IPTransE_Model(sess, len(ents), len(rels), ref_ent1, ref_ent2)
    for epoch in range(1, iptranse_epoch + 1):
        train_rel(model, triples, ents, rels, paths, epoch)
        if epoch % print_loss == 0 or epoch == iptranse_epoch - 1:
            early = model.eva(res_folder, epoch)
        if epoch in [600, 650, 700, 750, 800]:
            sim_mat = model.ref_sim_mat()
            dynamic_alignment(model, sim_mat, ref_ent1, ref_ent2, triples,
                              ents)
            early = model.eva(res_folder, epoch, iter=True)
        if early:
            sys.exit(0)
Пример #2
0
def train(folder, radio):
    print("data:", folder)
    res_folder = generate_res_folder(folder, "mtransh_sim", radio)
    folder = folder + "sharing/" + radio2str(radio) + "/"
    print("res folder:", res_folder)
    triples1, triples2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input(
        folder)
    sim_mat = np.load(folder + "label_sim.npy")
    label_sim = np.load(folder + "label_sim.npy")
    sim_mat[sim_mat < 0.85] = 0
    print("label2vec:")
    eval_alignment_mul(label_sim)
    print("filtered label2vec:")
    eval_alignment_mul(sim_mat)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    model = Model(sess, ent_num, rel_num, ref_ent1, ref_ent2, sim_mat,
                  label_sim)
    epochs = 500
    for epo in range(1, epochs + 1):
        loss, t = train_tris_1epo(model, triples1, triples2)
        print("epoch {}: triple_loss = {:.3f}, time = {:.3f} s".format(
            epo, loss, t))
        if epo % 5 == 0:
            train_sim_1epo(model)
        if epo % 10 == 0:
            model.eva(res_folder, epo)
            if model.is_early:
                break
Пример #3
0
def structure_embedding(folder, radio):
    res_folder = generate_res_folder(folder, "jape", radio)
    folder = folder + "sharing/" + radio2str(radio) + "/"
    print("res folder:", res_folder)
    triples_data1, triples_data2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input(
        folder)
    small = ent_num < 50000
    graph = tf.Graph()
    with graph.as_default():
        pos_hs = tf.placeholder(tf.int32, shape=[None])
        pos_rs = tf.placeholder(tf.int32, shape=[None])
        pos_ts = tf.placeholder(tf.int32, shape=[None])

        with tf.variable_scope('relation2vec' + 'embedding'):
            ent_embeddings = tf.Variable(
                tf.truncated_normal([ent_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size)))
            rel_embeddings = tf.Variable(
                tf.truncated_normal([rel_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size)))
            # ent_embeddings = tf.Variable(random_unit_embeddings(ent_num, embed_size))
            # rel_embeddings = tf.Variable(random_unit_embeddings(rel_num, embed_size))
            ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1)
            rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1)
            ref_ent_s = tf.constant(ref_ent1, dtype=tf.int32)
            ref_ent_t = tf.constant(ref_ent2, dtype=tf.int32)

        phs = tf.nn.embedding_lookup(ent_embeddings, pos_hs)
        prs = tf.nn.embedding_lookup(rel_embeddings, pos_rs)
        pts = tf.nn.embedding_lookup(ent_embeddings, pos_ts)
        optimizer, loss = only_pos_loss(phs, prs, pts)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(graph=graph, config=config) as sess:
            tf.global_variables_initializer().run()
            num_steps = triples_num // batch_size

            ppre_hits1, pre_hits1 = -1, -1
            is_early = False

            for epoch in range(1, epochs + 1):
                pos_loss = 0
                start = time.time()
                for step in range(num_steps):
                    batch_pos = generate_pos_batch(triples_data1,
                                                   triples_data2, step)
                    feed_dict = {
                        pos_hs: [x[0] for x in batch_pos],
                        pos_rs: [x[1] for x in batch_pos],
                        pos_ts: [x[2] for x in batch_pos]
                    }
                    (_, loss_val) = sess.run([optimizer, loss],
                                             feed_dict=feed_dict)
                    pos_loss += loss_val
                random.shuffle(triples_data1.train_triples)
                random.shuffle(triples_data2.train_triples)
                end = time.time()
                print("{}/{}, relation_loss = {:.3f}, time = {:.3f} s".format(
                    epoch, epochs, pos_loss, end - start))
                if epoch % print_loss == 0:
                    ppre_hits1, pre_hits1, is_early = jape_eva(
                        ent_embeddings, ref_ent_s, ref_ent_t, epoch,
                        res_folder, ppre_hits1, pre_hits1, is_early, small)
Пример #4
0
def mtranse(folder1, radio):
    res_folder = generate_res_folder(folder1, "mtranse", radio)
    folder1 = folder1 + "mapping/" + radio2str(radio) + "/"
    print("res folder:", res_folder)

    triples1, triples2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = \
        generate_input(folder1)
    mat_m = random_orthogonal_matrix(embed_size)
    graph = tf.Graph()

    small = ent_num < 50000

    with graph.as_default():
        pos_hs = tf.placeholder(tf.int32, shape=[None])
        pos_rs = tf.placeholder(tf.int32, shape=[None])
        pos_ts = tf.placeholder(tf.int32, shape=[None])
        sup_hs = tf.placeholder(tf.int32, shape=[None])
        sup_ts = tf.placeholder(tf.int32, shape=[None])
        train = tf.placeholder(tf.bool)

        with tf.variable_scope('relation2vec' + 'embedding'):
            ent_embeddings = tf.Variable(
                tf.truncated_normal([ent_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size),
                                    dtype=tf.float64))
            rel_embeddings = tf.Variable(
                tf.truncated_normal([rel_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size),
                                    dtype=tf.float64))
            ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1)
            rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1)
        with tf.variable_scope('translation' + 'embedding'):
            m = tf.Variable(mat_m)
            eye = tf.constant(np.eye(embed_size))

        phs = tf.nn.embedding_lookup(ent_embeddings, pos_hs)
        prs = tf.nn.embedding_lookup(rel_embeddings, pos_rs)
        pts = tf.nn.embedding_lookup(ent_embeddings, pos_ts)
        e1s = tf.nn.embedding_lookup(ent_embeddings, sup_hs)
        e2s = tf.nn.embedding_lookup(ent_embeddings, sup_ts)
        optimizer, loss = tf.cond(train, lambda: relation_loss(phs, prs, pts),
                                  lambda: m_loss(e1s, e2s, m, eye))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(graph=graph, config=config) as sess:
            tf.global_variables_initializer().run()
            num_steps = triples_num // batch_size
            sup_batch_size = len(sup_ents_pairs) // num_steps
            assert sup_batch_size > 1
            # print("num of steps:", num_steps)
            # print("sup batch size", sup_batch_size)

            ppre_hits1, pre_hits1 = -1, -1
            is_early = False

            for e in range(1, epochs + 1):
                triple_loss = 0
                mapping_loss = 0
                start = time.time()
                for b in range(num_steps * 2):
                    loss_type = True if b % 2 == 0 else False
                    batch_pos, sup_batch = generate_triple_batch(
                        batch_size, triples1, triples2, sup_batch_size,
                        sup_ents_pairs)
                    feed_dict = {
                        pos_hs: [x[0] for x in batch_pos],
                        pos_rs: [x[1] for x in batch_pos],
                        pos_ts: [x[2] for x in batch_pos],
                        sup_hs: [x[0] for x in sup_batch],
                        sup_ts: [x[1] for x in sup_batch],
                        train: loss_type
                    }
                    (_, loss_val) = sess.run([optimizer, loss],
                                             feed_dict=feed_dict)
                    if loss_type:
                        triple_loss += loss_val
                    else:
                        mapping_loss += loss_val
                end = time.time()
                print(
                    "{}/{}, rel_loss = {:.3f}, trans_loss = {:.3f}, time = {:.3f} s"
                    .format(e, epochs, triple_loss, mapping_loss, end - start))
                if e % print_loss == 0:
                    embed1 = tf.nn.embedding_lookup(ent_embeddings,
                                                    ref_ent1).eval()
                    embed2 = tf.nn.embedding_lookup(ent_embeddings,
                                                    ref_ent2).eval()
                    embed12 = np.matmul(embed1, m.eval())
                    prec_set1, hits1 = eval_alignment_multi_embed(
                        embed12, embed2)
                    # prec_set1, hits1 = eval_alignment_mul(np.matmul(embed12, embed2.T), d=True)
                    embed21 = np.matmul(embed2, tf.matrix_inverse(m).eval())
                    prec_set2, hits12 = eval_alignment_multi_embed(
                        embed21, embed1)
                    # prec_set2, hits12 = eval_alignment_mul(np.matmul(embed21, embed1.T), d=True)
                    gc.collect()

                    if not is_early:
                        ppre_hits1, pre_hits1, is_early = early_stop(
                            ppre_hits1, pre_hits1, hits1, small=small)
                        if is_early:
                            out_path = radio_2file(e, res_folder)
                            pair2file(out_path + "res1", prec_set1)
                            pair2file(out_path + "res2", prec_set2)
                            np.save(out_path + "ents_vec",
                                    ent_embeddings.eval())
                            sys.exit(0)

                    if e % save_hits1 == 0:
                        out_path = radio_2file(e, res_folder)
                        pair2file(out_path + "res1", prec_set1)
                        pair2file(out_path + "res2", prec_set2)
def structure_embedding(folder, radio):
    res_folder = generate_res_folder(folder, "jape", radio)
    folder = folder + "sharing/" + radio2str(radio) + "/"
    print("res folder:", res_folder)
    triples_data1, triples_data2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input(
        folder)
    small = ent_num < 50000
    cross_sim_mat, kb1_sim_mat, kb2_sim_mat = get_all_sim_mat_sparse(folder)
    ids_list1, ids_list2 = get_ids_by_order(folder)
    graph = tf.Graph()
    with graph.as_default():
        pos_hs = tf.placeholder(tf.int32, shape=[None])
        pos_rs = tf.placeholder(tf.int32, shape=[None])
        pos_ts = tf.placeholder(tf.int32, shape=[None])
        neg_hs = tf.placeholder(tf.int32, shape=[None])
        neg_rs = tf.placeholder(tf.int32, shape=[None])
        neg_ts = tf.placeholder(tf.int32, shape=[None])
        flag = tf.placeholder(tf.bool)

        with tf.variable_scope('relation2vec' + 'embedding'):
            ent_embeddings = tf.Variable(
                tf.truncated_normal([ent_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size)))
            rel_embeddings = tf.Variable(
                tf.truncated_normal([rel_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size)))
            ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1)
            rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1)
            ref_ent_s = tf.constant(ref_ent1, dtype=tf.int32)
            ref_ent_t = tf.constant(ref_ent2, dtype=tf.int32)

        with tf.variable_scope('sparse' + 'sim'):
            cross_sparse_sim = sparse_mat_2sparse_tensor(cross_sim_mat)
            kb1_sparse_sim = sparse_mat_2sparse_tensor(kb1_sim_mat)
            kb2_sparse_sim = sparse_mat_2sparse_tensor(kb2_sim_mat)

        ents_1 = tf.nn.embedding_lookup(ent_embeddings, ids_list1)
        ents_2 = tf.nn.embedding_lookup(ent_embeddings, ids_list2)
        phs = tf.nn.embedding_lookup(ent_embeddings, pos_hs)
        prs = tf.nn.embedding_lookup(rel_embeddings, pos_rs)
        pts = tf.nn.embedding_lookup(ent_embeddings, pos_ts)
        nhs = tf.nn.embedding_lookup(ent_embeddings, neg_hs)
        nrs = tf.nn.embedding_lookup(rel_embeddings, neg_rs)
        nts = tf.nn.embedding_lookup(ent_embeddings, neg_ts)

        optimizer, loss = tf.cond(
            flag, lambda: loss_with_neg(phs, prs, pts, nhs, nrs, nts),
            lambda: sim_loss_sparse_with_kb12(ents_1, ents_2, cross_sparse_sim,
                                              kb1_sparse_sim, kb2_sparse_sim))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(graph=graph, config=config) as sess:
            tf.global_variables_initializer().run()
            num_steps = triples_num // batch_size

            ppre_hits1, pre_hits1 = -1, -1
            is_early = False

            for epoch in range(1, epochs + 1):
                train_loss = 0
                start = time.time()
                if epoch % 2 == 0:
                    for step in range(num_steps):
                        batch_pos, batch_neg = generate_pos_neg_batch(
                            triples_data1, triples_data2, step)
                        feed_dict = {
                            pos_hs: [x[0] for x in batch_pos],
                            pos_rs: [x[1] for x in batch_pos],
                            pos_ts: [x[2] for x in batch_pos],
                            neg_hs: [x[0] for x in batch_neg],
                            neg_rs: [x[1] for x in batch_neg],
                            neg_ts: [x[2] for x in batch_neg],
                            flag: True
                        }
                        (_, loss_val) = sess.run([optimizer, loss],
                                                 feed_dict=feed_dict)
                        train_loss += loss_val
                else:
                    batch_pos, batch_neg = generate_pos_neg_batch(
                        triples_data1, triples_data2, 1)
                    feed_dict = {
                        pos_hs: [x[0] for x in batch_pos],
                        pos_rs: [x[1] for x in batch_pos],
                        pos_ts: [x[2] for x in batch_pos],
                        neg_hs: [x[0] for x in batch_neg],
                        neg_rs: [x[1] for x in batch_neg],
                        neg_ts: [x[2] for x in batch_neg],
                        flag: False
                    }
                    (_, loss_val) = sess.run([optimizer, loss],
                                             feed_dict=feed_dict)
                    train_loss += loss_val
                random.shuffle(triples_data1.train_triples)
                random.shuffle(triples_data2.train_triples)
                end = time.time()
                loss_print = "rel loss" if epoch % 2 == 0 else "sim loss"
                loss_print = "rel loss" if epoch % 2 == 0 else "sim loss"
                print("{}/{}, {} = {:.3f}, time = {:.3f} s".format(
                    epoch, epochs, loss_print, train_loss, end - start))
                if epoch % print_loss == 0:
                    ppre_hits1, pre_hits1, is_early = jape_eva(
                        ent_embeddings, ref_ent_s, ref_ent_t, epoch,
                        res_folder, ppre_hits1, pre_hits1, is_early, small)
                    if is_early:
                        break
Пример #6
0
def structure_embedding(folder, radio):
    res_folder = generate_res_folder(folder, "mtransd", radio)
    folder = folder + "sharing/" + radio2str(radio) + "/"
    print("res folder:", res_folder)
    triples_data1, triples_data2, sup_ents_pairs, ref_ent1, ref_ent2, triples_num, ent_num, rel_num = generate_input(
        folder)
    small = ent_num < 50000
    graph = tf.Graph()
    with graph.as_default():
        pos_h = tf.placeholder(tf.int32, [None])
        pos_t = tf.placeholder(tf.int32, [None])
        pos_r = tf.placeholder(tf.int32, [None])

        neg_h = tf.placeholder(tf.int32, [None])
        neg_t = tf.placeholder(tf.int32, [None])
        neg_r = tf.placeholder(tf.int32, [None])

        with tf.variable_scope('relation2vec' + 'embedding'):
            ent_embeddings = tf.Variable(
                tf.truncated_normal([ent_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size)))
            rel_embeddings = tf.Variable(
                tf.truncated_normal([rel_num, embed_size],
                                    stddev=1.0 / math.sqrt(embed_size)))
            ent_embeddings = tf.nn.l2_normalize(ent_embeddings, 1)
            rel_embeddings = tf.nn.l2_normalize(rel_embeddings, 1)

            margin = tf.constant(1.0)

            ent_transfer = tf.get_variable(
                name="ent_transfer",
                shape=[ent_num, embed_size],
                initializer=tf.contrib.layers.xavier_initializer(
                    uniform=False))
            rel_transfer = tf.get_variable(
                name="rel_transfer",
                shape=[rel_num, embed_size],
                initializer=tf.contrib.layers.xavier_initializer(
                    uniform=False))

            ref_ent_s = tf.constant(ref_ent1, dtype=tf.int32)
            ref_ent_t = tf.constant(ref_ent2, dtype=tf.int32)

        pos_h_e = tf.nn.embedding_lookup(ent_embeddings, pos_h)
        pos_t_e = tf.nn.embedding_lookup(ent_embeddings, pos_t)
        pos_r_e = tf.nn.embedding_lookup(rel_embeddings, pos_r)
        pos_h_t = tf.nn.embedding_lookup(ent_transfer, pos_h)
        pos_t_t = tf.nn.embedding_lookup(ent_transfer, pos_t)
        pos_r_t = tf.nn.embedding_lookup(rel_transfer, pos_r)

        neg_h_e = tf.nn.embedding_lookup(ent_embeddings, neg_h)
        neg_t_e = tf.nn.embedding_lookup(ent_embeddings, neg_t)
        neg_r_e = tf.nn.embedding_lookup(rel_embeddings, neg_r)
        neg_h_t = tf.nn.embedding_lookup(ent_transfer, neg_h)
        neg_t_t = tf.nn.embedding_lookup(ent_transfer, neg_t)
        neg_r_t = tf.nn.embedding_lookup(rel_transfer, neg_r)

        pos_h_e = calc(pos_h_e, pos_h_t, pos_r_t)
        pos_t_e = calc(pos_t_e, pos_t_t, pos_r_t)
        neg_h_e = calc(neg_h_e, neg_h_t, neg_r_t)
        neg_t_e = calc(neg_t_e, neg_t_t, neg_r_t)

        optimizer, loss = transe_loss(pos_h_e, pos_r_e, pos_t_e, neg_h_e,
                                      neg_r_e, neg_t_e, margin)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(graph=graph, config=config) as sess:
            tf.global_variables_initializer().run()
            num_steps = triples_num // batch_size

            ppre_hits1, pre_hits1 = -1, -1
            is_early = False

            for epoch in range(1, epochs + 1):
                pos_loss = 0
                start = time.time()
                for step in range(num_steps):
                    batch_pos, batch_neg = generate_pos_neg_batch(
                        triples_data1, triples_data2, step, multi=1)
                    feed_dict = {
                        pos_h: [x[0] for x in batch_pos],
                        pos_r: [x[1] for x in batch_pos],
                        pos_t: [x[2] for x in batch_pos],
                        neg_h: [x[0] for x in batch_neg],
                        neg_r: [x[1] for x in batch_neg],
                        neg_t: [x[2] for x in batch_neg]
                    }
                    (_, loss_val) = sess.run([optimizer, loss],
                                             feed_dict=feed_dict)
                    pos_loss += loss_val
                random.shuffle(triples_data1.train_triples)
                random.shuffle(triples_data2.train_triples)
                end = time.time()
                print("{}/{}, relation_loss = {:.3f}, time = {:.3f} s".format(
                    epoch, epochs, pos_loss, end - start))
                if epoch % print_loss == 0:
                    ppre_hits1, pre_hits1, is_early = jape_eva(
                        ent_embeddings, ref_ent_s, ref_ent_t, epoch,
                        res_folder, ppre_hits1, pre_hits1, is_early, small)
                    if is_early:
                        break