Пример #1
0
if not os.path.exists(result_dir):
    os.makedirs(result_dir)

con = Config()
in_path = "./benchmarks/" + args.dataset + "/"
con.set_in_path(in_path)
test_file_path = ""
if args.test_file != "":
    test_file_path = in_path + args.test_file
con.set_test_file_path(test_file_path)
con.set_work_threads(8)
con.set_train_times(args.num_epochs)
con.set_nbatches(args.nbatches)
con.set_alpha(args.learning_rate)
con.set_bern(1)
con.set_dimension(args.hidden_size)
con.set_lmbda(args.lmbda)
con.set_lmbda_two(0.01)
con.set_margin(1.0)
con.set_ent_neg_rate(args.neg_num)
con.set_opt_method(args.optim)
con.set_save_steps(args.save_steps)
con.set_valid_steps(args.valid_steps)
con.set_early_stopping_patience(10)
con.set_checkpoint_dir(checkpoint_dir)
con.set_result_dir(result_dir)
# knowledge graph completion ~ link prediction
con.set_test_link(True)
con.init()

# training mode
Пример #2
0
from config.Config import Config
from models.TransE import TransE
import tensorflow as tf
import numpy as np
import json

# (1) Set import files and OpenKE will automatically load models via tf.Saver().
con = Config()
con.set_in_path("./benchmarks/FB15K/")
con.set_test_flag(True)
con.set_work_threads(4)
con.set_dimension(50)
con.set_import_files("./res/model.vec.tf")
con.init()
con.set_model(TransE)
con.test()

# (2) Read model parameters from json files and manually load parameters.
# con = config.Config()
# con.set_in_path("./benchmarks/FB15K/")
# con.set_test_flag(True)
# con.set_work_threads(4)
# con.set_dimension(50)
# con.init()
# con.set_model(models.TransE)
# f = open("./res/embedding.vec.json", "r")
# content = json.loads(f.read())
# f.close()
# con.set_parameters(content)
# con.test()
Пример #3
0
def train_embeddings(opts):
    os.makedirs(opts.output_dir, exist_ok=True)

    if opts.model_name == "1hot":
        embedding_dim, user_embeddings = one_hot_embedding(opts.openke_dir)
        print("Successfully generated one-hot embeddings (dim: %s)" %
              embedding_dim)
        embedding_file = open(
            os.path.join(opts.output_dir, "embedding.vec.txt"), "w")
        embedding_file.write("{} {}\n".format(len(user_embeddings),
                                              embedding_dim))
        for user_id in user_embeddings:
            embedding_file.write("{} {}\n".format(
                user_id, " ".join([str(v) for v in user_embeddings[user_id]])))
        embedding_file.close()
        print("Saved one-hot embeddings to %s" % opts.output_dir)
        return
    if opts.model_name == "autoenc":
        new_dim = 64
        user_embeddings = autoencoder_embedding(opts.openke_dir, new_dim,
                                                int(opts.nbatches),
                                                int(opts.epochs))
        print("Successfully generated autoencoder embeddings (dim: %s)" %
              new_dim)
        embedding_file = open(
            os.path.join(opts.output_dir, "embedding.vec.txt"), "w")
        embedding_file.write("{} {}\n".format(len(user_embeddings), new_dim))
        for user_id in user_embeddings:
            embedding_file.write("{} {}\n".format(
                user_id, " ".join([str(v) for v in user_embeddings[user_id]])))
        embedding_file.close()
        print("Saved autoencoder embeddings to %s" % opts.output_dir)
        return
    if opts.model_name == "test":
        new_dim = 64

        import numpy as np
        user_embedding_dim, user_embeddings_dict = one_hot_embedding(
            opts.openke_dir)
        user_ids, user_embeddings = zip(*list(user_embeddings_dict.items()))
        user_embeddings = np.array(user_embeddings)
        from pkg.test_model import TestModel
        dae = TestModel([user_embedding_dim, new_dim * 2, new_dim])
        genders, ages = PKG.get_userinfo(openke_dir=opts.openke_dir,
                                         user_info_path=os.path.join(
                                             opts.pkg_dir,
                                             "user_id_imei_birth_gender.txt"),
                                         user_ids=user_ids)
        dae.train(x=user_embeddings,
                  y=np.array(genders) - 1,
                  epochs=int(opts.epochs),
                  batch_size=int(opts.nbatches))
        user_embeddings = list(dae.encode(user_embeddings))
        user_embeddings = dict(zip(user_ids, user_embeddings))

        print("Successfully generated test embeddings (dim: %s)" % new_dim)
        embedding_file = open(
            os.path.join(opts.output_dir, "embedding.vec.txt"), "w")
        embedding_file.write("{} {}\n".format(len(user_embeddings), new_dim))
        for user_id in user_embeddings:
            embedding_file.write("{} {}\n".format(
                user_id, " ".join([str(v) for v in user_embeddings[user_id]])))
        embedding_file.close()
        print("Saved test embeddings to %s" % opts.output_dir)
        return

    from config.Config import Config
    from models.TransE import TransE
    from models.TransD import TransD
    from models.TransH import TransH
    from models.TransR import TransR
    from models.RESCAL import RESCAL
    from models.DistMult import DistMult
    from models.ComplEx import ComplEx
    from models.HolE import HolE

    con = Config()
    con.set_in_path(opts.openke_dir)

    con.set_test_flag(True)
    con.set_work_threads(4)
    con.set_train_times(500)
    con.set_nbatches(int(opts.nbatches))
    con.set_alpha(float(opts.alpha))
    con.set_margin(1.0)
    con.set_bern(0)
    con.set_dimension(64)
    con.set_ent_neg_rate(1)
    con.set_rel_neg_rate(0)
    con.set_opt_method("Adam")

    # Models will be exported via tf.Saver() automatically.
    con.set_export_files(os.path.join(opts.output_dir, "model.vec.tf"), 0)
    # Model parameters will be exported to json files automatically.
    con.set_out_files(os.path.join(opts.output_dir, "embedding.vec.json"))
    # Initialize experimental settings.
    con.init()

    # Set the knowledge embedding model
    if opts.model_name == "TransE":
        con.set_model(TransE)
    elif opts.model_name == "TransD":
        con.set_model(TransD)
    elif opts.model_name == "TransR":
        con.set_model(TransR)
    elif opts.model_name == "TransH":
        con.set_model(TransH)
    elif opts.model_name == "ComplEx":
        con.set_model(ComplEx)
    elif opts.model_name == "DistMult":
        con.set_model(DistMult)
    elif opts.model_name == "HolE":
        con.set_model(HolE)
    elif opts.model_name == "RESCAL":
        con.set_model(RESCAL)
    else:
        print("Unknown model: " + opts.model_name)
        sys.exit(1)

    # Train the model.
    con.run()
    # To test models after training needs "set_test_flag(True)".
    con.test()