os.makedirs(checkpoint_dir) if not os.path.exists(result_dir): os.makedirs(result_dir) con = Config() in_path = "./benchmarks/" + args.dataset + "/" con.set_in_path(in_path) test_file_path = "" if args.test_file != "": test_file_path = in_path + args.test_file con.set_test_file_path(test_file_path) con.set_work_threads(8) con.set_train_times(args.num_epochs) con.set_nbatches(args.nbatches) con.set_alpha(args.learning_rate) con.set_bern(1) con.set_dimension(args.hidden_size) con.set_lmbda(args.lmbda) con.set_lmbda_two(0.01) con.set_margin(1.0) con.set_ent_neg_rate(args.neg_num) con.set_opt_method(args.optim) con.set_save_steps(args.save_steps) con.set_valid_steps(args.valid_steps) con.set_early_stopping_patience(10) con.set_checkpoint_dir(checkpoint_dir) con.set_result_dir(result_dir) # knowledge graph completion ~ link prediction con.set_test_link(True) con.init()
def train_embeddings(opts): os.makedirs(opts.output_dir, exist_ok=True) if opts.model_name == "1hot": embedding_dim, user_embeddings = one_hot_embedding(opts.openke_dir) print("Successfully generated one-hot embeddings (dim: %s)" % embedding_dim) embedding_file = open( os.path.join(opts.output_dir, "embedding.vec.txt"), "w") embedding_file.write("{} {}\n".format(len(user_embeddings), embedding_dim)) for user_id in user_embeddings: embedding_file.write("{} {}\n".format( user_id, " ".join([str(v) for v in user_embeddings[user_id]]))) embedding_file.close() print("Saved one-hot embeddings to %s" % opts.output_dir) return if opts.model_name == "autoenc": new_dim = 64 user_embeddings = autoencoder_embedding(opts.openke_dir, new_dim, int(opts.nbatches), int(opts.epochs)) print("Successfully generated autoencoder embeddings (dim: %s)" % new_dim) embedding_file = open( os.path.join(opts.output_dir, "embedding.vec.txt"), "w") embedding_file.write("{} {}\n".format(len(user_embeddings), new_dim)) for user_id in user_embeddings: embedding_file.write("{} {}\n".format( user_id, " ".join([str(v) for v in user_embeddings[user_id]]))) embedding_file.close() print("Saved autoencoder embeddings to %s" % opts.output_dir) return if opts.model_name == "test": new_dim = 64 import numpy as np user_embedding_dim, user_embeddings_dict = one_hot_embedding( opts.openke_dir) user_ids, user_embeddings = zip(*list(user_embeddings_dict.items())) user_embeddings = np.array(user_embeddings) from pkg.test_model import TestModel dae = TestModel([user_embedding_dim, new_dim * 2, new_dim]) genders, ages = PKG.get_userinfo(openke_dir=opts.openke_dir, user_info_path=os.path.join( opts.pkg_dir, "user_id_imei_birth_gender.txt"), user_ids=user_ids) dae.train(x=user_embeddings, y=np.array(genders) - 1, epochs=int(opts.epochs), batch_size=int(opts.nbatches)) user_embeddings = list(dae.encode(user_embeddings)) user_embeddings = dict(zip(user_ids, user_embeddings)) print("Successfully generated test embeddings (dim: %s)" % new_dim) embedding_file = open( os.path.join(opts.output_dir, "embedding.vec.txt"), "w") embedding_file.write("{} {}\n".format(len(user_embeddings), new_dim)) for user_id in user_embeddings: embedding_file.write("{} {}\n".format( user_id, " ".join([str(v) for v in user_embeddings[user_id]]))) embedding_file.close() print("Saved test embeddings to %s" % opts.output_dir) return from config.Config import Config from models.TransE import TransE from models.TransD import TransD from models.TransH import TransH from models.TransR import TransR from models.RESCAL import RESCAL from models.DistMult import DistMult from models.ComplEx import ComplEx from models.HolE import HolE con = Config() con.set_in_path(opts.openke_dir) con.set_test_flag(True) con.set_work_threads(4) con.set_train_times(500) con.set_nbatches(int(opts.nbatches)) con.set_alpha(float(opts.alpha)) con.set_margin(1.0) con.set_bern(0) con.set_dimension(64) con.set_ent_neg_rate(1) con.set_rel_neg_rate(0) con.set_opt_method("Adam") # Models will be exported via tf.Saver() automatically. con.set_export_files(os.path.join(opts.output_dir, "model.vec.tf"), 0) # Model parameters will be exported to json files automatically. con.set_out_files(os.path.join(opts.output_dir, "embedding.vec.json")) # Initialize experimental settings. con.init() # Set the knowledge embedding model if opts.model_name == "TransE": con.set_model(TransE) elif opts.model_name == "TransD": con.set_model(TransD) elif opts.model_name == "TransR": con.set_model(TransR) elif opts.model_name == "TransH": con.set_model(TransH) elif opts.model_name == "ComplEx": con.set_model(ComplEx) elif opts.model_name == "DistMult": con.set_model(DistMult) elif opts.model_name == "HolE": con.set_model(HolE) elif opts.model_name == "RESCAL": con.set_model(RESCAL) else: print("Unknown model: " + opts.model_name) sys.exit(1) # Train the model. con.run() # To test models after training needs "set_test_flag(True)". con.test()