def multi_ke() -> EmbeddingInfo: model = MultiKE() args = load_args("../OpenEA/run/args/multike_args_15K.json") args.word2vec_path = args.word2vec_path[3:] model.set_args(args) return EmbeddingInfo(model, "multike", lambda m: m.ent_embeds.eval(session=m.session))
def main_for_args(arg_path, dataset, division): t = time.time() args = load_args(arg_path) args.training_data = args.training_data + dataset + "/" args.dataset_division = division print(args.embedding_module) print(args) remove_unlinked = False if args.embedding_module == "RSN4EA": remove_unlinked = True kgs = read_kgs_from_folder( args.training_data, args.dataset_division, args.alignment_module, args.ordered, remove_unlinked=remove_unlinked, ) import tensorflow as tf tf.keras.backend.clear_session() model = get_model(args.embedding_module)() model.set_args(args) model.set_kgs(kgs) model.init() model.run() model.test() model.save() print("Total run time = {:.3f} s.".format(time.time() - t))
def split_abt_buy(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.products, f"{base_path}/abt-buy/Abt.csv", f"{base_path}/abt-buy/Buy.csv", f"{base_path}/abt-buy/abt_buy_perfectMapping.csv", model, rnd, ) print(f"abt-buy: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}") split_dataset(dataset, f"{base_path}/abt-buy")
def split_amazon_google(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.products, f"{base_path}/amazon-google/Amazon.csv", f"{base_path}/amazon-google/GoogleProducts.csv", f"{base_path}/amazon-google/Amzon_GoogleProducts_perfectMapping.csv", model, rnd, ) print( f"amazon-google: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}" ) split_dataset(dataset, f"{base_path}/amazon-google")
def split_dblp_scholar(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.articles, f"{base_path}/dblp-scholar/DBLP1.csv", f"{base_path}/dblp-scholar/Scholar.csv", f"{base_path}/dblp-scholar/DBLP-Scholar_perfectMapping.csv", model, rnd, ) print( f"dblp-scholar: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}" ) split_dataset(dataset, f"{base_path}/dblp-scholar")
def split_dblp_acm(rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = CsvDataset( CsvType.articles, f"{base_path}/dblp-acm/DBLP2.csv", f"{base_path}/dblp-acm/ACM.csv", f"{base_path}/dblp-acm/DBLP-ACM_perfectMapping.csv", model, rnd, ) print( f"dblp2-acm: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}" ) split_dataset(dataset, f"{base_path}/dblp-acm")
def __init__(self, data_folder: str, division: str, args_path: str): args = load_args(args_path) self._data_folder = data_folder self.download_and_unzip() self._kgs = read_kgs_from_folder(data_folder, division, args.alignment_module, args.ordered) train_links = [(e[0], e[1], 1) for e in self._kgs.train_links] valid_links = [(e[0], e[1], 1) for e in self._kgs.valid_links] test_links = [(e[0], e[1], 1) for e in self._kgs.test_links] super().__init__( kg1=self._kgs.kg1, kg2=self._kgs.kg2, rnd=random.Random(), labelled_pairs=train_links, labelled_val_pairs=valid_links, labelled_test_pairs=test_links, ) self._name = data_folder.split("/")[-2] + "/" + division[:-1]
def __init__(self, data_folder: str, division: str, args_path: str, random_seed=0): args = load_args(args_path) self._data_folder = data_folder self.download_and_unzip() self._kgs = read_kgs_from_folder(data_folder, division, args.alignment_module, args.ordered) train_links = [(e[0], e[1], 1) for e in self._kgs.train_links] valid_links = [(e[0], e[1], 1) for e in self._kgs.valid_links] test_links = [(e[0], e[1], 1) for e in self._kgs.test_links] super().__init__( kg1=self._kgs.kg1, kg2=self._kgs.kg2, rnd=random.Random(random_seed), labelled_pairs=train_links + valid_links + test_links, # throw them together because of possible inbalance # due to removal of inner links ) self._name = data_folder.split("/")[-2] + "/" + division[:-1]
IMUSE = IMUSE SEA = SEA MultiKE = MultiKE RSN4EA = RSN4EA RDGCN = RDGCN BootEA_RotatE = BootEA_RotatE BootEA_TransH = BootEA_TransH def get_model(model_name): return getattr(ModelFamily, model_name) if __name__ == '__main__': t = time.time() args = load_args(sys.argv[1]) args.training_data = args.training_data + sys.argv[2] + '/' args.dataset_division = sys.argv[ 3] # This is the fold used among the k (5) available. if len(sys.argv) < 5: args.gpu = "0" else: if sys.argv[4] == "CPU": args.gpu = None else: args.gpu = sys.argv[4] print(args.embedding_module) print(args) remove_unlinked = False if args.embedding_module == "RSN4EA": remove_unlinked = True
import argparse from openea.modules.args.args_hander import load_args from data_model import DataModel from predicate_alignment import PredicateAlignModel from MultiKE_CSL import MultiKE_CV parser = argparse.ArgumentParser(description='run') parser.add_argument('--training_data', type=str, default='') parser_args = parser.parse_args() if __name__ == '__main__': args = load_args('args.json') args.training_data = parser_args.training_data data = DataModel(args) attr_align_model = PredicateAlignModel(data.kgs, args) model = MultiKE_CV(data, args, attr_align_model) model.run()
def rdgcn() -> EmbeddingInfo: model = RDGCN() model.set_args(load_args("../OpenEA/run/args/rdgcn_args_15K.json")) return EmbeddingInfo(model, "rdgcn", lambda m: m.sess.run(m.output))
def boot_ea() -> EmbeddingInfo: model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) return EmbeddingInfo(model, "bootea", lambda m: m.ent_embeds.eval(session=m.session))
def split_scads(source1, source2, rnd: random.Random): model = BootEA() model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json")) dataset = ScadsDataset(f"{base_path}/ScadsMB/100/", source1, source2, model, rnd) split_dataset(dataset, f"{base_path}/ScadsMB/{source1}-{source2}")