Exemplo n.º 1
0
def multi_ke() -> EmbeddingInfo:
    model = MultiKE()
    args = load_args("../OpenEA/run/args/multike_args_15K.json")
    args.word2vec_path = args.word2vec_path[3:]
    model.set_args(args)
    return EmbeddingInfo(model, "multike",
                         lambda m: m.ent_embeds.eval(session=m.session))
Exemplo n.º 2
0
def main_for_args(arg_path, dataset, division):
    t = time.time()
    args = load_args(arg_path)
    args.training_data = args.training_data + dataset + "/"
    args.dataset_division = division
    print(args.embedding_module)
    print(args)
    remove_unlinked = False
    if args.embedding_module == "RSN4EA":
        remove_unlinked = True
    kgs = read_kgs_from_folder(
        args.training_data,
        args.dataset_division,
        args.alignment_module,
        args.ordered,
        remove_unlinked=remove_unlinked,
    )
    import tensorflow as tf

    tf.keras.backend.clear_session()
    model = get_model(args.embedding_module)()
    model.set_args(args)
    model.set_kgs(kgs)
    model.init()
    model.run()
    model.test()
    model.save()
    print("Total run time = {:.3f} s.".format(time.time() - t))
Exemplo n.º 3
0
def split_abt_buy(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.products,
        f"{base_path}/abt-buy/Abt.csv",
        f"{base_path}/abt-buy/Buy.csv",
        f"{base_path}/abt-buy/abt_buy_perfectMapping.csv",
        model,
        rnd,
    )
    print(f"abt-buy: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}")
    split_dataset(dataset, f"{base_path}/abt-buy")
Exemplo n.º 4
0
def split_amazon_google(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.products,
        f"{base_path}/amazon-google/Amazon.csv",
        f"{base_path}/amazon-google/GoogleProducts.csv",
        f"{base_path}/amazon-google/Amzon_GoogleProducts_perfectMapping.csv",
        model,
        rnd,
    )
    print(
        f"amazon-google: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}"
    )
    split_dataset(dataset, f"{base_path}/amazon-google")
Exemplo n.º 5
0
def split_dblp_scholar(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.articles,
        f"{base_path}/dblp-scholar/DBLP1.csv",
        f"{base_path}/dblp-scholar/Scholar.csv",
        f"{base_path}/dblp-scholar/DBLP-Scholar_perfectMapping.csv",
        model,
        rnd,
    )
    print(
        f"dblp-scholar: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}"
    )
    split_dataset(dataset, f"{base_path}/dblp-scholar")
Exemplo n.º 6
0
def split_dblp_acm(rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = CsvDataset(
        CsvType.articles,
        f"{base_path}/dblp-acm/DBLP2.csv",
        f"{base_path}/dblp-acm/ACM.csv",
        f"{base_path}/dblp-acm/DBLP-ACM_perfectMapping.csv",
        model,
        rnd,
    )
    print(
        f"dblp2-acm: {len(dataset.kg1.entities_set)}, {len(dataset.kg2.entities_set)}"
    )
    split_dataset(dataset, f"{base_path}/dblp-acm")
Exemplo n.º 7
0
 def __init__(self, data_folder: str, division: str, args_path: str):
     args = load_args(args_path)
     self._data_folder = data_folder
     self.download_and_unzip()
     self._kgs = read_kgs_from_folder(data_folder, division,
                                      args.alignment_module, args.ordered)
     train_links = [(e[0], e[1], 1) for e in self._kgs.train_links]
     valid_links = [(e[0], e[1], 1) for e in self._kgs.valid_links]
     test_links = [(e[0], e[1], 1) for e in self._kgs.test_links]
     super().__init__(
         kg1=self._kgs.kg1,
         kg2=self._kgs.kg2,
         rnd=random.Random(),
         labelled_pairs=train_links,
         labelled_val_pairs=valid_links,
         labelled_test_pairs=test_links,
     )
     self._name = data_folder.split("/")[-2] + "/" + division[:-1]
Exemplo n.º 8
0
    def __init__(self,
                 data_folder: str,
                 division: str,
                 args_path: str,
                 random_seed=0):
        args = load_args(args_path)
        self._data_folder = data_folder
        self.download_and_unzip()
        self._kgs = read_kgs_from_folder(data_folder, division,
                                         args.alignment_module, args.ordered)

        train_links = [(e[0], e[1], 1) for e in self._kgs.train_links]
        valid_links = [(e[0], e[1], 1) for e in self._kgs.valid_links]
        test_links = [(e[0], e[1], 1) for e in self._kgs.test_links]
        super().__init__(
            kg1=self._kgs.kg1,
            kg2=self._kgs.kg2,
            rnd=random.Random(random_seed),
            labelled_pairs=train_links + valid_links + test_links,
            # throw them together because of possible inbalance
            # due to removal of inner links
        )
        self._name = data_folder.split("/")[-2] + "/" + division[:-1]
Exemplo n.º 9
0
    IMUSE = IMUSE
    SEA = SEA
    MultiKE = MultiKE
    RSN4EA = RSN4EA
    RDGCN = RDGCN
    BootEA_RotatE = BootEA_RotatE
    BootEA_TransH = BootEA_TransH


def get_model(model_name):
    return getattr(ModelFamily, model_name)


if __name__ == '__main__':
    t = time.time()
    args = load_args(sys.argv[1])
    args.training_data = args.training_data + sys.argv[2] + '/'
    args.dataset_division = sys.argv[
        3]  # This is the fold used among the k (5) available.
    if len(sys.argv) < 5:
        args.gpu = "0"
    else:
        if sys.argv[4] == "CPU":
            args.gpu = None
        else:
            args.gpu = sys.argv[4]
    print(args.embedding_module)
    print(args)
    remove_unlinked = False
    if args.embedding_module == "RSN4EA":
        remove_unlinked = True
Exemplo n.º 10
0
import argparse

from openea.modules.args.args_hander import load_args
from data_model import DataModel
from predicate_alignment import PredicateAlignModel
from MultiKE_CSL import MultiKE_CV

parser = argparse.ArgumentParser(description='run')
parser.add_argument('--training_data', type=str, default='')
parser_args = parser.parse_args()

if __name__ == '__main__':
    args = load_args('args.json')
    args.training_data = parser_args.training_data
    data = DataModel(args)
    attr_align_model = PredicateAlignModel(data.kgs, args)
    model = MultiKE_CV(data, args, attr_align_model)
    model.run()
Exemplo n.º 11
0
def rdgcn() -> EmbeddingInfo:
    model = RDGCN()
    model.set_args(load_args("../OpenEA/run/args/rdgcn_args_15K.json"))
    return EmbeddingInfo(model, "rdgcn", lambda m: m.sess.run(m.output))
Exemplo n.º 12
0
def boot_ea() -> EmbeddingInfo:
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    return EmbeddingInfo(model, "bootea",
                         lambda m: m.ent_embeds.eval(session=m.session))
Exemplo n.º 13
0
def split_scads(source1, source2, rnd: random.Random):
    model = BootEA()
    model.set_args(load_args("../OpenEA/run/args/bootea_args_15K.json"))
    dataset = ScadsDataset(f"{base_path}/ScadsMB/100/", source1, source2, model, rnd)
    split_dataset(dataset, f"{base_path}/ScadsMB/{source1}-{source2}")