示例#1
0
def run_kge(params, struct=[0, 1, 2, 3]):
    tester_val = lambda: model.test_link(valid_data, n_ent, heads, tails, args.
                                         filter)
    tester_tst = lambda: model.test_link(test_data, n_ent, heads, tails, args.
                                         filter)
    torch.cuda.set_device(select_gpu())

    args.lr = params["lr"]
    args.lamb = 10**params["lamb"]
    args.decay_rate = params["decay_rate"]
    args.n_batch = params["n_batch"]
    args.n_dim = params["n_dim"]
    plot_config(args)

    model = BaseModel(n_ent, n_rel, args, struct)
    best_mrr, best_str = model.train(train_data, corrupter, tester_val,
                                     tester_tst)
    with open(args.perf_file, 'a') as f:
        print('structure:', struct)
        print('write best mrr', best_str)
        for s in struct:
            f.write(str(s) + ' ')
        #struc_str = ' '.join(struct) + '\t\t'
        f.write('\t\tbest_performance: ' + best_str + '\n')

    return best_mrr
示例#2
0
def train_ensemble():
    nb_models = 5  # train 5 ensemble models
    models = []
    model_paths = []

    for run in range(nb_models):
        print("====== Ensemble model: %d ======" % run)
        m = BaseModel('data/', dense_nodes=4096)
        model_prefix = "da_r%d_" % run
        m.model_name = model_prefix + m.model_name

        print("====== training model ======")
        m.train(nb_epoch=20, use_da=True)

        # append model
        models = models + [m]
        model_paths = model_paths + [m.model_path]

    return models, model_paths
示例#3
0
def run_model(i, state):
    print('newID:', i, state, len(state))
    args.perf_file = os.path.join(directory, dataset + '_perf.txt')
    torch.cuda.empty_cache()
    # sleep to avoid multiple gpu occupy
    time.sleep(10*(i%args.parrel)+1)
    torch.cuda.set_device(select_gpu())

    model = BaseModel(n_ent, n_rel, args, state)
    tester_val = lambda: model.test_link(valid_data, valid_head_filter, valid_tail_filter)
    tester_tst = lambda: model.test_link(test_data, test_head_filter, test_tail_filter)
    best_mrr, best_str = model.train(train_data, tester_val, tester_tst)
    with open(args.perf_file, 'a') as f:
        print('ID:', i, 'structure:%s'%(str(state)), '\tvalid mrr', best_mrr)
        for s in state:
            f.write(str(s) + ' ')
        f.write('\t\tbest_performance: '+best_str)
    torch.cuda.empty_cache()
    return best_mrr
示例#4
0
        def run_kge(params):
            args.lr = params['lr']
            args.lamb = 10**params['lamb']
            args.decay_rate = params['decay_rate']
            args.n_batch = params['n_batch']
            args.n_dim = params['n_dim']
            plot_config(args)

            model = BaseModel(n_ent, n_rel, args, struct)
            tester_val = lambda: model.test_link(valid_data, valid_head_filter,
                                                 valid_tail_filter)
            tester_tst = lambda: model.test_link(test_data, test_head_filter,
                                                 test_tail_filter)
            best_mrr, best_str = model.train(train_data, tester_val,
                                             tester_tst)
            with open(args.perf_file, 'a') as f:
                print('structure:', struct, best_str)
                for s in struct:
                    f.write(str(s) + ' ')
                f.write(best_str + '\n')
            return {'loss': -best_mrr, 'status': STATUS_OK}
示例#5
0
def main(params, args, arch):
    #def main(args, i):

    # set number of threads in pytorch
    torch.set_num_threads(6)

    # select which gpu to use
    logger_init(args)

    # set gpu
    if args.GPU:
        torch.cuda.set_device(args.gpu)

    # the default settings for correspdonding dataset
    args = default_search_hyper(args)

    #    hyperOpt = {"lr":[0.00635456700742798, 0.0049700352658686425, 0.0023726642982752643],
    #                "lamb":[3.162503061522238e-05, 1.9567149674424395e-05, 1.0729611255307008e-05],
    #                "d":[512, 512, 512],
    #                "dr":[0.9933500551931267, 0.9903909316509071, 0.9933910046627364],
    #                "batch_size":[256, 256, 256]}
    #
    #    args.lr = hyperOpt["lr"][i]
    #    args.lamb = hyperOpt["lamb"][i]
    #    args.n_dim = hyperOpt["d"][i]
    #    args.decay_rate = hyperOpt["dr"][i]
    #    args.n_batch = hyperOpt["batch_size"][i]

    # load data
    # read nary data
    if args.n_arity > 2:
        d = nary_dataloader(args.task_dir)

        entity_idxs = {d.entities[i]: i for i in range(len(d.entities))}
        relation_idxs = {d.relations[i]: i for i in range(len(d.relations))}
        n_ent, n_rel = len(entity_idxs), len(relation_idxs)
        print("Number of train:{}, valid:{}, test:{}.".format(
            len(d.train_data), len(d.valid_data), len(d.test_data)))

        train_data = torch.LongTensor(
            get_data_idxs(d.train_data, entity_idxs, relation_idxs))
        valid_data = torch.LongTensor(
            get_data_idxs(d.valid_data, entity_idxs, relation_idxs))
        test_data = torch.LongTensor(
            get_data_idxs(d.test_data, entity_idxs, relation_idxs))

        e1_sp, e2_sp, e3_sp = n_ary_heads(train_data, valid_data, test_data)


#        train_data = torch.LongTensor(get_data_idxs(d.train_data, entity_idxs, relation_idxs))[0:512]
#        valid_data = torch.LongTensor(get_data_idxs(d.valid_data, entity_idxs, relation_idxs))[0:512]
#        test_data = torch.LongTensor(get_data_idxs(d.test_data, entity_idxs, relation_idxs))[0:512]

    else:
        loader = DataLoader(args.task_dir)
        n_ent, n_rel = loader.graph_size()
        train_data = loader.load_data('train')
        valid_data = loader.load_data('valid')
        test_data = loader.load_data('test')
        print("Number of train:{}, valid:{}, test:{}.".format(
            len(train_data[0]), len(valid_data[0]), len(test_data[0])))

        heads, tails = loader.heads_tails()

        train_data = torch.LongTensor(train_data).transpose(0, 1)  #[0:512]
        valid_data = torch.LongTensor(valid_data).transpose(0, 1)  #[0:512]
        test_data = torch.LongTensor(test_data).transpose(0, 1)  #[0:512]

    file_path = "search_nary" + "_" + str(args.num_blocks)
    directory = os.path.join("results", args.dataset, file_path)
    args.out_dir = directory
    if not os.path.exists(directory):
        os.makedirs(directory)
    os.environ["OMP_NUM_THREADS"] = "4"
    os.environ["MKL_NUM_THREADS"] = "4"
    args.perf_file = os.path.join(
        directory, args.dataset + '_search_nCP_nary_' + str(args.num_blocks) +
        "_" + str(args.trial) + '.txt')

    print('output file name:', args.perf_file)

    args.lr = params["lr"]
    args.decay_rate = params["decay_rate"]
    args.n_batch = params["n_batch"]
    #args.n_dim = params["n_dim"]
    args.input_dropout = params["input_dropout"]
    args.hidden_dropout = params["hidden_dropout"]
    #args.lamb = params["lamb"]

    plot_config(args)

    def tester_val(facts=None, arch=None):
        if args.n_arity == 2:
            return model.test_link(test_data=valid_data,
                                   n_ent=n_ent,
                                   heads=heads,
                                   tails=tails,
                                   filt=args.filter,
                                   arch=arch)

        elif args.n_arity > 2:
            return model.evaluate(valid_data, e1_sp, e2_sp, e3_sp, arch)

    def tester_tst():
        if args.n_arity == 2:
            return model.test_link(test_data=test_data,
                                   n_ent=n_ent,
                                   heads=heads,
                                   tails=tails,
                                   filt=args.filter)
        elif args.n_arity > 2:
            return model.evaluate(test_data, e1_sp, e2_sp, e3_sp)

    tester_trip_class = None
    model = BaseModel(n_ent, n_rel, args, arch)
    mrr = model.train(train_data, valid_data, tester_val, tester_tst,
                      tester_trip_class)

    return mrr
示例#6
0
    plot_config(args)

    heads, tails = loader.heads_tails()
    head_idx, tail_idx, head_cache, tail_cache, head_pos, tail_pos = loader.get_cache_list(
    )
    caches = [head_idx, tail_idx, head_cache, tail_cache, head_pos, tail_pos]

    train_data = [torch.LongTensor(vec) for vec in train_data]
    valid_data = [torch.LongTensor(vec) for vec in valid_data]
    test_data = [torch.LongTensor(vec) for vec in test_data]

    tester_val = lambda: model.test_link(valid_data, n_ent, heads, tails, args.
                                         filter)
    tester_tst = lambda: model.test_link(test_data, n_ent, heads, tails, args.
                                         filter)

    corrupter = BernCorrupter(train_data, n_ent, n_rel)
    model = BaseModel(n_ent, n_rel, args)

    if args.load:
        model.load(os.path.join(args.task_dir, args.model + '.mdl'))
        tester_val()
        tester_tst()

    best_str = model.train(train_data, caches, corrupter, tester_val,
                           tester_tst)
    with open(args.perf_file, 'a') as f:
        print('Training finished and best performance:', best_str)
        f.write('best_performance: ' + best_str)
示例#7
0
        space4kge = {
            "lr": hp.uniform("lr", 0, 1),
            "lamb": hp.uniform("lamb", -5, 0),
            "decay_rate": hp.uniform("decay_rate", 0.99, 1.0),
            "n_batch": hp.choice("n_batch", [128, 256, 512, 1024]),
            "n_dim": hp.choice("n_dim", [64]),
        }

        trials = Trials()
        best = fmin(run_kge,
                    space4kge,
                    algo=partial(tpe.suggest, n_startup_jobs=30),
                    max_evals=200,
                    trials=trials)

    else:
        plot_config(args)
        model = BaseModel(n_ent, n_rel, args, struct)
        tester_val = lambda: model.test_link(valid_data, valid_head_filter,
                                             valid_tail_filter)
        tester_tst = lambda: model.test_link(test_data, test_head_filter,
                                             test_tail_filter)
        best_mrr, best_str = model.train(train_data, tester_val, tester_tst)

        with open(args.perf_file, 'a') as f:
            print('structure:', struct, best_str)
            for s in struct:
                f.write(str(s) + ' ')
            f.write('\t\tbest_performance: ' + best_str + '\n')