Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description='TransE')
    parser.add_argument('--data_dir', type=str, default='../data/FB15k/')
    parser.add_argument('--embedding_dim', type=int, default=200)
    parser.add_argument('--margin_value', type=float, default=1.0)
    parser.add_argument('--score_func', type=str, default='L1')
    parser.add_argument('--batch_size', type=int, default=4800)
    parser.add_argument('--learning_rate', type=float, default=0.001)
    parser.add_argument('--n_generator', type=int, default=24)
    parser.add_argument('--n_rank_calculator', type=int, default=24)
    parser.add_argument('--ckpt_dir', type=str, default='../ckpt/')
    parser.add_argument('--summary_dir', type=str, default='../summary/')
    parser.add_argument('--max_epoch', type=int, default=500)
    parser.add_argument('--eval_freq', type=int, default=10)
    args = parser.parse_args()
    print(args)
    kg = KnowledgeGraph(data_dir=args.data_dir)
    kge_model = TransE(kg=kg, embedding_dim=args.embedding_dim, margin_value=args.margin_value,
                       score_func=args.score_func, batch_size=args.batch_size, learning_rate=args.learning_rate,
                       n_generator=args.n_generator, n_rank_calculator=args.n_rank_calculator, max_epoch=args.max_epoch,
                       eval_freq=args.eval_freq)

    gpu_config = tf.GPUOptions(allow_growth=True)
    sess_config = tf.ConfigProto(gpu_options=gpu_config)
    with tf.Session(config=sess_config) as sess:
        kge_model.train(sess)
Exemplo n.º 2
0
def main():
    opts = get_train_args()
    print("load data ...")
    data = DataSet('data/modified_triples.txt')
    dataloader = DataLoader(data, shuffle=True, batch_size=opts.batch_size)
    print("load model ...")
    if opts.model_type == 'transe':
        model = TransE(opts, data.ent_tot, data.rel_tot)
    elif opts.model_type == "distmult":
        model = DistMult(opts, data.ent_tot, data.rel_tot)
    if opts.optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=opts.lr)
    elif opts.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=opts.lr)
    model.cuda()
    model.relation_normalize()
    loss = torch.nn.MarginRankingLoss(margin=opts.margin)

    print("start training")
    for epoch in range(1, opts.epochs + 1):
        print("epoch : " + str(epoch))
        model.train()
        epoch_start = time.time()
        epoch_loss = 0
        tot = 0
        cnt = 0
        for i, batch_data in enumerate(dataloader):
            optimizer.zero_grad()
            batch_h, batch_r, batch_t, batch_n = batch_data
            batch_h = torch.LongTensor(batch_h).cuda()
            batch_r = torch.LongTensor(batch_r).cuda()
            batch_t = torch.LongTensor(batch_t).cuda()
            batch_n = torch.LongTensor(batch_n).cuda()
            pos_score, neg_score, dist = model.forward(batch_h, batch_r,
                                                       batch_t, batch_n)
            pos_score = pos_score.cpu()
            neg_score = neg_score.cpu()
            dist = dist.cpu()
            train_loss = loss(pos_score, neg_score,
                              torch.ones(pos_score.size(-1))) + dist
            train_loss.backward()
            optimizer.step()
            batch_loss = torch.sum(train_loss)
            epoch_loss += batch_loss
            batch_size = batch_h.size(0)
            tot += batch_size
            cnt += 1
            print('\r{:>10} epoch {} progress {} loss: {}\n'.format(
                '', epoch, tot / data.__len__(), train_loss),
                  end='')
        end = time.time()
        time_used = end - epoch_start
        epoch_loss /= cnt
        print('one epoch time: {} minutes'.format(time_used / 60))
        print('{} epochs'.format(epoch))
        print('epoch {} loss: {}'.format(epoch, epoch_loss))

        if epoch % opts.save_step == 0:
            print("save model...")
            model.entity_normalize()
            torch.save(model.state_dict(), 'model.pt')

    print("save model...")
    model.entity_normalize()
    torch.save(model.state_dict(), 'model.pt')
    print("[Saving embeddings of whole entities & relations...]")
    save_embeddings(model, opts, data.id2ent, data.id2rel)
    print("[Embedding results are saved successfully.]")