示例#1
0
def evaluate_fscore():
    model_path = "/home/ubuntu/RNET_gpu/checkpoints/yago/out/rngat.solution2.trained_present.pth"
    model_rsgat = multiRSGAT(entity_embeddings, relation_embeddings,
                             type_embeddings, rdf_relation_embeddings, args)
    model_rsgat.load_state_dict(
        torch.load(model_path,
                   map_location=torch.device('cuda')
                   if CUDA else torch.device('cpu')))
    connectE2T_TRT = get_model(model_rsgat)
    model_path2 = "/home/ubuntu/RNET_gpu/checkpoints/yago/out/connectE2T_TRT.trained_present.pth"
    connectE2T_TRT.load_state_dict(
        torch.load(model_path2,
                   map_location=torch.device('cuda')
                   if CUDA else torch.device('cpu')))
    model = WrapperModel2(connectE2T_TRT)
    if CUDA:
        model_rsgat.cuda()
        connectE2T_TRT.cuda()

    test_e2t = e2t_Corpus_.test_triples
    train_e2t = e2t_Corpus_.train_triples
    valid_e2t = e2t_Corpus_.validation_triples
    train_trt = trt_Corpus_.train_triples
    all_e2t = e2t_Corpus_.test_triples + e2t_Corpus_.validation_triples + e2t_Corpus_.train_triples
    evaluator1 = Type_Evaluator(test_e2t, all_e2t, logger)
    classification_evaluator = Classification_Evaluator_E2T(
        valid_e2t,
        test_e2t,
        all_e2t,
        train_trt,
        model,
        logger=logger,
        save_path=
        "/home/ubuntu/RNET_gpu/checkpoints/yago/out/classification_result",
        save_data=True)
    # evaluator1_train = Type_Evaluator(train_e2t, all_e2t, logger)
    # evaluator2 = Type_Evaluator_trt(test_e2t, all_e2t, ere_Corpus_.train_triples, logger)
    logger.info("TRAIN")
    # evaluator1_train(model)
    logger.info("TEST")
    with torch.no_grad():
        evaluator1(model, corpus_e2t=e2t_Corpus_)
示例#2
0
def train_gat(args):

    # Creating the gat model here.
    ####################################

    print("Defining model")

    print(
        "\nModel type -> GAT layer with {} heads used , Initital Embeddings training"
        .format(args.nheads_GAT[0]))

    model_rsgat = multiRSGAT(entity_embeddings, relation_embeddings,
                             type_embeddings, rdf_relation_embeddings, args)
    # model_rsgat.load_state_dict(torch.load("/home/ubuntu/RNET_gpu/checkpoints/fb/out/rngat.solution2.trained_present.pth"))
    start = 600
    wrapper_model = WrapperModel(model_rsgat)
    test_e2t = e2t_Corpus_.test_triples
    all_e2t = e2t_Corpus_.test_triples + e2t_Corpus_.validation_triples + e2t_Corpus_.train_triples
    evaluator1 = Type_Evaluator(test_e2t, all_e2t, logger)
    evaluator1_train = Type_Evaluator(e2t_Corpus_.train_triples, all_e2t,
                                      logger)

    if CUDA:
        model_rsgat.cuda()

    optimizer = torch.optim.Adam(model_rsgat.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay_gat)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=200,
                                                gamma=0.5,
                                                last_epoch=-1)

    gat_loss_func = nn.MarginRankingLoss(margin=args.margin)
    epoch_losses_e2t = []  # losses of all epochs
    epoch_losses_ere = []
    epoch_losses_trt = []
    print("Number of epochs {}".format(args.epochs_gat))

    for epoch in range(args.epochs_gat):
        print("\nepoch-> ", epoch)
        random.shuffle(e2t_Corpus_.train_triples)
        random.shuffle(ere_Corpus_.train_triples)
        random.shuffle(trt_Corpus_.train_triples)

        e2t_Corpus_.train_indices = np.array(list(
            e2t_Corpus_.train_triples)).astype(np.int32)
        ere_Corpus_.train_indices = np.array(list(
            ere_Corpus_.train_triples)).astype(np.int32)
        trt_Corpus_.train_indices = np.array(list(
            trt_Corpus_.train_triples)).astype(np.int32)

        model_rsgat.train(
        )  # getting in training mode启用 BatchNormalization 和 Dropout

        start_time = time.time()
        epoch_loss_e2t = []
        epoch_loss_ere = []
        epoch_loss_trt = []

        if len(e2t_Corpus_.train_indices) % args.batch_size_gat == 0:
            num_iters_per_epoch = len(
                e2t_Corpus_.train_indices) // args.batch_size_gat
        else:
            num_iters_per_epoch = (len(e2t_Corpus_.train_indices) //
                                   args.batch_size_gat) + 1
        print(num_iters_per_epoch)
        for iters in range(num_iters_per_epoch):
            start_time_iter = time.time()
            train_indices_e2t, train_values_e2t = e2t_Corpus_.get_iteration_batch(
                iters)
            train_indices_ere, train_values_ere = ere_Corpus_.get_iteration_batch(
                iters)
            train_indices_trt, train_values_trt = trt_Corpus_.get_iteration_batch(
                iters)

            if CUDA:
                train_indices_e2t = Variable(
                    torch.LongTensor(train_indices_e2t)).cuda()
                train_values_e2t = Variable(
                    torch.FloatTensor(train_values_e2t)).cuda()
                train_indices_ere = Variable(
                    torch.LongTensor(train_indices_ere)).cuda()
                train_values_ere = Variable(
                    torch.FloatTensor(train_values_ere)).cuda()
                train_indices_trt = Variable(
                    torch.LongTensor(train_indices_trt)).cuda()
                train_values_trt = Variable(
                    torch.FloatTensor(train_values_trt)).cuda()

            else:
                train_indices_e2t = Variable(
                    torch.LongTensor(train_indices_e2t))
                train_values_e2t = Variable(
                    torch.FloatTensor(train_values_e2t))
                train_indices_ere = Variable(
                    torch.LongTensor(train_indices_ere))
                train_values_ere = Variable(
                    torch.FloatTensor(train_values_ere))
                train_indices_trt = Variable(
                    torch.LongTensor(train_indices_trt))
                train_values_trt = Variable(
                    torch.FloatTensor(train_values_trt))

            # forward pass  在这里定义的forward方法的input

            out_entity_embed_long, out_entity_embed_short, out_relation_embed_long, \
            out_relation_embed_short, out_type_embed, output_rdf_embedding = model_rsgat(
                    e2t_Corpus_, ere_Corpus_, trt_Corpus_, train_indices_e2t, train_indices_ere, train_indices_trt)

            optimizer.zero_grad()  #梯度置零,也就是把loss关于weight的导数变成0

            loss_e2t = batch_gat_loss_e2t(gat_loss_func, train_indices_e2t,
                                          out_entity_embed_short,
                                          output_rdf_embedding, out_type_embed)
            loss_ere = batch_gat_loss(gat_loss_func, train_indices_ere,
                                      out_entity_embed_long,
                                      out_relation_embed_long)
            loss_trt = batch_gat_loss(gat_loss_func, train_indices_trt,
                                      out_type_embed, out_relation_embed_short)

            loss_e2t.backward(retain_graph=True)
            loss_ere.backward(retain_graph=True)
            loss_trt.backward()

            optimizer.step()
            epoch_loss_e2t.append(loss_e2t.data.item())
            epoch_loss_ere.append(loss_ere.data.item())
            epoch_loss_trt.append(loss_trt.data.item())

            end_time_iter = time.time()

            print(
                "Iteration-> {0}  , Iteration_time-> {1:.4f} , Iteration_loss_e2t {2:.4f}, Iteration_loss_ere {3:.4f}, Iteration_loss_trt {4:.4f}"
                .format(iters, end_time_iter - start_time_iter,
                        loss_e2t.data.item(), loss_ere.data.item(),
                        loss_trt.data.item()))
        scheduler.step()
        print(
            "Epoch {} , average loss e2t {} , average loss ere {} , average loss trt {} , epoch_time {}"
            .format(epoch,
                    sum(epoch_loss_e2t) / len(epoch_loss_e2t),
                    sum(epoch_loss_ere) / len(epoch_loss_ere),
                    sum(epoch_loss_trt) / len(epoch_loss_trt),
                    time.time() - start_time))
        epoch_losses_e2t.append(sum(epoch_loss_e2t) / len(epoch_loss_e2t))
        epoch_losses_ere.append(sum(epoch_loss_ere) / len(epoch_loss_ere))
        epoch_losses_trt.append(sum(epoch_loss_trt) / len(epoch_loss_trt))

        save_model(model_rsgat, "rngat.solution2", epoch, args.output_folder,
                   args.epochs_gat)
        if (epoch + 1) % 100 == 0:
            logger.info("TEST")
            evaluator1(wrapper_model)
            if (epoch + 1) % 500 == 0:
                logger.info("TRAIN")
                evaluator1_train(wrapper_model)
示例#3
0
def train_ConnectE():
    model_path = "/home/ubuntu/RNET_gpu/checkpoints/yago/out/rngat.solution2.trained_present.pth"
    model_rsgat = multiRSGAT(entity_embeddings, relation_embeddings,
                             type_embeddings, rdf_relation_embeddings, args)
    model_rsgat.load_state_dict(
        torch.load(model_path,
                   map_location=torch.device('cuda')
                   if CUDA else torch.device('cpu')))
    connectE2T_TRT = get_model(model_rsgat)
    model_path2 = "/home/ubuntu/RNET_gpu/checkpoints/yago/out/connectE2T_TRT.trained_present.pth"
    connectE2T_TRT.load_state_dict(
        torch.load(model_path2,
                   map_location=torch.device('cuda')
                   if CUDA else torch.device('cpu')))
    model = WrapperModel2(connectE2T_TRT)
    if CUDA:
        model_rsgat.cuda()
        connectE2T_TRT.cuda()

    test_e2t = e2t_Corpus_.test_triples
    train_trt = trt_Corpus_.train_triples
    all_e2t = e2t_Corpus_.test_triples + e2t_Corpus_.validation_triples + e2t_Corpus_.train_triples
    evaluator1 = Type_Evaluator(test_e2t, all_e2t, logger)
    if CUDA:
        connectE2T_TRT.cuda()

    optimizer = torch.optim.Adagrad(connectE2T_TRT.parameters(),
                                    lr=args.lr,
                                    weight_decay=args.weight_decay_gat)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=100,
                                                gamma=0.7,
                                                last_epoch=-1)

    gat_loss_func = nn.MarginRankingLoss(margin=args.margin, reduction='none')

    epoch_losses_e2t = [0]  # losses of all epochs
    epoch_losses_trt = [0]  # losses of all epochs
    epoch_losses_e2t_margin = [0]
    epoch_losses_trt_margin = [0]
    print("Number of epochs {}".format(args.epochs_gat))
    for epoch in range(args.epochs_gat):
        print("\nepoch-> ", epoch)
        random.shuffle(e2t_Corpus_.train_triples)
        e2t_Corpus_.train_indices = np.array(list(
            e2t_Corpus_.train_triples)).astype(np.int32)
        connectE2T_TRT.train(
        )  # getting in training mode启用 BatchNormalization 和 Dropout
        start_time = time.time()
        epoch_loss_e2t = [0]
        e2t_margin = 0
        e2t_cnt = 1
        if len(e2t_Corpus_.train_indices) % args.batch_size_gat == 0:
            num_iters_per_epoch = len(
                e2t_Corpus_.train_indices) // args.batch_size_gat
        else:
            num_iters_per_epoch = (len(e2t_Corpus_.train_indices) //
                                   args.batch_size_gat) + 1

        print(num_iters_per_epoch)
        for iters in range(num_iters_per_epoch):
            start_time_iter = time.time()
            train_indices_e2t, train_values_e2t = e2t_Corpus_.get_iteration_batch(
                iters)
            if CUDA:
                train_indices_e2t = Variable(
                    torch.LongTensor(train_indices_e2t)).cuda()
                train_values_e2t = Variable(
                    torch.FloatTensor(train_values_e2t)).cuda().squeeze()
            else:
                train_indices_e2t = Variable(
                    torch.LongTensor(train_indices_e2t))
                train_values_e2t = Variable(
                    torch.FloatTensor(train_values_e2t)).squeeze()

            # forward pass  在这里定义的forward方法的input
            out_entity_embed_short, out_relation_embed_long, out_relation_embed_short, out_type_embed = connectE2T_TRT(
            )
            optimizer.zero_grad()  # 梯度置零,也就是把loss关于weight的导数变成0
            loss_e2t, e2t_margin_count = batch_gat_loss_e2t(
                gat_loss_func, train_indices_e2t, train_values_e2t,
                out_entity_embed_short, out_type_embed)
            e2t_margin += e2t_margin_count
            loss_e2t.backward()
            optimizer.step()
            epoch_loss_e2t.append(loss_e2t.data.item())
            end_time_iter = time.time()
            e2t_cnt += train_indices_e2t.size(0)

        random.shuffle(trt_Corpus_.train_triples)
        trt_Corpus_.train_indices = np.array(list(
            trt_Corpus_.train_triples)).astype(np.int32)
        connectE2T_TRT.train(
        )  # getting in training mode启用 BatchNormalization 和 Dropout
        epoch_loss_trt = [0]
        trt_margin = 0
        trt_cnt = 1
        if len(trt_Corpus_.train_indices) % args.batch_size_gat == 0:
            num_iters_per_epoch = len(
                trt_Corpus_.train_indices) // args.batch_size_gat
        else:
            num_iters_per_epoch = (len(trt_Corpus_.train_indices) //
                                   args.batch_size_gat) + 1

        # for iters in range (num_iters_per_epoch) :
        #     start_time_iter = time.time ()
        #     train_indices_trt, train_values_trt = trt_Corpus_.get_iteration_batch (iters)
        #     if CUDA :
        #         train_indices_trt = Variable(torch.LongTensor (train_indices_trt)).cuda ()
        #         train_values_trt = Variable(torch.FloatTensor (train_values_trt)).cuda ().squeeze()
        #     else :
        #         train_indices_trt = Variable(torch.LongTensor(train_indices_trt))
        #         train_values_trt = Variable(torch.FloatTensor(train_values_trt)).squeeze()

        #     # forward pass  在这里定义的forward方法的input
        #     out_entity_embed_short, out_relation_embed_long, out_relation_embed_short, out_type_embed = connectE2T_TRT()
        #     optimizer.zero_grad ()  # 梯度置零,也就是把loss关于weight的导数变成0
        #     loss_trt, trt_margin_count = batch_gat_loss (gat_loss_func, train_indices_trt, train_values_trt, out_type_embed, out_relation_embed_short)
        #     # trt_margin += trt_margin_count
        #     loss_trt.backward ()
        #     optimizer.step ()
        #     epoch_loss_trt.append (loss_trt.data.item ())
        #     end_time_iter = time.time ()
        #     trt_cnt += train_indices_trt.size(0)

        # scheduler.step ()
        print(
            "Epoch {} , average loss e2t {} , average loss ere {} , average loss trt {} , e2t_margin {:2%} trt_margin {:2%}, epoch_time {}"
            .format(epoch,
                    sum(epoch_loss_e2t) / len(epoch_loss_e2t), 0,
                    sum(epoch_loss_trt) / len(epoch_loss_trt),
                    e2t_margin / e2t_cnt, trt_margin / trt_cnt,
                    time.time() - start_time))
        epoch_losses_e2t.append(sum(epoch_loss_e2t) / len(epoch_loss_e2t))
        epoch_losses_trt.append(sum(epoch_loss_trt) / len(epoch_loss_trt))

        save_model(connectE2T_TRT, "connectE2T_TRT", epoch, args.output_folder,
                   args.epochs_gat)
        if (epoch + 1) % 100 == 0:
            connectE2T_TRT.eval()
            evaluator1(model)