Exemplo n.º 1
0
def main():

    args = ArgsInit().args

    dataset = PygNodePropPredDataset(name=args.dataset)
    graph = dataset[0]

    if args.self_loop:
        graph.edge_index = add_self_loops(edge_index=graph.edge_index,
                                          num_nodes=graph.num_nodes)[0]
    split_idx = dataset.get_idx_split()

    evaluator = Evaluator(args.dataset)

    args.in_channels = graph.x.size(-1)
    args.num_tasks = dataset.num_classes

    print(args)

    model = DeeperGCN(args)

    print(model)

    model.load_state_dict(torch.load(args.model_load_path)['model_state_dict'])
    result = test(model, graph.x, graph.edge_index, graph.y, split_idx,
                  evaluator)
    print(result)
    model.print_params(final=True)
Exemplo n.º 2
0
def main():

    args = ArgsInit().args

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygNodePropPredDataset(name=args.dataset)
    data = dataset[0]
    split_idx = dataset.get_idx_split()

    evaluator = Evaluator(args.dataset)

    x = data.x.to(device)
    y_true = data.y.to(device)

    edge_index = data.edge_index.to(device)
    edge_index = to_undirected(edge_index, data.num_nodes)

    if args.self_loop:
        edge_index = add_self_loops(edge_index, num_nodes=data.num_nodes)[0]

    args.in_channels = data.x.size(-1)
    args.num_tasks = dataset.num_classes

    print(args)

    model = DeeperGCN(args)

    model.load_state_dict(torch.load(args.model_load_path)['model_state_dict'])
    model.to(device)

    result = test(model, x, edge_index, y_true, split_idx, evaluator)
    train_accuracy, valid_accuracy, test_accuracy = result

    print({
        'Train': train_accuracy,
        'Validation': valid_accuracy,
        'Test': test_accuracy
    })

    model.print_params(final=True)
Exemplo n.º 3
0
def main():

    args = ArgsInit().args

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygLinkPropPredDataset(name=args.dataset)
    data = dataset[0]
    # Data(edge_index=[2, 2358104], edge_weight=[2358104, 1], edge_year=[2358104, 1], x=[235868, 128])
    split_edge = dataset.get_edge_split()
    evaluator = Evaluator(args.dataset)

    x = data.x.to(device)

    edge_index = data.edge_index.to(device)

    args.in_channels = data.x.size(-1)
    args.num_tasks = 1

    print(args)

    model = DeeperGCN(args).to(device)
    predictor = LinkPredictor(args).to(device)

    model.load_state_dict(torch.load(args.model_load_path)['model_state_dict'])
    model.to(device)

    predictor.load_state_dict(
        torch.load(args.predictor_load_path)['model_state_dict'])
    predictor.to(device)

    hits = ['Hits@10', 'Hits@50', 'Hits@100']

    result = test(model, predictor, x, edge_index, split_edge, evaluator,
                  args.batch_size)

    for k in hits:
        train_result, valid_result, test_result = result[k]
        print('{}--Train: {}, Validation: {}, Test: {}'.format(
            k, train_result, valid_result, test_result))
Exemplo n.º 4
0
def test_model(model_path):
    args = ArgsInit().args
    dataset = PygNodePropPredDataset(name=args.dataset)
    graph = dataset[0]

    num_parts = 10
    data_list = list(
        RandomNodeSampler(graph, num_parts=num_parts, shuffle=True))
    number_of_train = int(0.9 * num_parts)

    train_data_list = data_list[0:number_of_train]
    test_data_list = data_list[number_of_train:]

    args.in_channels = graph.x.size(-1)
    args.num_tasks = dataset.num_classes

    model = DeeperGCN(args)
    model.load_state_dict(torch.load(model_path))

    print(test(model, test_data_list))
Exemplo n.º 5
0
def main():

    args = ArgsInit().save_exp()

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygNodePropPredDataset(name=args.dataset, root=args.data_folder)
    graph = dataset[0]

    adj = SparseTensor(row=graph.edge_index[0], col=graph.edge_index[1])

    if args.self_loop:
        adj = adj.set_diag()
        graph.edge_index = add_self_loops(edge_index=graph.edge_index,
                                          num_nodes=graph.num_nodes)[0]
    split_idx = dataset.get_idx_split()
    train_idx = split_idx["train"].tolist()

    evaluator = Evaluator(args.dataset)

    sub_dir = 'random-train_{}-full_batch_test'.format(args.cluster_number)
    logging.info(sub_dir)

    log_dir = os.path.join(args.save, "tensorboard/")
    writer = SummaryWriter(log_dir=log_dir)

    args.in_channels = graph.x.size(-1)
    args.num_tasks = dataset.num_classes

    logging.info('%s' % args)

    model = DeeperGCN(args).to(device)

    logging.info(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    results = {
        'highest_valid': 0,
        'final_train': 0,
        'final_test': 0,
        'highest_train': 0
    }

    start_time = time.time()

    for epoch in range(1, args.epochs + 1):
        # generate batches
        parts = random_partition_graph(graph.num_nodes,
                                       cluster_number=args.cluster_number)
        data = generate_sub_graphs(adj,
                                   parts,
                                   cluster_number=args.cluster_number)

        # epoch_loss = train(data, model, graph.x, graph.y, train_idx, optimizer, device)
        epoch_loss = train_flag(data, model, graph.x, graph.y, train_idx,
                                optimizer, device, args)

        logging.info('Epoch {}, training loss {:.4f}'.format(
            epoch, epoch_loss))
        writer.add_scalar("Loss/train", epoch_loss, epoch)
        model.print_params(epoch=epoch)

        # if epoch % args.eval_epochs == 0:
        #     save_ckpt(model, optimizer,
        #               round(epoch_loss, 4), epoch,
        #               args.model_save_path,
        #               sub_dir, name_post=f'epoch{epoch}')
        #     logging.info(f"Epoch {epoch}, saved model to checkpoint folder {args.model_save_path}")

        if epoch % args.eval_epochs == 0:
            # save_ckpt(model, optimizer,
            #           round(epoch_loss, 4), epoch,
            #           args.model_save_path,
            #           sub_dir, name_post='final')
            # logging.info(f"Saved model to checkpoint folder {args.model_save_path}")

            logging.info(f'---- Evaluating at epoch {epoch} ----')
            res = test_with_partition(model,
                                      graph,
                                      adj,
                                      split_idx,
                                      num_clusters=args.eval_cluster_number,
                                      partition_method=args.partition_method,
                                      evaluator=evaluator,
                                      device=device)

            # result = test(model, graph.x, graph.edge_index, graph.y, split_idx, evaluator)

            logging.info(res)
            logging.info(f"---------------------------------")

            train_accuracy, valid_accuracy, test_accuracy = res[
                "train_acc"], res["valid_acc"], res["test_acc"]
            writer.add_scalar("Acc/train", train_accuracy)
            writer.add_scalar("Acc/dev", valid_accuracy)

            if train_accuracy > results['highest_train']:
                results['highest_train'] = train_accuracy

            if valid_accuracy > results['highest_valid']:
                results['highest_valid'] = valid_accuracy
                results['final_train'] = train_accuracy
                results['final_test'] = test_accuracy

                save_ckpt(model,
                          optimizer,
                          round(epoch_loss, 4),
                          epoch,
                          args.model_save_path,
                          sub_dir,
                          name_post='valid_best')
                logging.info(
                    f"Saved better model to checkpoint folder {args.model_save_path}"
                )

    logging.info("%s" % results)

    end_time = time.time()
    total_time = end_time - start_time
    logging.info('Total time: {}'.format(
        time.strftime('%H:%M:%S', time.gmtime(total_time))))
Exemplo n.º 6
0
def main():

    args = ArgsInit().save_exp()

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygNodePropPredDataset(name=args.dataset)
    data = dataset[0]
    split_idx = dataset.get_idx_split()

    evaluator = Evaluator(args.dataset)

    x = data.x.to(device)
    y_true = data.y.to(device)
    train_idx = split_idx['train'].to(device)

    edge_index = data.edge_index.to(device)
    edge_index = to_undirected(edge_index, data.num_nodes)

    if args.self_loop:
        edge_index = add_self_loops(edge_index, num_nodes=data.num_nodes)[0]

    sub_dir = 'SL_{}'.format(args.self_loop)

    args.in_channels = data.x.size(-1)
    args.num_tasks = dataset.num_classes

    logging.info('%s' % args)

    model = DeeperGCN(args).to(device)

    logging.info(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    results = {
        'highest_valid': 0,
        'final_train': 0,
        'final_test': 0,
        'highest_train': 0
    }

    start_time = time.time()

    for epoch in range(1, args.epochs + 1):

        epoch_loss = train(model, x, edge_index, y_true, train_idx, optimizer)
        logging.info('Epoch {}, training loss {:.4f}'.format(
            epoch, epoch_loss))
        model.print_params(epoch=epoch)

        result = test(model, x, edge_index, y_true, split_idx, evaluator)
        logging.info(result)
        train_accuracy, valid_accuracy, test_accuracy = result

        if train_accuracy > results['highest_train']:
            results['highest_train'] = train_accuracy

        if valid_accuracy > results['highest_valid']:
            results['highest_valid'] = valid_accuracy
            results['final_train'] = train_accuracy
            results['final_test'] = test_accuracy

            save_ckpt(model,
                      optimizer,
                      round(epoch_loss, 4),
                      epoch,
                      args.model_save_path,
                      sub_dir,
                      name_post='valid_best')

    logging.info("%s" % results)

    end_time = time.time()
    total_time = end_time - start_time
    logging.info('Total time: {}'.format(
        time.strftime('%H:%M:%S', time.gmtime(total_time))))
Exemplo n.º 7
0
def main():

    args = ArgsInit().save_exp()

    if args.use_tensor_board:
        writer = SummaryWriter(log_dir=args.save)

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygLinkPropPredDataset(name=args.dataset)
    data = dataset[0]
    # Data(edge_index=[2, 2358104], edge_weight=[2358104, 1], edge_year=[2358104, 1], x=[235868, 128])
    split_edge = dataset.get_edge_split()
    evaluator = Evaluator(args.dataset)

    x = data.x.to(device)

    edge_index = data.edge_index.to(device)

    args.in_channels = data.x.size(-1)
    args.num_tasks = 1

    logging.info('%s' % args)

    model = DeeperGCN(args).to(device)
    predictor = LinkPredictor(args).to(device)

    logging.info(model)
    logging.info(predictor)

    optimizer = torch.optim.Adam(list(model.parameters()) +
                                 list(predictor.parameters()),
                                 lr=args.lr)

    results = {}
    keys = ['highest_valid', 'final_train', 'final_test', 'highest_train']
    hits = ['Hits@10', 'Hits@50', 'Hits@100']

    for key in keys:
        results[key] = {k: 0 for k in hits}

    start_time = time.time()

    for epoch in range(1, args.epochs + 1):

        epoch_loss = train(model, predictor, x, edge_index, split_edge,
                           optimizer, args.batch_size)
        logging.info('Epoch {}, training loss {:.4f}'.format(
            epoch, epoch_loss))
        model.print_params(epoch=epoch)

        result = test(model, predictor, x, edge_index, split_edge, evaluator,
                      args.batch_size)

        for k in hits:
            # return a tuple
            train_result, valid_result, test_result = result[k]

            if args.use_tensor_board and k == 'Hits@50':
                writer.add_scalar('stats/train_loss', epoch_loss, epoch)
                writer.add_scalar('stats/train_Hits@50', train_result, epoch)
                writer.add_scalar('stats/valid_Hits@50', valid_result, epoch)
                writer.add_scalar('stats/test_Hits@50', test_result, epoch)

            if train_result > results['highest_train'][k]:
                results['highest_train'][k] = train_result

            if valid_result > results['highest_valid'][k]:
                results['highest_valid'][k] = valid_result
                results['final_train'][k] = train_result
                results['final_test'][k] = test_result

                save_ckpt(model,
                          optimizer,
                          round(epoch_loss, 4),
                          epoch,
                          args.model_save_path,
                          k,
                          name_post='valid_best')
                save_ckpt(predictor,
                          optimizer,
                          round(epoch_loss, 4),
                          epoch,
                          args.model_save_path,
                          k,
                          name_post='valid_best_link_predictor')

        logging.info(result)

    logging.info("%s" % results)

    end_time = time.time()
    total_time = end_time - start_time
    time_used = 'Total time: {}'.format(
        time.strftime('%H:%M:%S', time.gmtime(total_time)))
    logging.info(time_used)
Exemplo n.º 8
0
def main():

    args = ArgsInit().save_exp()

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygNodePropPredDataset(name=args.dataset)
    graph = dataset[0]

    adj = SparseTensor(row=graph.edge_index[0], col=graph.edge_index[1])

    if args.self_loop:
        adj = adj.set_diag()
        graph.edge_index = add_self_loops(edge_index=graph.edge_index,
                                          num_nodes=graph.num_nodes)[0]
    split_idx = dataset.get_idx_split()
    train_idx = split_idx["train"].tolist()

    evaluator = Evaluator(args.dataset)

    sub_dir = 'random-train_{}-full_batch_test'.format(args.cluster_number)
    logging.info(sub_dir)

    args.in_channels = graph.x.size(-1)
    args.num_tasks = dataset.num_classes

    logging.info('%s' % args)

    model = DeeperGCN(args).to(device)

    logging.info(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    results = {
        'highest_valid': 0,
        'final_train': 0,
        'final_test': 0,
        'highest_train': 0
    }

    start_time = time.time()

    for epoch in range(1, args.epochs + 1):
        # generate batches
        parts = random_partition_graph(graph.num_nodes,
                                       cluster_number=args.cluster_number)
        data = generate_sub_graphs(adj,
                                   parts,
                                   cluster_number=args.cluster_number)

        epoch_loss = train(data, model, graph.x, graph.y, train_idx, optimizer,
                           device)
        logging.info('Epoch {}, training loss {:.4f}'.format(
            epoch, epoch_loss))
        model.print_params(epoch=epoch)

        if epoch == args.epochs:

            result = test(model, graph.x, graph.edge_index, graph.y, split_idx,
                          evaluator)
            logging.info(result)

            train_accuracy, valid_accuracy, test_accuracy = result

            if train_accuracy > results['highest_train']:
                results['highest_train'] = train_accuracy

            if valid_accuracy > results['highest_valid']:
                results['highest_valid'] = valid_accuracy
                results['final_train'] = train_accuracy
                results['final_test'] = test_accuracy

                save_ckpt(model,
                          optimizer,
                          round(epoch_loss, 4),
                          epoch,
                          args.model_save_path,
                          sub_dir,
                          name_post='valid_best')

    logging.info("%s" % results)

    end_time = time.time()
    total_time = end_time - start_time
    logging.info('Total time: {}'.format(
        time.strftime('%H:%M:%S', time.gmtime(total_time))))
Exemplo n.º 9
0
def main():

    EPOCHS = 1
    NUMBER_OF_SUBGRAPHS = 10
    CKPT_PATH = f'neighbor_deeper_num_{NUMBER_OF_SUBGRAPHS}.pt'
    EXPERIMENT_RES_PATH = f'neighbor_deeper_num_{NUMBER_OF_SUBGRAPHS}_experiment_res.pk'
    args = ArgsInit().args

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygNodePropPredDataset(name=args.dataset)
    graph = dataset[0]
    print(graph)
    num_parts = NUMBER_OF_SUBGRAPHS
    data_list = list(
        NeighborSubgraphLoader(graph, num_parts=NUMBER_OF_SUBGRAPHS))
    print(f'len of datalist: {len(data_list)}')
    number_of_train = int(0.9 * num_parts)

    train_data_list = data_list[0:number_of_train]
    test_data_list = data_list[number_of_train:]

    print(
        f'Train test split successful, number of train: {len(train_data_list)} | number of test: {len(test_data_list)}'
    )

    args.in_channels = graph.x.size(-1)
    args.num_tasks = dataset.num_classes

    logging.info('%s' % args)

    model = DeeperGCN(args).to(device)

    logging.info(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    start_time = time.time()
    epoch_loss_list = []
    epoch_acc_list = []
    highest_acc = 0
    best_model_dict = None
    for epoch in range(1, EPOCHS + 1):

        epoch_loss, epoch_acc = train(data_list, model, optimizer, device)
        epoch_loss_list.append(epoch_loss)
        epoch_acc_list.append(epoch_acc)
        print('Epoch {}, training loss {:.4f} | training acc {}'.format(
            epoch, epoch_loss, epoch_acc))

        test_acc = test(model, test_data_list)
        if test_acc > highest_acc:
            highest_acc = test_acc
            best_model_dict = model.state_dict()

        logging.info(
            f'best test acc: {highest_acc} | saved to path {CKPT_PATH}')

    end_time = time.time()
    total_time = end_time - start_time
    logging.info('Total time: {}'.format(
        time.strftime('%H:%M:%S', time.gmtime(total_time))))

    experiment_result = {}
    experiment_result['Total training time'] = total_time
    experiment_result['Epoch loss list'] = epoch_loss_list
    experiment_result['Epoch acc list'] = epoch_acc_list
    experiment_result['Best test acc'] = highest_acc

    torch.save(best_model_dict, CKPT_PATH)
    with open(EXPERIMENT_RES_PATH, 'wb') as f:
        pk.dump(experiment_result, f)