Пример #1
0
def make_data_loader(batch_size, dataset_name='Letter_low', cuda=False):
    """
    Create train/val/test dataloaders
    :param batch_size: batch size (applies for train/test/val)
    :param dataset_name: dataset name, to take from TU dortmund dataset
                         (https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets)
    :param cuda: if cuda is available
    :return: train_dataloader, val_dataloader, test_dataloader
    """

    # 1. create train/val/test datasets
    dataset = tu.LegacyTUDataset(name=dataset_name)
    preprocess(dataset, cuda)

    train_size = int(TRAIN_RATIO * len(dataset))
    test_size = int(TEST_RATIO * len(dataset))
    val_size = int(len(dataset) - train_size - test_size)
    dataset_train, dataset_val, dataset_test = torch.utils.data.random_split(
        dataset, (train_size, val_size, test_size))

    # 2. create train/val/test dataloader
    train_dataloader = torch.utils.data.DataLoader(dataset_train,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   collate_fn=collate
                                                   )

    val_dataloader = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 collate_fn=collate
                                                 )

    test_dataloader = torch.utils.data.DataLoader(dataset_test,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  collate_fn=collate
                                                  )

    return train_dataloader, val_dataloader, test_dataloader
Пример #2
0
def graph_classify_task(prog_args):
    '''
    perform graph classification task
    '''

    dataset = tu.LegacyTUDataset(name=prog_args.dataset)
    train_size = int(prog_args.train_ratio * len(dataset))
    test_size = int(prog_args.test_ratio * len(dataset))
    val_size = int(len(dataset) - train_size - test_size)

    dataset_train, dataset_val, dataset_test = torch.utils.data.random_split(
        dataset, (train_size, val_size, test_size))
    train_dataloader = prepare_data(dataset_train, prog_args, train=True,
                                    pre_process=pre_process)
    val_dataloader = prepare_data(dataset_val, prog_args, train=False,
                                  pre_process=pre_process)
    test_dataloader = prepare_data(dataset_test, prog_args, train=False,
                                   pre_process=pre_process)
    input_dim, label_dim, max_num_node = dataset.statistics()
    print("++++++++++STATISTICS ABOUT THE DATASET")
    print("dataset feature dimension is", input_dim)
    print("dataset label dimension is", label_dim)
    print("the max num node is", max_num_node)
    print("number of graphs is", len(dataset))
    # assert len(dataset) % prog_args.batch_size == 0, "training set not divisible by batch size"

    hidden_dim = 64  # used to be 64
    embedding_dim = 64

    # calculate assignment dimension: pool_ratio * largest graph's maximum
    # number of nodes  in the dataset
    assign_dim = int(max_num_node * prog_args.pool_ratio)
    print("++++++++++MODEL STATISTICS++++++++")
    print("model hidden dim is", hidden_dim)
    print("model embedding dim for graph instance embedding", embedding_dim)
    print("initial batched pool graph dim is", assign_dim)
    activation = F.relu

    # initialize model
    # 'diffpool' : diffpool
    model = DiffPool(input_dim,
                     hidden_dim,
                     embedding_dim,
                     label_dim,
                     activation,
                     prog_args.gc_per_block,
                     prog_args.dropout,
                     prog_args.num_pool,
                     prog_args.linkpred,
                     prog_args.batch_size,
                     'meanpool',
                     assign_dim,
                     prog_args.pool_ratio)

    if prog_args.load_epoch >= 0 and prog_args.save_dir is not None:
        model.load_state_dict(torch.load(prog_args.save_dir + "/" + prog_args.dataset
                                         + "/model.iter-" + str(prog_args.load_epoch)))

    print("model init finished")
    print("MODEL:::::::", prog_args.method)
    if prog_args.cuda:
        model = model.cuda()

    logger = train(
        train_dataloader,
        model,
        prog_args,
        val_dataset=val_dataloader)
    result = evaluate(test_dataloader, model, prog_args, logger)
    print("test  accuracy {:.2f}%".format(result * 100))