def make_data_loader(batch_size, dataset_name='Letter_low', cuda=False): """ Create train/val/test dataloaders :param batch_size: batch size (applies for train/test/val) :param dataset_name: dataset name, to take from TU dortmund dataset (https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets) :param cuda: if cuda is available :return: train_dataloader, val_dataloader, test_dataloader """ # 1. create train/val/test datasets dataset = tu.LegacyTUDataset(name=dataset_name) preprocess(dataset, cuda) train_size = int(TRAIN_RATIO * len(dataset)) test_size = int(TEST_RATIO * len(dataset)) val_size = int(len(dataset) - train_size - test_size) dataset_train, dataset_val, dataset_test = torch.utils.data.random_split( dataset, (train_size, val_size, test_size)) # 2. create train/val/test dataloader train_dataloader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate ) val_dataloader = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size, shuffle=False, collate_fn=collate ) test_dataloader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=False, collate_fn=collate ) return train_dataloader, val_dataloader, test_dataloader
def graph_classify_task(prog_args): ''' perform graph classification task ''' dataset = tu.LegacyTUDataset(name=prog_args.dataset) train_size = int(prog_args.train_ratio * len(dataset)) test_size = int(prog_args.test_ratio * len(dataset)) val_size = int(len(dataset) - train_size - test_size) dataset_train, dataset_val, dataset_test = torch.utils.data.random_split( dataset, (train_size, val_size, test_size)) train_dataloader = prepare_data(dataset_train, prog_args, train=True, pre_process=pre_process) val_dataloader = prepare_data(dataset_val, prog_args, train=False, pre_process=pre_process) test_dataloader = prepare_data(dataset_test, prog_args, train=False, pre_process=pre_process) input_dim, label_dim, max_num_node = dataset.statistics() print("++++++++++STATISTICS ABOUT THE DATASET") print("dataset feature dimension is", input_dim) print("dataset label dimension is", label_dim) print("the max num node is", max_num_node) print("number of graphs is", len(dataset)) # assert len(dataset) % prog_args.batch_size == 0, "training set not divisible by batch size" hidden_dim = 64 # used to be 64 embedding_dim = 64 # calculate assignment dimension: pool_ratio * largest graph's maximum # number of nodes in the dataset assign_dim = int(max_num_node * prog_args.pool_ratio) print("++++++++++MODEL STATISTICS++++++++") print("model hidden dim is", hidden_dim) print("model embedding dim for graph instance embedding", embedding_dim) print("initial batched pool graph dim is", assign_dim) activation = F.relu # initialize model # 'diffpool' : diffpool model = DiffPool(input_dim, hidden_dim, embedding_dim, label_dim, activation, prog_args.gc_per_block, prog_args.dropout, prog_args.num_pool, prog_args.linkpred, prog_args.batch_size, 'meanpool', assign_dim, prog_args.pool_ratio) if prog_args.load_epoch >= 0 and prog_args.save_dir is not None: model.load_state_dict(torch.load(prog_args.save_dir + "/" + prog_args.dataset + "/model.iter-" + str(prog_args.load_epoch))) print("model init finished") print("MODEL:::::::", prog_args.method) if prog_args.cuda: model = model.cuda() logger = train( train_dataloader, model, prog_args, val_dataset=val_dataloader) result = evaluate(test_dataloader, model, prog_args, logger) print("test accuracy {:.2f}%".format(result * 100))