def load_data( dataset="Cora", supervised=True, ): ''' support semi-supervised and supervised :param dataset: :param supervised: :return: ''' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ["CS", "Physics"]: dataset = Coauthor(path, dataset, T.NormalizeFeatures()) elif dataset in ["Computers", "Photo"]: dataset = Amazon(path, dataset, T.NormalizeFeatures()) elif dataset in ["Cora", "Citeseer", "Pubmed"]: dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] if supervised: data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[:-1000] = 1 data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.val_mask[-1000:-500] = 1 data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[-500:] = 1 data.num_classes = data.y.max().item() + 1 return dataset
def load_data(dataset="Cora", supervised=False, full_data=True): ''' support semi-supervised and supervised :param dataset: :param supervised: :return: ''' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ["CS", "Physics"]: dataset = Coauthor(path, dataset, T.NormalizeFeatures()) elif dataset in ["Computers", "Photo"]: dataset = Amazon(path, dataset, T.NormalizeFeatures()) elif dataset in ["Cora", "Citeseer", "Pubmed"]: dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] if supervised: if full_data: data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[:-1000] = 1 data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.val_mask[data.num_nodes - 1000:data.num_nodes - 500] = 1 data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[data.num_nodes - 500:] = 1 else: data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[:1000] = 1 data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.val_mask[data.num_nodes - 1000:data.num_nodes - 500] = 1 data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[data.num_nodes - 500:] = 1 print('loaded data: ', '\n', data) return data
def load_data(dataset_name="Cora", seed=10, n_splits=5): # Path in which the data will be stored path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset_name) if dataset_name in ["CS", "Physics"]: dataset = Coauthor(path, dataset_name, T.NormalizeFeatures()) elif dataset_name in ["Computers", "Photo"]: dataset = Amazon(path, dataset_name, T.NormalizeFeatures()) elif dataset_name in ["Cora", "Citeseer", "Pubmed"]: dataset = Planetoid(path, dataset_name, split='public', transform=T.NormalizeFeatures()) elif dataset_name in ["Arxiv", "Papers", "Products"]: dataset = PygNodePropPredDataset(name=ogb_data_name_conv[dataset_name], root=path, transform=T.NormalizeFeatures()) elif dataset_name == "MAG": dataset = PygNodePropPredDataset(name=ogb_data_name_conv[dataset_name], root=path) else: raise Exception("[!] Dataset not found: ", str(dataset_name)) if dataset_name in obg_datasets: data = split_ogb_data(dataset, dataset_name) else: data = dataset[0] # pyg graph object data = split_data(data, seed, n_splits) data.num_classes = dataset.num_classes return data
def get_dataset(path, name): assert name in ['Cora', 'CiteSeer', 'PubMed', 'DBLP', 'Karate', 'WikiCS', 'Coauthor-CS', 'Coauthor-Phy', 'Amazon-Computers', 'Amazon-Photo', 'ogbn-arxiv', 'ogbg-code'] name = 'dblp' if name == 'DBLP' else name root_path = osp.expanduser('~/datasets') if name == 'Coauthor-CS': return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures()) if name == 'Coauthor-Phy': return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures()) if name == 'WikiCS': return WikiCS(root=path, transform=T.NormalizeFeatures()) if name == 'Amazon-Computers': return Amazon(root=path, name='computers', transform=T.NormalizeFeatures()) if name == 'Amazon-Photo': return Amazon(root=path, name='photo', transform=T.NormalizeFeatures()) if name.startswith('ogbn'): return PygNodePropPredDataset(root=osp.join(root_path, 'OGB'), name=name, transform=T.NormalizeFeatures()) return (CitationFull if name == 'dblp' else Planetoid)(osp.join(root_path, 'Citation'), name, transform=T.NormalizeFeatures())
def DataLoader(name): # assert name in ['cSBM_data_Aug_19_2020-13:06', # 'cSBM_data_Aug_18_2020-18:50', # 'cSBM_data_Aug_21_2020-10:06', # 'cSBM_data_Aug_19_2020-20:41', # 'cSBM_data_Aug_21_2020-11:04', # 'cSBM_data_Aug_21_2020-11:21', # 'cSBM_data_Sep_01_2020-14:15', # 'cSBM_data_Sep_01_2020-14:18', # 'cSBM_data_Sep_01_2020-14:19', # 'cSBM_data_Sep_01_2020-14:32', # 'cSBM_data_Sep_01_2020-14:22', # 'cSBM_data_Sep_01_2020-14:23', # 'cSBM_data_Sep_01_2020-14:27', # 'cSBM_data_Sep_01_2020-14:29', # 'Cora', 'Citeseer', 'PubMed', # 'Computers', 'Photo', # 'chameleon', 'film', 'squirrel', # 'Texas', 'Cornell'] # if name in ['cSBM_data_Aug_19_2020-13:06', # 'cSBM_data_Aug_18_2020-18:50', # 'cSBM_data_Aug_21_2020-10:06', # 'cSBM_data_Aug_19_2020-20:41', # 'cSBM_data_Aug_21_2020-11:04', # 'cSBM_data_Aug_21_2020-11:21', # 'cSBM_data_Sep_01_2020-14:15', # 'cSBM_data_Sep_01_2020-14:18', # 'cSBM_data_Sep_01_2020-14:19', # 'cSBM_data_Sep_01_2020-14:32', # 'cSBM_data_Sep_01_2020-14:22', # 'cSBM_data_Sep_01_2020-14:23', # 'cSBM_data_Sep_01_2020-14:27', # 'cSBM_data_Sep_01_2020-14:29']: if 'cSBM_data' in name: path = '../data/' dataset = dataset_ContextualSBM(path, name=name) else: name = name.lower() if name in ['cora', 'citeseer', 'pubmed']: root_path = '../' path = osp.join(root_path, 'data', name) dataset = Planetoid(path, name, transform=T.NormalizeFeatures()) elif name in ['computers', 'photo']: root_path = '../' path = osp.join(root_path, 'data', name) dataset = Amazon(path, name, T.NormalizeFeatures()) elif name in ['chameleon', 'film', 'squirrel']: dataset = dataset_heterophily(root='../data/', name=name, transform=T.NormalizeFeatures()) elif name in ['texas', 'cornell']: dataset = WebKB(root='../data/', name=name, transform=T.NormalizeFeatures()) else: raise ValueError(f'dataset {name} not supported in dataloader') return dataset
def load_data(task, seed, val_size, test_size): """Load dataset Task: Graph Classification - benchmark datasets. Consisting of various graphs we want to classify. Returns: - train_dataset: PyG dataset - val_dataset: PyG dataset - test_dataset: PyG dataset - test_idx: indices of the original dataset used for testing - to recover original data. """ path = "../data/TUDataset" if task == 'mutag': dataset = 'Mutagenicity' dataset = TUDataset(path, dataset, transform=T.NormalizeFeatures(), cleaned=True) elif task == 'enzymes': dataset = 'ENZYMES' dataset = TUDataset(path, dataset, transform=T.NormalizeFeatures(), cleaned=True) elif task == 'proteins': dataset = 'PROTEINS' dataset = TUDataset(path, dataset, transform=T.NormalizeFeatures(), cleaned=True) else: NameError(f"task {args.task} not allowed") print(f'Dataset: {dataset}:') print('====================') print(f'Number of graphs: {len(dataset)}') print(f'Number of features: {dataset.num_features}') print(f'Number of classes: {dataset.num_classes}') indices = [i for i in range(len(dataset))] train_idx, test_idx = split(indices, random_state=seed, test_size=test_size) train_dataset = dataset[train_idx] test_dataset = dataset[test_idx] indices = [i for i in range(len(train_dataset))] train_idx, val_idx = split(indices, random_state=seed, test_size=val_size / (1 - test_size)) val_dataset = train_dataset[val_idx] train_dataset = train_dataset[train_idx] print(f'Number of training graphs: {len(train_dataset)}') print(f'Number of val graphs: {len(val_dataset)}') print(f'Number of test graphs: {len(test_dataset)}') return train_dataset, val_dataset, test_dataset, test_idx
def load_ppi_data(): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "data", "PPI") train_dataset = PPI(path, split="train", transform=T.NormalizeFeatures()) val_dataset = PPI(path, split="val", transform=T.NormalizeFeatures()) test_dataset = PPI(path, split="test", transform=T.NormalizeFeatures()) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False) val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False) return [train_loader, val_loader, test_loader]
def get_directed_dataset(name, directed, normalize_features=False, transform=None): path = osp.join(osp.dirname(osp.realpath(__file__)), "..", "directed", name) dataset = DirectedCitation(path, name, directed) if transform is not None and normalize_features: dataset.transform = T.Compose([T.NormalizeFeatures(), transform]) elif normalize_features: dataset.transform = T.NormalizeFeatures() elif transform is not None: dataset.transform = transform return dataset
def get_planetoid_dataset(name, normalize_features=False, transform=None): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) dataset = Planetoid(path, name) if transform is not None and normalize_features: dataset.transform = T.Compose([T.NormalizeFeatures(), transform]) elif normalize_features: dataset.transform = T.NormalizeFeatures() elif transform is not None: dataset.transform = transform return dataset
def __init__(self, name): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) self.path = path self.train_dataset = PPI(self.path, split='train', transform=T.NormalizeFeatures()) self.test_dataset = PPI(self.path, split='test', transform=T.NormalizeFeatures()) self.num_features = self.train_dataset.num_features self.reconstruction_loss = None
def get_dataset(dataset_name): """ Retrieves the dataset corresponding to the given name. """ print("Getting dataset...") path = join('dataset', dataset_name) if dataset_name == 'reddit': dataset = Reddit(path) elif dataset_name == 'ppi': dataset = PPI(path) elif dataset_name == 'github': dataset = GitHub(path) data = dataset.data idx_train, idx_test = train_test_split(list(range(data.x.shape[0])), test_size=0.4, random_state=42) idx_val, idx_test = train_test_split(idx_test, test_size=0.5, random_state=42) data.train_mask = torch.tensor(idx_train) data.val_mask = torch.tensor(idx_val) data.test_mask = torch.tensor(idx_test) dataset.data = data elif dataset_name in ['amazon_comp', 'amazon_photo']: dataset = Amazon(path, "Computers", T.NormalizeFeatures() ) if dataset_name == 'amazon_comp' else Amazon( path, "Photo", T.NormalizeFeatures()) data = dataset.data idx_train, idx_test = train_test_split(list(range(data.x.shape[0])), test_size=0.4, random_state=42) idx_val, idx_test = train_test_split(idx_test, test_size=0.5, random_state=42) data.train_mask = torch.tensor(idx_train) data.val_mask = torch.tensor(idx_val) data.test_mask = torch.tensor(idx_test) dataset.data = data elif dataset_name in ["Cora", "CiteSeer", "PubMed"]: dataset = Planetoid(path, name=dataset_name, split="full", transform=T.NormalizeFeatures()) else: raise NotImplementedError print("Dataset ready!") return dataset
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = Planetoid(root='data', name='Cora', transform=T.NormalizeFeatures()) data = dataset[0] ground_truth_edge_index = data.edge_index.to(device) data.train_mask = data.val_mask = data.test_mask = data.y = None data = train_test_split_edges(data) data = data.to(device) model = Net(dataset.num_features, 64).to(device) optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01) best_val_auc = test_auc = 0 for epoch in range(1, 101): loss = train(data, model, optimizer) val_auc, tmp_test_auc = test(data, model) if val_auc > best_val_auc: best_val_auc = val_auc test_auc = tmp_test_auc print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, ' f'Test: {test_auc:.4f}') z = model.encode(data.x, data.train_pos_edge_index) final_edge_index = model.decode_all(z)
def load_facebook_dataset(dataset_name: FacebookDataset, allow_features=True) -> Data: path = osp.join(DATASETS_DIR, dataset_name.value[:12]) facebook_idx_map = { "0": 0, "107": 1, "1684": 2, "1912": 3, "3437": 4, "348": 5, "3980": 6, "414": 7, "686": 8, "698": 9, } data = SNAPDataset(path, dataset_name.value[:12], T.NormalizeFeatures()) data = data[facebook_idx_map[dataset_name.value[12:]]] if not allow_features: data.x = torch.eye(data.x.size(0)) data.num_communities = data.circle_batch.max() + 1 communities = np.zeros((data.num_communities, data.x.size(0))) communities[data.circle_batch, data.circle] = 1 data.communities = communities data.communities_cnl_format = matrix_to_cnl_format(communities, data.num_communities) return data
def __init__(self, data, data_path, task, **kwargs): path = data_path + data if data in ['CiteSeer', 'Cora', 'PubMed']: """ Citation Networks: CiteSeer, Cora, and PubMed Description: these datasets contain sparse bag-of-words feature vectors for each document and a list of citation links between documents. The citation links are treated as undirected edges """ assert task == "node_class", "%s is a dataset for node classification" % data self.dataset = Citations(path, data, T.NormalizeFeatures()) self.data = self.dataset[0] # these citation networks are small enough fit into the GPUs, but they still have enough nodes for # training, validation, and test. elif data in [ 'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY', 'REDDIT-MULTI5K', 'PROTEINS', 'MUTAG', 'PTC', 'NCI1' ]: """ Benchmark Data Sets for Graph Kernels Description: these datasets are the benchmarks for graph classification, the detailed information is available at <https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets> """ assert task == "graph_class", "%s is a dataset for graph classification" % data self.dataset = graph_kernel_dataset(data, path) self.train_ids, self.test_ids, self.val_ids = k_fold( self.dataset, kwargs['fold']) self.batch_size = kwargs['batch_size'] # these datasets are much smaller. For instance, MUTAG only has 188 different graphs. So unlike the above # node classification, here we need cross validation. else: raise NameError('unknown dataset')
def load_dataset(dataset): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ['cora', 'citeseer', 'pubmed']: dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.adj = torch.zeros((data.x.size(0), data.x.size(0))) col, row = data.edge_index data.adj[col, row] = 1 return data, num_features, num_classes elif dataset == 'reddit': dataset = Reddit(path) elif dataset == 'corafull': dataset = CoraFull(path) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.train_mask, data.val_mask, data.test_mask = generate_split( data, num_classes) data.adj = torch.zeros((data.x.size[0], data.x.size(0))) col, row = data.edge_index data.adj[col, row] = 1 return data, num_features, num_classes
def get_amazon_dataset(name): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'datasets', 'node_datasets', name) dataset = Amazon(path, name, transform=T.NormalizeFeatures()) num_per_class = 20 train_index = [] val_index = [] test_index = [] for i in range(dataset.num_classes): index = (dataset[0].y.long() == i).nonzero().view(-1) index = index[torch.randperm(index.size(0))] if len(index) > num_per_class + 30: train_index.append(index[:num_per_class]) val_index.append(index[num_per_class:num_per_class + 30]) test_index.append(index[num_per_class + 30:]) else: continue train_index = torch.cat(train_index) val_index = torch.cat(val_index) test_index = torch.cat(test_index) train_mask = index_to_mask(train_index, size=dataset[0].num_nodes) val_mask = index_to_mask(val_index, size=dataset[0].num_nodes) test_mask = index_to_mask(test_index, size=dataset[0].num_nodes) dataset.train_mask = train_mask dataset.val_mask = val_mask dataset.test_mask = test_mask return dataset
def load_data(args): dataset = args.input path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ['cora', 'citeseer', 'pubmed']: dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] return data, num_features, num_classes elif dataset == 'corafull': dataset = CoraFull(path) elif dataset in ['cs', 'physics']: dataset = Coauthor(path, name=dataset) elif dataset in ['computers', 'photo']: dataset = Amazon(path, name=dataset) elif dataset == 'reddit': dataset = Reddit(path) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] return data, num_features, num_classes num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.train_mask, data.val_mask, data.test_mask = generate_split( data, num_classes) return data, num_features, num_classes
def __init__(self): dataset = "Cora" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): Planetoid(path, dataset, T.NormalizeFeatures()) Planetoid(path, dataset, T.TargetIndegree()) super(CoraDataset, self).__init__(path, dataset, T.TargetIndegree())
def __init__(self, name): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) self.dataset = Planetoid(path, "Cora", T.NormalizeFeatures()) data = self.dataset[0] data.train_mask = data.val_mask = data.test_mask = data.y = None self.num_features = self.dataset.num_features self.reconstruction_loss = None
def get_data(dataset_name, dataset_dir): full_names = {'cora': 'Cora', 'citeseer': 'CiteSeer', 'pubmed': 'PubMed'} dataset_name = full_names[dataset_name] dataset_path = path.join(dataset_dir, dataset_name) dataset = Planetoid(dataset_path, dataset_name, transform=T.NormalizeFeatures()) return dataset
def get_planetoid_dataset(name, normalize_features=False, transform=None): usrhome = os.path.expanduser('~') datadir = usrhome + '/.pyg' if not os.path.exists(datadir): print('Creating data dir:' + datadir) os.makedirs(datadir) path = osp.join(datadir, name) dataset = Planetoid(path, name) if transform is not None and normalize_features: dataset.transform = T.Compose([T.NormalizeFeatures(), transform]) elif normalize_features: dataset.transform = T.NormalizeFeatures() elif transform is not None: dataset.transform = transform return dataset
def loaddatas(d_loader, d_name): if d_loader == 'Planetoid': dataset = getattr(torch_geometric.datasets, d_loader)('../data/' + d_name, d_name, T.NormalizeFeatures()) else: dataset = getattr(torch_geometric.datasets, d_loader)('../data/' + d_name, d_name) return dataset
def exp(exp_name, seed, style, shared): torch.manual_seed(seed) dataset = 'Cora' path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] fold = 0 accuracies = [] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') with open('{}.log'.format(exp_name), 'w') as flog: for tr_mask, vl_mask, ts_mask in gen_folds(data.x.shape[0], FOLDS, FOLDS_SEED): fold += 1 print("FOLD:", fold) flog.write("fold #{}\n".format(fold)) data.train_mask = tr_mask data.val_mask = vl_mask data.test_mask = ts_mask print('Train: {}'.format(torch.sum(data.train_mask))) print('Validation: {}'.format(torch.sum(data.val_mask))) print('Test: {}'.format(torch.sum(data.test_mask))) data = data.to(device) #model = GINNet(dataset).to(device) model = GIN(dataset, 2, 64, seed).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001) best_acc = 0 count = 0 for epoch in range(1, EPOCH): train(model, data, optimizer) train_accs = validate(model, data) log = 'Epoch: {:03d}, Train: {:.4f}, Validation: {:.4f}' print(log.format(epoch, *train_accs)) log += '\n' flog.write(log.format(epoch, *train_accs)) if train_accs[1] > best_acc: best_acc = train_accs[1] torch.save(model.state_dict(), "{}.dth".format(exp_name)) print("Saving model at iteration {}".format(epoch)) count = 0 else: count += 1 if count == 200: break model.load_state_dict(torch.load("{}.dth".format(exp_name))) accs = test(model, data) print('Test Accuracy: {}'.format(accs[1])) flog.write('Test Accuracy: {}\n'.format(accs[1])) accuracies.append(accs[1]) flog.write("----------\n") flog.write("Avg Test Accuracy: {}\tVariance: {}\n".format( np.mean(accuracies), np.var(accuracies)))
def load_data(dataset="Cora"): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "data", dataset) if dataset in ["Cora", "Citeseer", "Pubmed"]: data = Planetoid(path, dataset, split="public", transform=T.NormalizeFeatures())[0] num_nodes = data.x.size(0) edge_index, _ = remove_self_loops(data.edge_index) edge_index = add_self_loops(edge_index, num_nodes=num_nodes) if isinstance(edge_index, tuple): data.edge_index = edge_index[0] # !!! 2*N 新版可能有改变 else: data.edge_index = edge_index return data elif dataset in ["CoauthorCS"]: data = Coauthor(path, "cs", transform=T.NormalizeFeatures())[0] num_nodes = data.x.size(0) edge_index, _ = remove_self_loops(data.edge_index) edge_index = add_self_loops(edge_index, num_nodes=num_nodes) if isinstance(edge_index, tuple): data.edge_index = edge_index[0] else: data.edge_index = edge_index # devide training validation and testing set train_mask = torch.zeros((num_nodes, ), dtype=torch.bool) val_mask = torch.zeros((num_nodes, ), dtype=torch.bool) test_mask = torch.zeros((num_nodes, ), dtype=torch.bool) train_num = 40 val_num = 150 for i in range(15): # number of labels index = (data.y == i).nonzero()[:, 0] perm = torch.randperm(index.size(0)) train_mask[index[perm[:train_num]]] = 1 val_mask[index[perm[train_num:(train_num + val_num)]]] = 1 test_mask[index[perm[(train_num + val_num):]]] = 1 data.train_mask = train_mask data.val_mask = val_mask data.test_mask = test_mask return data else: raise Exception(f"the dataset of {dataset} has not been implemented")
def get_planetoid_dataset(name, normalize_features=False, transform=None, split="public"): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) if split == 'complete': dataset = Planetoid(path, name) dataset[0].train_mask.fill_(False) dataset[0].train_mask[:dataset[0].num_nodes - 1000] = 1 dataset[0].val_mask.fill_(False) dataset[0].val_mask[dataset[0].num_nodes - 1000:dataset[0].num_nodes - 500] = 1 dataset[0].test_mask.fill_(False) dataset[0].test_mask[dataset[0].num_nodes - 500:] = 1 else: dataset = Planetoid(path, name, split=split) if transform is not None and normalize_features: dataset.transform = T.Compose([T.NormalizeFeatures(), transform]) elif normalize_features: dataset.transform = T.NormalizeFeatures() elif transform is not None: dataset.transform = transform return dataset
def get_data(model_name, dataset_dir): dataset_path = path.join(dataset_dir, 'Cora') if model_name == 'spline': transform = T.TargetIndegree() elif model_name == 'dna': transform = None else: transform = T.NormalizeFeatures() dataset = Planetoid(dataset_path, 'Cora', transform=transform) return dataset
def load_dataset(dataset_folder, dataset_name): """ 导入数据集,并处理为Data格式 :param dataset_folder: 数据集存储路径 :param dataset_name: 数据集的名字("Cora", "CiteSeer", "PubMed") :return: dataset """ path = os.path.join(os.path.dirname(dataset_folder), dataset_name) dataset = Planetoid(path, dataset_name, T.NormalizeFeatures()) return dataset
def get_dataset_properties(params): dataset_name = params["dataset"] split = params["split"] path = os.path.join(os.getcwd(), "dataset", dataset_name) os.makedirs(path, exist_ok=True) torch.manual_seed(params["seed"]) dataset = Planetoid(path, dataset_name, split=split, transform=T.NormalizeFeatures()) data = dataset[0] n_features = dataset.num_features n_classes = dataset.num_classes return data, n_features, n_classes
def get_data(dataset_name, model_name, dataset_dir): full_names = {'cora': 'Cora', 'citeseer': 'CiteSeer', 'pubmed': 'PubMed'} dataset_name = full_names[dataset_name] dataset_path = path.join(dataset_dir, dataset_name) if model_name == 'spline': transform = T.TargetIndegree() elif model_name == 'dna': transform = None else: transform = T.NormalizeFeatures() dataset = Planetoid(dataset_path, dataset_name, transform=transform) return dataset
def __init__(self, root='/home/galkampel/tmp', dataset_name="PubMed", split_type='public'): names = {"Cora", "CiteSeer", "PubMed"} self.dataset_name = dataset_name self.split_type = split_type path = os.path.join(root, dataset_name) self.dataset = Planetoid(path, dataset_name, split=split_type, transform=T.NormalizeFeatures())