def __init__(self, descriptor_dim, sampler=None, split='train', transform=DefaultTransform, cls=False, build_graph=False): super(SurrealFEPts5k).__init__() self.name = 'SurrealFEPts5k' self.split = split if self.split == 'train': self.IDlist = IDlist[:, :-(num_test * num_views)].reshape(-1) elif self.split == 'test': self.IDlist = IDlist[:, -(num_test * num_views):].reshape(-1) elif self.split == 'val': self.IDlist = IDlist[:, :num_views].reshape(-1) self.file_path = '{}/scans/{{0:06d}}/{{1:03d}}.mat'.format( PATH_TO_SURREAL) self.template_feats = helper.loadSMPLDescriptors()[:, :descriptor_dim] self.template_points = helper.loadSMPLModels()[0].verts self.cls = cls if build_graph: self.transform = T.Compose( [transform, T.KNNGraph(k=6), T.ToDense(5000)]) else: self.transform = T.Compose([transform, T.ToDense(5000)])
def __init__(self, descriptor_dim, sampler=None, split='train', transform=DefaultTransform, build_graph=False, cls=False): super(FaustFEPts5k).__init__() self.name = 'FaustFEPts5k' self.IDlist = np.arange(10000) self.split = split if self.split == 'train': raise RuntimeError("This dataset is Test Only") elif self.split == 'test': self.IDlist = self.IDlist elif self.split == 'val': self.IDlist = self.IDlist[:40] self.file_path = '{}/faust/scans/{{0:03d}}_{{0:03d}}.mat'.format( PATH_TO_DATA) self.template_feats = helper.loadSMPLDescriptors()[:, :descriptor_dim] self.template_points = helper.loadSMPLModels()[0].verts self.pre_transform = None #T.NormalizeScale() self.cls = cls if build_graph: self.transform = T.Compose( [transform, T.KNNGraph(k=6), T.ToDense(5000)]) else: self.transform = T.Compose([transform, T.ToDense(5000)])
def transform_setup(graph_u=False, graph_gcn=False, rotation=180, samplePoints=1024, mesh=False, node_translation=0.01): if not graph_u and not graph_gcn: # Default transformation for scale noralization, centering, point sampling and rotating pretransform = T.Compose([T.NormalizeScale(), T.Center()]) transform = T.Compose([ T.SamplePoints(samplePoints), T.RandomRotate(rotation[0], rotation[1]) ]) print("pointnet rotation {}".format(rotation)) elif graph_u: pretransform = T.Compose([T.NormalizeScale(), T.Center()]) transform = T.Compose([ T.NormalizeScale(), T.Center(), T.SamplePoints(samplePoints, True, True), T.RandomRotate(rotation[0], rotation[1]), T.KNNGraph(k=graph_u) ]) elif graph_gcn: pretransform = T.Compose([T.NormalizeScale(), T.Center()]) if mesh: if mesh == "extraFeatures": transform = T.Compose([ T.RandomRotate(rotation[0], rotation[1]), T.GenerateMeshNormals(), T.FaceToEdge(True), T.Distance(norm=True), T.TargetIndegree(cat=True) ]) # , else: transform = T.Compose([ T.RandomRotate(rotation[0], rotation[1]), T.GenerateMeshNormals(), T.FaceToEdge(True), T.Distance(norm=True), T.TargetIndegree(cat=True) ]) else: transform = T.Compose([ T.SamplePoints(samplePoints, True, True), T.KNNGraph(k=graph_gcn), T.Distance(norm=True) ]) print("no mesh") print("Rotation {}".format(rotation)) print("Meshing {}".format(mesh)) else: print('no transfom') return transform, pretransform
def __init__(self, dataset_opt, training_opt): super().__init__(dataset_opt, training_opt) self._data_path = os.path.join(dataset_opt.dataroot, "S3DIS") pre_transform = self._pre_transform transform = T.Compose([ T.FixedPoints(dataset_opt.num_points), T.RandomTranslate(0.01), T.RandomRotate(180, axis=2), ]) train_dataset = S3DIS_With_Weights( self._data_path, test_area=self.dataset_opt.fold, train=True, pre_transform=pre_transform, transform=transform, class_weight_method=dataset_opt.class_weight_method, ) test_dataset = S3DIS_With_Weights( self._data_path, test_area=self.dataset_opt.fold, train=False, pre_transform=pre_transform, transform=T.FixedPoints(dataset_opt.num_points), ) self._create_dataloaders(train_dataset, test_dataset)
def __init__(self, dataset_opt, training_opt): super().__init__(dataset_opt, training_opt) self._data_path = os.path.join(dataset_opt.dataroot, "ShapeNet") try: self._category = dataset_opt.category except KeyError: self._category = None pre_transform = T.NormalizeScale() train_transform = T.Compose( [T.FixedPoints(dataset_opt.num_points), RandomNoise()]) test_transform = T.FixedPoints(dataset_opt.num_points) train_dataset = ShapeNet( self._data_path, self._category, include_normals=dataset_opt.normal, split="trainval", pre_transform=pre_transform, transform=train_transform, ) test_dataset = ShapeNet( self._data_path, self._category, include_normals=dataset_opt.normal, split="test", pre_transform=pre_transform, transform=test_transform, ) self._categories = train_dataset.categories self._create_dataloaders(train_dataset, test_dataset)
def augment_transforms(args): """ define transformation """ pre_transform = None if args.norm == 'scale': pre_transform = T.NormalizeScale() elif args.norm == 'bbox': pre_transform = NormalizeBox() elif args.norm == 'sphere': pre_transform = NormalizeSphere(center=True) elif args.norm == 'sphere_wo_center': pre_transform = NormalizeSphere(center=False) else: pass transform = [] # Shapenet if args.task == 'segmentation': transform.append(T.FixedPoints(args.num_pts)) # Modelnet if args.task == 'classification': transform.append(T.SamplePoints(args.num_pts)) transform = T.Compose(transform) return pre_transform, transform
def __init__( self, root, url, gt_url, num_neighbours=10, train_val_split=(30, 15), mat_key=None, gt_mat_key=None, transform=None, pre_transform=None, ): self.url = url self.filename = url.split("/")[-1] self.gt_filename = gt_url.split("/")[-1] self.gt_url = gt_url self.train_val_split = train_val_split self.mat_key = mat_key self.gt_mat_key = gt_mat_key self.num_neighbours = num_neighbours self.processed_file = f"{self.mat_key}-k{self.num_neighbours}.pt" self.result_path = path.join(root, self.processed_file) self.base_transform = T.Compose([ T.AddTrainValTestMask( "test_rest", num_train_per_class=self.train_val_split[0], num_val=self.train_val_split[1], ), ]) super().__init__(root=root, pre_transform=pre_transform, transform=transform) self.data, self.slices = torch.load(self.result_path)
def __init__(self, root:str, device:torch.device=torch.device("cpu"), train:bool=True, test:bool=True, transform_data:bool=True): self.url = 'https://drive.google.com/file/d/1dp4sMvZ8cmIIITE-qj6zYpZb0-v-4Kgf/view?usp=sharing' self.categories = ["big_cats","cows","dogs","hippos","horses"] # center each mesh into its centroid pre_transform = transforms.Center() # transform if transform_data: # rotate and move transform = transforms.Compose([ Move(mean=[0,0,0], std=[0.05,0.05,0.05]), Rotate(dims=[0,1,2]), ToDevice(device)]) else: transform=ToDevice(device) super().__init__(root=root, transform=transform, pre_transform=pre_transform) self.data, self.slices = torch.load(self.processed_paths[0]) self.downscaler = dscale.Downscaler( filename=join(self.processed_dir,"ds"), mesh=self.get(0), factor=2) if train and not test: self.data, self.slices = self.collate([self.get(i) for i in range(len(self)) if self.get(i).pose < 16]) elif not train and test: self.data, self.slices = self.collate([self.get(i) for i in range(len(self)) if self.get(i).pose >= 16])
def __init__(self, dataset_opt): super().__init__(dataset_opt) pre_transform = self.pre_transform transform = T.Compose([ T.FixedPoints(dataset_opt.num_points), T.RandomTranslate(0.01), T.RandomRotate(180, axis=2), ]) train_dataset = S3DIS1x1( self._data_path, test_area=self.dataset_opt.fold, train=True, pre_transform=self.pre_transform, transform=self.train_transform, ) self.test_dataset = S3DIS1x1( self._data_path, test_area=self.dataset_opt.fold, train=False, pre_transform=pre_transform, transform=self.test_transform, ) self.train_dataset = add_weights(train_dataset, True, dataset_opt.class_weight_method)
def load_dataset(path, specify_target): # apply transform class SpecifyTarget(object): def __call__(self, data): data.y = data.y[specify_target].view(-1) return data transform = T.Compose([SpecifyTarget(), Complete(), T.Distance(norm=True)]) print('Check split dataset...') save_path = path + 'train_valid_test.ckpt' if os.path.isfile(save_path): trn, val, test = torch.load(save_path) trn.transform = transform val.transform = transform test.transform = transform return trn, val, test print('Load dataset...') dataset = QM9Dataset(root=path).shuffle() print('Split the dataset...') one_tenth = len(dataset) // 10 test_dataset = dataset[:one_tenth] valid_dataset = dataset[one_tenth:one_tenth * 2] train_dataset = dataset[one_tenth * 2:] assert len(train_dataset) + len(valid_dataset) + len(test_dataset) == len( dataset) print('Save dataset...') torch.save([train_dataset, valid_dataset, test_dataset], save_path) return load_dataset(path, specify_target)
def __init__(self, dataset_opt, training_opt): super().__init__(dataset_opt, training_opt) number = dataset_opt.number if str(number) not in AVAILABLE_NUMBERS: raise Exception("Only ModelNet10 and ModelNet40 are available") name = "ModelNet{}".format(number) self._data_path = osp.join(osp.dirname(osp.realpath(__file__)), "..", "data", name) pre_transform = T.Compose([T.NormalizeScale(), MeshToNormal()]) transform = (T.SamplePoints(dataset_opt.num_points) if contains_key( dataset_opt, "num_points") else None) train_dataset = ModelNet( self._data_path, name=str(number), train=True, transform=transform, pre_transform=pre_transform, ) test_dataset = ModelNet( self._data_path, name=str(number), train=False, transform=transform, pre_transform=pre_transform, ) self._create_dataloaders(train_dataset, test_dataset, validation=None)
def __init__(self, data_dir): super().__init__() self.data_dir = data_dir self.transform = T.Compose([ T.OneHotDegree(self.num_features - 1), T.ToSparseTensor(), ])
def __init__(self, config): rotations = [T.RandomRotate(180, axis=i) for i in range(3)] translation = T.RandomTranslate(config.augment_translate_limit) merge_score_noise = UnitEdgeAttrGaussianNoise( mu=0, sigma=config.edge_attr_noise_std) self.transform = T.Compose( [*rotations, translation, merge_score_noise])
def __init__(self, dataset_opt, training_opt): super().__init__(dataset_opt, training_opt) self._data_path = os.path.join(dataset_opt.dataroot, "S3DIS") pre_transform = cT.GridSampling(dataset_opt.first_subsampling, 13) # Select only 2^15 points from the room # pre_transform = T.FixedPoints(dataset_opt.room_points) transform = T.Compose([ T.FixedPoints(dataset_opt.num_points), T.RandomTranslate(0.01), T.RandomRotate(180, axis=2), ]) train_dataset = S3DIS_With_Weights( self._data_path, test_area=self.dataset_opt.fold, train=True, pre_transform=pre_transform, transform=transform, class_weight_method=dataset_opt.class_weight_method, ) test_dataset = S3DIS_With_Weights( self._data_path, test_area=self.dataset_opt.fold, train=False, pre_transform=pre_transform, transform=T.FixedPoints(dataset_opt.num_points), ) self._create_dataloaders(train_dataset, test_dataset, validation=None)
def __init__(self, root: str, device: torch.device = torch.device("cpu"), train: bool = True, test: bool = False, transform_data: bool = True): transform = transforms.Compose([ transforms.RandomRotate(36, axis=1), transforms.RandomTranslate(0.005) ]) super().__init__(root=root, transform=transform) # print(self.processed_paths[0]) self.data, self.slices = torch.load(self.processed_paths[0]) if train and not test: self.data, self.slices = self.collate( [self.get(i) for i in range(0, 80)]) elif not train and test: self.data, self.slices = self.collate( [self.get(i) for i in range(80, 100)]) print(self.data) self.class_ids = [int(c) for c in self.data.y]
def __init__(self, dataset_opt, training_opt): super().__init__(dataset_opt, training_opt) try: self._category = dataset_opt.category except KeyError: self._category = None pre_transform = self._pre_transform train_transform = T.Compose([RandomNoise()]) train_dataset = ShapeNet( self._data_path, self._category, include_normals=dataset_opt.normal, split="trainval", pre_transform=pre_transform, transform=train_transform, ) test_dataset = ShapeNet( self._data_path, self._category, include_normals=dataset_opt.normal, split="test", pre_transform=pre_transform, ) self._categories = train_dataset.categories self._create_dataloaders(train_dataset, test_dataset)
def networkx_to_torch2(self, networkx_graph): from torch_geometric.utils import convert import torch_geometric.transforms as T graph = convert.from_networkx(networkx_graph) transform = T.Compose([T.TargetIndegree()]) graph = transform(graph) return graph.to(self.device)
class MNISTSuperpixels(LightningDataModule): def __init__( self, data_dir: str = "data/", batch_size: int = 32, num_workers: int = 0, pin_memory: bool = False, train_val_test_split: Sequence[int] = (55_000, 5_000, 10_000), n_segments: int = 75, k: int = 10, loop: bool = True, **kwargs, ): super().__init__() self.data_dir = data_dir self.batch_size = batch_size self.num_workers = num_workers self.pin_memory = pin_memory self.train_val_test_split = train_val_test_split self.n_segments = n_segments self.k = k self.loop = loop self.slic_kwargs = kwargs assert 1 <= n_segments <= 28 * 28 self.pre_transform = T.Compose([ T.NormalizeScale(), ]) self.transform = None self.data_train: Optional[Dataset] = None self.data_val: Optional[Dataset] = None self.data_test: Optional[Dataset] = None
def get_view_transform(self, k, num_pts): R = rotation_matrix(np.pi / 3., 0., np.pi / 6. * k) transformation = TG.Compose([ TG.NormalizeScale(), TG.LinearTransformation(R), TG.SamplePoints(num=num_pts, include_normals=self.generate_norms) ]) return transformation
def load_planetoid(dataset): data_name = ['Cora', 'CiteSeer', 'PubMed'] assert dataset in data_name path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'Datasets', 'NodeData') transforms = T.Compose([T.AddSelfLoops()]) dataset = Planetoid(path, dataset, transform=transforms) return dataset, dataset[0]
def __init__(self, n_neigh=9, rad_neigh=0.1, knn=None, self_loop=True, edge_attr=None, flow='source_to_target'): super(GraphReg, self).__init__() # defining graph transform graph_transform_list = [] self.del_edge_attr = False self.knn = knn self.n_neigh = n_neigh self.rad_neigh = rad_neigh self.self_loop = self_loop self.edge_attr = edge_attr if self.knn == True: graph_transform_list.append( T.KNNGraph(n_neigh, loop=self_loop, flow=flow)) elif self.knn == False: graph_transform_list.append( T.RadiusGraph(self.rad_neigh, loop=self_loop, max_num_neighbors=n_neigh, flow=flow)) else: print("Connectivity of the graph will not be re-generated") # edge attr if edge_attr is not None: self.del_edge_attr = True if type(edge_attr) == str: if edge_attr: edge_attr = [attr.strip() for attr in edge_attr.split('-')] else: edge_attr = [] for attr in edge_attr: attr = attr.strip().lower() if attr == 'poscart': graph_transform_list.append(Cartesian(norm=False, cat=True)) elif attr == 'posspherical': graph_transform_list.append(Spherical(cat=True)) elif attr == 'featureoffsets': graph_transform_list.append( FeatureDistances(metric='offset', cat=True)) elif attr == 'featurel2': graph_transform_list.append( FeatureDistances(metric='l2', cat=True)) else: raise RuntimeError('{} is not supported'.format(attr)) self.graph_transform = T.Compose(graph_transform_list)
def get_dataset(name, sparse=True, dataset_div=None): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) try: shutil.copytree('../input/smt', path) except shutil.Error as e: for src,dst,msg in e.args[0]: print(dst,src,msg) except FileExistsError as e: print(e) dataset = TUDataset(path, name, use_node_attr=True) dataset.data.edge_attr = None if dataset.data.x is None: print('confirm the data.x do not exists!!') exit(1) max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) if dataset_div!=None: dataset=dataset.shuffle()[:len(dataset)//dataset_div] return dataset
def __init__(self, root='data/ShapeNet', train=True, categories=None, include_normals=True, split='trainval', transform=None, pre_transform=None, pre_filter=None, repeat_to=None): # Modified here to add repeat_to if categories is None: categories = list(self.category_ids.keys()) if isinstance(categories, str): categories = [categories] assert all(category in self.category_ids for category in categories) self.categories = categories # Default settings pre_transform = T.NormalizeScale() pre_filter = None include_normals = True if train: transform = T.Compose([ T.RandomTranslate(0.01), T.RandomRotate(15, axis=0), T.RandomRotate(15, axis=1), T.RandomRotate(15, axis=2) ]) split = 'trainval' else: transform = None split = 'test' super().__init__(root, transform, pre_transform, pre_filter) # Modified here to add repeat_to if split == 'train': path = self.processed_paths[0] elif split == 'val': path = self.processed_paths[1] elif split == 'test': path = self.processed_paths[2] elif split == 'trainval': path = self.processed_paths[3] else: raise ValueError((f'Split {split} found, but expected either ' 'train, val, trainval or test')) self.data, self.slices = torch.load(path) self.data.x = self.data.x if include_normals else None self.y_mask = torch.zeros((len(self.seg_classes.keys()), 50), dtype=torch.bool) for i, labels in enumerate(self.seg_classes.values()): self.y_mask[i, labels] = 1 self.repeat_to = repeat_to # Modified here to add repeat_to
def __init__(self, data_dir, batch_size, shuffle, validation_split, num_workers, num_points, training=True): # trsfm = transforms.Compose([ # transforms.ToTensor(), # transforms.Normalize((0.1307,), (0.3081,)) # ]) self.data_dir = data_dir path = osp.join(self.data_dir, 'SyntheticAdvanced') pre_transform = T.Compose( [UniformSample(num_points), NormalizeScale()]) transform = T.Compose([ RandomFlip(p=0.5, flip_x=True, flip_y=False, flip_z=True), RandomReverseFrames(p=0.5), RandomRotate(degree_range=(-15, 15), axis=0), RandomRotate(degree_range=(0, 360), axis=1), RandomRotate(degree_range=(-15, 15), axis=2), RandomScale(scales=(0.9, 1.1)), Jitter(jitter_range=0.0002, uniform=True, clip=(torch.tensor([ [-float("inf"), -float("inf"), -float("inf"), 0, 0, 0], [float("inf"), float("inf"), float("inf"), 1, 1, 1], ]))), Shuffle() ]) train_dataset = SyntheticAdvancedDataset(path, transform, pre_transform) # train_dataset = ModelNet(path, '10', training, transform, pre_transform) super(SyntheticAdvancedDataLoader, self).__init__(train_dataset, batch_size=batch_size, shuffle=shuffle)
def test_empty_dataset(self): opt = Options() opt.dataset_name = os.path.join(os.getcwd(), "test") opt.dataroot = os.path.join(os.getcwd(), "test") opt.pre_transform = [DictConfig({"transform": "RandomNoise"})] opt.test_transform = [DictConfig({"transform": "AddOnes"})] opt.val_transform = [DictConfig({"transform": "Jitter"})] opt.train_transform = [DictConfig({"transform": "RandomSymmetry"})] dataset = BaseDataset(opt) self.assertEqual(str(dataset.pre_transform), str(T.Compose([T3d.RandomNoise()]))) self.assertEqual(str(dataset.test_transform), str(T.Compose([T3d.AddOnes()]))) self.assertEqual(str(dataset.train_transform), str(T.Compose([T3d.RandomSymmetry()]))) self.assertEqual(str(dataset.val_transform), str(T.Compose([T3d.Jitter()]))) self.assertEqual(str(dataset.inference_transform), str(T.Compose([T3d.RandomNoise(), T3d.AddOnes()]))) self.assertEqual(dataset.train_dataset, None) self.assertEqual(dataset.test_dataset, None) self.assertEqual(dataset.val_dataset, None)
def main(): # ------------ # args # ------------ parser = ArgumentParser() parser.add_argument("--batch_size", default=64, type=int) parser.add_argument("--num_workers", default=2, type=int) parser.add_argument("--task", default="off_center", type=str) parser = pl.Trainer.add_argparse_args(parser) args = parser.parse_args() # ------------ # data # ------------ data_dir = Path(gvp.__file__).parents[1] / "data/synthetic" transform = transforms.Compose([transforms.KNNGraph(k=10), ExtendedPPF()]) dm = SyntheticDataModule( data_dir, args.batch_size, args.task, transform, num_workers=args.num_workers ) # ------------ # model # ------------ model = SyntheticGNN(4, 32, 4, 32) # ------------ # training # ------------ wandb_logger = WandbLogger( name=f"SyntheticGNN-{args.task}", project="GVP", reinit=True ) checkpoint_callback = ModelCheckpoint( monitor="val_loss", dirpath="model_checkpoints", filename=f"SyntheticGNN-{args.task}-" + "{epoch:02d}-{val_loss:.2f}", save_weights_only=True, save_top_k=3, mode="min", ) trainer = pl.Trainer.from_argparse_args( args, logger=wandb_logger, callbacks=[checkpoint_callback], ) trainer.fit(model, dm) # ------------ # testing # ------------ result = trainer.test(datamodule=dm) print(result) wandb.finish()
def instantiate_transforms(transform_options): """ Creates a torch_geometric composite transform from an OmegaConf list such as - transform: GridSampling params: size: 0.01 - transform: NormaliseScale """ transforms = [] for transform in transform_options: transforms.append(instantiate_transform(transform)) return T.Compose(transforms)
class CIFAR10SuperpixelsDataModule(LightningDataModule): def __init__( self, data_dir: str = "data/", train_val_test_split: Sequence[int] = (45_000, 5_000, 10_000), n_segments: int = 100, sp_generation_workers: int = 4, batch_size: int = 32, num_workers: int = 0, pin_memory: bool = False, **kwargs, ): """DataModule which converts CIFAR10 to dataset of superpixel graphs. Conversion happens on first run only. When changing pre_transforms you need to manually delete previously generated dataset files! Args: data_dir (str): Path to data folder. train_val_test_split (Sequence[int]): Number of datapoints for training, validation and testing. Should sum up to 70_000. n_segments (int): Number of superpixels per image. sp_generation_workers (int): Number of processes for superpixel dataset generation. batch_size (int): Batch size. num_workers (int): Number of processes for data loading. pin_memory (bool): Whether to pin CUDA memory (slight speed up for GPU users). **kwargs : Extra paramters passed to SLIC algorithm, learn more here: https://scikit-image.org/docs/dev/api/skimage.segmentation.html#skimage.segmentation.slic """ super().__init__() self.data_dir = data_dir self.train_val_test_split = train_val_test_split # superpixel graph parameters self.n_segments = n_segments self.sp_generation_workers = sp_generation_workers # dataloader parameters self.batch_size = batch_size self.num_workers = num_workers self.pin_memory = pin_memory self.slic_kwargs = kwargs self.pre_transform = T.Compose( [ NormalizeScale(), ] ) self.transform = None self.pre_filter = None self.data_train: Optional[Dataset] = None self.data_val: Optional[Dataset] = None self.data_test: Optional[Dataset] = None
def get_dataset(name, sparse=True, cleaned=False): if name == 'node': path = osp.join(os.environ['GNN_TRAINING_DATA_ROOT'], name) print(path) dataset = HitGraphDataset2(path, directed=False, categorical=True) else: path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) dataset = TUDataset(path, name, cleaned=cleaned) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) return dataset
def get_planetoid_dataset(name, normalize_features=False, transform=None): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) dataset = Planetoid(path, name) if transform is not None and normalize_features: dataset.transform = T.Compose([T.NormalizeFeatures(), transform]) elif normalize_features: dataset.transform = T.NormalizeFeatures() elif transform is not None: dataset.transform = transform return dataset