def load_ogb(name, dataset_dir): if name[:4] == 'ogbn': dataset = PygNodePropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = ['train_mask', 'val_mask', 'test_mask'] for i, key in enumerate(splits.keys()): mask = index2mask(splits[key], size=dataset.data.y.shape[0]) set_dataset_attr(dataset, split_names[i], mask, len(mask)) edge_index = to_undirected(dataset.data.edge_index) set_dataset_attr(dataset, 'edge_index', edge_index, edge_index.shape[1]) elif name[:4] == 'ogbg': dataset = PygGraphPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = [ 'train_graph_index', 'val_graph_index', 'test_graph_index' ] for i, key in enumerate(splits.keys()): id = splits[key] set_dataset_attr(dataset, split_names[i], id, len(id)) elif name[:4] == "ogbl": dataset = PygLinkPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_edge_split() id = splits['train']['edge'].T if cfg.dataset.resample_negative: set_dataset_attr(dataset, 'train_pos_edge_index', id, id.shape[1]) # todo: applying transform for negative sampling is very slow dataset.transform = neg_sampling_transform else: id_neg = negative_sampling(edge_index=id, num_nodes=dataset.data.num_nodes[0], num_neg_samples=id.shape[1]) id_all = torch.cat([id, id_neg], dim=-1) label = get_link_label(id, id_neg) set_dataset_attr(dataset, 'train_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'train_edge_label', label, len(label)) id, id_neg = splits['valid']['edge'].T, splits['valid']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = get_link_label(id, id_neg) set_dataset_attr(dataset, 'val_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'val_edge_label', label, len(label)) id, id_neg = splits['test']['edge'].T, splits['test']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = get_link_label(id, id_neg) set_dataset_attr(dataset, 'test_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'test_edge_label', label, len(label)) else: raise ValueError('OGB dataset: {} non-exist') return dataset
def load_ogb(name, dataset_dir): r""" Load OGB dataset objects. Args: name (string): dataset name dataset_dir (string): data directory Returns: PyG dataset object """ from ogb.graphproppred import PygGraphPropPredDataset from ogb.linkproppred import PygLinkPropPredDataset from ogb.nodeproppred import PygNodePropPredDataset if name[:4] == 'ogbn': dataset = PygNodePropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = ['train_mask', 'val_mask', 'test_mask'] for i, key in enumerate(splits.keys()): mask = index_to_mask(splits[key], size=dataset.data.y.shape[0]) set_dataset_attr(dataset, split_names[i], mask, len(mask)) edge_index = to_undirected(dataset.data.edge_index) set_dataset_attr(dataset, 'edge_index', edge_index, edge_index.shape[1]) elif name[:4] == 'ogbg': dataset = PygGraphPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = [ 'train_graph_index', 'val_graph_index', 'test_graph_index' ] for i, key in enumerate(splits.keys()): id = splits[key] set_dataset_attr(dataset, split_names[i], id, len(id)) elif name[:4] == "ogbl": dataset = PygLinkPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_edge_split() id = splits['train']['edge'].T if cfg.dataset.resample_negative: set_dataset_attr(dataset, 'train_pos_edge_index', id, id.shape[1]) dataset.transform = neg_sampling_transform else: id_neg = negative_sampling(edge_index=id, num_nodes=dataset.data.num_nodes, num_neg_samples=id.shape[1]) id_all = torch.cat([id, id_neg], dim=-1) label = create_link_label(id, id_neg) set_dataset_attr(dataset, 'train_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'train_edge_label', label, len(label)) id, id_neg = splits['valid']['edge'].T, splits['valid']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = create_link_label(id, id_neg) set_dataset_attr(dataset, 'val_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'val_edge_label', label, len(label)) id, id_neg = splits['test']['edge'].T, splits['test']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = create_link_label(id, id_neg) set_dataset_attr(dataset, 'test_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'test_edge_label', label, len(label)) else: raise ValueError('OGB dataset: {} non-exist') return dataset