def test_hetero_in_memory_dataset(): data1 = HeteroData() data1.y = torch.randn(5) data1['paper'].x = torch.randn(10, 16) data1['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long() data2 = HeteroData() data2.y = torch.randn(5) data2['paper'].x = torch.randn(10, 16) data2['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long() dataset = MyTestDataset([data1, data2]) assert str(dataset) == 'MyTestDataset(2)' assert len(dataset) == 2 assert len(dataset[0]) == 3 assert dataset[0].y.tolist() == data1.y.tolist() assert dataset[0]['paper'].x.tolist() == data1['paper'].x.tolist() assert (dataset[0]['paper', 'paper'].edge_index.tolist() == data1[ 'paper', 'paper'].edge_index.tolist()) assert len(dataset[1]) == 3 assert dataset[1].y.tolist() == data2.y.tolist() assert dataset[1]['paper'].x.tolist() == data2['paper'].x.tolist() assert (dataset[1]['paper', 'paper'].edge_index.tolist() == data2[ 'paper', 'paper'].edge_index.tolist())
def test_hetero_data_to_canonical(): data = HeteroData() assert isinstance(data['user', 'product'], EdgeStorage) assert len(data.edge_types) == 1 assert isinstance(data['user', 'to', 'product'], EdgeStorage) assert len(data.edge_types) == 1 data = HeteroData() assert isinstance(data['user', 'buys', 'product'], EdgeStorage) assert isinstance(data['user', 'clicks', 'product'], EdgeStorage) assert len(data.edge_types) == 2 with pytest.raises(TypeError, match="missing 1 required"): data['user', 'product']
def test_hetero_conv(aggr): data = HeteroData() data['paper'].x = torch.randn(50, 32) data['author'].x = torch.randn(30, 64) data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200) data['paper', 'author'].edge_index = get_edge_index(50, 30, 100) data['author', 'paper'].edge_index = get_edge_index(30, 50, 100) data['paper', 'paper'].edge_weight = torch.rand(200) conv = HeteroConv( { ('paper', 'to', 'paper'): GCNConv(-1, 64), ('author', 'to', 'paper'): SAGEConv((-1, -1), 64), ('paper', 'to', 'author'): GATConv((-1, -1), 64), }, aggr=aggr) assert len(list(conv.parameters())) > 0 assert str(conv) == 'HeteroConv(num_relations=3)' out = conv(data.x_dict, data.edge_index_dict, edge_weight_dict=data.edge_weight_dict) assert len(out) == 2 if aggr is not None: assert out['paper'].size() == (50, 64) assert out['author'].size() == (30, 64) else: assert out['paper'].size() == (50, 2, 64) assert out['author'].size() == (30, 1, 64)
def test_hetero_to_undirected(): edge_index = torch.tensor([[2, 0, 2], [3, 1, 0]]) edge_weight = torch.randn(edge_index.size(1)) edge_attr = torch.randn(edge_index.size(1), 8) perm = torch.tensor([1, 2, 1, 2, 0, 0]) data = HeteroData() data['v'].num_nodes = 4 data['w'].num_nodes = 4 data['v', 'v'].edge_index = edge_index data['v', 'v'].edge_weight = edge_weight data['v', 'v'].edge_attr = edge_attr data['v', 'w'].edge_index = edge_index data['v', 'w'].edge_weight = edge_weight data['v', 'w'].edge_attr = edge_attr data = ToUndirected()(data) assert data['v', 'v'].edge_index.tolist() == [[0, 0, 1, 2, 2, 3], [1, 2, 0, 0, 3, 2]] assert data['v', 'v'].edge_weight.tolist() == edge_weight[perm].tolist() assert data['v', 'v'].edge_attr.tolist() == edge_attr[perm].tolist() assert data['v', 'w'].edge_index.tolist() == edge_index.tolist() assert data['v', 'w'].edge_weight.tolist() == edge_weight.tolist() assert data['v', 'w'].edge_attr.tolist() == edge_attr.tolist() assert data['w', 'v'].edge_index.tolist() == [[3, 1, 0], [2, 0, 2]] assert data['w', 'v'].edge_weight.tolist() == edge_weight.tolist() assert data['w', 'v'].edge_attr.tolist() == edge_attr.tolist()
def test_heterogeneous_link_neighbor_loader_loop(directed): torch.manual_seed(12345) data = HeteroData() data['paper'].x = torch.arange(100) data['author'].x = torch.arange(100, 300) data['paper', 'paper'].edge_index = get_edge_index(100, 100, 500) data['paper', 'author'].edge_index = get_edge_index(100, 200, 1000) data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000) loader = LinkNeighborLoader(data, num_neighbors=[-1] * 2, edge_label_index=('paper', 'paper'), batch_size=20, directed=directed) for batch in loader: assert batch['paper'].x.size(0) <= 100 assert batch['paper'].x.min() >= 0 and batch['paper'].x.max() < 100 # Assert positive samples are present in the original graph: edge_index = unique_edge_pairs(batch['paper', 'paper'].edge_index) edge_label_index = batch['paper', 'paper'].edge_label_index edge_label_index = unique_edge_pairs(edge_label_index) assert len(edge_index | edge_label_index) == len(edge_index)
def test_hetero_conv_with_dot_syntax_node_types(): data = HeteroData() data['src.paper'].x = torch.randn(50, 32) data['author'].x = torch.randn(30, 64) data['src.paper', 'src.paper'].edge_index = get_edge_index(50, 50, 200) data['src.paper', 'author'].edge_index = get_edge_index(50, 30, 100) data['author', 'src.paper'].edge_index = get_edge_index(30, 50, 100) data['src.paper', 'src.paper'].edge_weight = torch.rand(200) conv = HeteroConv({ ('src.paper', 'to', 'src.paper'): GCNConv(-1, 64), ('author', 'to', 'src.paper'): SAGEConv((-1, -1), 64), ('src.paper', 'to', 'author'): GATConv((-1, -1), 64, add_self_loops=False), }) assert len(list(conv.parameters())) > 0 assert str(conv) == 'HeteroConv(num_relations=3)' out = conv(data.x_dict, data.edge_index_dict, edge_weight_dict=data.edge_weight_dict) assert len(out) == 2 assert out['src.paper'].size() == (50, 64) assert out['author'].size() == (30, 64)
def __getitem__(self, time_index: Union[int, slice]): if isinstance(time_index, slice): snapshot = StaticHeteroGraphTemporalSignal( self.edge_index_dict, self.edge_weight_dict, self.feature_dicts[time_index], self.target_dicts[time_index], **{ key: getattr(self, key)[time_index] for key in self.additional_feature_keys }) else: x_dict = self._get_features(time_index) edge_index_dict = self._get_edge_index() edge_weight_dict = self._get_edge_weight() y_dict = self._get_target(time_index) additional_features = self._get_additional_features(time_index) snapshot = HeteroData() if x_dict: for key, value in x_dict.items(): snapshot[key].x = value if edge_index_dict: for key, value in edge_index_dict.items(): snapshot[key].edge_index = value if edge_weight_dict: for key, value in edge_weight_dict.items(): snapshot[key].edge_attr = value if y_dict: for key, value in y_dict.items(): snapshot[key].y = value if additional_features: for feature_name, feature_dict in additional_features.items(): if feature_dict: for key, value in feature_dict.items(): snapshot[key][feature_name] = value return snapshot
def test_copy_hetero_data(): data = HeteroData() data['paper'].x = x_paper data['paper', 'to', 'paper'].edge_index = edge_index_paper_paper out = copy.copy(data) assert id(data) != id(out) assert len(data.stores) == len(out.stores) for store1, store2 in zip(data.stores, out.stores): assert id(store1) != id(store2) assert id(data) == id(store1._parent()) assert id(out) == id(store2._parent()) assert out['paper']._key == 'paper' assert data['paper'].x.data_ptr() == out['paper'].x.data_ptr() assert out['to']._key == ('paper', 'to', 'paper') assert data['to'].edge_index.data_ptr() == out['to'].edge_index.data_ptr() out = copy.deepcopy(data) assert id(data) != id(out) assert len(data.stores) == len(out.stores) for store1, store2 in zip(data.stores, out.stores): assert id(store1) != id(store2) assert id(out) == id(out['paper']._parent()) assert out['paper']._key == 'paper' assert data['paper'].x.data_ptr() != out['paper'].x.data_ptr() assert data['paper'].x.tolist() == out['paper'].x.tolist() assert id(out) == id(out['to']._parent()) assert out['to']._key == ('paper', 'to', 'paper') assert data['to'].edge_index.data_ptr() != out['to'].edge_index.data_ptr() assert data['to'].edge_index.tolist() == out['to'].edge_index.tolist()
def test_remove_isolated_nodes_in_hetero_data(): data = HeteroData() data['p'].x = torch.arange(6) data['a'].x = torch.arange(6) data['i'].num_nodes = 4 # isolated paper nodes: {4} # isolated author nodes: {3, 4, 5} # isolated institution nodes: {0, 1, 2, 3} data['p', '1', 'p'].edge_index = torch.tensor([[0, 1, 2], [0, 1, 3]]) data['p', '2', 'a'].edge_index = torch.tensor([[1, 3, 5], [0, 1, 2]]) data['p', '2', 'a'].edge_attr = torch.arange(3) data['p', '3', 'a'].edge_index = torch.tensor([[5], [2]]) data = RemoveIsolatedNodes()(data) assert len(data) == 4 assert data['p'].num_nodes == 5 assert data['a'].num_nodes == 3 assert data['i'].num_nodes == 0 assert data['p'].x.tolist() == [0, 1, 2, 3, 5] assert data['a'].x.tolist() == [0, 1, 2] assert data['1'].edge_index.tolist() == [[0, 1, 2], [0, 1, 3]] assert data['2'].edge_index.tolist() == [[1, 3, 4], [0, 1, 2]] assert data['2'].edge_attr.tolist() == [0, 1, 2] assert data['3'].edge_index.tolist() == [[4], [2]]
def test_to_homogeneous(): data = HeteroData() data['paper'].x = torch.randn(100, 128) data['author'].x = torch.randn(200, 128) data['paper', 'paper'].edge_index = get_edge_index(100, 100, 250) data['paper', 'paper'].edge_weight = torch.randn(250, ) data['paper', 'paper'].edge_attr = torch.randn(250, 64) data['paper', 'author'].edge_index = get_edge_index(100, 200, 500) data['paper', 'author'].edge_weight = torch.randn(500, ) data['paper', 'author'].edge_attr = torch.randn(500, 64) data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000) data['author', 'paper'].edge_weight = torch.randn(1000, ) data['author', 'paper'].edge_attr = torch.randn(1000, 64) data = data.to_homogeneous() assert len(data) == 5 assert data.num_nodes == 300 assert data.num_edges == 1750 assert data.num_node_features == 128 assert data.num_edge_features == 64 assert data.edge_type.size() == (1750, ) assert data.edge_type.min() == 0 assert data.edge_type.max() == 2 assert len(data._node_slices) == 2 assert len(data._edge_slices) == 3 assert len(data._edge_type_dict) == 3
def test_init_hetero_data(): data = HeteroData() data['v1'].x = 1 data['paper'].x = x_paper data['author'].x = x_author data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'author'].edge_index = edge_index_paper_author data['author', 'paper'].edge_index = edge_index_author_paper assert len(data) == 2 assert len(data.edge_types) == 3 assert data.node_types == ['v1', 'paper', 'author'] data = HeteroData( v1={'x': 1}, paper={'x': x_paper}, author={'x': x_author}, paper__paper={'edge_index': edge_index_paper_paper}, paper__author={'edge_index': edge_index_paper_author}, author__paper={'edge_index': edge_index_author_paper}, ) assert len(data) == 2 assert len(data.edge_types) == 3 assert data.node_types == ['v1', 'paper', 'author'] data = HeteroData({ 'v1': { 'x': 1 }, 'paper': { 'x': x_paper }, 'author': { 'x': x_author }, ('paper', 'paper'): { 'edge_index': edge_index_paper_paper }, ('paper', 'author'): { 'edge_index': edge_index_paper_author }, ('author', 'paper'): { 'edge_index': edge_index_author_paper }, }) assert len(data) == 2 assert len(data.edge_types) == 3 assert data.node_types == ['v1', 'paper', 'author']
def generate_graph(self): data = HeteroData() data = self.define_graph_nodes_and_labels(data) data = self.define_graph_edges(data) torch.save( data, ''.join( (self.seed_data_path, '_', self.file_type, '_data.pt'))) return
def process(self): import pandas as pd data = HeteroData() path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node-feat.csv.gz') x_paper = pd.read_csv(path, compression='gzip', header=None, dtype=np.float32).values data['paper'].x = torch.from_numpy(x_paper) path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node_year.csv.gz') year_paper = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values data['paper'].year = torch.from_numpy(year_paper).view(-1) path = osp.join(self.raw_dir, 'node-label', 'paper', 'node-label.csv.gz') y_paper = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values.flatten() data['paper'].y = torch.from_numpy(y_paper) if self.preprocess is None: path = osp.join(self.raw_dir, 'num-node-dict.csv.gz') num_nodes_df = pd.read_csv(path, compression='gzip') for node_type in ['author', 'institution', 'field_of_study']: data[node_type].num_nodes = num_nodes_df[node_type].tolist()[0] else: emb_dict = torch.load(self.raw_paths[-1]) for key, value in emb_dict.items(): if key != 'paper': data[key].x = value for edge_type in [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), ('paper', 'cites', 'paper'), ('paper', 'has_topic', 'field_of_study')]: f = '___'.join(edge_type) path = osp.join(self.raw_dir, 'relations', f, 'edge.csv.gz') edge_index = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values edge_index = torch.from_numpy(edge_index).t().contiguous() data[edge_type].edge_index = edge_index for f, v in [('train', 'train'), ('valid', 'val'), ('test', 'test')]: path = osp.join(self.raw_dir, 'split', 'time', 'paper', f'{f}.csv.gz') idx = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values.flatten() idx = torch.from_numpy(idx) mask = torch.zeros(data['paper'].num_nodes, dtype=torch.bool) mask[idx] = True data['paper'][f'{v}_mask'] = mask if self.pre_transform is not None: data = self.pre_transform(data) torch.save(self.collate([data]), self.processed_paths[0])
def test_heterogeneous_neighbor_loader_on_cora(directed): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, 'Cora') data = dataset[0] data.edge_weight = torch.rand(data.num_edges) hetero_data = HeteroData() hetero_data['paper'].x = data.x hetero_data['paper'].n_id = torch.arange(data.num_nodes) hetero_data['paper', 'paper'].edge_index = data.edge_index hetero_data['paper', 'paper'].edge_weight = data.edge_weight split_idx = torch.arange(5, 8) loader = NeighborLoader(hetero_data, num_neighbors=[-1, -1], batch_size=split_idx.numel(), input_nodes=('paper', split_idx), directed=directed) assert len(loader) == 1 hetero_batch = next(iter(loader)) batch_size = hetero_batch['paper'].batch_size if not directed: n_id, _, _, e_mask = k_hop_subgraph(split_idx, num_hops=2, edge_index=data.edge_index, num_nodes=data.num_nodes) n_id = n_id.sort()[0] assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist() assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum()) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels, out_channels): super().__init__() self.conv1 = GraphConv(in_channels, hidden_channels) self.conv2 = GraphConv(hidden_channels, out_channels) def forward(self, x, edge_index, edge_weight): x = self.conv1(x, edge_index, edge_weight).relu() x = self.conv2(x, edge_index, edge_weight).relu() return x model = GNN(dataset.num_features, 16, dataset.num_classes) hetero_model = to_hetero(model, hetero_data.metadata()) out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx] out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict, hetero_batch.edge_weight_dict)['paper'][:batch_size] assert torch.allclose(out1, out2, atol=1e-6) try: shutil.rmtree(root) except PermissionError: pass
def test_add_metapaths(): dblp = HeteroData() dblp['paper'].x = torch.ones(5) dblp['author'].x = torch.ones(6) dblp['conference'].x = torch.ones(3) dblp['paper', 'cites', 'paper'].edge_index = torch.tensor([[0, 1, 2, 3], [1, 2, 4, 2]]) dblp['paper', 'author'].edge_index = torch.tensor([[0, 1, 2, 3, 4], [2, 2, 5, 2, 5]]) dblp['author', 'paper'].edge_index = dblp['paper', 'author'].edge_index[[1, 0]] dblp['conference', 'paper'].edge_index = torch.tensor([[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]]) dblp['paper', 'conference'].edge_index = dblp['conference', 'paper'].edge_index[[1, 0]] # Test transform options: orig_edge_type = dblp.edge_types metapaths = [[('paper', 'conference'), ('conference', 'paper')]] meta1 = AddMetaPaths(metapaths)(dblp.clone()) meta2 = AddMetaPaths(metapaths, drop_orig_edges=True)(dblp.clone()) meta3 = AddMetaPaths(metapaths, drop_orig_edges=True, keep_same_node_type=True)(dblp.clone()) meta4 = AddMetaPaths(metapaths, drop_orig_edges=True, keep_same_node_type=True, drop_unconnected_nodes=True)(dblp.clone()) assert meta1['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert meta2['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert meta3['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert meta4['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert all([i in meta1.edge_types for i in orig_edge_type]) assert meta2.edge_types == [('paper', 'metapath_0', 'paper')] assert meta3.edge_types == [('paper', 'cites', 'paper'), ('paper', 'metapath_0', 'paper')] assert meta4.edge_types == [('paper', 'cites', 'paper'), ('paper', 'metapath_0', 'paper')] assert meta3.node_types == ['paper', 'author', 'conference'] assert meta4.node_types == ['paper'] # Test 4-hop metapath: metapaths = [[('author', 'paper'), ('paper', 'conference')], [('author', 'paper'), ('paper', 'conference'), ('conference', 'paper'), ('paper', 'author')]] meta1 = AddMetaPaths(metapaths)(dblp.clone()) new_edge_types = [('author', 'metapath_0', 'conference'), ('author', 'metapath_1', 'author')] assert meta1[new_edge_types[0]].edge_index.shape[-1] == 4 assert meta1[new_edge_types[1]].edge_index.shape[-1] == 4 # Test `metapath_dict` information: assert list(meta1.metapath_dict.values()) == metapaths assert list(meta1.metapath_dict.keys()) == new_edge_types
def test_hetero_normalize_scale(): x = torch.tensor([[1, 0, 1], [0, 1, 0], [0, 0, 0]], dtype=torch.float) data = HeteroData() data['v'].x = x data['w'].x = x data = NormalizeFeatures()(data) assert data['v'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]] assert data['w'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]]
def test_hetero_add_self_loops(): edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) data = HeteroData() data['v'].num_nodes = 3 data['w'].num_nodes = 3 data['v', 'v'].edge_index = edge_index data['v', 'w'].edge_index = edge_index data = AddSelfLoops()(data) assert data['v', 'v'].edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2], [1, 0, 2, 1, 0, 1, 2]] assert data['v', 'w'].edge_index.tolist() == edge_index.tolist()
def test_hgt_loader_on_cora(get_dataset): dataset = get_dataset(name='Cora') data = dataset[0] data.edge_weight = torch.rand(data.num_edges) hetero_data = HeteroData() hetero_data['paper'].x = data.x hetero_data['paper'].n_id = torch.arange(data.num_nodes) hetero_data['paper', 'paper'].edge_index = data.edge_index hetero_data['paper', 'paper'].edge_weight = data.edge_weight split_idx = torch.arange(5, 8) # Sample the complete two-hop neighborhood: loader = HGTLoader(hetero_data, num_samples=[data.num_nodes] * 2, batch_size=split_idx.numel(), input_nodes=('paper', split_idx)) assert len(loader) == 1 hetero_batch = next(iter(loader)) batch_size = hetero_batch['paper'].batch_size n_id, _, _, e_mask = k_hop_subgraph(split_idx, num_hops=2, edge_index=data.edge_index, num_nodes=data.num_nodes) n_id = n_id.sort()[0] assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist() assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum()) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels, out_channels): super().__init__() self.conv1 = GraphConv(in_channels, hidden_channels) self.conv2 = GraphConv(hidden_channels, out_channels) def forward(self, x, edge_index, edge_weight): x = self.conv1(x, edge_index, edge_weight).relu() x = self.conv2(x, edge_index, edge_weight).relu() return x model = GNN(dataset.num_features, 16, dataset.num_classes) hetero_model = to_hetero(model, hetero_data.metadata()) out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx] out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict, hetero_batch.edge_weight_dict)['paper'][:batch_size] assert torch.allclose(out1, out2, atol=1e-6)
def test_random_link_split_on_undirected_hetero_data(): data = HeteroData() data['p'].x = torch.arange(100) data['p', 'p'].edge_index = get_edge_index(100, 100, 500) data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index) transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p')) train_data, val_data, test_data = transform(data) assert train_data['p', 'p'].is_undirected() transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'), rev_edge_types=('p', 'p')) train_data, val_data, test_data = transform(data) assert train_data['p', 'p'].is_undirected()
def test_hetero_data_subgraph(): data = HeteroData() data.num_node_types = 3 data['paper'].x = x_paper data['paper'].name = 'paper' data['paper'].num_nodes = x_paper.size(0) data['author'].x = x_author data['author'].num_nodes = x_author.size(0) data['conference'].x = x_conference data['conference'].num_nodes = x_conference.size(0) data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'paper'].edge_attr = edge_attr_paper_paper data['paper', 'paper'].name = 'cites' data['author', 'paper'].edge_index = edge_index_author_paper data['paper', 'author'].edge_index = edge_index_paper_author data['paper', 'conference'].edge_index = edge_index_paper_conference subset = { 'paper': torch.randperm(x_paper.size(0))[:4], 'author': torch.randperm(x_author.size(0))[:2] } out = data.subgraph(subset) assert out.num_node_types == data.num_node_types assert out.node_types == ['paper', 'author'] assert len(out['paper']) == 3 assert torch.allclose(out['paper'].x, data['paper'].x[subset['paper']]) assert out['paper'].name == 'paper' assert out['paper'].num_nodes == 4 assert len(out['author']) == 2 assert torch.allclose(out['author'].x, data['author'].x[subset['author']]) assert out['author'].num_nodes == 2 assert out.edge_types == [ ('paper', 'to', 'paper'), ('author', 'to', 'paper'), ('paper', 'to', 'author'), ] assert len(out['paper', 'paper']) == 3 assert out['paper', 'paper'].edge_index is not None assert out['paper', 'paper'].edge_attr is not None assert out['paper', 'paper'].name == 'cites' assert len(out['paper', 'author']) == 1 assert out['paper', 'author'].edge_index is not None assert len(out['author', 'paper']) == 1 assert out['author', 'paper'].edge_index is not None
def test_hetero_data_functions(): data = HeteroData() data['paper'].x = x_paper data['author'].x = x_author data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'author'].edge_index = edge_index_paper_author data['author', 'paper'].edge_index = edge_index_author_paper data['paper', 'paper'].edge_attr = edge_attr_paper_paper assert len(data) == 3 assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x'] assert 'x' in data and 'edge_index' in data and 'edge_attr' in data assert data.num_nodes == 15 assert data.num_edges == 110 assert data.num_node_features == {'paper': 16, 'author': 32} assert data.num_edge_features == { ('paper', 'to', 'paper'): 8, ('paper', 'to', 'author'): 0, ('author', 'to', 'paper'): 0, } node_types, edge_types = data.metadata() assert node_types == ['paper', 'author'] assert edge_types == [ ('paper', 'to', 'paper'), ('paper', 'to', 'author'), ('author', 'to', 'paper'), ] x_dict = data.collect('x') assert len(x_dict) == 2 assert x_dict['paper'].tolist() == x_paper.tolist() assert x_dict['author'].tolist() == x_author.tolist() assert x_dict == data.x_dict data.y = 0 assert data['y'] == 0 and data.y == 0 assert len(data) == 4 assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x', 'y'] del data['paper', 'author'] node_types, edge_types = data.metadata() assert node_types == ['paper', 'author'] assert edge_types == [('paper', 'to', 'paper'), ('author', 'to', 'paper')] assert len(data.to_dict()) == 5 assert len(data.to_namedtuple()) == 5 assert data.to_namedtuple().y == 0 assert len(data.to_namedtuple().paper) == 1
def test_init_hetero_data(): data = HeteroData() data['paper'].x = x_paper data['author'].x = x_author data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'author'].edge_index = edge_index_paper_author data['author', 'paper'].edge_index = edge_index_author_paper assert len(data) == 2 data = HeteroData( paper={'x': x_paper}, author={'x': x_author}, paper__paper={'edge_index': edge_index_paper_paper}, paper__author={'edge_index': edge_index_paper_author}, author__paper={'edge_index': edge_index_author_paper}, ) assert len(data) == 2 data = HeteroData({ 'paper': { 'x': x_paper }, 'author': { 'x': x_author }, ('paper', 'paper'): { 'edge_index': edge_index_paper_paper }, ('paper', 'author'): { 'edge_index': edge_index_paper_author }, ('author', 'paper'): { 'edge_index': edge_index_author_paper }, }) assert len(data) == 2
def test_hetero_to_sparse_tensor(): edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) data = HeteroData() data['v'].num_nodes = 3 data['w'].num_nodes = 3 data['v', 'v'].edge_index = edge_index data['v', 'w'].edge_index = edge_index data = ToSparseTensor()(data) assert data['v', 'v'].adj_t.storage.row().tolist() == [0, 1, 1, 2] assert data['v', 'v'].adj_t.storage.col().tolist() == [1, 0, 2, 1] assert data['v', 'v'].adj_t.storage.value() is None assert data['v', 'w'].adj_t.storage.row().tolist() == [0, 1, 1, 2] assert data['v', 'w'].adj_t.storage.col().tolist() == [1, 0, 2, 1] assert data['v', 'w'].adj_t.storage.value() is None
def test_hetero_conv_with_custom_conv(): data = HeteroData() data['paper'].x = torch.randn(50, 32) data['paper'].pos = torch.randn(50, 3) data['author'].x = torch.randn(30, 64) data['author'].pos = torch.randn(30, 3) data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200) data['paper', 'author'].edge_index = get_edge_index(50, 30, 100) data['author', 'paper'].edge_index = get_edge_index(30, 50, 100) conv = HeteroConv({key: CustomConv(64) for key in data.edge_types}) out = conv(data.x_dict, data.edge_index_dict, data.pos_dict) assert len(out) == 2 assert out['paper'].size() == (50, 64) assert out['author'].size() == (30, 64)
def test_heterogeneous_link_neighbor_loader_no_edges(): loader = LinkNeighborLoader( HeteroData(paper=dict(num_nodes=100)), num_neighbors=[], edge_label_index=(('paper', 'paper'), get_edge_index(100, 100, 100)), batch_size=20, ) for batch in loader: assert isinstance(batch, HeteroData) assert len(batch) == 3 assert batch['paper'].num_nodes <= 40 assert batch['paper', 'paper'].edge_label_index.size(1) == 20 assert batch['paper'].num_nodes == batch[ 'paper', 'paper'].edge_label_index.unique().numel()
def process(self): data = HeteroData() node_types = ['author', 'paper', 'term', 'conference'] for i, node_type in enumerate(node_types[:2]): x = sp.load_npz(osp.join(self.raw_dir, f'features_{i}.npz')) data[node_type].x = torch.from_numpy(x.todense()).to(torch.float) x = np.load(osp.join(self.raw_dir, 'features_2.npy')) data['term'].x = torch.from_numpy(x).to(torch.float) node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy')) node_type_idx = torch.from_numpy(node_type_idx).to(torch.long) data['conference'].num_nodes = int((node_type_idx == 3).sum()) y = np.load(osp.join(self.raw_dir, 'labels.npy')) data['author'].y = torch.from_numpy(y).to(torch.long) split = np.load(osp.join(self.raw_dir, 'train_val_test_idx.npz')) for name in ['train', 'val', 'test']: idx = split[f'{name}_idx'] idx = torch.from_numpy(idx).to(torch.long) mask = torch.zeros(data['author'].num_nodes, dtype=torch.bool) mask[idx] = True data['author'][f'{name}_mask'] = mask s = {} N_a = data['author'].num_nodes N_p = data['paper'].num_nodes N_t = data['term'].num_nodes N_c = data['conference'].num_nodes s['author'] = (0, N_a) s['paper'] = (N_a, N_a + N_p) s['term'] = (N_a + N_p, N_a + N_p + N_t) s['conference'] = (N_a + N_p + N_t, N_a + N_p + N_t + N_c) A = sp.load_npz(osp.join(self.raw_dir, 'adjM.npz')) for src, dst in product(node_types, node_types): A_sub = A[s[src][0]:s[src][1], s[dst][0]:s[dst][1]].tocoo() if A_sub.nnz > 0: row = torch.from_numpy(A_sub.row).to(torch.long) col = torch.from_numpy(A_sub.col).to(torch.long) data[src, dst].edge_index = torch.stack([row, col], dim=0) if self.pre_transform is not None: data = self.pre_transform(data) torch.save(self.collate([data]), self.processed_paths[0])
def test_temporal_heterogeneous_neighbor_loader_on_cora(get_dataset): dataset = get_dataset(name='Cora') data = dataset[0] hetero_data = HeteroData() hetero_data['paper'].x = data.x hetero_data['paper'].time = torch.arange(data.num_nodes) hetero_data['paper', 'paper'].edge_index = data.edge_index loader = NeighborLoader(hetero_data, num_neighbors=[-1, -1], input_nodes='paper', time_attr='time', batch_size=1) for batch in loader: mask = batch['paper'].time[0] >= batch['paper'].time[1:] assert torch.all(mask)
def test_lightning_hetero_link_data(): torch.manual_seed(12345) data = HeteroData() data['paper'].x = torch.arange(10) data['author'].x = torch.arange(10) data['term'].x = torch.arange(10) data['paper', 'author'].edge_index = get_edge_index(10, 10, 10) data['author', 'paper'].edge_index = get_edge_index(10, 10, 10) data['paper', 'term'].edge_index = get_edge_index(10, 10, 10) datamodule = LightningLinkData( data, input_train_edges=('author', 'paper'), loader='neighbor', num_neighbors=[5], batch_size=32, num_workers=0, ) for batch in datamodule.train_dataloader(): assert 'edge_label' in batch['author', 'paper'] assert 'edge_label_index' in batch['author', 'paper'] break data['author'].time = torch.arange(data['author'].num_nodes) data['paper'].time = torch.arange(data['paper'].num_nodes) data['term'].time = torch.arange(data['term'].num_nodes) datamodule = LightningLinkData( data, input_train_edges=('author', 'paper'), input_train_time=torch.arange(data['author', 'paper'].num_edges), loader='neighbor', num_neighbors=[5], batch_size=32, num_workers=0, time_attr='time', ) for batch in datamodule.train_dataloader(): assert 'edge_label' in batch['author', 'paper'] assert 'edge_label_index' in batch['author', 'paper'] assert 'edge_label_time' in batch['author', 'paper'] break
def process(self): data = HeteroData() node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy')) node_type_idx = torch.from_numpy(node_type_idx).to(torch.long) node_types = ['user', 'artist', 'tag'] for i, node_type in enumerate(node_types): data[node_type].num_nodes = int((node_type_idx == i).sum()) pos_split = np.load( osp.join(self.raw_dir, 'train_val_test_pos_user_artist.npz')) neg_split = np.load( osp.join(self.raw_dir, 'train_val_test_neg_user_artist.npz')) for name in ['train', 'val', 'test']: if name != 'train': edge_index = pos_split[f'{name}_pos_user_artist'] edge_index = torch.from_numpy(edge_index) edge_index = edge_index.t().to(torch.long).contiguous() data['user', 'artist'][f'{name}_pos_edge_index'] = edge_index edge_index = neg_split[f'{name}_neg_user_artist'] edge_index = torch.from_numpy(edge_index) edge_index = edge_index.t().to(torch.long).contiguous() data['user', 'artist'][f'{name}_neg_edge_index'] = edge_index s = {} N_u = data['user'].num_nodes N_a = data['artist'].num_nodes N_t = data['tag'].num_nodes s['user'] = (0, N_u) s['artist'] = (N_u, N_u + N_a) s['tag'] = (N_u + N_a, N_u + N_a + N_t) A = sp.load_npz(osp.join(self.raw_dir, 'adjM.npz')) for src, dst in product(node_types, node_types): A_sub = A[s[src][0]:s[src][1], s[dst][0]:s[dst][1]].tocoo() if A_sub.nnz > 0: row = torch.from_numpy(A_sub.row).to(torch.long) col = torch.from_numpy(A_sub.col).to(torch.long) data[src, dst].edge_index = torch.stack([row, col], dim=0) if self.pre_transform is not None: data = self.pre_transform(data) torch.save(self.collate([data]), self.processed_paths[0])
def test_heterogeneous_link_neighbor_loader(directed, neg_sampling_ratio): torch.manual_seed(12345) data = HeteroData() data['paper'].x = torch.arange(100) data['author'].x = torch.arange(100, 300) data['paper', 'paper'].edge_index = get_edge_index(100, 100, 500) data['paper', 'paper'].edge_attr = torch.arange(500) data['paper', 'author'].edge_index = get_edge_index(100, 200, 1000) data['paper', 'author'].edge_attr = torch.arange(500, 1500) data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000) data['author', 'paper'].edge_attr = torch.arange(1500, 2500) loader = LinkNeighborLoader( data, num_neighbors=[-1] * 2, edge_label_index=('paper', 'author'), batch_size=20, directed=directed, neg_sampling_ratio=neg_sampling_ratio, shuffle=True, ) assert str(loader) == 'LinkNeighborLoader()' assert len(loader) == 1000 / 20 for batch in loader: assert isinstance(batch, HeteroData) if neg_sampling_ratio == 0.0: assert len(batch) == 4 # Assert positive samples are present in the original graph: edge_index = unique_edge_pairs(batch['paper', 'author'].edge_index) edge_label_index = batch['paper', 'author'].edge_label_index edge_label_index = unique_edge_pairs(edge_label_index) assert len(edge_index | edge_label_index) == len(edge_index) else: assert len(batch) == 5 assert batch['paper', 'author'].edge_label_index.size(1) == 40 assert torch.all(batch['paper', 'author'].edge_label[:20] == 1) assert torch.all(batch['paper', 'author'].edge_label[20:] == 0)