Exemplo n.º 1
0
def test_hetero_in_memory_dataset():
    data1 = HeteroData()
    data1.y = torch.randn(5)
    data1['paper'].x = torch.randn(10, 16)
    data1['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long()

    data2 = HeteroData()
    data2.y = torch.randn(5)
    data2['paper'].x = torch.randn(10, 16)
    data2['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long()

    dataset = MyTestDataset([data1, data2])
    assert str(dataset) == 'MyTestDataset(2)'
    assert len(dataset) == 2

    assert len(dataset[0]) == 3
    assert dataset[0].y.tolist() == data1.y.tolist()
    assert dataset[0]['paper'].x.tolist() == data1['paper'].x.tolist()
    assert (dataset[0]['paper', 'paper'].edge_index.tolist() == data1[
        'paper', 'paper'].edge_index.tolist())

    assert len(dataset[1]) == 3
    assert dataset[1].y.tolist() == data2.y.tolist()
    assert dataset[1]['paper'].x.tolist() == data2['paper'].x.tolist()
    assert (dataset[1]['paper', 'paper'].edge_index.tolist() == data2[
        'paper', 'paper'].edge_index.tolist())
Exemplo n.º 2
0
def test_hetero_data_to_canonical():
    data = HeteroData()
    assert isinstance(data['user', 'product'], EdgeStorage)
    assert len(data.edge_types) == 1
    assert isinstance(data['user', 'to', 'product'], EdgeStorage)
    assert len(data.edge_types) == 1

    data = HeteroData()
    assert isinstance(data['user', 'buys', 'product'], EdgeStorage)
    assert isinstance(data['user', 'clicks', 'product'], EdgeStorage)
    assert len(data.edge_types) == 2

    with pytest.raises(TypeError, match="missing 1 required"):
        data['user', 'product']
def test_hetero_conv(aggr):
    data = HeteroData()
    data['paper'].x = torch.randn(50, 32)
    data['author'].x = torch.randn(30, 64)
    data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200)
    data['paper', 'author'].edge_index = get_edge_index(50, 30, 100)
    data['author', 'paper'].edge_index = get_edge_index(30, 50, 100)
    data['paper', 'paper'].edge_weight = torch.rand(200)

    conv = HeteroConv(
        {
            ('paper', 'to', 'paper'): GCNConv(-1, 64),
            ('author', 'to', 'paper'): SAGEConv((-1, -1), 64),
            ('paper', 'to', 'author'): GATConv((-1, -1), 64),
        },
        aggr=aggr)

    assert len(list(conv.parameters())) > 0
    assert str(conv) == 'HeteroConv(num_relations=3)'

    out = conv(data.x_dict,
               data.edge_index_dict,
               edge_weight_dict=data.edge_weight_dict)

    assert len(out) == 2
    if aggr is not None:
        assert out['paper'].size() == (50, 64)
        assert out['author'].size() == (30, 64)
    else:
        assert out['paper'].size() == (50, 2, 64)
        assert out['author'].size() == (30, 1, 64)
def test_hetero_to_undirected():
    edge_index = torch.tensor([[2, 0, 2], [3, 1, 0]])
    edge_weight = torch.randn(edge_index.size(1))
    edge_attr = torch.randn(edge_index.size(1), 8)

    perm = torch.tensor([1, 2, 1, 2, 0, 0])

    data = HeteroData()
    data['v'].num_nodes = 4
    data['w'].num_nodes = 4
    data['v', 'v'].edge_index = edge_index
    data['v', 'v'].edge_weight = edge_weight
    data['v', 'v'].edge_attr = edge_attr
    data['v', 'w'].edge_index = edge_index
    data['v', 'w'].edge_weight = edge_weight
    data['v', 'w'].edge_attr = edge_attr

    data = ToUndirected()(data)
    assert data['v', 'v'].edge_index.tolist() == [[0, 0, 1, 2, 2, 3],
                                                  [1, 2, 0, 0, 3, 2]]
    assert data['v', 'v'].edge_weight.tolist() == edge_weight[perm].tolist()
    assert data['v', 'v'].edge_attr.tolist() == edge_attr[perm].tolist()
    assert data['v', 'w'].edge_index.tolist() == edge_index.tolist()
    assert data['v', 'w'].edge_weight.tolist() == edge_weight.tolist()
    assert data['v', 'w'].edge_attr.tolist() == edge_attr.tolist()
    assert data['w', 'v'].edge_index.tolist() == [[3, 1, 0], [2, 0, 2]]
    assert data['w', 'v'].edge_weight.tolist() == edge_weight.tolist()
    assert data['w', 'v'].edge_attr.tolist() == edge_attr.tolist()
def test_heterogeneous_link_neighbor_loader_loop(directed):
    torch.manual_seed(12345)

    data = HeteroData()

    data['paper'].x = torch.arange(100)
    data['author'].x = torch.arange(100, 300)

    data['paper', 'paper'].edge_index = get_edge_index(100, 100, 500)
    data['paper', 'author'].edge_index = get_edge_index(100, 200, 1000)
    data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000)

    loader = LinkNeighborLoader(data,
                                num_neighbors=[-1] * 2,
                                edge_label_index=('paper', 'paper'),
                                batch_size=20,
                                directed=directed)

    for batch in loader:
        assert batch['paper'].x.size(0) <= 100
        assert batch['paper'].x.min() >= 0 and batch['paper'].x.max() < 100

        # Assert positive samples are present in the original graph:
        edge_index = unique_edge_pairs(batch['paper', 'paper'].edge_index)
        edge_label_index = batch['paper', 'paper'].edge_label_index
        edge_label_index = unique_edge_pairs(edge_label_index)
        assert len(edge_index | edge_label_index) == len(edge_index)
Exemplo n.º 6
0
def test_hetero_conv_with_dot_syntax_node_types():
    data = HeteroData()
    data['src.paper'].x = torch.randn(50, 32)
    data['author'].x = torch.randn(30, 64)
    data['src.paper', 'src.paper'].edge_index = get_edge_index(50, 50, 200)
    data['src.paper', 'author'].edge_index = get_edge_index(50, 30, 100)
    data['author', 'src.paper'].edge_index = get_edge_index(30, 50, 100)
    data['src.paper', 'src.paper'].edge_weight = torch.rand(200)

    conv = HeteroConv({
        ('src.paper', 'to', 'src.paper'):
        GCNConv(-1, 64),
        ('author', 'to', 'src.paper'):
        SAGEConv((-1, -1), 64),
        ('src.paper', 'to', 'author'):
        GATConv((-1, -1), 64, add_self_loops=False),
    })

    assert len(list(conv.parameters())) > 0
    assert str(conv) == 'HeteroConv(num_relations=3)'

    out = conv(data.x_dict,
               data.edge_index_dict,
               edge_weight_dict=data.edge_weight_dict)

    assert len(out) == 2
    assert out['src.paper'].size() == (50, 64)
    assert out['author'].size() == (30, 64)
Exemplo n.º 7
0
    def __getitem__(self, time_index: Union[int, slice]):
        if isinstance(time_index, slice):
            snapshot = StaticHeteroGraphTemporalSignal(
                self.edge_index_dict, self.edge_weight_dict,
                self.feature_dicts[time_index], self.target_dicts[time_index],
                **{
                    key: getattr(self, key)[time_index]
                    for key in self.additional_feature_keys
                })
        else:
            x_dict = self._get_features(time_index)
            edge_index_dict = self._get_edge_index()
            edge_weight_dict = self._get_edge_weight()
            y_dict = self._get_target(time_index)
            additional_features = self._get_additional_features(time_index)

            snapshot = HeteroData()
            if x_dict:
                for key, value in x_dict.items():
                    snapshot[key].x = value
            if edge_index_dict:
                for key, value in edge_index_dict.items():
                    snapshot[key].edge_index = value
            if edge_weight_dict:
                for key, value in edge_weight_dict.items():
                    snapshot[key].edge_attr = value
            if y_dict:
                for key, value in y_dict.items():
                    snapshot[key].y = value
            if additional_features:
                for feature_name, feature_dict in additional_features.items():
                    if feature_dict:
                        for key, value in feature_dict.items():
                            snapshot[key][feature_name] = value
        return snapshot
Exemplo n.º 8
0
def test_copy_hetero_data():
    data = HeteroData()
    data['paper'].x = x_paper
    data['paper', 'to', 'paper'].edge_index = edge_index_paper_paper

    out = copy.copy(data)
    assert id(data) != id(out)
    assert len(data.stores) == len(out.stores)
    for store1, store2 in zip(data.stores, out.stores):
        assert id(store1) != id(store2)
        assert id(data) == id(store1._parent())
        assert id(out) == id(store2._parent())
    assert out['paper']._key == 'paper'
    assert data['paper'].x.data_ptr() == out['paper'].x.data_ptr()
    assert out['to']._key == ('paper', 'to', 'paper')
    assert data['to'].edge_index.data_ptr() == out['to'].edge_index.data_ptr()

    out = copy.deepcopy(data)
    assert id(data) != id(out)
    assert len(data.stores) == len(out.stores)
    for store1, store2 in zip(data.stores, out.stores):
        assert id(store1) != id(store2)
    assert id(out) == id(out['paper']._parent())
    assert out['paper']._key == 'paper'
    assert data['paper'].x.data_ptr() != out['paper'].x.data_ptr()
    assert data['paper'].x.tolist() == out['paper'].x.tolist()
    assert id(out) == id(out['to']._parent())
    assert out['to']._key == ('paper', 'to', 'paper')
    assert data['to'].edge_index.data_ptr() != out['to'].edge_index.data_ptr()
    assert data['to'].edge_index.tolist() == out['to'].edge_index.tolist()
Exemplo n.º 9
0
def test_remove_isolated_nodes_in_hetero_data():
    data = HeteroData()

    data['p'].x = torch.arange(6)
    data['a'].x = torch.arange(6)
    data['i'].num_nodes = 4

    # isolated paper nodes: {4}
    # isolated author nodes: {3, 4, 5}
    # isolated institution nodes: {0, 1, 2, 3}
    data['p', '1', 'p'].edge_index = torch.tensor([[0, 1, 2], [0, 1, 3]])
    data['p', '2', 'a'].edge_index = torch.tensor([[1, 3, 5], [0, 1, 2]])
    data['p', '2', 'a'].edge_attr = torch.arange(3)
    data['p', '3', 'a'].edge_index = torch.tensor([[5], [2]])

    data = RemoveIsolatedNodes()(data)

    assert len(data) == 4
    assert data['p'].num_nodes == 5
    assert data['a'].num_nodes == 3
    assert data['i'].num_nodes == 0

    assert data['p'].x.tolist() == [0, 1, 2, 3, 5]
    assert data['a'].x.tolist() == [0, 1, 2]

    assert data['1'].edge_index.tolist() == [[0, 1, 2], [0, 1, 3]]
    assert data['2'].edge_index.tolist() == [[1, 3, 4], [0, 1, 2]]
    assert data['2'].edge_attr.tolist() == [0, 1, 2]
    assert data['3'].edge_index.tolist() == [[4], [2]]
Exemplo n.º 10
0
def test_to_homogeneous():
    data = HeteroData()

    data['paper'].x = torch.randn(100, 128)
    data['author'].x = torch.randn(200, 128)

    data['paper', 'paper'].edge_index = get_edge_index(100, 100, 250)
    data['paper', 'paper'].edge_weight = torch.randn(250, )
    data['paper', 'paper'].edge_attr = torch.randn(250, 64)

    data['paper', 'author'].edge_index = get_edge_index(100, 200, 500)
    data['paper', 'author'].edge_weight = torch.randn(500, )
    data['paper', 'author'].edge_attr = torch.randn(500, 64)

    data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000)
    data['author', 'paper'].edge_weight = torch.randn(1000, )
    data['author', 'paper'].edge_attr = torch.randn(1000, 64)

    data = data.to_homogeneous()
    assert len(data) == 5
    assert data.num_nodes == 300
    assert data.num_edges == 1750
    assert data.num_node_features == 128
    assert data.num_edge_features == 64
    assert data.edge_type.size() == (1750, )
    assert data.edge_type.min() == 0
    assert data.edge_type.max() == 2
    assert len(data._node_slices) == 2
    assert len(data._edge_slices) == 3
    assert len(data._edge_type_dict) == 3
Exemplo n.º 11
0
def test_init_hetero_data():
    data = HeteroData()
    data['v1'].x = 1
    data['paper'].x = x_paper
    data['author'].x = x_author
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['author', 'paper'].edge_index = edge_index_author_paper
    assert len(data) == 2
    assert len(data.edge_types) == 3
    assert data.node_types == ['v1', 'paper', 'author']

    data = HeteroData(
        v1={'x': 1},
        paper={'x': x_paper},
        author={'x': x_author},
        paper__paper={'edge_index': edge_index_paper_paper},
        paper__author={'edge_index': edge_index_paper_author},
        author__paper={'edge_index': edge_index_author_paper},
    )
    assert len(data) == 2
    assert len(data.edge_types) == 3
    assert data.node_types == ['v1', 'paper', 'author']

    data = HeteroData({
        'v1': {
            'x': 1
        },
        'paper': {
            'x': x_paper
        },
        'author': {
            'x': x_author
        },
        ('paper', 'paper'): {
            'edge_index': edge_index_paper_paper
        },
        ('paper', 'author'): {
            'edge_index': edge_index_paper_author
        },
        ('author', 'paper'): {
            'edge_index': edge_index_author_paper
        },
    })
    assert len(data) == 2
    assert len(data.edge_types) == 3
    assert data.node_types == ['v1', 'paper', 'author']
 def generate_graph(self):
     data = HeteroData()
     data = self.define_graph_nodes_and_labels(data)
     data = self.define_graph_edges(data)
     torch.save(
         data, ''.join(
             (self.seed_data_path, '_', self.file_type, '_data.pt')))
     return
Exemplo n.º 13
0
    def process(self):
        import pandas as pd

        data = HeteroData()

        path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node-feat.csv.gz')
        x_paper = pd.read_csv(path, compression='gzip', header=None,
                              dtype=np.float32).values
        data['paper'].x = torch.from_numpy(x_paper)

        path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node_year.csv.gz')
        year_paper = pd.read_csv(path, compression='gzip', header=None,
                                 dtype=np.int64).values
        data['paper'].year = torch.from_numpy(year_paper).view(-1)

        path = osp.join(self.raw_dir, 'node-label', 'paper',
                        'node-label.csv.gz')
        y_paper = pd.read_csv(path, compression='gzip', header=None,
                              dtype=np.int64).values.flatten()
        data['paper'].y = torch.from_numpy(y_paper)

        if self.preprocess is None:
            path = osp.join(self.raw_dir, 'num-node-dict.csv.gz')
            num_nodes_df = pd.read_csv(path, compression='gzip')
            for node_type in ['author', 'institution', 'field_of_study']:
                data[node_type].num_nodes = num_nodes_df[node_type].tolist()[0]
        else:
            emb_dict = torch.load(self.raw_paths[-1])
            for key, value in emb_dict.items():
                if key != 'paper':
                    data[key].x = value

        for edge_type in [('author', 'affiliated_with', 'institution'),
                          ('author', 'writes', 'paper'),
                          ('paper', 'cites', 'paper'),
                          ('paper', 'has_topic', 'field_of_study')]:

            f = '___'.join(edge_type)
            path = osp.join(self.raw_dir, 'relations', f, 'edge.csv.gz')
            edge_index = pd.read_csv(path, compression='gzip', header=None,
                                     dtype=np.int64).values
            edge_index = torch.from_numpy(edge_index).t().contiguous()
            data[edge_type].edge_index = edge_index

        for f, v in [('train', 'train'), ('valid', 'val'), ('test', 'test')]:
            path = osp.join(self.raw_dir, 'split', 'time', 'paper',
                            f'{f}.csv.gz')
            idx = pd.read_csv(path, compression='gzip', header=None,
                              dtype=np.int64).values.flatten()
            idx = torch.from_numpy(idx)
            mask = torch.zeros(data['paper'].num_nodes, dtype=torch.bool)
            mask[idx] = True
            data['paper'][f'{v}_mask'] = mask

        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])
def test_heterogeneous_neighbor_loader_on_cora(directed):
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, 'Cora')
    data = dataset[0]
    data.edge_weight = torch.rand(data.num_edges)

    hetero_data = HeteroData()
    hetero_data['paper'].x = data.x
    hetero_data['paper'].n_id = torch.arange(data.num_nodes)
    hetero_data['paper', 'paper'].edge_index = data.edge_index
    hetero_data['paper', 'paper'].edge_weight = data.edge_weight

    split_idx = torch.arange(5, 8)

    loader = NeighborLoader(hetero_data,
                            num_neighbors=[-1, -1],
                            batch_size=split_idx.numel(),
                            input_nodes=('paper', split_idx),
                            directed=directed)
    assert len(loader) == 1

    hetero_batch = next(iter(loader))
    batch_size = hetero_batch['paper'].batch_size

    if not directed:
        n_id, _, _, e_mask = k_hop_subgraph(split_idx,
                                            num_hops=2,
                                            edge_index=data.edge_index,
                                            num_nodes=data.num_nodes)

        n_id = n_id.sort()[0]
        assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist()
        assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum())

    class GNN(torch.nn.Module):
        def __init__(self, in_channels, hidden_channels, out_channels):
            super().__init__()
            self.conv1 = GraphConv(in_channels, hidden_channels)
            self.conv2 = GraphConv(hidden_channels, out_channels)

        def forward(self, x, edge_index, edge_weight):
            x = self.conv1(x, edge_index, edge_weight).relu()
            x = self.conv2(x, edge_index, edge_weight).relu()
            return x

    model = GNN(dataset.num_features, 16, dataset.num_classes)
    hetero_model = to_hetero(model, hetero_data.metadata())

    out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx]
    out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict,
                        hetero_batch.edge_weight_dict)['paper'][:batch_size]
    assert torch.allclose(out1, out2, atol=1e-6)

    try:
        shutil.rmtree(root)
    except PermissionError:
        pass
Exemplo n.º 15
0
def test_add_metapaths():
    dblp = HeteroData()
    dblp['paper'].x = torch.ones(5)
    dblp['author'].x = torch.ones(6)
    dblp['conference'].x = torch.ones(3)
    dblp['paper', 'cites', 'paper'].edge_index = torch.tensor([[0, 1, 2, 3],
                                                               [1, 2, 4, 2]])
    dblp['paper', 'author'].edge_index = torch.tensor([[0, 1, 2, 3, 4],
                                                       [2, 2, 5, 2, 5]])
    dblp['author', 'paper'].edge_index = dblp['paper',
                                              'author'].edge_index[[1, 0]]
    dblp['conference', 'paper'].edge_index = torch.tensor([[0, 0, 1, 2, 2],
                                                           [0, 1, 2, 3, 4]])
    dblp['paper', 'conference'].edge_index = dblp['conference',
                                                  'paper'].edge_index[[1, 0]]

    # Test transform options:
    orig_edge_type = dblp.edge_types
    metapaths = [[('paper', 'conference'), ('conference', 'paper')]]
    meta1 = AddMetaPaths(metapaths)(dblp.clone())
    meta2 = AddMetaPaths(metapaths, drop_orig_edges=True)(dblp.clone())
    meta3 = AddMetaPaths(metapaths,
                         drop_orig_edges=True,
                         keep_same_node_type=True)(dblp.clone())
    meta4 = AddMetaPaths(metapaths,
                         drop_orig_edges=True,
                         keep_same_node_type=True,
                         drop_unconnected_nodes=True)(dblp.clone())

    assert meta1['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9
    assert meta2['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9
    assert meta3['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9
    assert meta4['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9

    assert all([i in meta1.edge_types for i in orig_edge_type])
    assert meta2.edge_types == [('paper', 'metapath_0', 'paper')]
    assert meta3.edge_types == [('paper', 'cites', 'paper'),
                                ('paper', 'metapath_0', 'paper')]
    assert meta4.edge_types == [('paper', 'cites', 'paper'),
                                ('paper', 'metapath_0', 'paper')]

    assert meta3.node_types == ['paper', 'author', 'conference']
    assert meta4.node_types == ['paper']

    # Test 4-hop metapath:
    metapaths = [[('author', 'paper'), ('paper', 'conference')],
                 [('author', 'paper'), ('paper', 'conference'),
                  ('conference', 'paper'), ('paper', 'author')]]
    meta1 = AddMetaPaths(metapaths)(dblp.clone())
    new_edge_types = [('author', 'metapath_0', 'conference'),
                      ('author', 'metapath_1', 'author')]
    assert meta1[new_edge_types[0]].edge_index.shape[-1] == 4
    assert meta1[new_edge_types[1]].edge_index.shape[-1] == 4

    # Test `metapath_dict` information:
    assert list(meta1.metapath_dict.values()) == metapaths
    assert list(meta1.metapath_dict.keys()) == new_edge_types
def test_hetero_normalize_scale():
    x = torch.tensor([[1, 0, 1], [0, 1, 0], [0, 0, 0]], dtype=torch.float)

    data = HeteroData()
    data['v'].x = x
    data['w'].x = x
    data = NormalizeFeatures()(data)
    assert data['v'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]]
    assert data['w'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]]
def test_hetero_add_self_loops():
    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])

    data = HeteroData()
    data['v'].num_nodes = 3
    data['w'].num_nodes = 3
    data['v', 'v'].edge_index = edge_index
    data['v', 'w'].edge_index = edge_index
    data = AddSelfLoops()(data)
    assert data['v', 'v'].edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2],
                                                  [1, 0, 2, 1, 0, 1, 2]]
    assert data['v', 'w'].edge_index.tolist() == edge_index.tolist()
Exemplo n.º 18
0
def test_hgt_loader_on_cora(get_dataset):
    dataset = get_dataset(name='Cora')
    data = dataset[0]
    data.edge_weight = torch.rand(data.num_edges)

    hetero_data = HeteroData()
    hetero_data['paper'].x = data.x
    hetero_data['paper'].n_id = torch.arange(data.num_nodes)
    hetero_data['paper', 'paper'].edge_index = data.edge_index
    hetero_data['paper', 'paper'].edge_weight = data.edge_weight

    split_idx = torch.arange(5, 8)

    # Sample the complete two-hop neighborhood:
    loader = HGTLoader(hetero_data,
                       num_samples=[data.num_nodes] * 2,
                       batch_size=split_idx.numel(),
                       input_nodes=('paper', split_idx))
    assert len(loader) == 1

    hetero_batch = next(iter(loader))
    batch_size = hetero_batch['paper'].batch_size

    n_id, _, _, e_mask = k_hop_subgraph(split_idx,
                                        num_hops=2,
                                        edge_index=data.edge_index,
                                        num_nodes=data.num_nodes)

    n_id = n_id.sort()[0]
    assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist()
    assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum())

    class GNN(torch.nn.Module):
        def __init__(self, in_channels, hidden_channels, out_channels):
            super().__init__()
            self.conv1 = GraphConv(in_channels, hidden_channels)
            self.conv2 = GraphConv(hidden_channels, out_channels)

        def forward(self, x, edge_index, edge_weight):
            x = self.conv1(x, edge_index, edge_weight).relu()
            x = self.conv2(x, edge_index, edge_weight).relu()
            return x

    model = GNN(dataset.num_features, 16, dataset.num_classes)
    hetero_model = to_hetero(model, hetero_data.metadata())

    out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx]
    out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict,
                        hetero_batch.edge_weight_dict)['paper'][:batch_size]
    assert torch.allclose(out1, out2, atol=1e-6)
Exemplo n.º 19
0
def test_random_link_split_on_undirected_hetero_data():
    data = HeteroData()
    data['p'].x = torch.arange(100)
    data['p', 'p'].edge_index = get_edge_index(100, 100, 500)
    data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index)

    transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'))
    train_data, val_data, test_data = transform(data)
    assert train_data['p', 'p'].is_undirected()

    transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'),
                                rev_edge_types=('p', 'p'))
    train_data, val_data, test_data = transform(data)
    assert train_data['p', 'p'].is_undirected()
Exemplo n.º 20
0
def test_hetero_data_subgraph():
    data = HeteroData()
    data.num_node_types = 3
    data['paper'].x = x_paper
    data['paper'].name = 'paper'
    data['paper'].num_nodes = x_paper.size(0)
    data['author'].x = x_author
    data['author'].num_nodes = x_author.size(0)
    data['conference'].x = x_conference
    data['conference'].num_nodes = x_conference.size(0)
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'paper'].edge_attr = edge_attr_paper_paper
    data['paper', 'paper'].name = 'cites'
    data['author', 'paper'].edge_index = edge_index_author_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['paper', 'conference'].edge_index = edge_index_paper_conference

    subset = {
        'paper': torch.randperm(x_paper.size(0))[:4],
        'author': torch.randperm(x_author.size(0))[:2]
    }

    out = data.subgraph(subset)

    assert out.num_node_types == data.num_node_types
    assert out.node_types == ['paper', 'author']

    assert len(out['paper']) == 3
    assert torch.allclose(out['paper'].x, data['paper'].x[subset['paper']])
    assert out['paper'].name == 'paper'
    assert out['paper'].num_nodes == 4
    assert len(out['author']) == 2
    assert torch.allclose(out['author'].x, data['author'].x[subset['author']])
    assert out['author'].num_nodes == 2

    assert out.edge_types == [
        ('paper', 'to', 'paper'),
        ('author', 'to', 'paper'),
        ('paper', 'to', 'author'),
    ]

    assert len(out['paper', 'paper']) == 3
    assert out['paper', 'paper'].edge_index is not None
    assert out['paper', 'paper'].edge_attr is not None
    assert out['paper', 'paper'].name == 'cites'
    assert len(out['paper', 'author']) == 1
    assert out['paper', 'author'].edge_index is not None
    assert len(out['author', 'paper']) == 1
    assert out['author', 'paper'].edge_index is not None
Exemplo n.º 21
0
def test_hetero_data_functions():
    data = HeteroData()
    data['paper'].x = x_paper
    data['author'].x = x_author
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['author', 'paper'].edge_index = edge_index_author_paper
    data['paper', 'paper'].edge_attr = edge_attr_paper_paper
    assert len(data) == 3
    assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x']
    assert 'x' in data and 'edge_index' in data and 'edge_attr' in data
    assert data.num_nodes == 15
    assert data.num_edges == 110

    assert data.num_node_features == {'paper': 16, 'author': 32}
    assert data.num_edge_features == {
        ('paper', 'to', 'paper'): 8,
        ('paper', 'to', 'author'): 0,
        ('author', 'to', 'paper'): 0,
    }

    node_types, edge_types = data.metadata()
    assert node_types == ['paper', 'author']
    assert edge_types == [
        ('paper', 'to', 'paper'),
        ('paper', 'to', 'author'),
        ('author', 'to', 'paper'),
    ]

    x_dict = data.collect('x')
    assert len(x_dict) == 2
    assert x_dict['paper'].tolist() == x_paper.tolist()
    assert x_dict['author'].tolist() == x_author.tolist()
    assert x_dict == data.x_dict

    data.y = 0
    assert data['y'] == 0 and data.y == 0
    assert len(data) == 4
    assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x', 'y']

    del data['paper', 'author']
    node_types, edge_types = data.metadata()
    assert node_types == ['paper', 'author']
    assert edge_types == [('paper', 'to', 'paper'), ('author', 'to', 'paper')]

    assert len(data.to_dict()) == 5
    assert len(data.to_namedtuple()) == 5
    assert data.to_namedtuple().y == 0
    assert len(data.to_namedtuple().paper) == 1
Exemplo n.º 22
0
def test_init_hetero_data():
    data = HeteroData()
    data['paper'].x = x_paper
    data['author'].x = x_author
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['author', 'paper'].edge_index = edge_index_author_paper
    assert len(data) == 2

    data = HeteroData(
        paper={'x': x_paper},
        author={'x': x_author},
        paper__paper={'edge_index': edge_index_paper_paper},
        paper__author={'edge_index': edge_index_paper_author},
        author__paper={'edge_index': edge_index_author_paper},
    )
    assert len(data) == 2

    data = HeteroData({
        'paper': {
            'x': x_paper
        },
        'author': {
            'x': x_author
        },
        ('paper', 'paper'): {
            'edge_index': edge_index_paper_paper
        },
        ('paper', 'author'): {
            'edge_index': edge_index_paper_author
        },
        ('author', 'paper'): {
            'edge_index': edge_index_author_paper
        },
    })
    assert len(data) == 2
def test_hetero_to_sparse_tensor():
    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])

    data = HeteroData()
    data['v'].num_nodes = 3
    data['w'].num_nodes = 3
    data['v', 'v'].edge_index = edge_index
    data['v', 'w'].edge_index = edge_index
    data = ToSparseTensor()(data)
    assert data['v', 'v'].adj_t.storage.row().tolist() == [0, 1, 1, 2]
    assert data['v', 'v'].adj_t.storage.col().tolist() == [1, 0, 2, 1]
    assert data['v', 'v'].adj_t.storage.value() is None
    assert data['v', 'w'].adj_t.storage.row().tolist() == [0, 1, 1, 2]
    assert data['v', 'w'].adj_t.storage.col().tolist() == [1, 0, 2, 1]
    assert data['v', 'w'].adj_t.storage.value() is None
def test_hetero_conv_with_custom_conv():
    data = HeteroData()
    data['paper'].x = torch.randn(50, 32)
    data['paper'].pos = torch.randn(50, 3)
    data['author'].x = torch.randn(30, 64)
    data['author'].pos = torch.randn(30, 3)
    data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200)
    data['paper', 'author'].edge_index = get_edge_index(50, 30, 100)
    data['author', 'paper'].edge_index = get_edge_index(30, 50, 100)

    conv = HeteroConv({key: CustomConv(64) for key in data.edge_types})
    out = conv(data.x_dict, data.edge_index_dict, data.pos_dict)
    assert len(out) == 2
    assert out['paper'].size() == (50, 64)
    assert out['author'].size() == (30, 64)
Exemplo n.º 25
0
def test_heterogeneous_link_neighbor_loader_no_edges():
    loader = LinkNeighborLoader(
        HeteroData(paper=dict(num_nodes=100)),
        num_neighbors=[],
        edge_label_index=(('paper', 'paper'), get_edge_index(100, 100, 100)),
        batch_size=20,
    )

    for batch in loader:
        assert isinstance(batch, HeteroData)
        assert len(batch) == 3
        assert batch['paper'].num_nodes <= 40
        assert batch['paper', 'paper'].edge_label_index.size(1) == 20
        assert batch['paper'].num_nodes == batch[
            'paper', 'paper'].edge_label_index.unique().numel()
Exemplo n.º 26
0
    def process(self):
        data = HeteroData()

        node_types = ['author', 'paper', 'term', 'conference']
        for i, node_type in enumerate(node_types[:2]):
            x = sp.load_npz(osp.join(self.raw_dir, f'features_{i}.npz'))
            data[node_type].x = torch.from_numpy(x.todense()).to(torch.float)

        x = np.load(osp.join(self.raw_dir, 'features_2.npy'))
        data['term'].x = torch.from_numpy(x).to(torch.float)

        node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy'))
        node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)
        data['conference'].num_nodes = int((node_type_idx == 3).sum())

        y = np.load(osp.join(self.raw_dir, 'labels.npy'))
        data['author'].y = torch.from_numpy(y).to(torch.long)

        split = np.load(osp.join(self.raw_dir, 'train_val_test_idx.npz'))
        for name in ['train', 'val', 'test']:
            idx = split[f'{name}_idx']
            idx = torch.from_numpy(idx).to(torch.long)
            mask = torch.zeros(data['author'].num_nodes, dtype=torch.bool)
            mask[idx] = True
            data['author'][f'{name}_mask'] = mask

        s = {}
        N_a = data['author'].num_nodes
        N_p = data['paper'].num_nodes
        N_t = data['term'].num_nodes
        N_c = data['conference'].num_nodes
        s['author'] = (0, N_a)
        s['paper'] = (N_a, N_a + N_p)
        s['term'] = (N_a + N_p, N_a + N_p + N_t)
        s['conference'] = (N_a + N_p + N_t, N_a + N_p + N_t + N_c)

        A = sp.load_npz(osp.join(self.raw_dir, 'adjM.npz'))
        for src, dst in product(node_types, node_types):
            A_sub = A[s[src][0]:s[src][1], s[dst][0]:s[dst][1]].tocoo()
            if A_sub.nnz > 0:
                row = torch.from_numpy(A_sub.row).to(torch.long)
                col = torch.from_numpy(A_sub.col).to(torch.long)
                data[src, dst].edge_index = torch.stack([row, col], dim=0)

        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])
def test_temporal_heterogeneous_neighbor_loader_on_cora(get_dataset):
    dataset = get_dataset(name='Cora')
    data = dataset[0]

    hetero_data = HeteroData()
    hetero_data['paper'].x = data.x
    hetero_data['paper'].time = torch.arange(data.num_nodes)
    hetero_data['paper', 'paper'].edge_index = data.edge_index

    loader = NeighborLoader(hetero_data, num_neighbors=[-1, -1],
                            input_nodes='paper', time_attr='time',
                            batch_size=1)

    for batch in loader:
        mask = batch['paper'].time[0] >= batch['paper'].time[1:]
        assert torch.all(mask)
def test_lightning_hetero_link_data():
    torch.manual_seed(12345)

    data = HeteroData()

    data['paper'].x = torch.arange(10)
    data['author'].x = torch.arange(10)
    data['term'].x = torch.arange(10)

    data['paper', 'author'].edge_index = get_edge_index(10, 10, 10)
    data['author', 'paper'].edge_index = get_edge_index(10, 10, 10)
    data['paper', 'term'].edge_index = get_edge_index(10, 10, 10)

    datamodule = LightningLinkData(
        data,
        input_train_edges=('author', 'paper'),
        loader='neighbor',
        num_neighbors=[5],
        batch_size=32,
        num_workers=0,
    )

    for batch in datamodule.train_dataloader():
        assert 'edge_label' in batch['author', 'paper']
        assert 'edge_label_index' in batch['author', 'paper']
        break

    data['author'].time = torch.arange(data['author'].num_nodes)
    data['paper'].time = torch.arange(data['paper'].num_nodes)
    data['term'].time = torch.arange(data['term'].num_nodes)

    datamodule = LightningLinkData(
        data,
        input_train_edges=('author', 'paper'),
        input_train_time=torch.arange(data['author', 'paper'].num_edges),
        loader='neighbor',
        num_neighbors=[5],
        batch_size=32,
        num_workers=0,
        time_attr='time',
    )

    for batch in datamodule.train_dataloader():
        assert 'edge_label' in batch['author', 'paper']
        assert 'edge_label_index' in batch['author', 'paper']
        assert 'edge_label_time' in batch['author', 'paper']
        break
Exemplo n.º 29
0
    def process(self):
        data = HeteroData()

        node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy'))
        node_type_idx = torch.from_numpy(node_type_idx).to(torch.long)

        node_types = ['user', 'artist', 'tag']
        for i, node_type in enumerate(node_types):
            data[node_type].num_nodes = int((node_type_idx == i).sum())

        pos_split = np.load(
            osp.join(self.raw_dir, 'train_val_test_pos_user_artist.npz'))
        neg_split = np.load(
            osp.join(self.raw_dir, 'train_val_test_neg_user_artist.npz'))

        for name in ['train', 'val', 'test']:
            if name != 'train':
                edge_index = pos_split[f'{name}_pos_user_artist']
                edge_index = torch.from_numpy(edge_index)
                edge_index = edge_index.t().to(torch.long).contiguous()
                data['user', 'artist'][f'{name}_pos_edge_index'] = edge_index

            edge_index = neg_split[f'{name}_neg_user_artist']
            edge_index = torch.from_numpy(edge_index)
            edge_index = edge_index.t().to(torch.long).contiguous()
            data['user', 'artist'][f'{name}_neg_edge_index'] = edge_index

        s = {}
        N_u = data['user'].num_nodes
        N_a = data['artist'].num_nodes
        N_t = data['tag'].num_nodes
        s['user'] = (0, N_u)
        s['artist'] = (N_u, N_u + N_a)
        s['tag'] = (N_u + N_a, N_u + N_a + N_t)

        A = sp.load_npz(osp.join(self.raw_dir, 'adjM.npz'))
        for src, dst in product(node_types, node_types):
            A_sub = A[s[src][0]:s[src][1], s[dst][0]:s[dst][1]].tocoo()
            if A_sub.nnz > 0:
                row = torch.from_numpy(A_sub.row).to(torch.long)
                col = torch.from_numpy(A_sub.col).to(torch.long)
                data[src, dst].edge_index = torch.stack([row, col], dim=0)

        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])
def test_heterogeneous_link_neighbor_loader(directed, neg_sampling_ratio):
    torch.manual_seed(12345)

    data = HeteroData()

    data['paper'].x = torch.arange(100)
    data['author'].x = torch.arange(100, 300)

    data['paper', 'paper'].edge_index = get_edge_index(100, 100, 500)
    data['paper', 'paper'].edge_attr = torch.arange(500)
    data['paper', 'author'].edge_index = get_edge_index(100, 200, 1000)
    data['paper', 'author'].edge_attr = torch.arange(500, 1500)
    data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000)
    data['author', 'paper'].edge_attr = torch.arange(1500, 2500)

    loader = LinkNeighborLoader(
        data,
        num_neighbors=[-1] * 2,
        edge_label_index=('paper', 'author'),
        batch_size=20,
        directed=directed,
        neg_sampling_ratio=neg_sampling_ratio,
        shuffle=True,
    )

    assert str(loader) == 'LinkNeighborLoader()'
    assert len(loader) == 1000 / 20

    for batch in loader:
        assert isinstance(batch, HeteroData)

        if neg_sampling_ratio == 0.0:
            assert len(batch) == 4

            # Assert positive samples are present in the original graph:
            edge_index = unique_edge_pairs(batch['paper', 'author'].edge_index)
            edge_label_index = batch['paper', 'author'].edge_label_index
            edge_label_index = unique_edge_pairs(edge_label_index)
            assert len(edge_index | edge_label_index) == len(edge_index)

        else:
            assert len(batch) == 5

            assert batch['paper', 'author'].edge_label_index.size(1) == 40
            assert torch.all(batch['paper', 'author'].edge_label[:20] == 1)
            assert torch.all(batch['paper', 'author'].edge_label[20:] == 0)