Пример #1
0
    def __call__(self, data: HeteroData) -> HeteroData:
        edge_types = data.edge_types  # save original edge types
        data.metapath_dict = {}

        for j, metapath in enumerate(self.metapaths):
            for edge_type in metapath:
                assert data._to_canonical(
                    edge_type) in edge_types, f"'{edge_type}' not present"

            edge_type = metapath[0]
            adj1 = SparseTensor.from_edge_index(
                edge_index=data[edge_type].edge_index,
                sparse_sizes=data[edge_type].size())

            for i, edge_type in enumerate(metapath[1:]):
                adj2 = SparseTensor.from_edge_index(
                    edge_index=data[edge_type].edge_index,
                    sparse_sizes=data[edge_type].size())
                adj1 = adj1 @ adj2

            row, col, _ = adj1.coo()
            new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1])
            data[new_edge_type].edge_index = torch.vstack([row, col])
            data.metapath_dict[new_edge_type] = metapath

        if self.drop_orig_edges:
            for i in edge_types:
                if self.keep_same_node_type and i[0] == i[-1]:
                    continue
                else:
                    del data[i]

        return data
def test_hetero_to_undirected():
    edge_index = torch.tensor([[2, 0], [3, 1]])
    edge_weight = torch.randn(edge_index.size(1))
    edge_attr = torch.randn(edge_index.size(1), 8)

    perm = torch.tensor([1, 1, 0, 0])

    data = HeteroData()
    data['v'].num_nodes = 4
    data['w'].num_nodes = 4
    data['v', 'v'].edge_index = edge_index
    data['v', 'v'].edge_weight = edge_weight
    data['v', 'v'].edge_attr = edge_attr
    data['v', 'w'].edge_index = edge_index
    data['v', 'w'].edge_weight = edge_weight
    data['v', 'w'].edge_attr = edge_attr

    from torch_geometric.transforms import ToUndirected

    assert not data.is_undirected()
    data = ToUndirected()(data)
    assert data.is_undirected()

    assert data['v', 'v'].edge_index.tolist() == [[0, 1, 2, 3], [1, 0, 3, 2]]
    assert data['v', 'v'].edge_weight.tolist() == edge_weight[perm].tolist()
    assert data['v', 'v'].edge_attr.tolist() == edge_attr[perm].tolist()
    assert data['v', 'w'].edge_index.tolist() == edge_index.tolist()
    assert data['v', 'w'].edge_weight.tolist() == edge_weight.tolist()
    assert data['v', 'w'].edge_attr.tolist() == edge_attr.tolist()
    assert data['w', 'v'].edge_index.tolist() == [[3, 1], [2, 0]]
    assert data['w', 'v'].edge_weight.tolist() == edge_weight.tolist()
    assert data['w', 'v'].edge_attr.tolist() == edge_attr.tolist()
Пример #3
0
def test_to_homogeneous():
    data = HeteroData()

    data['paper'].x = torch.randn(100, 128)
    data['author'].x = torch.randn(200, 128)

    data['paper', 'paper'].edge_index = get_edge_index(100, 100, 250)
    data['paper', 'paper'].edge_weight = torch.randn(250, )
    data['paper', 'paper'].edge_attr = torch.randn(250, 64)

    data['paper', 'author'].edge_index = get_edge_index(100, 200, 500)
    data['paper', 'author'].edge_weight = torch.randn(500, )
    data['paper', 'author'].edge_attr = torch.randn(500, 64)

    data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000)
    data['author', 'paper'].edge_weight = torch.randn(1000, )
    data['author', 'paper'].edge_attr = torch.randn(1000, 64)

    data = data.to_homogeneous()
    assert len(data) == 5
    assert data.num_nodes == 300
    assert data.num_edges == 1750
    assert data.num_node_features == 128
    assert data.num_edge_features == 64
    assert data.edge_type.size() == (1750, )
    assert data.edge_type.min() == 0
    assert data.edge_type.max() == 2
    assert len(data._node_slices) == 2
    assert len(data._edge_slices) == 3
    assert len(data._edge_type_dict) == 3
def test_heterogeneous_neighbor_loader_on_cora(directed):
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, 'Cora')
    data = dataset[0]
    data.edge_weight = torch.rand(data.num_edges)

    hetero_data = HeteroData()
    hetero_data['paper'].x = data.x
    hetero_data['paper'].n_id = torch.arange(data.num_nodes)
    hetero_data['paper', 'paper'].edge_index = data.edge_index
    hetero_data['paper', 'paper'].edge_weight = data.edge_weight

    split_idx = torch.arange(5, 8)

    loader = NeighborLoader(hetero_data,
                            num_neighbors=[-1, -1],
                            batch_size=split_idx.numel(),
                            input_nodes=('paper', split_idx),
                            directed=directed)
    assert len(loader) == 1

    hetero_batch = next(iter(loader))
    batch_size = hetero_batch['paper'].batch_size

    if not directed:
        n_id, _, _, e_mask = k_hop_subgraph(split_idx,
                                            num_hops=2,
                                            edge_index=data.edge_index,
                                            num_nodes=data.num_nodes)

        n_id = n_id.sort()[0]
        assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist()
        assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum())

    class GNN(torch.nn.Module):
        def __init__(self, in_channels, hidden_channels, out_channels):
            super().__init__()
            self.conv1 = GraphConv(in_channels, hidden_channels)
            self.conv2 = GraphConv(hidden_channels, out_channels)

        def forward(self, x, edge_index, edge_weight):
            x = self.conv1(x, edge_index, edge_weight).relu()
            x = self.conv2(x, edge_index, edge_weight).relu()
            return x

    model = GNN(dataset.num_features, 16, dataset.num_classes)
    hetero_model = to_hetero(model, hetero_data.metadata())

    out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx]
    out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict,
                        hetero_batch.edge_weight_dict)['paper'][:batch_size]
    assert torch.allclose(out1, out2, atol=1e-6)

    try:
        shutil.rmtree(root)
    except PermissionError:
        pass
Пример #5
0
    def __call__(self, data: HeteroData) -> HeteroData:
        edge_types = data.edge_types  # save original edge types
        data.metapath_dict = {}

        for j, metapath in enumerate(self.metapaths):
            for edge_type in metapath:
                assert data._to_canonical(
                    edge_type) in edge_types, f"'{edge_type}' not present"

            edge_type = metapath[0]
            edge_weight = self._get_edge_weight(data, edge_type)
            adj1 = SparseTensor.from_edge_index(
                edge_index=data[edge_type].edge_index,
                sparse_sizes=data[edge_type].size(), edge_attr=edge_weight)

            if self.max_sample is not None:
                adj1 = self.sample_adj(adj1)

            for i, edge_type in enumerate(metapath[1:]):
                edge_weight = self._get_edge_weight(data, edge_type)
                adj2 = SparseTensor.from_edge_index(
                    edge_index=data[edge_type].edge_index,
                    sparse_sizes=data[edge_type].size(), edge_attr=edge_weight)

                adj1 = adj1 @ adj2

                if self.max_sample is not None:
                    adj1 = self.sample_adj(adj1)

            row, col, edge_weight = adj1.coo()
            new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1])
            data[new_edge_type].edge_index = torch.vstack([row, col])
            if self.weighted:
                data[new_edge_type].edge_weight = edge_weight
            data.metapath_dict[new_edge_type] = metapath

        if self.drop_orig_edges:
            for i in edge_types:
                if self.keep_same_node_type and i[0] == i[-1]:
                    continue
                else:
                    del data[i]

        # remove nodes not connected by any edge type.
        if self.drop_unconnected_nodes:
            new_edge_types = data.edge_types
            node_types = data.node_types
            connected_nodes = set()
            for i in new_edge_types:
                connected_nodes.add(i[0])
                connected_nodes.add(i[-1])
            for node in node_types:
                if node not in connected_nodes:
                    del data[node]

        return data
Пример #6
0
def test_hetero_data_to_canonical():
    data = HeteroData()
    assert isinstance(data['user', 'product'], EdgeStorage)
    assert len(data.edge_types) == 1
    assert isinstance(data['user', 'to', 'product'], EdgeStorage)
    assert len(data.edge_types) == 1

    data = HeteroData()
    assert isinstance(data['user', 'buys', 'product'], EdgeStorage)
    assert isinstance(data['user', 'clicks', 'product'], EdgeStorage)
    assert len(data.edge_types) == 2

    with pytest.raises(TypeError, match="missing 1 required"):
        data['user', 'product']
Пример #7
0
def test_hgt_loader_on_cora(get_dataset):
    dataset = get_dataset(name='Cora')
    data = dataset[0]
    data.edge_weight = torch.rand(data.num_edges)

    hetero_data = HeteroData()
    hetero_data['paper'].x = data.x
    hetero_data['paper'].n_id = torch.arange(data.num_nodes)
    hetero_data['paper', 'paper'].edge_index = data.edge_index
    hetero_data['paper', 'paper'].edge_weight = data.edge_weight

    split_idx = torch.arange(5, 8)

    # Sample the complete two-hop neighborhood:
    loader = HGTLoader(hetero_data,
                       num_samples=[data.num_nodes] * 2,
                       batch_size=split_idx.numel(),
                       input_nodes=('paper', split_idx))
    assert len(loader) == 1

    hetero_batch = next(iter(loader))
    batch_size = hetero_batch['paper'].batch_size

    n_id, _, _, e_mask = k_hop_subgraph(split_idx,
                                        num_hops=2,
                                        edge_index=data.edge_index,
                                        num_nodes=data.num_nodes)

    n_id = n_id.sort()[0]
    assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist()
    assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum())

    class GNN(torch.nn.Module):
        def __init__(self, in_channels, hidden_channels, out_channels):
            super().__init__()
            self.conv1 = GraphConv(in_channels, hidden_channels)
            self.conv2 = GraphConv(hidden_channels, out_channels)

        def forward(self, x, edge_index, edge_weight):
            x = self.conv1(x, edge_index, edge_weight).relu()
            x = self.conv2(x, edge_index, edge_weight).relu()
            return x

    model = GNN(dataset.num_features, 16, dataset.num_classes)
    hetero_model = to_hetero(model, hetero_data.metadata())

    out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx]
    out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict,
                        hetero_batch.edge_weight_dict)['paper'][:batch_size]
    assert torch.allclose(out1, out2, atol=1e-6)
Пример #8
0
def test_hetero_data_functions():
    data = HeteroData()
    data['paper'].x = x_paper
    data['author'].x = x_author
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['author', 'paper'].edge_index = edge_index_author_paper
    data['paper', 'paper'].edge_attr = edge_attr_paper_paper
    assert len(data) == 3
    assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x']
    assert 'x' in data and 'edge_index' in data and 'edge_attr' in data
    assert data.num_nodes == 15
    assert data.num_edges == 110

    assert data.num_node_features == {'paper': 16, 'author': 32}
    assert data.num_edge_features == {
        ('paper', 'to', 'paper'): 8,
        ('paper', 'to', 'author'): 0,
        ('author', 'to', 'paper'): 0,
    }

    node_types, edge_types = data.metadata()
    assert node_types == ['paper', 'author']
    assert edge_types == [
        ('paper', 'to', 'paper'),
        ('paper', 'to', 'author'),
        ('author', 'to', 'paper'),
    ]

    x_dict = data.collect('x')
    assert len(x_dict) == 2
    assert x_dict['paper'].tolist() == x_paper.tolist()
    assert x_dict['author'].tolist() == x_author.tolist()
    assert x_dict == data.x_dict

    data.y = 0
    assert data['y'] == 0 and data.y == 0
    assert len(data) == 4
    assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x', 'y']

    del data['paper', 'author']
    node_types, edge_types = data.metadata()
    assert node_types == ['paper', 'author']
    assert edge_types == [('paper', 'to', 'paper'), ('author', 'to', 'paper')]

    assert len(data.to_dict()) == 5
    assert len(data.to_namedtuple()) == 5
    assert data.to_namedtuple().y == 0
    assert len(data.to_namedtuple().paper) == 1
Пример #9
0
def test_hetero_data_subgraph():
    data = HeteroData()
    data.num_node_types = 3
    data['paper'].x = x_paper
    data['paper'].name = 'paper'
    data['paper'].num_nodes = x_paper.size(0)
    data['author'].x = x_author
    data['author'].num_nodes = x_author.size(0)
    data['conference'].x = x_conference
    data['conference'].num_nodes = x_conference.size(0)
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'paper'].edge_attr = edge_attr_paper_paper
    data['paper', 'paper'].name = 'cites'
    data['author', 'paper'].edge_index = edge_index_author_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['paper', 'conference'].edge_index = edge_index_paper_conference

    subset = {
        'paper': torch.randperm(x_paper.size(0))[:4],
        'author': torch.randperm(x_author.size(0))[:2]
    }

    out = data.subgraph(subset)

    assert out.num_node_types == data.num_node_types
    assert out.node_types == ['paper', 'author']

    assert len(out['paper']) == 3
    assert torch.allclose(out['paper'].x, data['paper'].x[subset['paper']])
    assert out['paper'].name == 'paper'
    assert out['paper'].num_nodes == 4
    assert len(out['author']) == 2
    assert torch.allclose(out['author'].x, data['author'].x[subset['author']])
    assert out['author'].num_nodes == 2

    assert out.edge_types == [
        ('paper', 'to', 'paper'),
        ('author', 'to', 'paper'),
        ('paper', 'to', 'author'),
    ]

    assert len(out['paper', 'paper']) == 3
    assert out['paper', 'paper'].edge_index is not None
    assert out['paper', 'paper'].edge_attr is not None
    assert out['paper', 'paper'].name == 'cites'
    assert len(out['paper', 'author']) == 1
    assert out['paper', 'author'].edge_index is not None
    assert len(out['author', 'paper']) == 1
    assert out['author', 'paper'].edge_index is not None
def test_heterogeneous_link_neighbor_loader_loop(directed):
    torch.manual_seed(12345)

    data = HeteroData()

    data['paper'].x = torch.arange(100)
    data['author'].x = torch.arange(100, 300)

    data['paper', 'paper'].edge_index = get_edge_index(100, 100, 500)
    data['paper', 'author'].edge_index = get_edge_index(100, 200, 1000)
    data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000)

    loader = LinkNeighborLoader(data,
                                num_neighbors=[-1] * 2,
                                edge_label_index=('paper', 'paper'),
                                batch_size=20,
                                directed=directed)

    for batch in loader:
        assert batch['paper'].x.size(0) <= 100
        assert batch['paper'].x.min() >= 0 and batch['paper'].x.max() < 100

        # Assert positive samples are present in the original graph:
        edge_index = unique_edge_pairs(batch['paper', 'paper'].edge_index)
        edge_label_index = batch['paper', 'paper'].edge_label_index
        edge_label_index = unique_edge_pairs(edge_label_index)
        assert len(edge_index | edge_label_index) == len(edge_index)
def test_hetero_conv(aggr):
    data = HeteroData()
    data['paper'].x = torch.randn(50, 32)
    data['author'].x = torch.randn(30, 64)
    data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200)
    data['paper', 'author'].edge_index = get_edge_index(50, 30, 100)
    data['author', 'paper'].edge_index = get_edge_index(30, 50, 100)
    data['paper', 'paper'].edge_weight = torch.rand(200)

    conv = HeteroConv(
        {
            ('paper', 'to', 'paper'): GCNConv(-1, 64),
            ('author', 'to', 'paper'): SAGEConv((-1, -1), 64),
            ('paper', 'to', 'author'): GATConv((-1, -1), 64),
        },
        aggr=aggr)

    assert len(list(conv.parameters())) > 0
    assert str(conv) == 'HeteroConv(num_relations=3)'

    out = conv(data.x_dict,
               data.edge_index_dict,
               edge_weight_dict=data.edge_weight_dict)

    assert len(out) == 2
    if aggr is not None:
        assert out['paper'].size() == (50, 64)
        assert out['author'].size() == (30, 64)
    else:
        assert out['paper'].size() == (50, 2, 64)
        assert out['author'].size() == (30, 1, 64)
Пример #12
0
def test_copy_hetero_data():
    data = HeteroData()
    data['paper'].x = x_paper
    data['paper', 'to', 'paper'].edge_index = edge_index_paper_paper

    out = copy.copy(data)
    assert id(data) != id(out)
    assert len(data.stores) == len(out.stores)
    for store1, store2 in zip(data.stores, out.stores):
        assert id(store1) != id(store2)
        assert id(data) == id(store1._parent())
        assert id(out) == id(store2._parent())
    assert out['paper']._key == 'paper'
    assert data['paper'].x.data_ptr() == out['paper'].x.data_ptr()
    assert out['to']._key == ('paper', 'to', 'paper')
    assert data['to'].edge_index.data_ptr() == out['to'].edge_index.data_ptr()

    out = copy.deepcopy(data)
    assert id(data) != id(out)
    assert len(data.stores) == len(out.stores)
    for store1, store2 in zip(data.stores, out.stores):
        assert id(store1) != id(store2)
    assert id(out) == id(out['paper']._parent())
    assert out['paper']._key == 'paper'
    assert data['paper'].x.data_ptr() != out['paper'].x.data_ptr()
    assert data['paper'].x.tolist() == out['paper'].x.tolist()
    assert id(out) == id(out['to']._parent())
    assert out['to']._key == ('paper', 'to', 'paper')
    assert data['to'].edge_index.data_ptr() != out['to'].edge_index.data_ptr()
    assert data['to'].edge_index.tolist() == out['to'].edge_index.tolist()
Пример #13
0
def test_remove_isolated_nodes_in_hetero_data():
    data = HeteroData()

    data['p'].x = torch.arange(6)
    data['a'].x = torch.arange(6)
    data['i'].num_nodes = 4

    # isolated paper nodes: {4}
    # isolated author nodes: {3, 4, 5}
    # isolated institution nodes: {0, 1, 2, 3}
    data['p', '1', 'p'].edge_index = torch.tensor([[0, 1, 2], [0, 1, 3]])
    data['p', '2', 'a'].edge_index = torch.tensor([[1, 3, 5], [0, 1, 2]])
    data['p', '2', 'a'].edge_attr = torch.arange(3)
    data['p', '3', 'a'].edge_index = torch.tensor([[5], [2]])

    data = RemoveIsolatedNodes()(data)

    assert len(data) == 4
    assert data['p'].num_nodes == 5
    assert data['a'].num_nodes == 3
    assert data['i'].num_nodes == 0

    assert data['p'].x.tolist() == [0, 1, 2, 3, 5]
    assert data['a'].x.tolist() == [0, 1, 2]

    assert data['1'].edge_index.tolist() == [[0, 1, 2], [0, 1, 3]]
    assert data['2'].edge_index.tolist() == [[1, 3, 4], [0, 1, 2]]
    assert data['2'].edge_attr.tolist() == [0, 1, 2]
    assert data['3'].edge_index.tolist() == [[4], [2]]
Пример #14
0
    def __getitem__(self, time_index: Union[int, slice]):
        if isinstance(time_index, slice):
            snapshot = StaticHeteroGraphTemporalSignal(
                self.edge_index_dict, self.edge_weight_dict,
                self.feature_dicts[time_index], self.target_dicts[time_index],
                **{
                    key: getattr(self, key)[time_index]
                    for key in self.additional_feature_keys
                })
        else:
            x_dict = self._get_features(time_index)
            edge_index_dict = self._get_edge_index()
            edge_weight_dict = self._get_edge_weight()
            y_dict = self._get_target(time_index)
            additional_features = self._get_additional_features(time_index)

            snapshot = HeteroData()
            if x_dict:
                for key, value in x_dict.items():
                    snapshot[key].x = value
            if edge_index_dict:
                for key, value in edge_index_dict.items():
                    snapshot[key].edge_index = value
            if edge_weight_dict:
                for key, value in edge_weight_dict.items():
                    snapshot[key].edge_attr = value
            if y_dict:
                for key, value in y_dict.items():
                    snapshot[key].y = value
            if additional_features:
                for feature_name, feature_dict in additional_features.items():
                    if feature_dict:
                        for key, value in feature_dict.items():
                            snapshot[key][feature_name] = value
        return snapshot
Пример #15
0
def test_hetero_conv_with_dot_syntax_node_types():
    data = HeteroData()
    data['src.paper'].x = torch.randn(50, 32)
    data['author'].x = torch.randn(30, 64)
    data['src.paper', 'src.paper'].edge_index = get_edge_index(50, 50, 200)
    data['src.paper', 'author'].edge_index = get_edge_index(50, 30, 100)
    data['author', 'src.paper'].edge_index = get_edge_index(30, 50, 100)
    data['src.paper', 'src.paper'].edge_weight = torch.rand(200)

    conv = HeteroConv({
        ('src.paper', 'to', 'src.paper'):
        GCNConv(-1, 64),
        ('author', 'to', 'src.paper'):
        SAGEConv((-1, -1), 64),
        ('src.paper', 'to', 'author'):
        GATConv((-1, -1), 64, add_self_loops=False),
    })

    assert len(list(conv.parameters())) > 0
    assert str(conv) == 'HeteroConv(num_relations=3)'

    out = conv(data.x_dict,
               data.edge_index_dict,
               edge_weight_dict=data.edge_weight_dict)

    assert len(out) == 2
    assert out['src.paper'].size() == (50, 64)
    assert out['author'].size() == (30, 64)
Пример #16
0
def test_init_hetero_data():
    data = HeteroData()
    data['v1'].x = 1
    data['paper'].x = x_paper
    data['author'].x = x_author
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['author', 'paper'].edge_index = edge_index_author_paper
    assert len(data) == 2
    assert len(data.edge_types) == 3
    assert data.node_types == ['v1', 'paper', 'author']

    data = HeteroData(
        v1={'x': 1},
        paper={'x': x_paper},
        author={'x': x_author},
        paper__paper={'edge_index': edge_index_paper_paper},
        paper__author={'edge_index': edge_index_paper_author},
        author__paper={'edge_index': edge_index_author_paper},
    )
    assert len(data) == 2
    assert len(data.edge_types) == 3
    assert data.node_types == ['v1', 'paper', 'author']

    data = HeteroData({
        'v1': {
            'x': 1
        },
        'paper': {
            'x': x_paper
        },
        'author': {
            'x': x_author
        },
        ('paper', 'paper'): {
            'edge_index': edge_index_paper_paper
        },
        ('paper', 'author'): {
            'edge_index': edge_index_paper_author
        },
        ('author', 'paper'): {
            'edge_index': edge_index_author_paper
        },
    })
    assert len(data) == 2
    assert len(data.edge_types) == 3
    assert data.node_types == ['v1', 'paper', 'author']
 def generate_graph(self):
     data = HeteroData()
     data = self.define_graph_nodes_and_labels(data)
     data = self.define_graph_edges(data)
     torch.save(
         data, ''.join(
             (self.seed_data_path, '_', self.file_type, '_data.pt')))
     return
Пример #18
0
    def process(self):
        import pandas as pd

        data = HeteroData()

        path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node-feat.csv.gz')
        x_paper = pd.read_csv(path, compression='gzip', header=None,
                              dtype=np.float32).values
        data['paper'].x = torch.from_numpy(x_paper)

        path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node_year.csv.gz')
        year_paper = pd.read_csv(path, compression='gzip', header=None,
                                 dtype=np.int64).values
        data['paper'].year = torch.from_numpy(year_paper).view(-1)

        path = osp.join(self.raw_dir, 'node-label', 'paper',
                        'node-label.csv.gz')
        y_paper = pd.read_csv(path, compression='gzip', header=None,
                              dtype=np.int64).values.flatten()
        data['paper'].y = torch.from_numpy(y_paper)

        if self.preprocess is None:
            path = osp.join(self.raw_dir, 'num-node-dict.csv.gz')
            num_nodes_df = pd.read_csv(path, compression='gzip')
            for node_type in ['author', 'institution', 'field_of_study']:
                data[node_type].num_nodes = num_nodes_df[node_type].tolist()[0]
        else:
            emb_dict = torch.load(self.raw_paths[-1])
            for key, value in emb_dict.items():
                if key != 'paper':
                    data[key].x = value

        for edge_type in [('author', 'affiliated_with', 'institution'),
                          ('author', 'writes', 'paper'),
                          ('paper', 'cites', 'paper'),
                          ('paper', 'has_topic', 'field_of_study')]:

            f = '___'.join(edge_type)
            path = osp.join(self.raw_dir, 'relations', f, 'edge.csv.gz')
            edge_index = pd.read_csv(path, compression='gzip', header=None,
                                     dtype=np.int64).values
            edge_index = torch.from_numpy(edge_index).t().contiguous()
            data[edge_type].edge_index = edge_index

        for f, v in [('train', 'train'), ('valid', 'val'), ('test', 'test')]:
            path = osp.join(self.raw_dir, 'split', 'time', 'paper',
                            f'{f}.csv.gz')
            idx = pd.read_csv(path, compression='gzip', header=None,
                              dtype=np.int64).values.flatten()
            idx = torch.from_numpy(idx)
            mask = torch.zeros(data['paper'].num_nodes, dtype=torch.bool)
            mask[idx] = True
            data['paper'][f'{v}_mask'] = mask

        if self.pre_transform is not None:
            data = self.pre_transform(data)

        torch.save(self.collate([data]), self.processed_paths[0])
def test_hetero_normalize_scale():
    x = torch.tensor([[1, 0, 1], [0, 1, 0], [0, 0, 0]], dtype=torch.float)

    data = HeteroData()
    data['v'].x = x
    data['w'].x = x
    data = NormalizeFeatures()(data)
    assert data['v'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]]
    assert data['w'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]]
Пример #20
0
def test_hetero_data_rename():
    data = HeteroData()
    data['paper'].x = x_paper
    data['author'].x = x_author
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['author', 'paper'].edge_index = edge_index_author_paper

    data = data.rename('paper', 'article')
    assert data.node_types == ['author', 'article']
    assert data.edge_types == [
        ('article', 'to', 'article'),
        ('article', 'to', 'author'),
        ('author', 'to', 'article'),
    ]

    assert data['article'].x.tolist() == x_paper.tolist()
    edge_index = data['article', 'article'].edge_index
    assert edge_index.tolist() == edge_index_paper_paper.tolist()
Пример #21
0
def test_add_metapaths():
    dblp = HeteroData()
    dblp['paper'].x = torch.ones(5)
    dblp['author'].x = torch.ones(6)
    dblp['conference'].x = torch.ones(3)
    dblp['paper', 'cites', 'paper'].edge_index = torch.tensor([[0, 1, 2, 3],
                                                               [1, 2, 4, 2]])
    dblp['paper', 'author'].edge_index = torch.tensor([[0, 1, 2, 3, 4],
                                                       [2, 2, 5, 2, 5]])
    dblp['author', 'paper'].edge_index = dblp['paper',
                                              'author'].edge_index[[1, 0]]
    dblp['conference', 'paper'].edge_index = torch.tensor([[0, 0, 1, 2, 2],
                                                           [0, 1, 2, 3, 4]])
    dblp['paper', 'conference'].edge_index = dblp['conference',
                                                  'paper'].edge_index[[1, 0]]

    # Test transform options:
    orig_edge_type = dblp.edge_types
    metapaths = [[('paper', 'conference'), ('conference', 'paper')]]
    meta1 = AddMetaPaths(metapaths)(dblp.clone())
    meta2 = AddMetaPaths(metapaths, drop_orig_edges=True)(dblp.clone())
    meta3 = AddMetaPaths(metapaths,
                         drop_orig_edges=True,
                         keep_same_node_type=True)(dblp.clone())
    meta4 = AddMetaPaths(metapaths,
                         drop_orig_edges=True,
                         keep_same_node_type=True,
                         drop_unconnected_nodes=True)(dblp.clone())

    assert meta1['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9
    assert meta2['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9
    assert meta3['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9
    assert meta4['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9

    assert all([i in meta1.edge_types for i in orig_edge_type])
    assert meta2.edge_types == [('paper', 'metapath_0', 'paper')]
    assert meta3.edge_types == [('paper', 'cites', 'paper'),
                                ('paper', 'metapath_0', 'paper')]
    assert meta4.edge_types == [('paper', 'cites', 'paper'),
                                ('paper', 'metapath_0', 'paper')]

    assert meta3.node_types == ['paper', 'author', 'conference']
    assert meta4.node_types == ['paper']

    # Test 4-hop metapath:
    metapaths = [[('author', 'paper'), ('paper', 'conference')],
                 [('author', 'paper'), ('paper', 'conference'),
                  ('conference', 'paper'), ('paper', 'author')]]
    meta1 = AddMetaPaths(metapaths)(dblp.clone())
    new_edge_types = [('author', 'metapath_0', 'conference'),
                      ('author', 'metapath_1', 'author')]
    assert meta1[new_edge_types[0]].edge_index.shape[-1] == 4
    assert meta1[new_edge_types[1]].edge_index.shape[-1] == 4

    # Test `metapath_dict` information:
    assert list(meta1.metapath_dict.values()) == metapaths
    assert list(meta1.metapath_dict.keys()) == new_edge_types
Пример #22
0
    def generate_data(self) -> HeteroData:
        data = HeteroData()

        iterator = zip(self.node_types, self.num_channels)
        for i, (node_type, num_channels) in enumerate(iterator):
            num_nodes = get_num_nodes(self.avg_num_nodes, self.avg_degree)

            store = data[node_type]

            if num_channels > 0:
                store.x = torch.randn(num_nodes, num_channels)
            else:
                store.num_nodes = num_nodes

            if self._num_classes > 0 and self.task == 'node' and i == 0:
                store.y = torch.randint(self._num_classes, (num_nodes, ))

        for (src, rel, dst) in self.edge_types:
            store = data[(src, rel, dst)]

            store.edge_index = get_edge_index(
                data[src].num_nodes,
                data[dst].num_nodes,
                self.avg_degree,
                is_undirected=False,
                remove_loops=False,
            )

            if self.edge_dim > 1:
                store.edge_attr = torch.rand(store.num_edges, self.edge_dim)
            elif self.edge_dim == 1:
                store.edge_weight = torch.rand(store.num_edges)

            pass

        if self._num_classes > 0 and self.task == 'graph':
            data.y = torch.tensor([random.randint(0, self._num_classes - 1)])

        for feature_name, feature_shape in self.kwargs.items():
            setattr(data, feature_name, torch.randn(feature_shape))

        return data
Пример #23
0
def test_hgt_conv_out_of_place():
    data = HeteroData()
    data['author'].x = torch.randn(4, 16)
    data['paper'].x = torch.randn(6, 32)

    index1 = torch.randint(0, 4, (20, ), dtype=torch.long)
    index2 = torch.randint(0, 6, (20, ), dtype=torch.long)

    data['author', 'paper'].edge_index = torch.stack([index1, index2], dim=0)
    data['paper', 'author'].edge_index = torch.stack([index2, index1], dim=0)

    conv = HGTConv(-1, 64, data.metadata(), heads=1)

    x_dict, edge_index_dict = data.x_dict, data.edge_index_dict
    assert x_dict['author'].size() == (4, 16)
    assert x_dict['paper'].size() == (6, 32)

    _ = conv(x_dict, edge_index_dict)

    assert x_dict['author'].size() == (4, 16)
    assert x_dict['paper'].size() == (6, 32)
def test_hetero_add_self_loops():
    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])

    data = HeteroData()
    data['v'].num_nodes = 3
    data['w'].num_nodes = 3
    data['v', 'v'].edge_index = edge_index
    data['v', 'w'].edge_index = edge_index
    data = AddSelfLoops()(data)
    assert data['v', 'v'].edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2],
                                                  [1, 0, 2, 1, 0, 1, 2]]
    assert data['v', 'w'].edge_index.tolist() == edge_index.tolist()
Пример #25
0
def test_hetero_in_memory_dataset():
    data1 = HeteroData()
    data1.y = torch.randn(5)
    data1['paper'].x = torch.randn(10, 16)
    data1['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long()

    data2 = HeteroData()
    data2.y = torch.randn(5)
    data2['paper'].x = torch.randn(10, 16)
    data2['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long()

    dataset = MyTestDataset([data1, data2])
    assert str(dataset) == 'MyTestDataset(2)'
    assert len(dataset) == 2

    assert len(dataset[0]) == 3
    assert dataset[0].y.tolist() == data1.y.tolist()
    assert dataset[0]['paper'].x.tolist() == data1['paper'].x.tolist()
    assert (dataset[0]['paper', 'paper'].edge_index.tolist() == data1[
        'paper', 'paper'].edge_index.tolist())

    assert len(dataset[1]) == 3
    assert dataset[1].y.tolist() == data2.y.tolist()
    assert dataset[1]['paper'].x.tolist() == data2['paper'].x.tolist()
    assert (dataset[1]['paper', 'paper'].edge_index.tolist() == data2[
        'paper', 'paper'].edge_index.tolist())
Пример #26
0
def test_random_link_split_on_undirected_hetero_data():
    data = HeteroData()
    data['p'].x = torch.arange(100)
    data['p', 'p'].edge_index = get_edge_index(100, 100, 500)
    data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index)

    transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'))
    train_data, val_data, test_data = transform(data)
    assert train_data['p', 'p'].is_undirected()

    transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'),
                                rev_edge_types=('p', 'p'))
    train_data, val_data, test_data = transform(data)
    assert train_data['p', 'p'].is_undirected()
Пример #27
0
def create_hetero_mock_data(n_count, feature_dict):
    _x_dict = {
        'author':
        torch.FloatTensor(
            np.random.uniform(0, 1, (n_count, feature_dict['author']))),
        'paper':
        torch.FloatTensor(
            np.random.uniform(0, 1, (n_count, feature_dict['paper'])))
    }
    _edge_index_dict = {
        ('author', 'writes', 'paper'):
        torch.LongTensor(get_edge_array(n_count))
    }

    data = HeteroData()
    data['author'].x = _x_dict['author']
    data['paper'].x = _x_dict['paper']
    data[('author', 'writes',
          'paper')].edge_index = _edge_index_dict[('author', 'writes',
                                                   'paper')]
    data = T.ToUndirected()(data)

    return data.x_dict, data.edge_index_dict, data.metadata()
Пример #28
0
def test_init_hetero_data():
    data = HeteroData()
    data['paper'].x = x_paper
    data['author'].x = x_author
    data['paper', 'paper'].edge_index = edge_index_paper_paper
    data['paper', 'author'].edge_index = edge_index_paper_author
    data['author', 'paper'].edge_index = edge_index_author_paper
    assert len(data) == 2

    data = HeteroData(
        paper={'x': x_paper},
        author={'x': x_author},
        paper__paper={'edge_index': edge_index_paper_paper},
        paper__author={'edge_index': edge_index_paper_author},
        author__paper={'edge_index': edge_index_author_paper},
    )
    assert len(data) == 2

    data = HeteroData({
        'paper': {
            'x': x_paper
        },
        'author': {
            'x': x_author
        },
        ('paper', 'paper'): {
            'edge_index': edge_index_paper_paper
        },
        ('paper', 'author'): {
            'edge_index': edge_index_paper_author
        },
        ('author', 'paper'): {
            'edge_index': edge_index_author_paper
        },
    })
    assert len(data) == 2
def test_hetero_to_sparse_tensor():
    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])

    data = HeteroData()
    data['v'].num_nodes = 3
    data['w'].num_nodes = 3
    data['v', 'v'].edge_index = edge_index
    data['v', 'w'].edge_index = edge_index
    data = ToSparseTensor()(data)
    assert data['v', 'v'].adj_t.storage.row().tolist() == [0, 1, 1, 2]
    assert data['v', 'v'].adj_t.storage.col().tolist() == [1, 0, 2, 1]
    assert data['v', 'v'].adj_t.storage.value() is None
    assert data['v', 'w'].adj_t.storage.row().tolist() == [0, 1, 1, 2]
    assert data['v', 'w'].adj_t.storage.col().tolist() == [1, 0, 2, 1]
    assert data['v', 'w'].adj_t.storage.value() is None
def test_hetero_conv_with_custom_conv():
    data = HeteroData()
    data['paper'].x = torch.randn(50, 32)
    data['paper'].pos = torch.randn(50, 3)
    data['author'].x = torch.randn(30, 64)
    data['author'].pos = torch.randn(30, 3)
    data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200)
    data['paper', 'author'].edge_index = get_edge_index(50, 30, 100)
    data['author', 'paper'].edge_index = get_edge_index(30, 50, 100)

    conv = HeteroConv({key: CustomConv(64) for key in data.edge_types})
    out = conv(data.x_dict, data.edge_index_dict, data.pos_dict)
    assert len(out) == 2
    assert out['paper'].size() == (50, 64)
    assert out['author'].size() == (30, 64)