Exemplo n.º 1
0
    def process(self):
        for s, split in enumerate(['train', 'valid', 'test']):
            path = osp.join(self.raw_dir, '{}_graph.json').format(split)
            with open(path, 'r') as f:
                G = nx.DiGraph(json_graph.node_link_graph(json.load(f)))

            x = np.load(osp.join(self.raw_dir, '{}_feats.npy').format(split))
            x = torch.from_numpy(x).to(torch.float)

            y = np.load(osp.join(self.raw_dir, '{}_labels.npy').format(split))
            y = torch.from_numpy(y).to(torch.float)

            data_list = []
            path = osp.join(self.raw_dir, '{}_graph_id.npy').format(split)
            idx = torch.from_numpy(np.load(path)).to(torch.long)
            idx = idx - idx.min()

            for i in range(idx.max().item() + 1):
                mask = idx == i

                G_s = G.subgraph(mask.nonzero().view(-1).tolist())
                edge_index = torch.tensor(list(G_s.edges)).t().contiguous()
                edge_index = edge_index - edge_index.min()
                edge_index, _ = remove_self_loops(edge_index)

                data = Data(edge_index=edge_index, x=x[mask], y=y[mask])

                if self.pre_filter is not None and not self.pre_filter(data):
                    continue

                if self.pre_transform is not None:
                    data = self.pre_transform(data)

                data_list.append(data)
            torch.save(self.collate(data_list), self.processed_paths[s])
Exemplo n.º 2
0
def edge_index_from_dict(graph_dict, num_nodes=None):
    row, col = [], []
    for key, value in graph_dict.items():
        row.append(np.repeat(key, len(value)))
        col.append(value)
    _row = np.concatenate(np.array(row))
    _col = np.concatenate(np.array(col))
    edge_index = np.stack([_row, _col], axis=0)

    row_dom = edge_index[:, _row > _col]
    col_dom = edge_index[:, _col > _row][[1, 0]]
    edge_index = np.concatenate([row_dom, col_dom], axis=1)
    _row, _col = edge_index

    edge_index = np.stack([_row, _col], axis=0)

    order = np.lexsort((_col, _row))
    edge_index = edge_index[:, order]

    edge_index = torch.tensor(edge_index, dtype=torch.long)
    # There may be duplicated edges and self loops in the datasets.
    row, col, _ = coalesce(edge_index[0], edge_index[1])
    edge_index = torch.stack([row, col], dim=0)
    edge_index, _ = remove_self_loops(edge_index)
    row = torch.cat([edge_index[0], edge_index[1]])
    col = torch.cat([edge_index[1], edge_index[0]])
    edge_index = torch.stack([row, col])
    return edge_index
Exemplo n.º 3
0
    def __call__(self, data):
        edge_index, edge_attr = data.edge_index, data.edge_attr
        n = data.num_nodes

        fill = 1e16
        value = edge_index.new_full((edge_index.size(1), ),
                                    fill,
                                    dtype=torch.float)

        index, value = spspmm(edge_index, value, edge_index, value, n, n, n)
        index, value = remove_self_loops(index, value)

        edge_index = torch.cat([edge_index, index], dim=1)
        if edge_attr is None:
            data.edge_index, _ = coalesce(edge_index, None, n, n)
        else:
            value = value.view(-1, *[1 for _ in range(edge_attr.dim() - 1)])
            value = value.expand(-1, *list(edge_attr.size())[1:])
            edge_attr = torch.cat([edge_attr, value], dim=0)
            data.edge_index, edge_attr = coalesce(edge_index,
                                                  edge_attr,
                                                  n,
                                                  n,
                                                  op='min',
                                                  fill_value=fill)
            edge_attr[edge_attr >= fill] = 0
            data.edge_attr = edge_attr

        return data
Exemplo n.º 4
0
    def __init__(self, root, name):
        super(OGBNDataset, self).__init__(root)
        dataset = NodePropPredDataset(name, root)
        graph, y = dataset[0]
        x = torch.tensor(graph["node_feat"])
        y = torch.tensor(y.squeeze())
        row, col, edge_attr = coalesce(graph["edge_index"][0], graph["edge_index"][1], graph["edge_feat"])
        edge_index = torch.stack([row, col], dim=0)
        edge_index, edge_attr = remove_self_loops(edge_index, edge_attr)
        row = torch.cat([edge_index[0], edge_index[1]])
        col = torch.cat([edge_index[1], edge_index[0]])
        edge_index = torch.stack([row, col], dim=0)
        if edge_attr is not None:
            edge_attr = torch.cat([edge_attr, edge_attr], dim=0)

        self.data = Graph(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
        self.data.num_nodes = graph["num_nodes"]
        assert self.data.num_nodes == self.data.x.shape[0]

        # split
        split_index = dataset.get_idx_split()
        self.data.train_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool)
        self.data.test_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool)
        self.data.val_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool)
        self.data.train_mask[split_index["train"]] = True
        self.data.test_mask[split_index["test"]] = True
        self.data.val_mask[split_index["valid"]] = True

        self.transform = None
Exemplo n.º 5
0
 def preprocess(self, n_cluster):
     save_name = f"{self.dataset_name}-{n_cluster}.cluster"
     if os.path.exists(save_name):
         return torch.load(save_name)
     print("Preprocessing...")
     edges = self.data.edge_index
     edges, _ = remove_self_loops(edges)
     if str(edges.device) != "cpu":
         edges = edges.cpu()
     edges = edges.numpy()
     num_nodes = np.max(edges) + 1
     adj = sp.csr_matrix((np.ones(edges.shape[1]), (edges[0], edges[1])),
                         shape=(num_nodes, num_nodes))
     indptr = adj.indptr
     indptr = np.split(adj.indices, indptr[1:])[:-1]
     _, parts = ClusteredDataset.partition_tool.part_graph(indptr,
                                                           n_cluster,
                                                           seed=1)
     division = [[] for _ in range(n_cluster)]
     for i, v in enumerate(parts):
         division[v].append(i)
     for k in range(len(division)):
         division[k] = np.array(division[k], dtype=np.int)
     torch.save(division, save_name)
     print("Graph clustering done")
     return division
Exemplo n.º 6
0
    def forward(self, x, edge_index):
        """"""
        edge_index, _ = remove_self_loops(edge_index)
        edge_index = add_self_loops(edge_index, num_nodes=x.size(0))

        x = torch.mm(x, self.weight).view(-1, self.heads, self.out_channels)
        return self.propagate(edge_index, x=x, num_nodes=x.size(0))
Exemplo n.º 7
0
 def forward(self, x, edge_index):
     h = x
     edge_index, _ = remove_self_loops(edge_index)
     for layer in self.layers:
         h = layer(h, edge_index)
         # h = F.leaky_relu(h)
         h = F.dropout(h, p=self.dropout, training=self.training)
     out = torch.matmul(h, self.weight) + self.bias
     return out
Exemplo n.º 8
0
 def forward(self, x, edge_index, edge_weight=None):
     edge_index, _ = remove_self_loops(edge_index)
     edge_weight = torch.ones(edge_index.shape[1]).to(x.device) if edge_weight is None else edge_weight
     adj = torch.sparse_coo_tensor(edge_index, edge_weight, (x.shape[0], x.shape[0]))
     adj = adj.to(x.device)
     out = (1 + self.eps) * x + torch.spmm(adj, x)
     if self.apply_func is not None:
         out = self.apply_func(out)
     return out
Exemplo n.º 9
0
 def normalization(self, H):
     norm_H = []
     for i in range(self.num_channels):
         edge, value = H[i]
         edge, value = remove_self_loops(edge, value)
         deg_row, deg_col = self.norm(edge.detach(), self.num_nodes,
                                      value.detach())
         value = deg_col * value
         norm_H.append((edge, value))
     return norm_H
Exemplo n.º 10
0
def edge_index_from_dict(graph_dict, num_nodes=None):
    row, col = [], []
    for key, value in graph_dict.items():
        row += repeat(key, len(value))
        col += value
    edge_index = torch.stack([torch.tensor(row), torch.tensor(col)], dim=0)
    # NOTE: There are duplicated edges and self loops in the datasets. Other
    # implementations do not remove them!
    edge_index, _ = remove_self_loops(edge_index)
    edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes)
    return edge_index
Exemplo n.º 11
0
def get_adj(row, col, asymm_norm=False, set_diag=True, remove_diag=False):
    edge_index = torch.stack([row, col])
    edge_attr = torch.ones(edge_index.shape[1]).to(edge_index.device)
    if set_diag:
        edge_index, edge_attr = add_remaining_self_loops(edge_index, edge_attr)
    elif remove_diag:
        edge_index, _ = remove_self_loops(edge_index)

    num_nodes = int(torch.max(edge_index)) + 1
    if not asymm_norm:
        edge_attr = row_normalization(num_nodes, edge_index, edge_attr)
    else:
        edge_attr = symmetric_normalization(num_nodes, edge_index, edge_attr)
    return edge_index, edge_attr
Exemplo n.º 12
0
    def __call__(self, data):
        pos = data.pos
        assert not pos.is_cuda

        tree = scipy.spatial.cKDTree(pos)
        indices = tree.query_ball_tree(tree, self.r)

        row, col = [], []
        for i, neighbors in enumerate(indices):
            row += repeat(i, len(neighbors))
            col += neighbors
        edge_index = torch.tensor([row, col])
        edge_index, _ = remove_self_loops(edge_index)

        data.edge_index = edge_index
        return data
Exemplo n.º 13
0
    def norm(edge_index, num_nodes, edge_weight, gcn=False, dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1), ),
                                     dtype=dtype,
                                     device=edge_index.device)
        edge_weight = edge_weight.view(-1)
        assert edge_weight.size(0) == edge_index.size(1)

        edge_index, _ = remove_self_loops(edge_index)
        edge_index = add_self_loops(edge_index, num_nodes)
        loop_weight = torch.full((num_nodes, ),
                                 1 if gcn else 0,
                                 dtype=edge_weight.dtype,
                                 device=edge_weight.device)
        edge_weight = torch.cat([edge_weight, loop_weight], dim=0)

        row, col = edge_index
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-1)
        #  deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        return edge_index, deg_inv_sqrt[row] * edge_weight
Exemplo n.º 14
0
Arquivo: data.py Projeto: sofyc/cogdl
 def remove_self_loops(self):
     edge_index = torch.stack([self.row, self.col])
     edge_index, self.weight = remove_self_loops(edge_index, self.weight)
     self.row, self.col = edge_index
     if indicator is True:
         self._to_csr()