def test_dynamic_addition(): N = 3 D = 1 g = DGLGraph() # Test node addition g.add_nodes(N) g.ndata.update({'h1': th.randn(N, D), 'h2': th.randn(N, D)}) g.add_nodes(3) assert g.ndata['h1'].shape[0] == g.ndata['h2'].shape[0] == N + 3 # Test edge addition g.add_edge(0, 1) g.add_edge(1, 0) g.edata.update({'h1': th.randn(2, D), 'h2': th.randn(2, D)}) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 2 g.add_edges([0, 2], [2, 0]) g.edata['h1'] = th.randn(4, D) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 4 g.add_edge(1, 2) g.edges[4].data['h1'] = th.randn(1, D) assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 5
def test_send_twice_different_field(): g = DGLGraph() g.set_n_initializer(dgl.init.zero_initializer) g.add_nodes(2) g.add_edge(0, 1) def _message_a(edges): return {'a': edges.src['a']} def _message_b(edges): return {'b': edges.src['b']} def _reduce(nodes): return { 'a': F.sum(nodes.mailbox['a'], 1), 'b': F.sum(nodes.mailbox['b'], 1) } old_a = F.randn((2, 5)) old_b = F.randn((2, 5)) g.set_n_repr({'a': old_a, 'b': old_b}) g.send((0, 1), _message_a) g.send((0, 1), _message_b) g.recv([1], _reduce) new_repr = g.get_n_repr() assert F.allclose(new_repr['a'][1], old_a[0]) assert F.allclose(new_repr['b'][1], old_b[0])
def _disabled_test_send_twice(): # TODO(minjie): please re-enable this unittest after the send code problem is fixed. g = DGLGraph() g.add_nodes(3) g.add_edge(0, 1) g.add_edge(2, 1) def _message_a(edges): return {'a': edges.src['a']} def _message_b(edges): return {'a': edges.src['a'] * 3} def _reduce(nodes): return {'a': nodes.mailbox['a'].max(1)[0]} old_repr = th.randn(3, 5) g.ndata['a'] = old_repr g.send((0, 1), _message_a) g.send((0, 1), _message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], old_repr[0] * 3) g.ndata['a'] = old_repr g.send((0, 1), _message_a) g.send((2, 1), _message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], th.stack([old_repr[0], old_repr[2] * 3], 0).max(0)[0])
def check_reduce_0deg(readonly): if readonly: row_idx = [] col_idx = [] for i in range(1, 5): row_idx.append(i) col_idx.append(0) ones = np.ones(shape=(len(row_idx))) csr = spsp.csr_matrix((ones, (row_idx, col_idx)), shape=(5, 5)) g = DGLGraph(csr, readonly=True) else: g = DGLGraph() g.add_nodes(5) g.add_edge(1, 0) g.add_edge(2, 0) g.add_edge(3, 0) g.add_edge(4, 0) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + nodes.mailbox['m'].sum(1)} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape, dtype=dtype, ctx=ctx) g.set_n_initializer(_init2, 'h') old_repr = mx.nd.random.normal(shape=(5, 5)) g.set_n_repr({'h': old_repr}) g.update_all(_message, _reduce) new_repr = g.ndata['h'] assert np.allclose(new_repr[1:].asnumpy(), 2 + np.zeros((4, 5))) assert np.allclose(new_repr[0].asnumpy(), old_repr.sum(0).asnumpy())
def test_multi_recv_0deg(): # test recv with 0deg nodes; g = DGLGraph() def _message(edges): return {'m' : edges.src['h']} def _reduce(nodes): return {'h' : nodes.data['h'] + nodes.mailbox['m'].sum(1)} def _apply(nodes): return {'h' : nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + th.zeros(shape, dtype=dtype, device=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(_init2) g.add_nodes(2) g.add_edge(0, 1) # recv both 0deg and non-0deg nodes old = th.randn((2, 5)) g.ndata['h'] = old g.send((0, 1)) g.recv([0, 1]) new = g.ndata['h'] # 0deg check: initialized with the func and got applied assert U.allclose(new[0], th.full((5,), 4)) # non-0deg check assert U.allclose(new[1], th.sum(old, 0) * 2) # recv again on zero degree node g.recv([0]) assert U.allclose(g.nodes[0].data['h'], th.full((5,), 8)) # recv again on node with no incoming message g.recv([1]) assert U.allclose(g.nodes[1].data['h'], th.sum(old, 0) * 4)
def test_send_twice_different_msg(): g = DGLGraph() g.set_n_initializer(dgl.init.zero_initializer) g.add_nodes(3) g.add_edge(0, 1) g.add_edge(2, 1) def _message_a(edges): return {'a': edges.src['a']} def _message_b(edges): return {'a': edges.src['a'] * 3} def _reduce(nodes): return {'a': F.max(nodes.mailbox['a'], 1)} old_repr = F.randn((3, 5)) g.ndata['a'] = old_repr g.send((0, 1), _message_a) g.send((0, 1), _message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], old_repr[0] * 3) g.ndata['a'] = old_repr g.send((0, 1), _message_a) g.send((2, 1), _message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], F.max(F.stack([old_repr[0], old_repr[2] * 3], 0), 0))
def test_dynamic_addition(): N = 3 D = 1 g = DGLGraph() def _init(shape, dtype, ctx, ids): return F.copy_to(F.astype(F.randn(shape), dtype), ctx) g.set_n_initializer(_init) g.set_e_initializer(_init) def _message(edges): return { 'm': edges.src['h1'] + edges.dst['h2'] + edges.data['h1'] + edges.data['h2'] } def _reduce(nodes): return {'h': F.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h': nodes.data['h']} g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) # add nodes and edges g.add_nodes(N) g.ndata.update({'h1': F.randn((N, D)), 'h2': F.randn((N, D))}) g.add_nodes(3) g.add_edge(0, 1) g.add_edge(1, 0) g.edata.update({'h1': F.randn((2, D)), 'h2': F.randn((2, D))}) g.send() expected = F.copy_to(F.ones((g.number_of_edges(), ), dtype=F.int64), F.cpu()) assert F.array_equal(g._get_msg_index().tousertensor(), expected) # add more edges g.add_edges([0, 2], [2, 0], {'h1': F.randn((2, D))}) g.send(([0, 2], [2, 0])) g.recv(0) g.add_edge(1, 2) g.edges[4].data['h1'] = F.randn((1, D)) g.send((1, 2)) g.recv([1, 2]) h = g.ndata.pop('h') # a complete round of send and recv g.send() g.recv() assert F.allclose(h, g.ndata['h'])
def check_pull_0deg(readonly): if readonly: row_idx = [] col_idx = [] row_idx.append(0) col_idx.append(1) ones = np.ones(shape=(len(row_idx))) csr = spsp.csr_matrix((ones, (row_idx, col_idx)), shape=(2, 2)) g = DGLGraph(csr, readonly=True) else: g = DGLGraph() g.add_nodes(2) g.add_edge(0, 1) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.mailbox['m'].sum(1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape, dtype=dtype, ctx=ctx) g.set_n_initializer(_init2, 'h') old_repr = mx.nd.random.normal(shape=(2, 5)) # test#1: pull only 0-deg node g.ndata['h'] = old_repr g.pull(0, _message, _reduce, _apply) new_repr = g.ndata['h'] # 0deg check: equal to apply_nodes assert np.allclose(new_repr[0].asnumpy(), old_repr[0].asnumpy() * 2) # non-0deg check: untouched assert np.allclose(new_repr[1].asnumpy(), old_repr[1].asnumpy()) # test#2: pull only non-deg node g.ndata['h'] = old_repr g.pull(1, _message, _reduce, _apply) new_repr = g.ndata['h'] # 0deg check: untouched assert np.allclose(new_repr[0].asnumpy(), old_repr[0].asnumpy()) # non-0deg check: recved node0 and got applied assert np.allclose(new_repr[1].asnumpy(), old_repr[0].asnumpy() * 2) # test#3: pull only both nodes g.ndata['h'] = old_repr g.pull([0, 1], _message, _reduce, _apply) new_repr = g.ndata['h'] # 0deg check: init and applied t = mx.nd.zeros(shape=(2, 5)) + 4 assert np.allclose(new_repr[0].asnumpy(), t.asnumpy()) # non-0deg check: recv node0 and applied assert np.allclose(new_repr[1].asnumpy(), old_repr[0].asnumpy() * 2)
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) # add a back flow from 9 to 0 g.add_edge(9, 0) ncol = Variable(th.randn(10, D), requires_grad=grad) ecol = Variable(th.randn(17, D), requires_grad=grad) g.ndata['h'] = ncol g.edata['l'] = ecol return g
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # 10 nodes. # create a graph where 0 is the source and 9 is the sink # 16 edges for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) ncol = Variable(th.randn(10, D), requires_grad=grad) ecol = Variable(th.randn(16, D), requires_grad=grad) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) g.ndata['h'] = ncol g.edata['w'] = ecol return g
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) # add a back flow from 9 to 0 g.add_edge(9, 0) ncol = F.randn((10, D)) ecol = F.randn((17, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.ndata['h'] = ncol g.edata['l'] = ecol return g
def generate_graph(grad=False): g = DGLGraph() g.add_nodes(10) # 10 nodes. # create a graph where 0 is the source and 9 is the sink # 16 edges for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) ncol = F.randn((10, D)) ecol = F.randn((16, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) g.ndata['h'] = ncol g.edata['w'] = ecol return g
def test_recv_0deg_newfld(): # test recv with 0deg nodes; the reducer also creates a new field g = DGLGraph() g.add_nodes(2) g.add_edge(0, 1) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h1': nodes.data['h'] + mx.nd.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h1': nodes.data['h1'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape=shape, dtype=dtype, ctx=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) # test#1: recv both 0deg and non-0deg nodes old = mx.nd.random.normal(shape=(2, 5)) g.set_n_initializer(_init2, 'h1') g.ndata['h'] = old g.send((0, 1)) g.recv([0, 1]) new = g.ndata.pop('h1') # 0deg check: initialized with the func and got applied assert np.allclose(new[0].asnumpy(), np.full((5, ), 4)) # non-0deg check assert np.allclose(new[1].asnumpy(), mx.nd.sum(old, 0).asnumpy() * 2) # test#2: recv only 0deg node old = mx.nd.random.normal(shape=(2, 5)) g.ndata['h'] = old g.ndata['h1'] = mx.nd.full((2, 5), -1) # this is necessary g.send((0, 1)) g.recv(0) new = g.ndata.pop('h1') # 0deg check: fallback to apply assert np.allclose(new[0].asnumpy(), np.full((5, ), -2)) # non-0deg check: not changed assert np.allclose(new[1].asnumpy(), np.full((5, ), -1))
def generate_graph(grad=False, readonly=False): if readonly: row_idx = [] col_idx = [] for i in range(1, 9): row_idx.append(0) col_idx.append(i) row_idx.append(i) col_idx.append(9) row_idx.append(9) col_idx.append(0) ones = np.ones(shape=(len(row_idx))) csr = spsp.csr_matrix((ones, (row_idx, col_idx)), shape=(10, 10)) g = DGLGraph(csr, readonly=True) ncol = mx.nd.random.normal(shape=(10, D)) ecol = mx.nd.random.normal(shape=(17, D)) if grad: ncol.attach_grad() ecol.attach_grad() g.ndata['h'] = ncol g.edata['w'] = ecol g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) return g else: g = DGLGraph() g.add_nodes(10) # 10 nodes. # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edge(0, i) g.add_edge(i, 9) # add a back flow from 9 to 0 g.add_edge(9, 0) ncol = mx.nd.random.normal(shape=(10, D)) ecol = mx.nd.random.normal(shape=(17, D)) if grad: ncol.attach_grad() ecol.attach_grad() g.ndata['h'] = ncol g.edata['w'] = ecol g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) return g
def test_recv_0deg(): # test recv with 0deg nodes; g = DGLGraph() g.add_nodes(2) g.add_edge(0, 1) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + nodes.mailbox['m'].sum(1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + th.zeros(shape, dtype=dtype, device=ctx) g.register_message_func(_message) g.register_reduce_func(_reduce) g.register_apply_node_func(_apply) g.set_n_initializer(_init2, 'h') # test#1: recv both 0deg and non-0deg nodes old = th.randn((2, 5)) g.ndata['h'] = old g.send((0, 1)) g.recv([0, 1]) new = g.ndata.pop('h') # 0deg check: initialized with the func and got applied assert U.allclose(new[0], th.full((5, ), 4)) # non-0deg check assert U.allclose(new[1], th.sum(old, 0) * 2) # test#2: recv only 0deg node is equal to apply old = th.randn((2, 5)) g.ndata['h'] = old g.send((0, 1)) g.recv(0) new = g.ndata.pop('h') # 0deg check: equal to apply_nodes assert U.allclose(new[0], 2 * old[0]) # non-0deg check: untouched assert U.allclose(new[1], old[1])
def test_update_all_0deg(): # test#1 g = DGLGraph() g.add_nodes(5) g.add_edge(1, 0) g.add_edge(2, 0) g.add_edge(3, 0) g.add_edge(4, 0) def _message(edges): return {'m': edges.src['h']} def _reduce(nodes): return {'h': nodes.data['h'] + mx.nd.sum(nodes.mailbox['m'], 1)} def _apply(nodes): return {'h': nodes.data['h'] * 2} def _init2(shape, dtype, ctx, ids): return 2 + mx.nd.zeros(shape, dtype=dtype, ctx=ctx) g.set_n_initializer(_init2, 'h') old_repr = mx.nd.random.normal(shape=(5, 5)) g.ndata['h'] = old_repr g.update_all(_message, _reduce, _apply) new_repr = g.ndata['h'] # the first row of the new_repr should be the sum of all the node # features; while the 0-deg nodes should be initialized by the # initializer and applied with UDF. assert np.allclose(new_repr[1:].asnumpy(), 2 * (2 + np.zeros((4, 5)))) assert np.allclose(new_repr[0].asnumpy(), 2 * mx.nd.sum(old_repr, 0).asnumpy()) # test#2: graph with no edge g = DGLGraph() g.add_nodes(5) g.set_n_initializer(_init2, 'h') g.ndata['h'] = old_repr g.update_all(_message, _reduce, _apply) new_repr = g.ndata['h'] # should fallback to apply assert np.allclose(new_repr.asnumpy(), 2 * old_repr.asnumpy())
def __init__(self, split ): super(DGLDataset, self).__init__() self.device = torch.device("cuda" ) self.split = split self.data_list = [] self.gt_list = [] n7 = int (len(random_index_list) * 0.7) # print('enter DGLDataset ', random_index_list) if split == 'train': for i in random_index_list[ : n7]: d_data = train_data_list[random_index_list[i]] # if i == 0: # print('d_data ', d_data) nodes = d_data['nodes'] edges = d_data['edges'] g = DGLGraph() g.add_nodes(len(nodes)) gt = [] # {'idx': atom_index, 't': atom_index_dic[atom], 'x': x, 'y' : y, 'z' : z} d = [] for node_info in nodes: idx = int(node_info['idx']) tp = int(node_info['t']) x = float(node_info['x']) y = float(node_info['y']) z = float(node_info['z']) dn = [[tp, x, y, z]] n = torch.tensor( dn).cuda() g.nodes[idx].data['h'] = n d.append(dn) # gt = [] e = [] d_e = [] for edge_info in edges: idx0 = int(edge_info['index0']) idx1 = int(edge_info['index1']) et = int(edge_info['et']) sc = float(edge_info['sc']) g.add_edge(idx0, idx1) e.append([et]) d_e.append(d[idx1]) # if 'w' not in g.edata.keys(): # g.edata['w'] = torch.tensor( [[et]]).cuda() # else : # g.edata['w'].expand( torch.tensor( [et]).cuda()) gt.append(sc) # print('e ', e) g.edata['we'] = torch.tensor(e).cuda() # g.edata['wd'] = torch.tensor(d_e).cuda() # print('g ', g) self.data_list.append(g) self.gt_list.append(gt) # self.gt_list.append([gt]) # self.gt_list.append(1) print('len data ', len(self.data_list)) print('len gt ', len(self.gt_list)) # self.gt_list = np.array(self.gt_list) # self.data_list = np.array(self.data_list) # tshape = self.data_list.shape # print('self.data_list ', tshape) if split == 'val': self.val_data_list = [] for i in random_index_list[n7 : ]: d_data = train_data_list[random_index_list[i]] # if i == 0: # print('d_data ', d_data) nodes = d_data['nodes'] edges = d_data['edges'] g = DGLGraph() g.add_nodes(len(nodes)) gt = [] # {'idx': atom_index, 't': atom_index_dic[atom], 'x': x, 'y' : y, 'z' : z} d = [] for node_info in nodes: idx = int(node_info['idx']) tp = int(node_info['t']) x = float(node_info['x']) y = float(node_info['y']) z = float(node_info['z']) dn = [[tp, x, y, z]] n = torch.tensor( dn).cuda() g.nodes[idx].data['h'] = n d.append(dn) # gt = [] e = [] d_e = [] for edge_info in edges: idx0 = int(edge_info['index0']) idx1 = int(edge_info['index1']) et = int(edge_info['et']) sc = float(edge_info['sc']) g.add_edge(idx0, idx1) e.append([et]) d_e.append(d[idx1]) # if 'w' not in g.edata.keys(): # g.edata['w'] = torch.tensor( [[et]]).cuda() # else : # g.edata['w'].expand( torch.tensor( [et]).cuda()) gt.append(sc) # print('e ', e) g.edata['we'] = torch.tensor(e).cuda() # g.edata['wd'] = torch.tensor(d_e).cuda() # print('g ', g) self.data_list.append(g) self.gt_list.append(gt) print('len v data ', len(self.data_list)) print('len v gt ', len(self.gt_list))
def test_send_multigraph(): g = DGLGraph(multigraph=True) g.add_nodes(3) g.add_edge(0, 1) g.add_edge(0, 1) g.add_edge(0, 1) g.add_edge(2, 1) def _message_a(edges): return {'a': edges.data['a']} def _message_b(edges): return {'a': edges.data['a'] * 3} def _reduce(nodes): return {'a': nodes.mailbox['a'].max(1)[0]} def answer(*args): return th.stack(args, 0).max(0)[0] # send by eid old_repr = th.randn(4, 5) g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send([0, 2], message_func=_message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[2])) g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send([0, 2, 3], message_func=_message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])) # send on multigraph g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send(([0, 2], [1, 1]), _message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], old_repr.max(0)[0]) # consecutive send and send_on g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send((2, 1), _message_a) g.send([0, 1], message_func=_message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0] * 3, old_repr[1] * 3, old_repr[3])) # consecutive send_on g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send(0, message_func=_message_a) g.send(1, message_func=_message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[1] * 3)) # send_and_recv_on g.ndata['a'] = th.zeros(3, 5) g.edata['a'] = old_repr g.send_and_recv([0, 2, 3], message_func=_message_a, reduce_func=_reduce) new_repr = g.ndata['a'] assert U.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])) assert U.allclose(new_repr[[0, 2]], th.zeros(2, 5))
def _load(self): """ Loads input dataset from dataset/NAME/NAME.txt file """ print('loading data...') with open(self.file, 'r') as f: # line_1 == N, total number of graphs self.N = int(f.readline().strip()) for i in range(self.N): if (i + 1) % 10 == 0 and self.verbosity is True: print('processing graph {}...'.format(i + 1)) grow = f.readline().strip().split() # line_2 == [n_nodes, l] is equal to # [node number of a graph, class label of a graph] n_nodes, glabel = [int(w) for w in grow] # relabel graphs if glabel not in self.glabel_dict: mapped = len(self.glabel_dict) self.glabel_dict[glabel] = mapped self.labels.append(self.glabel_dict[glabel]) g = DGLGraph() g.add_nodes(n_nodes) nlabels = [] # node labels nattrs = [] # node attributes if it has m_edges = 0 for j in range(n_nodes): nrow = f.readline().strip().split() # handle edges and attributes(if has) tmp = int(nrow[1]) + 2 # tmp == 2 + #edges if tmp == len(nrow): # no node attributes nrow = [int(w) for w in nrow] nattr = None elif tmp > len(nrow): nrow = [int(w) for w in nrow[:tmp]] nattr = [float(w) for w in nrow[tmp:]] nattrs.append(nattr) else: raise Exception('edge number is incorrect!') # relabel nodes if it has labels # if it doesn't have node labels, then every nrow[0]==0 if not nrow[0] in self.nlabel_dict: mapped = len(self.nlabel_dict) self.nlabel_dict[nrow[0]] = mapped #nlabels.append(self.nlabel_dict[nrow[0]]) nlabels.append(nrow[0]) m_edges += nrow[1] g.add_edges(j, nrow[2:]) # add self loop if self.self_loop: m_edges += 1 g.add_edge(j, j) if (j + 1) % 10 == 0 and self.verbosity is True: print( 'processing node {} of graph {}...'.format( j + 1, i + 1)) print('this node has {} edgs.'.format( nrow[1])) if nattrs != []: nattrs = np.stack(nattrs) g.ndata['attr'] = nattrs self.nattrs_flag = True else: nattrs = None g.ndata['label'] = np.array(nlabels) if len(self.nlabel_dict) > 1: self.nlabels_flag = True assert len(g) == n_nodes # update statistics of graphs self.n += n_nodes self.m += m_edges self.graphs.append(g) # if no attr if not self.nattrs_flag: print('there are no node features in this dataset!') label2idx = {} # generate node attr by node degree if self.degree_as_nlabel: print('generate node features by node degree...') nlabel_set = set([]) for g in self.graphs: # actually this label shouldn't be updated # in case users want to keep it # but usually no features means no labels, fine. g.ndata['label'] = g.in_degrees() # extracting unique node labels nlabel_set = nlabel_set.union(set(g.ndata['label'].numpy())) nlabel_set = list(nlabel_set) # in case the labels/degrees are not continuous number self.ndegree_dict = { nlabel_set[i]: i for i in range(len(nlabel_set)) } label2idx = self.ndegree_dict # generate node attr by node label else: print('generate node features by node label...') label2idx = self.nlabel_dict for g in self.graphs: g.ndata['attr'] = np.zeros(( g.number_of_nodes(), len(label2idx))) g.ndata['attr'][range(g.number_of_nodes( )), [label2idx[nl.item()] for nl in g.ndata['label']]] = 1 # after load, get the #classes and #dim self.gclasses = len(self.glabel_dict) self.nclasses = len(self.nlabel_dict) self.eclasses = len(self.elabel_dict) self.dim_nfeats = len(self.graphs[0].ndata['attr'][0]) print('Done.') print( """ -------- Data Statistics --------' #Graphs: %d #Graph Classes: %d #Nodes: %d #Node Classes: %d #Node Features Dim: %d #Edges: %d #Edge Classes: %d Avg. of #Nodes: %.2f Avg. of #Edges: %.2f Graph Relabeled: %s Node Relabeled: %s Degree Relabeled(If degree_as_nlabel=True): %s \n """ % ( self.N, self.gclasses, self.n, self.nclasses, self.dim_nfeats, self.m, self.eclasses, self.n / self.N, self.m / self.N, self.glabel_dict, self.nlabel_dict, self.ndegree_dict))