def start_client(num_clients, num_servers): os.environ['DGL_DIST_MODE'] = 'distributed' # Note: connect to server first ! dgl.distributed.initialize(ip_config='kv_ip_config.txt') # Init kvclient kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt', num_servers=num_servers) kvclient.map_shared_data(partition_book=gpb) assert dgl.distributed.get_num_client() == num_clients kvclient.init_data(name='data_1', shape=F.shape(data_1), dtype=F.dtype(data_1), part_policy=edge_policy, init_func=init_zero_func) kvclient.init_data(name='data_2', shape=F.shape(data_2), dtype=F.dtype(data_2), part_policy=node_policy, init_func=init_zero_func) # Test data_name_list name_list = kvclient.data_name_list() print(name_list) assert 'data_0' in name_list assert 'data_0_1' in name_list assert 'data_0_2' in name_list assert 'data_0_3' in name_list assert 'data_1' in name_list assert 'data_2' in name_list # Test get_meta_data meta = kvclient.get_data_meta('data_0') dtype, shape, policy = meta assert dtype == F.dtype(data_0) assert shape == F.shape(data_0) assert policy.policy_str == 'node:_N' meta = kvclient.get_data_meta('data_0_1') dtype, shape, policy = meta assert dtype == F.dtype(data_0_1) assert shape == F.shape(data_0_1) assert policy.policy_str == 'node:_N' meta = kvclient.get_data_meta('data_0_2') dtype, shape, policy = meta assert dtype == F.dtype(data_0_2) assert shape == F.shape(data_0_2) assert policy.policy_str == 'node:_N' meta = kvclient.get_data_meta('data_0_3') dtype, shape, policy = meta assert dtype == F.dtype(data_0_3) assert shape == F.shape(data_0_3) assert policy.policy_str == 'node:_N' meta = kvclient.get_data_meta('data_1') dtype, shape, policy = meta assert dtype == F.dtype(data_1) assert shape == F.shape(data_1) assert policy.policy_str == 'edge:_E' meta = kvclient.get_data_meta('data_2') dtype, shape, policy = meta assert dtype == F.dtype(data_2) assert shape == F.shape(data_2) assert policy.policy_str == 'node:_N' # Test push and pull id_tensor = F.tensor([0, 2, 4], F.int64) data_tensor = F.tensor([[6., 6.], [6., 6.], [6., 6.]], F.float32) kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor) res = kvclient.pull(name='data_0', id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name='data_1', id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name='data_2', id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) # Register new push handler kvclient.register_push_handler('data_0', udf_push) kvclient.register_push_handler('data_1', udf_push) kvclient.register_push_handler('data_2', udf_push) # Test push and pull kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor) kvclient.barrier() data_tensor = data_tensor * data_tensor res = kvclient.pull(name='data_0', id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name='data_1', id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name='data_2', id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) # Test delete data kvclient.delete_data('data_0') kvclient.delete_data('data_1') kvclient.delete_data('data_2') # Register new push handler kvclient.init_data(name='data_3', shape=F.shape(data_2), dtype=F.dtype(data_2), part_policy=node_policy, init_func=init_zero_func) kvclient.register_push_handler('data_3', add_push) data_tensor = F.tensor([[6., 6.], [6., 6.], [6., 6.]], F.float32) kvclient.barrier() time.sleep(kvclient.client_id + 1) print("add...") kvclient.push(name='data_3', id_tensor=id_tensor, data_tensor=data_tensor) kvclient.barrier() res = kvclient.pull(name='data_3', id_tensor=id_tensor) data_tensor = data_tensor * num_clients assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
def foo(g): g = g.local_var() g.nodes[0].data['h'] = F.ones((1, 1)) assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
def foo(g): with g.local_scope(): g.edges[0, 1].data['h'] = F.ones((1, 1)) assert F.allclose(g.edata['h'], F.ones((2, 1))) g.edges[0, 1].data['w'] = F.ones((1, 1)) assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]]))
def test_to_bidirected(): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.], [1.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) u, v = g.edges() ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal(g.ndata['h'], bg.ndata['h']) assert F.array_equal(F.cat([g.edata['h'], g.edata['h']], dim=0), bg.edata['h']) bg.ndata['hh'] = F.tensor([[0.], [1.], [2.], [1.]]) assert ('hh' in g.ndata) is False bg.edata['hh'] = F.tensor([[0.], [1.], [2.], [1.], [0.], [1.], [2.], [1.]]) assert ('hh' in g.edata) is False # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False) ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert ('h' in bg.ndata) is False assert ('h' in bg.edata) is False # zero edge graph g = dgl.graph([]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) # heterogeneous graph g = dgl.heterograph({ ('user', 'wins', 'user'): (F.tensor([0, 2, 0, 2, 2]), F.tensor([1, 1, 2, 1, 0])), ('user', 'plays', 'game'): (F.tensor([1, 2, 1]), F.tensor([2, 1, 1])), ('user', 'follows', 'user'): (F.tensor([1, 2, 1]), F.tensor([0, 0, 0])) }) g.nodes['game'].data['hv'] = F.ones((3, 1)) g.nodes['user'].data['hv'] = F.ones((3, 1)) g.edges['wins'].data['h'] = F.tensor([0, 1, 2, 3, 4]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True, ignore_bipartite=True) assert F.array_equal(g.nodes['game'].data['hv'], bg.nodes['game'].data['hv']) assert F.array_equal(g.nodes['user'].data['hv'], bg.nodes['user'].data['hv']) u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal( F.cat([g.edges['wins'].data['h'], g.edges['wins'].data['h']], dim=0), bg.edges['wins'].data['h']) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb) assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False, ignore_bipartite=True) assert len(bg.edges['wins'].data) == 0 assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 assert len(bg.nodes['game'].data) == 0 assert len(bg.nodes['user'].data) == 0 u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb)
def test_to_simple(index_dtype): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 2, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) sg, wb = dgl.to_simple(g, writeback_mapping=True) u, v = g.all_edges(form='uv', order='eid') u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb) su, sv = sg.all_edges(form='uv', order='eid') su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sc = F.asnumpy(sg.edata['count']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sc[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.ndata['h'], g.ndata['h']) assert 'h' not in sg.edata # new ndata to sg sg.ndata['hh'] = F.tensor([[0.], [1.], [2.]]) assert 'hh' not in g.ndata sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) assert 'h' not in sg.ndata assert 'h' not in sg.edata # heterogeneous graph g = dgl.heterograph( { ('user', 'follow', 'user'): ([0, 1, 2, 1, 1, 1], [1, 3, 2, 3, 4, 4]), ('user', 'plays', 'game'): ([3, 2, 1, 1, 3, 2, 2], [5, 3, 4, 4, 5, 3, 3]) }, index_dtype=index_dtype) g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4]) g.nodes['user'].data['hh'] = F.tensor([0, 1, 2, 3, 4]) g.edges['follow'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) sg, wb = dgl.to_simple(g, return_counts='weights', writeback_mapping=True, copy_edata=True) g.nodes['game'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) for etype in g.canonical_etypes: u, v = g.all_edges(form='uv', order='eid', etype=etype) u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb[etype]) su, sv = sg.all_edges(form='uv', order='eid', etype=etype) su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sw = F.asnumpy(sg.edges[etype].data['weights']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sw[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.nodes['user'].data['hh'], g.nodes['user'].data['hh']) assert 'h' not in sg.nodes['game'].data # new ndata to sg sg.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4]) assert 'hhh' not in g.nodes['user'].data # share edata feat_idx = F.asnumpy(wb[('user', 'follow', 'user')]) _, indices = np.unique(feat_idx, return_index=True) assert np.array_equal(F.asnumpy(sg.edges['follow'].data['h']), F.asnumpy(g.edges['follow'].data['h'])[indices]) sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert 'h' not in sg.nodes['user'].data assert 'hh' not in sg.nodes['user'].data
def test_topk(g, idtype, descending): g = g.astype(idtype).to(F.ctx()) g.ndata['x'] = F.randn((g.number_of_nodes(), 3)) # Test.1: to test the case where k > number of nodes. dgl.topk_nodes(g, 'x', 100, sortby=-1) # Test.2: test correctness min_nnodes = F.asnumpy(g.batch_num_nodes()).min() if min_nnodes <= 1: return k = min_nnodes - 1 val, indices = dgl.topk_nodes(g, 'x', k, descending=descending, sortby=-1) print(k) print(g.ndata['x']) print('val', val) print('indices', indices) subg = dgl.unbatch(g) subval, subidx = [], [] for sg in subg: subx = F.asnumpy(sg.ndata['x']) ai = np.argsort(subx[:, -1:].flatten()) if descending: ai = np.ascontiguousarray(ai[::-1]) subx = np.expand_dims(subx[ai[:k]], 0) subval.append(F.tensor(subx)) subidx.append(F.tensor(np.expand_dims(ai[:k], 0))) print(F.cat(subval, dim=0)) assert F.allclose(val, F.cat(subval, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0)) # Test.3: sorby=None dgl.topk_nodes(g, 'x', k, sortby=None) g.edata['x'] = F.randn((g.number_of_edges(), 3)) # Test.4: topk edges where k > number of edges. dgl.topk_edges(g, 'x', 100, sortby=-1) # Test.5: topk edges test correctness min_nedges = F.asnumpy(g.batch_num_edges()).min() if min_nedges <= 1: return k = min_nedges - 1 val, indices = dgl.topk_edges(g, 'x', k, descending=descending, sortby=-1) print(k) print(g.edata['x']) print('val', val) print('indices', indices) subg = dgl.unbatch(g) subval, subidx = [], [] for sg in subg: subx = F.asnumpy(sg.edata['x']) ai = np.argsort(subx[:, -1:].flatten()) if descending: ai = np.ascontiguousarray(ai[::-1]) subx = np.expand_dims(subx[ai[:k]], 0) subval.append(F.tensor(subx)) subidx.append(F.tensor(np.expand_dims(ai[:k], 0))) print(F.cat(subval, dim=0)) assert F.allclose(val, F.cat(subval, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0))
def test_to_block(index_dtype): def check(g, bg, ntype, etype, dst_nodes, include_dst_in_src=True): if dst_nodes is not None: assert F.array_equal(bg.dstnodes[ntype].data[dgl.NID], dst_nodes) n_dst_nodes = bg.number_of_nodes('DST/' + ntype) if include_dst_in_src: assert F.array_equal( bg.srcnodes[ntype].data[dgl.NID][:n_dst_nodes], bg.dstnodes[ntype].data[dgl.NID]) g = g[etype] bg = bg[etype] induced_src = bg.srcdata[dgl.NID] induced_dst = bg.dstdata[dgl.NID] induced_eid = bg.edata[dgl.EID] bg_src, bg_dst = bg.all_edges(order='eid') src_ans, dst_ans = g.all_edges(order='eid') induced_src_bg = F.gather_row(induced_src, bg_src) induced_dst_bg = F.gather_row(induced_dst, bg_dst) induced_src_ans = F.gather_row(src_ans, induced_eid) induced_dst_ans = F.gather_row(dst_ans, induced_eid) assert F.array_equal(induced_src_bg, induced_src_ans) assert F.array_equal(induced_dst_bg, induced_dst_ans) def checkall(g, bg, dst_nodes, include_dst_in_src=True): for etype in g.etypes: ntype = g.to_canonical_etype(etype)[2] if dst_nodes is not None and ntype in dst_nodes: check(g, bg, ntype, etype, dst_nodes[ntype], include_dst_in_src) else: check(g, bg, ntype, etype, None, include_dst_in_src) g = dgl.heterograph( { ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)], ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)], ('B', 'BA', 'A'): [(2, 3), (3, 2)] }, index_dtype=index_dtype) g_a = g['AA'] bg = dgl.to_block(g_a) check(g_a, bg, 'A', 'AA', None) assert bg.number_of_src_nodes() == 5 assert bg.number_of_dst_nodes() == 4 bg = dgl.to_block(g_a, include_dst_in_src=False) check(g_a, bg, 'A', 'AA', None, False) assert bg.number_of_src_nodes() == 4 assert bg.number_of_dst_nodes() == 4 dst_nodes = F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype)) bg = dgl.to_block(g_a, dst_nodes) check(g_a, bg, 'A', 'AA', dst_nodes) g_ab = g['AB'] bg = dgl.to_block(g_ab) assert bg._idtype_str == index_dtype assert bg.number_of_nodes('SRC/B') == 4 assert F.array_equal(bg.srcnodes['B'].data[dgl.NID], bg.dstnodes['B'].data[dgl.NID]) assert bg.number_of_nodes('DST/A') == 0 checkall(g_ab, bg, None) dst_nodes = {'B': F.tensor([5, 6, 3, 1], dtype=getattr(F, index_dtype))} bg = dgl.to_block(g, dst_nodes) assert bg.number_of_nodes('SRC/B') == 4 assert F.array_equal(bg.srcnodes['B'].data[dgl.NID], bg.dstnodes['B'].data[dgl.NID]) assert bg.number_of_nodes('DST/A') == 0 checkall(g, bg, dst_nodes) dst_nodes = { 'A': F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype)), 'B': F.tensor([3, 5, 6, 1], dtype=getattr(F, index_dtype)) } bg = dgl.to_block(g, dst_nodes=dst_nodes) checkall(g, bg, dst_nodes)
def test_random_walk(): g1 = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0]) }) g2 = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]) }) g3 = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0]), ('user', 'view', 'item'): ([0, 1, 2], [0, 1, 2]), ('item', 'viewed-by', 'user'): ([0, 1, 2], [0, 1, 2])}) g4 = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]), ('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]), ('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3])}) g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32) g2.edata['p2'] = F.tensor([[3], [0], [3], [3], [3]], dtype=F.float32) g4.edges['follow'].data['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32) g4.edges['viewed-by'].data['p'] = F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32) traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, return_eids=True) check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=0., return_eids=True) check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids) traces, ntypes = dgl.sampling.random_walk( g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=F.zeros((4,), F.float32, F.cpu())) check_random_walk(g1, ['follow'] * 4, traces, ntypes) traces, ntypes = dgl.sampling.random_walk( g1, [0, 1, 2, 0, 1, 2], length=5, restart_prob=F.tensor([0, 0, 0, 0, 1], dtype=F.float32)) check_random_walk( g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5)) assert (F.asnumpy(traces)[:, 5] == -1).all() traces, eids, ntypes = dgl.sampling.random_walk( g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, return_eids=True) check_random_walk(g2, ['follow'] * 4, traces, ntypes, trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p', return_eids=True) check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids) try: traces, ntypes = dgl.sampling.random_walk( g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p2') fail = False except dgl.DGLError: fail = True assert fail metapath = ['follow', 'view', 'viewed-by'] * 2 traces, eids, ntypes = dgl.sampling.random_walk( g3, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True) check_random_walk(g3, metapath, traces, ntypes, trace_eids=eids) metapath = ['follow', 'view', 'viewed-by'] * 2 traces, eids, ntypes = dgl.sampling.random_walk( g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, return_eids=True) check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True) check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids) metapath = ['follow', 'view', 'viewed-by'] * 2 traces, eids, ntypes = dgl.sampling.random_walk( g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', return_eids=True) check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=0., return_eids=True) check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=F.zeros((6,), F.float32, F.cpu()), return_eids=True) check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath + ['follow'], prob='p', restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32), return_eids=True) check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p', trace_eids=eids) assert (F.asnumpy(traces[:, 7]) == -1).all()
def test_multi_recv(): # basic recv test g = generate_graph() h = g.ndata['h'] g.register_message_func(message_func) g.register_reduce_func(reduce_func) g.register_apply_node_func(apply_node_func) expected = F.copy_to(F.zeros((g.number_of_edges(), ), dtype=F.int64), F.cpu()) # two separate round of send and recv u = [4, 5, 6] v = [9] g.send((u, v)) eid = g.edge_ids(u, v) expected = F.asnumpy(expected) eid = F.asnumpy(eid) expected[eid] = 1 assert np.array_equal(g._get_msg_index().tonumpy(), expected) g.recv(v) expected[eid] = 0 assert np.array_equal(g._get_msg_index().tonumpy(), expected) u = [0] v = [1, 2, 3] g.send((u, v)) eid = g.edge_ids(u, v) eid = F.asnumpy(eid) expected[eid] = 1 assert np.array_equal(g._get_msg_index().tonumpy(), expected) g.recv(v) expected[eid] = 0 assert np.array_equal(g._get_msg_index().tonumpy(), expected) h1 = g.ndata['h'] # one send, two recv g.ndata['h'] = h u = F.tensor([0, 0, 0, 4, 5, 6]) v = F.tensor([1, 2, 3, 9, 9, 9]) g.send((u, v)) eid = g.edge_ids(u, v) eid = F.asnumpy(eid) expected[eid] = 1 assert np.array_equal(g._get_msg_index().tonumpy(), expected) u = [4, 5, 6] v = [9] g.recv(v) eid = g.edge_ids(u, v) eid = F.asnumpy(eid) expected[eid] = 0 assert np.array_equal(g._get_msg_index().tonumpy(), expected) u = [0] v = [1, 2, 3] g.recv(v) eid = g.edge_ids(u, v) eid = F.asnumpy(eid) expected[eid] = 0 assert np.array_equal(g._get_msg_index().tonumpy(), expected) h2 = g.ndata['h'] assert F.allclose(h1, h2)
def check_partition(g, part_method, reshuffle, num_parts=4, num_trainers_per_machine=1, load_feats=True): g.ndata['labels'] = F.arange(0, g.number_of_nodes()) g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10), F.float32) g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10), F.float32) g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h')) g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh')) num_hops = 2 orig_nids, orig_eids = partition_graph( g, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method=part_method, reshuffle=reshuffle, return_mapping=True, num_trainers_per_machine=num_trainers_per_machine) part_sizes = [] shuffled_labels = [] shuffled_edata = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( '/tmp/partition/test.json', i, load_feats=load_feats) if not load_feats: assert not node_feats assert not edge_feats node_feats, edge_feats = load_partition_feats( '/tmp/partition/test.json', i) if num_trainers_per_machine > 1: for ntype in g.ntypes: name = ntype + '/trainer_id' assert name in node_feats part_ids = F.floor_div(node_feats[name], num_trainers_per_machine) assert np.all(F.asnumpy(part_ids) == i) for etype in g.etypes: name = etype + '/trainer_id' assert name in edge_feats part_ids = F.floor_div(edge_feats[name], num_trainers_per_machine) assert np.all(F.asnumpy(part_ids) == i) # Check the metadata assert gpb._num_nodes() == g.number_of_nodes() assert gpb._num_edges() == g.number_of_edges() assert gpb.num_partitions() == num_parts gpb_meta = gpb.metadata() assert len(gpb_meta) == num_parts assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes'] assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges'] part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges'])) nid = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']) local_nid = gpb.nid2localnid(nid, i) assert F.dtype(local_nid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) eid = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']) local_eid = gpb.eid2localeid(eid, i) assert F.dtype(local_eid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid))) # Check the node map. local_nodes = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']) llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node']) local_nodes1 = gpb.partid2nids(i) assert F.dtype(local_nodes1) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy( local_nodes1))) assert np.all(F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes))) # Check the edge map. local_edges = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']) llocal_edges = F.nonzero_1d(part_g.edata['inner_edge']) local_edges1 = gpb.partid2eids(i) assert F.dtype(local_edges1) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy( local_edges1))) assert np.all(F.asnumpy(llocal_edges) == np.arange(len(llocal_edges))) # Verify the mapping between the reshuffled IDs and the original IDs. part_src_ids, part_dst_ids = part_g.edges() part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] orig_src_ids = F.gather_row(orig_nids, part_src_ids) orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) orig_eids1 = F.gather_row(orig_eids, part_eids) orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) if reshuffle: part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata['orig_id']) part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata['orig_id']) # when we read node data from the original global graph, we should use orig_id. local_nodes = F.boolean_mask(part_g.ndata['orig_id'], part_g.ndata['inner_node']) local_edges = F.boolean_mask(part_g.edata['orig_id'], part_g.edata['inner_edge']) else: part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata[dgl.NID]) part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata[dgl.NID]) part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h')) part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh')) assert F.allclose(F.gather_row(g.ndata['h'], local_nodes), F.gather_row(part_g.ndata['h'], llocal_nodes)) assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes), F.gather_row(part_g.ndata['eh'], llocal_nodes)) for name in ['labels', 'feats']: assert '_N/' + name in node_feats assert node_feats['_N/' + name].shape[0] == len(local_nodes) true_feats = F.gather_row(g.ndata[name], local_nodes) ndata = F.gather_row(node_feats['_N/' + name], local_nid) assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) for name in ['feats']: assert '_E/' + name in edge_feats assert edge_feats['_E/' + name].shape[0] == len(local_edges) true_feats = F.gather_row(g.edata[name], local_edges) edata = F.gather_row(edge_feats['_E/' + name], local_eid) assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) # This only works if node/edge IDs are shuffled. if reshuffle: shuffled_labels.append(node_feats['_N/labels']) shuffled_edata.append(edge_feats['_E/feats']) # Verify that we can reconstruct node/edge data for original IDs. if reshuffle: shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) orig_labels[F.asnumpy(orig_nids)] = shuffled_labels orig_edata[F.asnumpy(orig_eids)] = shuffled_edata assert np.all(orig_labels == F.asnumpy(g.ndata['labels'])) assert np.all(orig_edata == F.asnumpy(g.edata['feats'])) if reshuffle: node_map = [] edge_map = [] for i, (num_nodes, num_edges) in enumerate(part_sizes): node_map.append(np.ones(num_nodes) * i) edge_map.append(np.ones(num_edges) * i) node_map = np.concatenate(node_map) edge_map = np.concatenate(edge_map) nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) assert F.dtype(nid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(nid2pid) == node_map) eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) assert F.dtype(eid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(eid2pid) == edge_map)
def _test_one(g): assert g.number_of_nodes() == 10 assert g.number_of_edges() == 20 assert len(g) == 10 assert not g.is_multigraph for i in range(10): assert g.has_node(i) assert i in g assert not g.has_node(11) assert not 11 in g assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0])) src, dst = edge_pair_input() for u, v in zip(src, dst): assert g.has_edge_between(u, v) assert not g.has_edge_between(0, 0) assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])) assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4]) assert set(F.asnumpy(g.successors(2))) == set([7, 3]) assert g.edge_id(4, 4) == 5 assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([5, 0])) src, dst = g.find_edges([3, 6, 5]) assert F.allclose(src, F.tensor([5, 7, 4])) assert F.allclose(dst, F.tensor([9, 9, 4])) src, dst, eid = g.in_edges(9, form='all') tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7)]) src, dst, eid = g.in_edges([9, 0, 8], form='all') # test node#0 has no in edges tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7), (3, 8, 9), (7, 8, 12)]) src, dst, eid = g.out_edges(0, form='all') tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4)]) src, dst, eid = g.out_edges([0, 4, 8], form='all') # test node#8 has no out edges tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4), (4, 3, 2), (4, 4, 5), (4, 9, 7), (4, 1, 8)]) src, dst, eid = g.edges('all', 'eid') t_src, t_dst = edge_pair_input() t_tup = list(zip(t_src, t_dst, list(range(20)))) tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set(t_tup) assert list(F.asnumpy(eid)) == list(range(20)) src, dst, eid = g.edges('all', 'srcdst') t_src, t_dst = edge_pair_input() t_tup = list(zip(t_src, t_dst, list(range(20)))) tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set(t_tup) assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src))) assert g.in_degree(0) == 0 assert g.in_degree(9) == 4 assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4])) assert g.out_degree(8) == 0 assert g.out_degree(9) == 1 assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1])) assert np.array_equal( F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray().T) assert np.array_equal( F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray())
def check_hetero_partition(hg, part_method, num_parts=4, num_trainers_per_machine=1, load_feats=True): hg.nodes['n1'].data['labels'] = F.arange(0, hg.number_of_nodes('n1')) hg.nodes['n1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_nodes('n1'), 10), F.float32) hg.edges['r1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_edges('r1'), 10), F.float32) hg.edges['r1'].data['labels'] = F.arange(0, hg.number_of_edges('r1')) num_hops = 1 orig_nids, orig_eids = partition_graph( hg, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method=part_method, reshuffle=True, return_mapping=True, num_trainers_per_machine=num_trainers_per_machine) assert len(orig_nids) == len(hg.ntypes) assert len(orig_eids) == len(hg.etypes) for ntype in hg.ntypes: assert len(orig_nids[ntype]) == hg.number_of_nodes(ntype) for etype in hg.etypes: assert len(orig_eids[etype]) == hg.number_of_edges(etype) parts = [] shuffled_labels = [] shuffled_elabels = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( '/tmp/partition/test.json', i, load_feats=load_feats) if not load_feats: assert not node_feats assert not edge_feats node_feats, edge_feats = load_partition_feats( '/tmp/partition/test.json', i) if num_trainers_per_machine > 1: for ntype in hg.ntypes: name = ntype + '/trainer_id' assert name in node_feats part_ids = F.floor_div(node_feats[name], num_trainers_per_machine) assert np.all(F.asnumpy(part_ids) == i) for etype in hg.etypes: name = etype + '/trainer_id' assert name in edge_feats part_ids = F.floor_div(edge_feats[name], num_trainers_per_machine) assert np.all(F.asnumpy(part_ids) == i) # Verify the mapping between the reshuffled IDs and the original IDs. # These are partition-local IDs. part_src_ids, part_dst_ids = part_g.edges() # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # These are original per-type IDs. for etype_id, etype in enumerate(hg.etypes): part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask(src_ntype_ids, etype_ids == etype_id) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask(dst_ntype_ids, etype_ids == etype_id) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) verify_graph_feats(hg, gpb, part_g, node_feats, edge_feats) shuffled_labels.append(node_feats['n1/labels']) shuffled_elabels.append(edge_feats['r1/labels']) verify_hetero_graph(hg, parts) shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) orig_elabels = np.zeros(shuffled_elabels.shape, dtype=shuffled_elabels.dtype) orig_labels[F.asnumpy(orig_nids['n1'])] = shuffled_labels orig_elabels[F.asnumpy(orig_eids['r1'])] = shuffled_elabels assert np.all(orig_labels == F.asnumpy(hg.nodes['n1'].data['labels'])) assert np.all(orig_elabels == F.asnumpy(hg.edges['r1'].data['labels']))
def check_weighted_negative_sampler(mode, exclude_positive, neg_size): g = generate_rand_graph(100) num_edges = g.number_of_edges() num_nodes = g.number_of_nodes() edge_weight = F.copy_to( F.tensor(np.full((num_edges, ), 1, dtype=np.float32)), F.cpu()) node_weight = F.copy_to( F.tensor(np.full((num_nodes, ), 1, dtype=np.float32)), F.cpu()) etype = np.random.randint(0, 10, size=num_edges, dtype=np.int64) g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu()) pos_gsrc, pos_gdst, pos_geid = g.all_edges(form='all', order='eid') pos_map = {} for i in range(len(pos_geid)): pos_d = int(F.asnumpy(pos_gdst[i])) pos_e = int(F.asnumpy(pos_geid[i])) pos_map[(pos_d, pos_e)] = int(F.asnumpy(pos_gsrc[i])) EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler') # Correctness check # Test the homogeneous graph. batch_size = 50 # Test the knowledge graph with edge weight provied. total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, reset=False, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): pos_lsrc, pos_ldst, pos_leid = pos_edges.all_edges(form='all', order='eid') assert_array_equal( F.asnumpy(F.gather_row(pos_edges.parent_eid, pos_leid)), F.asnumpy( g.edge_ids(F.gather_row(pos_edges.parent_nid, pos_lsrc), F.gather_row(pos_edges.parent_nid, pos_ldst)))) neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) for i in range(len(neg_eid)): neg_d = int(F.asnumpy(neg_dst[i])) neg_e = int(F.asnumpy(neg_eid[i])) assert (neg_d, neg_e) in pos_map if exclude_positive: assert int(F.asnumpy(neg_src[i])) != pos_map[(neg_d, neg_e)] check_head_tail(neg_edges) pos_tails = F.gather_row(pos_edges.parent_nid, pos_edges.tail_nid) neg_tails = F.gather_row(neg_edges.parent_nid, neg_edges.tail_nid) pos_tails = np.sort(F.asnumpy(pos_tails)) neg_tails = np.sort(F.asnumpy(neg_tails)) np.testing.assert_equal(pos_tails, neg_tails) exist = neg_edges.edata['false_neg'] if exclude_positive: assert np.sum(F.asnumpy(exist) == 0) == len(exist) else: assert F.array_equal(g.has_edges_between(neg_src, neg_dst), exist) total_samples += batch_size assert total_samples <= num_edges # Test the knowledge graph with edge weight provied. total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, reset=False, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) exists = neg_edges.edata['false_neg'] neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid) for i in range(len(neg_eid)): u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i]) if g.has_edge_between(u, v): eid = g.edge_id(u, v) etype = g.edata['etype'][eid] exist = neg_edges.edata['etype'][i] == etype assert F.asnumpy(exists[i]) == F.asnumpy(exist) total_samples += batch_size assert total_samples <= num_edges # Test the knowledge graph with edge/node weight provied. total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, reset=False, edge_weight=edge_weight, node_weight=node_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) exists = neg_edges.edata['false_neg'] neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid) for i in range(len(neg_eid)): u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i]) if g.has_edge_between(u, v): eid = g.edge_id(u, v) etype = g.edata['etype'][eid] exist = neg_edges.edata['etype'][i] == etype assert F.asnumpy(exists[i]) == F.asnumpy(exist) total_samples += batch_size assert total_samples <= num_edges # check replacement = True with pos edges no-uniform sample # with reset = False total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=False, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = True with pos edges no-uniform sample # with reset = True total_samples = 0 max_samples = 4 * num_edges for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=True, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) if total_samples >= max_samples: break assert total_samples == max_samples # check replacement = False with pos/neg edges no-uniform sample # reset = False total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=False, edge_weight=edge_weight, node_weight=node_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = False with pos/neg edges no-uniform sample # reset = True total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=True, edge_weight=edge_weight, node_weight=node_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) if total_samples >= max_samples: break assert total_samples == max_samples # Check Rate dgl.random.seed(0) g = generate_rand_graph(1000) num_edges = g.number_of_edges() num_nodes = g.number_of_nodes() edge_weight = F.copy_to( F.tensor(np.full((num_edges, ), 1, dtype=np.float32)), F.cpu()) edge_weight[0] = F.sum(edge_weight, dim=0) node_weight = F.copy_to( F.tensor(np.full((num_nodes, ), 1, dtype=np.float32)), F.cpu()) node_weight[-1] = F.sum(node_weight, dim=0) / 200 etype = np.random.randint(0, 20, size=num_edges, dtype=np.int64) g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu()) # Test w/o node weight. max_samples = num_edges // 5 total_samples = 0 # Test the knowledge graph with edge weight provied. edge_sampled = np.full((num_edges, ), 0, dtype=np.int32) node_sampled = np.full((num_nodes, ), 0, dtype=np.int32) for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, edge_weight=edge_weight, shuffle=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=False, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') neg_lsrc, neg_ldst, _ = neg_edges.all_edges(form='all', order='eid') if 'head' in mode: neg_src = neg_edges.parent_nid[neg_lsrc] np.add.at(node_sampled, F.asnumpy(neg_src), 1) else: neg_dst = neg_edges.parent_nid[neg_ldst] np.add.at(node_sampled, F.asnumpy(neg_dst), 1) np.add.at(edge_sampled, F.asnumpy(pos_edges.parent_eid[pos_leid]), 1) total_samples += batch_size if total_samples > max_samples: break # Check rate here edge_rate_0 = edge_sampled[0] / edge_sampled.sum() edge_tail_half_cnt = edge_sampled[edge_sampled.shape[0] // 2:-1].sum() edge_rate_tail_half = edge_tail_half_cnt / edge_sampled.sum() assert np.allclose(edge_rate_0, 0.5, atol=0.05) assert np.allclose(edge_rate_tail_half, 0.25, atol=0.05) node_rate_0 = node_sampled[0] / node_sampled.sum() node_tail_half_cnt = node_sampled[node_sampled.shape[0] // 2:-1].sum() node_rate_tail_half = node_tail_half_cnt / node_sampled.sum() assert node_rate_0 < 0.02 assert np.allclose(node_rate_tail_half, 0.5, atol=0.02) # Test the knowledge graph with edge/node weight provied. edge_sampled = np.full((num_edges, ), 0, dtype=np.int32) node_sampled = np.full((num_nodes, ), 0, dtype=np.int32) total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, edge_weight=edge_weight, node_weight=node_weight, shuffle=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=False, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') neg_lsrc, neg_ldst, _ = neg_edges.all_edges(form='all', order='eid') if 'head' in mode: neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) np.add.at(node_sampled, F.asnumpy(neg_src), 1) else: neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) np.add.at(node_sampled, F.asnumpy(neg_dst), 1) np.add.at(edge_sampled, F.asnumpy(pos_edges.parent_eid[pos_leid]), 1) total_samples += batch_size if total_samples > max_samples: break # Check rate here edge_rate_0 = edge_sampled[0] / edge_sampled.sum() edge_tail_half_cnt = edge_sampled[edge_sampled.shape[0] // 2:-1].sum() edge_rate_tail_half = edge_tail_half_cnt / edge_sampled.sum() assert np.allclose(edge_rate_0, 0.5, atol=0.05) assert np.allclose(edge_rate_tail_half, 0.25, atol=0.05) node_rate = node_sampled[-1] / node_sampled.sum() node_rate_a = np.average(node_sampled[:50]) / node_sampled.sum() node_rate_b = np.average(node_sampled[50:100]) / node_sampled.sum() # As neg sampling does not contain duplicate nodes, # this test takes some acceptable variation on the sample rate. assert np.allclose(node_rate, node_rate_a * 5, atol=0.002) assert np.allclose(node_rate_a, node_rate_b, atol=0.0002)
def check_negative_sampler(mode, exclude_positive, neg_size): g = generate_rand_graph(100) num_edges = g.number_of_edges() etype = np.random.randint(0, 10, size=g.number_of_edges(), dtype=np.int64) g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu()) pos_gsrc, pos_gdst, pos_geid = g.all_edges(form='all', order='eid') pos_map = {} for i in range(len(pos_geid)): pos_d = int(F.asnumpy(pos_gdst[i])) pos_e = int(F.asnumpy(pos_geid[i])) pos_map[(pos_d, pos_e)] = int(F.asnumpy(pos_gsrc[i])) EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler') # Test the homogeneous graph. batch_size = 50 total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, negative_mode=mode, reset=False, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): pos_lsrc, pos_ldst, pos_leid = pos_edges.all_edges(form='all', order='eid') assert_array_equal( F.asnumpy(F.gather_row(pos_edges.parent_eid, pos_leid)), F.asnumpy( g.edge_ids(F.gather_row(pos_edges.parent_nid, pos_lsrc), F.gather_row(pos_edges.parent_nid, pos_ldst)))) neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) for i in range(len(neg_eid)): neg_d = int(F.asnumpy(neg_dst)[i]) neg_e = int(F.asnumpy(neg_eid)[i]) assert (neg_d, neg_e) in pos_map if exclude_positive: assert int(F.asnumpy(neg_src[i])) != pos_map[(neg_d, neg_e)] check_head_tail(neg_edges) pos_tails = F.gather_row(pos_edges.parent_nid, pos_edges.tail_nid) neg_tails = F.gather_row(neg_edges.parent_nid, neg_edges.tail_nid) pos_tails = np.sort(F.asnumpy(pos_tails)) neg_tails = np.sort(F.asnumpy(neg_tails)) np.testing.assert_equal(pos_tails, neg_tails) exist = neg_edges.edata['false_neg'] if exclude_positive: assert np.sum(F.asnumpy(exist) == 0) == len(exist) else: assert F.array_equal(g.has_edges_between(neg_src, neg_dst), exist) total_samples += batch_size assert total_samples <= num_edges # check replacement = True # with reset = False (default setting) total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=False, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = False # with reset = False (default setting) total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=False, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = True # with reset = True total_samples = 0 max_samples = 2 * num_edges for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) <= batch_size total_samples += len(pos_leid) if (total_samples >= max_samples): break assert total_samples >= max_samples # check replacement = False # with reset = True total_samples = 0 max_samples = 2 * num_edges for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) <= batch_size total_samples += len(pos_leid) if (total_samples >= max_samples): break assert total_samples >= max_samples # Test the knowledge graph. total_samples = 0 for _, neg_edges in EdgeSampler(g, batch_size, negative_mode=mode, reset=False, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) exists = neg_edges.edata['false_neg'] neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid) for i in range(len(neg_eid)): u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i]) if g.has_edge_between(u, v): eid = g.edge_id(u, v) etype = g.edata['etype'][eid] exist = neg_edges.edata['etype'][i] == etype assert F.asnumpy(exists[i]) == F.asnumpy(exist) total_samples += batch_size assert total_samples <= num_edges
def test_empty_relation(idtype): """Test the features of batched DGLHeteroGraphs""" g1 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([], []) }, idtype=idtype, device=F.ctx()) g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1], [0, 0]) }, idtype=idtype, device=F.ctx()) g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g2.nodes['game'].data['h1'] = F.tensor([[0.]]) g2.nodes['game'].data['h2'] = F.tensor([[1.]]) g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) bg = dgl.batch([g1, g2]) # Test number of nodes for ntype in bg.ntypes: assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [ g1.number_of_nodes(ntype), g2.number_of_nodes(ntype) ] # Test number of edges for etype in bg.canonical_etypes: assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [ g1.number_of_edges(etype), g2.number_of_edges(etype) ] # Test features assert F.allclose( bg.nodes['user'].data['h1'], F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0)) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1']) assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2']) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose(bg.edges['plays'].data['h1'], g2.edges['plays'].data['h1']) # Test unbatching graphs g3, g4 = dgl.unbatch(bg) check_equivalence_between_heterographs(g1, g3, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) check_equivalence_between_heterographs(g2, g4, node_attrs={ 'user': ['h1', 'h2'], 'game': ['h1', 'h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) # Test graphs without edges g1 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 0, 'v': 4}) g2 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 1, 'v': 5}) dgl.batch([g1, g2])
def assert_data(lhs, rhs): for key, value in lhs.items(): assert key in rhs assert F.array_equal(F.tensor(value), rhs[key])
def test_pickling_graph(): # graph structures and frames are pickled g = dgl.DGLGraph() g.add_nodes(3) src = F.tensor([0, 0]) dst = F.tensor([1, 2]) g.add_edges(src, dst) x = F.randn((3, 7)) y = F.randn((3, 5)) a = F.randn((2, 6)) b = F.randn((2, 4)) g.ndata['x'] = x g.ndata['y'] = y g.edata['a'] = a g.edata['b'] = b # registered functions are pickled g.register_message_func(_global_message_func) reduce_func = fn.sum('x', 'x') g.register_reduce_func(reduce_func) # custom attributes should be pickled g.foo = 2 new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g) assert new_g.foo == 2 assert new_g._message_func == _global_message_func assert isinstance(new_g._reduce_func, type(reduce_func)) assert new_g._reduce_func._name == 'sum' assert new_g._reduce_func.msg_field == 'x' assert new_g._reduce_func.out_field == 'x' # test batched graph with partial set case g2 = dgl.DGLGraph() g2.add_nodes(4) src2 = F.tensor([0, 1]) dst2 = F.tensor([2, 3]) g2.add_edges(src2, dst2) x2 = F.randn((4, 7)) y2 = F.randn((3, 5)) a2 = F.randn((2, 6)) b2 = F.randn((2, 4)) g2.ndata['x'] = x2 g2.nodes[[0, 1, 3]].data['y'] = y2 g2.edata['a'] = a2 g2.edata['b'] = b2 bg = dgl.batch([g, g2]) bg2 = _reconstruct_pickle(bg) _assert_is_identical(bg, bg2) new_g, new_g2 = dgl.unbatch(bg2) _assert_is_identical(g, new_g) _assert_is_identical(g2, new_g2) # readonly graph g = dgl.DGLGraph([(0, 1), (1, 2)], readonly=True) new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g) # multigraph g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)]) new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g) # readonly multigraph g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], readonly=True) new_g = _reconstruct_pickle(g) _assert_is_identical(g, new_g)
def _test_construct_graphs_multiple(): from dgl.data.csv_dataset_base import NodeData, EdgeData, GraphData, DGLGraphConstructor num_nodes = 100 num_edges = 1000 num_graphs = 10 num_dims = 3 node_ids = np.array([], dtype=np.int) src_ids = np.array([], dtype=np.int) dst_ids = np.array([], dtype=np.int) ngraph_ids = np.array([], dtype=np.int) egraph_ids = np.array([], dtype=np.int) u_indices = np.array([], dtype=np.int) for i in range(num_graphs): l_node_ids = np.random.choice( np.arange(num_nodes*2), size=num_nodes, replace=False) node_ids = np.append(node_ids, l_node_ids) _, l_u_indices = np.unique(l_node_ids, return_index=True) u_indices = np.append(u_indices, l_u_indices) ngraph_ids = np.append(ngraph_ids, np.full(num_nodes, i)) src_ids = np.append(src_ids, np.random.choice( l_node_ids, size=num_edges)) dst_ids = np.append(dst_ids, np.random.choice( l_node_ids, size=num_edges)) egraph_ids = np.append(egraph_ids, np.full(num_edges, i)) ndata = {'feat': np.random.rand(num_nodes*num_graphs, num_dims), 'label': np.random.randint(2, size=num_nodes*num_graphs)} node_data = NodeData(node_ids, ndata, graph_id=ngraph_ids) edata = {'feat': np.random.rand( num_edges*num_graphs, num_dims), 'label': np.random.randint(2, size=num_edges*num_graphs)} edge_data = EdgeData(src_ids, dst_ids, edata, graph_id=egraph_ids) gdata = {'feat': np.random.rand(num_graphs, num_dims), 'label': np.random.randint(2, size=num_graphs)} graph_data = GraphData(np.arange(num_graphs), gdata) graphs, data_dict = DGLGraphConstructor.construct_graphs( node_data, edge_data, graph_data) assert len(graphs) == num_graphs assert len(data_dict) == len(gdata) for k, v in data_dict.items(): assert F.array_equal(F.tensor(gdata[k]), v) for i, g in enumerate(graphs): assert g.is_homogeneous assert g.num_nodes() == num_nodes assert g.num_edges() == num_edges def assert_data(lhs, rhs, size, node=False): for key, value in lhs.items(): assert key in rhs value = value[i*size:(i+1)*size] if node: indices = u_indices[i*size:(i+1)*size] value = value[indices] assert F.array_equal(F.tensor(value), rhs[key]) assert_data(ndata, g.ndata, num_nodes, node=True) assert_data(edata, g.edata, num_edges) # Graph IDs found in node/edge CSV but not in graph CSV graph_data = GraphData(np.arange(num_graphs-2), {}) expect_except = False try: _, _ = DGLGraphConstructor.construct_graphs( node_data, edge_data, graph_data) except: expect_except = True assert expect_except
def test_reverse(): g = dgl.DGLGraph() g.add_nodes(5) # The graph need not to be completely connected. g.add_edges([0, 1, 2], [1, 2, 1]) g.ndata['h'] = F.tensor([[0.], [1.], [2.], [3.], [4.]]) g.edata['h'] = F.tensor([[5.], [6.], [7.]]) rg = g.reverse() assert g.is_multigraph == rg.is_multigraph assert g.number_of_nodes() == rg.number_of_nodes() assert g.number_of_edges() == rg.number_of_edges() assert F.allclose( F.astype(rg.has_edges_between([1, 2, 1], [0, 1, 2]), F.float32), F.ones((3, ))) assert g.edge_id(0, 1) == rg.edge_id(1, 0) assert g.edge_id(1, 2) == rg.edge_id(2, 1) assert g.edge_id(2, 1) == rg.edge_id(1, 2) # test dgl.reverse_heterograph # test homogeneous graph g = dgl.graph((F.tensor([0, 1, 2]), F.tensor([1, 2, 0]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.]]) g_r = dgl.reverse_heterograph(g) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() u_g, v_g, eids_g = g.all_edges(form='all') u_rg, v_rg, eids_rg = g_r.all_edges(form='all') assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) assert F.array_equal(g.ndata['h'], g_r.ndata['h']) assert len(g_r.edata) == 0 # without share ndata g_r = dgl.reverse_heterograph(g, copy_ndata=False) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() assert len(g_r.ndata) == 0 assert len(g_r.edata) == 0 # with share ndata and edata g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() assert F.array_equal(g.ndata['h'], g_r.ndata['h']) assert F.array_equal(g.edata['h'], g_r.edata['h']) # add new node feature to g_r g_r.ndata['hh'] = F.tensor([0, 1, 2]) assert ('hh' in g.ndata) is False assert ('hh' in g_r.ndata) is True # add new edge feature to g_r g_r.edata['hh'] = F.tensor([0, 1, 2]) assert ('hh' in g.edata) is False assert ('hh' in g_r.edata) is True # test heterogeneous graph g = dgl.heterograph({ ('user', 'follows', 'user'): ([0, 1, 2, 4, 3, 1, 3], [1, 2, 3, 2, 0, 0, 1]), ('user', 'plays', 'game'): ([0, 0, 2, 3, 3, 4, 1], [1, 0, 1, 0, 1, 0, 0]), ('developer', 'develops', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]) }) g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4]) g.nodes['user'].data['hh'] = F.tensor([1, 1, 1, 1, 1]) g.nodes['game'].data['h'] = F.tensor([0, 1]) g.edges['follows'].data['h'] = F.tensor([0, 1, 2, 4, 3, 1, 3]) g.edges['follows'].data['hh'] = F.tensor([1, 2, 3, 2, 0, 0, 1]) g_r = dgl.reverse_heterograph(g) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype) assert F.array_equal(g.nodes['user'].data['h'], g_r.nodes['user'].data['h']) assert F.array_equal(g.nodes['user'].data['hh'], g_r.nodes['user'].data['hh']) assert F.array_equal(g.nodes['game'].data['h'], g_r.nodes['game'].data['h']) assert len(g_r.edges['follows'].data) == 0 u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'follows', 'user')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('user', 'follows', 'user')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'plays', 'game')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'plays', 'user')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges(form='all', etype=('developer', 'develops', 'game')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'develops', 'developer')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) # withour share ndata g_r = dgl.reverse_heterograph(g, copy_ndata=False) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype) assert len(g_r.nodes['user'].data) == 0 assert len(g_r.nodes['game'].data) == 0 g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True) print(g_r) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) assert F.array_equal(g.edges['follows'].data['h'], g_r.edges['follows'].data['h']) assert F.array_equal(g.edges['follows'].data['hh'], g_r.edges['follows'].data['hh']) # add new node feature to g_r g_r.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4]) assert ('hhh' in g.nodes['user'].data) is False assert ('hhh' in g_r.nodes['user'].data) is True # add new edge feature to g_r g_r.edges['follows'].data['hhh'] = F.tensor([1, 2, 3, 2, 0, 0, 1]) assert ('hhh' in g.edges['follows'].data) is False assert ('hhh' in g_r.edges['follows'].data) is True
def _test_DGLCSVDataset_multiple(): with tempfile.TemporaryDirectory() as test_dir: # generate YAML/CSVs meta_yaml_path = os.path.join(test_dir, "meta.yaml") edges_csv_path_0 = os.path.join(test_dir, "test_edges_0.csv") edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") graph_csv_path = os.path.join(test_dir, "test_graph.csv") meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name', 'node_data': [{'file_name': os.path.basename(nodes_csv_path_0), 'ntype': 'user', }, {'file_name': os.path.basename(nodes_csv_path_1), 'ntype': 'item', }], 'edge_data': [{'file_name': os.path.basename(edges_csv_path_0), 'etype': ['user', 'follow', 'user'], }, {'file_name': os.path.basename(edges_csv_path_1), 'etype': ['user', 'like', 'item'], }], 'graph_data': {'file_name': os.path.basename(graph_csv_path)} } with open(meta_yaml_path, 'w') as f: yaml.dump(meta_yaml_data, f, sort_keys=False) num_nodes = 100 num_edges = 500 num_graphs = 10 num_dims = 3 feat_ndata = np.random.rand(num_nodes*num_graphs, num_dims) label_ndata = np.random.randint(2, size=num_nodes*num_graphs) df = pd.DataFrame({'node_id': np.hstack([np.arange(num_nodes) for _ in range(num_graphs)]), 'label': label_ndata, 'feat': [line.tolist() for line in feat_ndata], 'graph_id': np.hstack([np.full(num_nodes, i) for i in range(num_graphs)]) }) df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_1, index=False) feat_edata = np.random.rand(num_edges*num_graphs, num_dims) label_edata = np.random.randint(2, size=num_edges*num_graphs) df = pd.DataFrame({'src_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), 'dst_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), 'label': label_edata, 'feat': [line.tolist() for line in feat_edata], 'graph_id': np.hstack([np.full(num_edges, i) for i in range(num_graphs)]) }) df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_1, index=False) feat_gdata = np.random.rand(num_graphs, num_dims) label_gdata = np.random.randint(2, size=num_graphs) df = pd.DataFrame({'label': label_gdata, 'feat': [line.tolist() for line in feat_gdata], 'graph_id': np.arange(num_graphs) }) df.to_csv(graph_csv_path, index=False) # load CSVDataset with default node/edge/graph_data_parser for force_reload in [True, False]: if not force_reload: # remove original node data file to verify reload from cached files os.remove(nodes_csv_path_0) assert not os.path.exists(nodes_csv_path_0) csv_dataset = data.DGLCSVDataset( test_dir, force_reload=force_reload) assert len(csv_dataset) == num_graphs assert csv_dataset.has_cache() assert len(csv_dataset.data) == 2 assert 'feat' in csv_dataset.data assert 'label' in csv_dataset.data assert F.array_equal(F.tensor(feat_gdata), csv_dataset.data['feat']) for i, (g, label) in enumerate(csv_dataset): assert not g.is_homogeneous assert F.asnumpy(label) == label_gdata[i] for ntype in g.ntypes: assert g.num_nodes(ntype) == num_nodes assert F.array_equal(F.tensor(feat_ndata[i*num_nodes:(i+1)*num_nodes]), g.nodes[ntype].data['feat']) assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes], F.asnumpy(g.nodes[ntype].data['label'])) for etype in g.etypes: assert g.num_edges(etype) == num_edges assert F.array_equal(F.tensor(feat_edata[i*num_edges:(i+1)*num_edges]), g.edges[etype].data['feat']) assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges], F.asnumpy(g.edges[etype].data['label']))
def test_remove_edges(index_dtype): def check(g1, etype, g, edges_removed): src, dst, eid = g.edges(etype=etype, form='all') src1, dst1 = g1.edges(etype=etype, order='eid') if etype is not None: eid1 = g1.edges[etype].data[dgl.EID] else: eid1 = g1.edata[dgl.EID] src1 = F.asnumpy(src1) dst1 = F.asnumpy(dst1) eid1 = F.asnumpy(eid1) src = F.asnumpy(src) dst = F.asnumpy(dst) eid = F.asnumpy(eid) sde_set = set(zip(src, dst, eid)) for s, d, e in zip(src1, dst1, eid1): assert (s, d, e) in sde_set assert not np.isin(edges_removed, eid1).any() assert g1.idtype == g.idtype for fmt in ['coo', 'csr', 'csc']: for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]: g = dgl.graph([(0, 1), (2, 3), (1, 2), (3, 4)], restrict_format=fmt, index_dtype=index_dtype) g1 = dgl.remove_edges( g, F.tensor(edges_to_remove, getattr(F, index_dtype))) check(g1, None, g, edges_to_remove) g = dgl.graph(spsp.csr_matrix( ([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5)), restrict_format=fmt, index_dtype=index_dtype) g1 = dgl.remove_edges( g, F.tensor(edges_to_remove, getattr(F, index_dtype))) check(g1, None, g, edges_to_remove) g = dgl.heterograph( { ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)], ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)], ('B', 'BA', 'A'): [(2, 3), (3, 2)] }, index_dtype=index_dtype) g2 = dgl.remove_edges( g, { 'AA': F.tensor([2], getattr(F, index_dtype)), 'AB': F.tensor([3], getattr(F, index_dtype)), 'BA': F.tensor([1], getattr(F, index_dtype)) }) check(g2, 'AA', g, [2]) check(g2, 'AB', g, [3]) check(g2, 'BA', g, [1]) g3 = dgl.remove_edges( g, { 'AA': F.tensor([], getattr(F, index_dtype)), 'AB': F.tensor([3], getattr(F, index_dtype)), 'BA': F.tensor([1], getattr(F, index_dtype)) }) check(g3, 'AA', g, []) check(g3, 'AB', g, [3]) check(g3, 'BA', g, [1]) g4 = dgl.remove_edges( g, {'AB': F.tensor([3, 1, 2, 0], getattr(F, index_dtype))}) check(g4, 'AA', g, []) check(g4, 'AB', g, [3, 1, 2, 0]) check(g4, 'BA', g, [])
def rand_init(shape, dtype): return F.tensor(np.random.normal(size=shape), F.float32)
def test_compact(index_dtype): g1 = dgl.heterograph( { ('user', 'follow', 'user'): [(1, 3), (3, 5)], ('user', 'plays', 'game'): [(2, 4), (3, 4), (2, 5)], ('game', 'wished-by', 'user'): [(6, 7), (5, 7)] }, { 'user': 20, 'game': 10 }, index_dtype=index_dtype) g2 = dgl.heterograph( { ('game', 'clicked-by', 'user'): [(3, 1)], ('user', 'likes', 'user'): [(1, 8), (8, 9)] }, { 'user': 20, 'game': 10 }, index_dtype=index_dtype) g3 = dgl.graph([(0, 1), (1, 2)], num_nodes=10, ntype='user', index_dtype=index_dtype) g4 = dgl.graph([(1, 3), (3, 5)], num_nodes=10, ntype='user', index_dtype=index_dtype) def _check(g, new_g, induced_nodes): assert g.ntypes == new_g.ntypes assert g.canonical_etypes == new_g.canonical_etypes for ntype in g.ntypes: assert -1 not in induced_nodes[ntype] for etype in g.canonical_etypes: g_src, g_dst = g.all_edges(order='eid', etype=etype) g_src = F.asnumpy(g_src) g_dst = F.asnumpy(g_dst) new_g_src, new_g_dst = new_g.all_edges(order='eid', etype=etype) new_g_src_mapped = induced_nodes[etype[0]][F.asnumpy(new_g_src)] new_g_dst_mapped = induced_nodes[etype[2]][F.asnumpy(new_g_dst)] assert (g_src == new_g_src_mapped).all() assert (g_dst == new_g_dst_mapped).all() # Test default new_g1 = dgl.compact_graphs(g1) induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1._idtype_str == index_dtype assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7]) assert set(induced_nodes['game']) == set([4, 5, 6]) _check(g1, new_g1, induced_nodes) # Test with always_preserve given a dict new_g1 = dgl.compact_graphs(g1, always_preserve={ 'game': F.tensor([4, 7], dtype=getattr(F, index_dtype)) }) assert new_g1._idtype_str == index_dtype induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7]) assert set(induced_nodes['game']) == set([4, 5, 6, 7]) _check(g1, new_g1, induced_nodes) # Test with always_preserve given a tensor new_g3 = dgl.compact_graphs(g3, always_preserve=F.tensor([1, 7], dtype=getattr( F, index_dtype))) induced_nodes = { ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g3._idtype_str == index_dtype assert set(induced_nodes['user']) == set([0, 1, 2, 7]) _check(g3, new_g3, induced_nodes) # Test multiple graphs new_g1, new_g2 = dgl.compact_graphs([g1, g2]) induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1._idtype_str == index_dtype assert new_g2._idtype_str == index_dtype assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9]) assert set(induced_nodes['game']) == set([3, 4, 5, 6]) _check(g1, new_g1, induced_nodes) _check(g2, new_g2, induced_nodes) # Test multiple graphs with always_preserve given a dict new_g1, new_g2 = dgl.compact_graphs([g1, g2], always_preserve={ 'game': F.tensor([4, 7], dtype=getattr( F, index_dtype)) }) induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1._idtype_str == index_dtype assert new_g2._idtype_str == index_dtype assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9]) assert set(induced_nodes['game']) == set([3, 4, 5, 6, 7]) _check(g1, new_g1, induced_nodes) _check(g2, new_g2, induced_nodes) # Test multiple graphs with always_preserve given a tensor new_g3, new_g4 = dgl.compact_graphs([g3, g4], always_preserve=F.tensor( [1, 7], dtype=getattr(F, index_dtype))) induced_nodes = { ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g3._idtype_str == index_dtype assert new_g4._idtype_str == index_dtype assert set(induced_nodes['user']) == set([0, 1, 2, 3, 5, 7]) _check(g3, new_g3, induced_nodes) _check(g4, new_g4, induced_nodes)
def _gen_neighbor_sampling_test_graph(hypersparse, reverse): if hypersparse: # should crash if allocated a CSR card = 1 << 50 card2 = (1 << 50, 1 << 50) else: card = None card2 = None if reverse: g = dgl.graph([(0, 1), (0, 2), (0, 3), (1, 0), (1, 2), (1, 3), (2, 0)], 'user', 'follow', card=card) g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32) g1 = dgl.bipartite([(0, 0), (1, 0), (2, 1), (2, 3)], 'game', 'play', 'user', card=card2) g1.edata['prob'] = F.tensor([.8, .5, .5, .5], dtype=F.float32) g2 = dgl.bipartite([(0, 2), (1, 2), (2, 2), (0, 1), (3, 1), (0, 0)], 'user', 'liked-by', 'game', card=card2) g2.edata['prob'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32) g3 = dgl.bipartite([(0, 0), (0, 1), (0, 2), (0, 3)], 'coin', 'flips', 'user', card=card2) hg = dgl.hetero_from_relations([g, g1, g2, g3]) else: g = dgl.graph([(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1), (0, 2)], 'user', 'follow', card=card) g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32) g1 = dgl.bipartite([(0, 0), (0, 1), (1, 2), (3, 2)], 'user', 'play', 'game', card=card2) g1.edata['prob'] = F.tensor([.8, .5, .5, .5], dtype=F.float32) g2 = dgl.bipartite([(2, 0), (2, 1), (2, 2), (1, 0), (1, 3), (0, 0)], 'game', 'liked-by', 'user', card=card2) g2.edata['prob'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32) g3 = dgl.bipartite([(0, 0), (1, 0), (2, 0), (3, 0)], 'user', 'flips', 'coin', card=card2) hg = dgl.hetero_from_relations([g, g1, g2, g3]) return g, hg
def atest_nx_conversion(index_dtype): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(F.unsqueeze(attr[k], 0)) for k in node_feat: feat = F.cat(node_feat[k], 0) assert F.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = F.unsqueeze(attr[k], 0) for k in edge_feat: feat = F.cat(edge_feat[k], 0) assert F.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = F.randn((5, 3)) n2 = F.randn((5, 10)) n3 = F.randn((5, 4)) e1 = F.randn((4, 5)) e2 = F.randn((4, 7)) g = dgl.graph([(0,2),(1,4),(3,0),(4,3)], index_dtype=index_dtype) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = dgl.to_networkx(g, node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'], index_dtype=index_dtype) assert g._idtype_str == index_dtype # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert F.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert F.allclose(g.edata['e1'], e1) assert F.array_equal(g.edata['id'], F.copy_to(F.arange(0, 4), F.cpu())) # test conversion after modifying DGLGraph # TODO(minjie): enable after mutation is supported #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges #new_n = F.randn((2, 3)) #new_e = F.randn((3, 5)) #g.add_nodes(2, data={'n1': new_n}) ## add three edges, one is a multi-edge #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) #n1 = F.cat((n1, new_n), 0) #e1 = F.cat((e1, new_e), 0) ## convert to networkx again #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) #assert len(nxg) == 7 #assert nxg.size() == 7 #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = dgl.graph(nxg , node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert F.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(F.unsqueeze(attr['e1'], 0)) edge_feat = F.cat(edge_feat, 0) assert F.allclose(g.edata['e1'], edge_feat) # Test converting from a networkx graph whose nodes are # not labeled with consecutive-integers. nxg = nx.cycle_graph(5) nxg.remove_nodes_from([0, 4]) for u in nxg.nodes(): nxg.nodes[u]['h'] = F.tensor([u]) for u, v, d in nxg.edges(data=True): d['h'] = F.tensor([u, v]) g = dgl.DGLGraph() g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h']) assert g.number_of_nodes() == 3 assert g.number_of_edges() == 4 assert g.has_edge_between(0, 1) assert g.has_edge_between(1, 2) assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]])) assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
def test_random_walk(): g1 = dgl.heterograph({ ('user', 'follow', 'user'): [(0, 1), (1, 2), (2, 0)] }) g2 = dgl.heterograph({ ('user', 'follow', 'user'): [(0, 1), (1, 2), (1, 3), (2, 0), (3, 0)] }) g3 = dgl.heterograph({ ('user', 'follow', 'user'): [(0, 1), (1, 2), (2, 0)], ('user', 'view', 'item'): [(0, 0), (1, 1), (2, 2)], ('item', 'viewed-by', 'user'): [(0, 0), (1, 1), (2, 2)] }) g4 = dgl.heterograph({ ('user', 'follow', 'user'): [(0, 1), (1, 2), (1, 3), (2, 0), (3, 0)], ('user', 'view', 'item'): [(0, 0), (0, 1), (1, 1), (2, 2), (3, 2), (3, 1)], ('item', 'viewed-by', 'user'): [(0, 0), (1, 0), (1, 1), (2, 2), (2, 3), (1, 3)] }) g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32) g4.edges['follow'].data['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32) g4.edges['viewed-by'].data['p'] = F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32) traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4) check_random_walk(g1, ['follow'] * 4, traces, ntypes) traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=0.) check_random_walk(g1, ['follow'] * 4, traces, ntypes) traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=F.zeros((4, ), F.float32, F.cpu())) check_random_walk(g1, ['follow'] * 4, traces, ntypes) traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=5, restart_prob=F.tensor( [0, 0, 0, 0, 1], dtype=F.float32)) check_random_walk(g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5)) assert (F.asnumpy(traces)[:, 5] == -1).all() traces, ntypes = dgl.sampling.random_walk(g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4) check_random_walk(g2, ['follow'] * 4, traces, ntypes) traces, ntypes = dgl.sampling.random_walk(g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p') check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p') metapath = ['follow', 'view', 'viewed-by'] * 2 traces, ntypes = dgl.sampling.random_walk(g3, [0, 1, 2, 0, 1, 2], metapath=metapath) check_random_walk(g3, metapath, traces, ntypes) metapath = ['follow', 'view', 'viewed-by'] * 2 traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath) check_random_walk(g4, metapath, traces, ntypes) metapath = ['follow', 'view', 'viewed-by'] * 2 traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p') check_random_walk(g4, metapath, traces, ntypes, 'p') traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=0.) check_random_walk(g4, metapath, traces, ntypes, 'p') traces, ntypes = dgl.sampling.random_walk(g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=F.zeros((6, ), F.float32, F.cpu())) check_random_walk(g4, metapath, traces, ntypes, 'p') traces, ntypes = dgl.sampling.random_walk( g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath + ['follow'], prob='p', restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32)) check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p') assert (F.asnumpy(traces[:, 7]) == -1).all()
def foo(g): with g.local_scope(): g.nodes[0].data['h'] = F.ones((1, 1)) assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
def test_features(idtype): """Test the features of batched DGLHeteroGraphs""" g1 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1], [0, 0]) }, idtype=idtype, device=F.ctx()) g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g1.nodes['game'].data['h1'] = F.tensor([[0.]]) g1.nodes['game'].data['h2'] = F.tensor([[1.]]) g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) g2 = dgl.heterograph( { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1], [0, 0]) }, idtype=idtype, device=F.ctx()) g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g2.nodes['game'].data['h1'] = F.tensor([[0.]]) g2.nodes['game'].data['h2'] = F.tensor([[1.]]) g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) # test default setting bg = dgl.batch([g1, g2]) assert F.allclose( bg.nodes['user'].data['h1'], F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0)) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose( bg.nodes['game'].data['h1'], F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']], dim=0)) assert F.allclose( bg.nodes['game'].data['h2'], F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0)) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose( bg.edges['follows'].data['h2'], F.cat([g1.edges['follows'].data['h2'], g2.edges['follows'].data['h2']], dim=0)) assert F.allclose( bg.edges['plays'].data['h1'], F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0)) # test specifying ndata/edata bg = dgl.batch([g1, g2], ndata=['h2'], edata=['h1']) assert F.allclose( bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose( bg.nodes['game'].data['h2'], F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0)) assert F.allclose( bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose( bg.edges['plays'].data['h1'], F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0)) assert 'h1' not in bg.nodes['user'].data assert 'h1' not in bg.nodes['game'].data assert 'h2' not in bg.edges['follows'].data # Test unbatching graphs g3, g4 = dgl.unbatch(bg) check_equivalence_between_heterographs(g1, g3, node_attrs={ 'user': ['h2'], 'game': ['h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) check_equivalence_between_heterographs(g2, g4, node_attrs={ 'user': ['h2'], 'game': ['h2'] }, edge_attrs={ ('user', 'follows', 'user'): ['h1'] }) # test legacy bg = dgl.batch([g1, g2], edge_attrs=['h1']) assert 'h2' not in bg.edges['follows'].data.keys()
def test_batch_setter_getter(index_dtype): def _pfc(x): return list(F.zerocopy_to_numpy(x)[:,0]) g = generate_graph(index_dtype) # set all nodes g.ndata['h'] = F.zeros((10, D)) assert F.allclose(g.ndata['h'], F.zeros((10, D))) # pop nodes old_len = len(g.ndata) assert _pfc(g.ndata.pop('h')) == [0.] * 10 assert len(g.ndata) == old_len - 1 g.ndata['h'] = F.zeros((10, D)) # set partial nodes u = F.tensor([1, 3, 5], F.data_type_dict[index_dtype]) g.nodes[u].data['h'] = F.ones((3, D)) assert _pfc(g.ndata['h']) == [0., 1., 0., 1., 0., 1., 0., 0., 0., 0.] # get partial nodes u = F.tensor([1, 2, 3], F.data_type_dict[index_dtype]) assert _pfc(g.nodes[u].data['h']) == [1., 0., 1.] ''' s, d, eid 0, 1, 0 1, 9, 1 0, 2, 2 2, 9, 3 0, 3, 4 3, 9, 5 0, 4, 6 4, 9, 7 0, 5, 8 5, 9, 9 0, 6, 10 6, 9, 11 0, 7, 12 7, 9, 13 0, 8, 14 8, 9, 15 9, 0, 16 ''' # set all edges g.edata['l'] = F.zeros((17, D)) assert _pfc(g.edata['l']) == [0.] * 17 # pop edges old_len = len(g.edata) assert _pfc(g.edata.pop('l')) == [0.] * 17 assert len(g.edata) == old_len - 1 g.edata['l'] = F.zeros((17, D)) # set partial edges (many-many) u = F.tensor([0, 0, 2, 5, 9], dtype=F.data_type_dict[index_dtype]) v = F.tensor([1, 3, 9, 9, 0], dtype=F.data_type_dict[index_dtype]) g.edges[u, v].data['l'] = F.ones((5, D)) truth = [0.] * 17 truth[0] = truth[4] = truth[3] = truth[9] = truth[16] = 1. assert _pfc(g.edata['l']) == truth # set partial edges (many-one) u = F.tensor([3, 4, 6], dtype=F.data_type_dict[index_dtype]) v = F.tensor([9], dtype=F.data_type_dict[index_dtype]) g.edges[u, v].data['l'] = F.ones((3, D)) truth[5] = truth[7] = truth[11] = 1. assert _pfc(g.edata['l']) == truth # set partial edges (one-many) u = F.tensor([0], dtype=F.data_type_dict[index_dtype]) v = F.tensor([4, 5, 6], dtype=F.data_type_dict[index_dtype]) g.edges[u, v].data['l'] = F.ones((3, D)) truth[6] = truth[8] = truth[10] = 1. assert _pfc(g.edata['l']) == truth # get partial edges (many-many) u = F.tensor([0, 6, 0], dtype=F.data_type_dict[index_dtype]) v = F.tensor([6, 9, 7], dtype=F.data_type_dict[index_dtype]) assert _pfc(g.edges[u, v].data['l']) == [1., 1., 0.] # get partial edges (many-one) u = F.tensor([5, 6, 7], dtype=F.data_type_dict[index_dtype]) v = F.tensor([9], dtype=F.data_type_dict[index_dtype]) assert _pfc(g.edges[u, v].data['l']) == [1., 1., 0.] # get partial edges (one-many) u = F.tensor([0], dtype=F.data_type_dict[index_dtype]) v = F.tensor([3, 4, 5], dtype=F.data_type_dict[index_dtype]) assert _pfc(g.edges[u, v].data['l']) == [1., 1., 1.]
ip_addr = sock.getsockname()[0] except ValueError: ip_addr = '127.0.0.1' finally: sock.close() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(("", 0)) sock.listen(1) port = sock.getsockname()[1] sock.close() return ip_addr + ' ' + str(port) # Create an one-part Graph node_map = F.tensor([0, 0, 0, 0, 0, 0], F.int64) edge_map = F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64) global_nid = F.tensor([0, 1, 2, 3, 4, 5], F.int64) global_eid = F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64) g = dgl.DGLGraph() g.add_nodes(6) g.add_edges(0, 1) # 0 g.add_edges(0, 2) # 1 g.add_edges(0, 3) # 2 g.add_edges(2, 3) # 3 g.add_edges(1, 1) # 4 g.add_edges(0, 4) # 5 g.add_edges(2, 5) # 6 g.ndata[dgl.NID] = global_nid