def check_rpc_find_edges(tmpdir, num_server): ip_config = open("rpc_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{} 1\n'.format(get_local_usable_addr())) ip_config.close() g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server partition_graph(g, 'test_find_edges', num_parts, tmpdir, num_hops=1, part_method='metis', reshuffle=False) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_find_edges')) p.start() time.sleep(1) pserver_list.append(p) time.sleep(3) eids = F.tensor(np.random.randint(g.number_of_edges(), size=100)) u, v = g.find_edges(eids) du, dv = start_find_edges_client(0, tmpdir, num_server > 1, eids) assert F.array_equal(u, du) assert F.array_equal(v, dv)
def check_rpc_sampling(tmpdir, num_server): ip_config = open("rpc_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{} 1\n'.format(get_local_usable_addr())) ip_config.close() g = CitationGraphDataset("cora")[0] g.readonly() print(g.idtype) num_parts = num_server num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=False) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_sampling')) p.start() time.sleep(1) pserver_list.append(p) time.sleep(3) sampled_graph = start_sample_client(0, tmpdir, num_server > 1) print("Done sampling") for p in pserver_list: p.join() src, dst = sampled_graph.edges() assert sampled_graph.number_of_nodes() == g.number_of_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) eids = g.edge_ids(src, dst) assert np.array_equal( F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids))
def check_rpc_find_edges_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server orig_nid, orig_eid = partition_graph(g, 'test_find_edges', num_parts, tmpdir, num_hops=1, part_method='metis', reshuffle=True, return_mapping=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_find_edges', ['csr', 'coo'])) p.start() time.sleep(1) pserver_list.append(p) eids = F.tensor(np.random.randint(g.number_of_edges(), size=100)) u, v = g.find_edges(orig_eid[eids]) du, dv = start_find_edges_client(0, tmpdir, num_server > 1, eids) du = orig_nid[du] dv = orig_nid[dv] assert F.array_equal(u, du) assert F.array_equal(v, dv)
def check_rpc_get_degree_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server partition_graph(g, 'test_get_degrees', num_parts, tmpdir, num_hops=1, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_get_degrees')) p.start() time.sleep(1) pserver_list.append(p) orig_nid = F.zeros((g.number_of_nodes(),), dtype=F.int64, ctx=F.cpu()) for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_get_degrees.json', i) orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] nids = F.tensor(np.random.randint(g.number_of_nodes(), size=100)) in_degs, out_degs, all_in_degs, all_out_degs = start_get_degrees_client(0, tmpdir, num_server > 1, nids) print("Done get_degree") for p in pserver_list: p.join() print('check results') assert F.array_equal(g.in_degrees(orig_nid[nids]), in_degs) assert F.array_equal(g.in_degrees(orig_nid), all_in_degs) assert F.array_equal(g.out_degrees(orig_nid[nids]), out_degs) assert F.array_equal(g.out_degrees(orig_nid), all_out_degs)
def check_standalone_sampling(tmpdir): g = CitationGraphDataset("cora")[0] g.readonly() num_parts = 1 num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=False) dist_graph = DistGraph(None, "test_sampling", conf_file=tmpdir / 'test_sampling.json') sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3) src, dst = sampled_graph.edges() assert sampled_graph.number_of_nodes() == g.number_of_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) eids = g.edge_ids(src, dst) assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids))
def check_rpc_in_subgraph_shuffle(tmpdir, num_server): ip_config = open("rpc_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{}\n'.format(get_local_usable_addr())) ip_config.close() g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server partition_graph(g, 'test_in_subgraph', num_parts, tmpdir, num_hops=1, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_in_subgraph')) p.start() time.sleep(1) pserver_list.append(p) nodes = [0, 10, 99, 66, 1024, 2008] time.sleep(3) sampled_graph = start_in_subgraph_client(0, tmpdir, num_server > 1, nodes) for p in pserver_list: p.join() orig_nid = F.zeros((g.number_of_nodes(), ), dtype=F.int64, ctx=F.cpu()) orig_eid = F.zeros((g.number_of_edges(), ), dtype=F.int64, ctx=F.cpu()) for i in range(num_server): part, _, _, _, _, _, _ = load_partition( tmpdir / 'test_in_subgraph.json', i) orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] src, dst = sampled_graph.edges() src = orig_nid[src] dst = orig_nid[dst] assert sampled_graph.number_of_nodes() == g.number_of_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) subg1 = dgl.in_subgraph(g, orig_nid[nodes]) src1, dst1 = subg1.edges() assert np.all(np.sort(F.asnumpy(src)) == np.sort(F.asnumpy(src1))) assert np.all(np.sort(F.asnumpy(dst)) == np.sort(F.asnumpy(dst1))) eids = g.edge_ids(src, dst) eids1 = orig_eid[sampled_graph.edata[dgl.EID]] assert np.array_equal(F.asnumpy(eids1), F.asnumpy(eids))
def check_rpc_sampling_shuffle(tmpdir): num_server = 2 ip_config = open("rpc_sampling_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{} 1\n'.format(get_local_usable_addr())) ip_config.close() g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir)) p.start() time.sleep(1) pserver_list.append(p) time.sleep(3) sampled_graph = start_client(0, tmpdir) print("Done sampling") for p in pserver_list: p.join() orig_nid = F.zeros((g.number_of_nodes(), ), dtype=F.int64) orig_eid = F.zeros((g.number_of_edges(), ), dtype=F.int64) for i in range(num_server): part, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] src, dst = sampled_graph.edges() src = orig_nid[src] dst = orig_nid[dst] assert sampled_graph.number_of_nodes() == g.number_of_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) eids = g.edge_ids(src, dst) eids1 = orig_eid[sampled_graph.edata[dgl.EID]] assert np.array_equal(F.asnumpy(eids1), F.asnumpy(eids))
def check_rpc_sampling_shuffle(tmpdir, num_server, num_groups=1): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server num_hops = 1 partition_graph(g, 'test_sampling', num_parts, tmpdir, num_hops=num_hops, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') keep_alive = num_groups > 1 for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_sampling', ['csc', 'coo'], keep_alive)) p.start() time.sleep(1) pserver_list.append(p) pclient_list = [] num_clients = 1 for client_id in range(num_clients): for group_id in range(num_groups): p = ctx.Process(target=start_sample_client_shuffle, args=(client_id, tmpdir, num_server > 1, g, num_server, group_id)) p.start() time.sleep(1) # avoid race condition when instantiating DistGraph pclient_list.append(p) for p in pclient_list: p.join() if keep_alive: for p in pserver_list: assert p.is_alive() # force shutdown server dgl.distributed.shutdown_servers("rpc_ip_config.txt", 1) for p in pserver_list: p.join()
def check_rpc_find_edges_shuffle(tmpdir, num_server): ip_config = open("rpc_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{}\n'.format(get_local_usable_addr())) ip_config.close() g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server partition_graph(g, 'test_find_edges', num_parts, tmpdir, num_hops=1, part_method='metis', reshuffle=True) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_find_edges', ['csr', 'coo'])) p.start() time.sleep(1) pserver_list.append(p) orig_nid = F.zeros((g.number_of_nodes(), ), dtype=F.int64, ctx=F.cpu()) orig_eid = F.zeros((g.number_of_edges(), ), dtype=F.int64, ctx=F.cpu()) for i in range(num_server): part, _, _, _, _, _, _ = load_partition( tmpdir / 'test_find_edges.json', i) orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] time.sleep(3) eids = F.tensor(np.random.randint(g.number_of_edges(), size=100)) u, v = g.find_edges(orig_eid[eids]) du, dv = start_find_edges_client(0, tmpdir, num_server > 1, eids) du = orig_nid[du] dv = orig_nid[dv] assert F.array_equal(u, du) assert F.array_equal(v, dv)
def check_rpc_in_subgraph(tmpdir, num_server): ip_config = open("rpc_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{} 1\n'.format(get_local_usable_addr())) ip_config.close() g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server partition_graph(g, 'test_in_subgraph', num_parts, tmpdir, num_hops=1, part_method='metis', reshuffle=False) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_in_subgraph')) p.start() time.sleep(1) pserver_list.append(p) nodes = [0, 10, 99, 66, 1024, 2008] time.sleep(3) sampled_graph = start_in_subgraph_client(0, tmpdir, num_server > 1, nodes) for p in pserver_list: p.join() src, dst = sampled_graph.edges() g = dgl.as_heterograph(g) assert sampled_graph.number_of_nodes() == g.number_of_nodes() subg1 = dgl.in_subgraph(g, nodes) src1, dst1 = subg1.edges() assert np.all(np.sort(F.asnumpy(src)) == np.sort(F.asnumpy(src1))) assert np.all(np.sort(F.asnumpy(dst)) == np.sort(F.asnumpy(dst1))) eids = g.edge_ids(src, dst) assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids))