def check_server_client_hierarchy(shared_mem, num_servers, num_clients): prepare_dist() g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = 'dist_graph_test_2' g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_parts, '/tmp/dist_graph', num_trainers_per_machine=num_clients) # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context('spawn') for serv_id in range(num_servers): p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, num_clients, shared_mem)) serv_ps.append(p) p.start() cli_ps = [] manager = mp.Manager() return_dict = manager.dict() node_mask = np.zeros((g.number_of_nodes(),), np.int32) edge_mask = np.zeros((g.number_of_edges(),), np.int32) nodes = np.random.choice(g.number_of_nodes(), g.number_of_nodes() // 10, replace=False) edges = np.random.choice(g.number_of_edges(), g.number_of_edges() // 10, replace=False) node_mask[nodes] = 1 edge_mask[edges] = 1 nodes = np.sort(nodes) edges = np.sort(edges) for cli_id in range(num_clients): print('start client', cli_id) p = ctx.Process(target=run_client_hierarchy, args=(graph_name, 0, num_servers, node_mask, edge_mask, return_dict)) p.start() cli_ps.append(p) for p in cli_ps: p.join() for p in serv_ps: p.join() nodes1 = [] edges1 = [] for n, e in return_dict.values(): nodes1.append(n) edges1.append(e) nodes1, _ = F.sort_1d(F.cat(nodes1, 0)) edges1, _ = F.sort_1d(F.cat(edges1, 0)) assert np.all(F.asnumpy(nodes1) == nodes) assert np.all(F.asnumpy(edges1) == edges) print('clients have terminated')
def sort_edges(edges): edges = [e.tousertensor() for e in edges] if np.prod(edges[2].shape) > 0: val, idx = F.sort_1d(edges[2]) return (edges[0][idx], edges[1][idx], edges[2][idx]) else: return (edges[0], edges[1], edges[2])
def sort_edges(edges): edges = [e.tousertensor() for e in edges] if np.prod(edges[2].shape) > 0: val, idx = F.sort_1d(edges[2]) return (F.gather_row(edges[0], idx), F.gather_row(edges[1], idx), F.gather_row(edges[2], idx)) else: return (edges[0], edges[1], edges[2])
def test_split_even(): g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] all_nodes1 = [] all_nodes2 = [] all_edges1 = [] all_edges2 = [] # The code now collects the roles of all client processes and use the information # to determine how to split the workloads. Here is to simulate the multi-client # use case. def set_roles(num_clients): dgl.distributed.role.CUR_ROLE = 'default' dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK['default'] = { i: i for i in range(num_clients) } for i in range(num_parts): set_roles(num_parts) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( '/tmp/dist_graph/dist_graph_test.json', i) local_nids = F.nonzero_1d(part_g.ndata['inner_node']) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes = node_split(node_mask, gpb, rank=i, force_even=True) all_nodes1.append(nodes) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids)) print('part {} get {} nodes and {} are in the partition'.format( i, len(nodes), len(subset))) set_roles(num_parts * 2) nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True) nodes2 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=True) nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0)) all_nodes2.append(nodes3) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3)) print('intersection has', len(subset)) set_roles(num_parts) local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges = edge_split(edge_mask, gpb, rank=i, force_even=True) all_edges1.append(edges) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids)) print('part {} get {} edges and {} are in the partition'.format( i, len(edges), len(subset))) set_roles(num_parts * 2) edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True) edges2 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=True) edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0)) all_edges2.append(edges3) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3)) print('intersection has', len(subset)) all_nodes1 = F.cat(all_nodes1, 0) all_edges1 = F.cat(all_edges1, 0) all_nodes2 = F.cat(all_nodes2, 0) all_edges2 = F.cat(all_edges2, 0) all_nodes = np.nonzero(node_mask)[0] all_edges = np.nonzero(edge_mask)[0] assert np.all(all_nodes == F.asnumpy(all_nodes1)) assert np.all(all_edges == F.asnumpy(all_edges1)) assert np.all(all_nodes == F.asnumpy(all_nodes2)) assert np.all(all_edges == F.asnumpy(all_edges2))