def run_client(graph_name, barrier, num_nodes, num_edges): barrier.wait() g = DistGraph(server_namebook, graph_name) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) g.shut_down() print('end')
def start_node_dataloader(rank, tmpdir, num_server, num_workers): import dgl import torch as th dgl.distributed.initialize("mp_ip_config.txt", 1, num_workers=num_workers) gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json') orig_nid = F.zeros((dist_graph.number_of_nodes(), ), dtype=F.int64) orig_eid = F.zeros((dist_graph.number_of_edges(), ), dtype=F.int64) for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] # Create sampler sampler = dgl.dataloading.MultiLayerNeighborSampler([5, 10]) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = dgl.dataloading.NodeDataLoader(dist_graph, train_nid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers) groundtruth_g = CitationGraphDataset("cora")[0] max_nid = [] for epoch in range(2): for idx, (_, _, blocks) in zip(range(0, num_nodes_to_sample, batch_size), dataloader): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] src_nodes_id = orig_nid[src_nodes_id] dst_nodes_id = orig_nid[dst_nodes_id] has_edges = groundtruth_g.has_edges_between( src_nodes_id, dst_nodes_id) assert np.all(F.asnumpy(has_edges)) max_nid.append(np.max(F.asnumpy(dst_nodes_id))) # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size)) del dataloader dgl.distributed.exit_client( ) # this is needed since there's two test here in one process
def start_dist_dataloader(rank, tmpdir, num_server, drop_last): import dgl import torch as th dgl.distributed.initialize("mp_ip_config.txt") gpb = None disable_shared_mem = num_server > 0 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json') orig_nid = F.arange(0, dist_graph.number_of_nodes()) orig_eid = F.arange(0, dist_graph.number_of_edges()) for i in range(num_server): part, _, _, _, _, _, _ = load_partition(tmpdir / 'test_sampling.json', i) if 'orig_id' in part.ndata: orig_nid[part.ndata[dgl.NID]] = part.ndata['orig_id'] if 'orig_id' in part.edata: orig_eid[part.edata[dgl.EID]] = part.edata['orig_id'] # Create sampler sampler = NeighborSampler(dist_graph, [5, 10], dgl.distributed.sample_neighbors) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = DistDataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=False, drop_last=drop_last) groundtruth_g = CitationGraphDataset("cora")[0] max_nid = [] for epoch in range(2): for idx, blocks in zip(range(0, num_nodes_to_sample, batch_size), dataloader): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] max_nid.append(np.max(F.asnumpy(dst_nodes_id))) src_nodes_id = orig_nid[src_nodes_id] dst_nodes_id = orig_nid[dst_nodes_id] has_edges = groundtruth_g.has_edges_between(src_nodes_id, dst_nodes_id) assert np.all(F.asnumpy(has_edges)) # assert np.all(np.unique(np.sort(F.asnumpy(dst_nodes_id))) == np.arange(idx, batch_size)) if drop_last: assert np.max(max_nid) == num_nodes_to_sample - 1 - num_nodes_to_sample % batch_size else: assert np.max(max_nid) == num_nodes_to_sample - 1 del dataloader dgl.distributed.exit_client() # this is needed since there's two test here in one process
def run_client(graph_name, barrier, num_nodes, num_edges): barrier.wait() g = DistGraph(server_namebook, graph_name) # Test API assert g.number_of_nodes() == num_nodes assert g.number_of_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.number_of_nodes() / 2)) feats1 = g.ndata['features'][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.number_of_edges() / 2)) feats1 = g.edata['features'][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test init node data new_shape = (g.number_of_nodes(), 2) g.init_ndata('test1', new_shape, F.int32) feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 0) # Test init edge data new_shape = (g.number_of_edges(), 2) g.init_edata('test1', new_shape, F.int32) feats = g.edata['test1'][eids] assert np.all(F.asnumpy(feats) == 0) # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata['test1'][nids] = new_feats feats = g.ndata['test1'][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata['features']) == g.number_of_nodes() assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata['features'].dtype == F.int64 assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()['features'].shape == (1, ) selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), g.rank()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.number_of_nodes()) for n in nodes: assert n in local_nids g.shut_down() print('end')
def run_client(graph_name, cli_id, part_id, server_count): device = F.ctx() time.sleep(5) os.environ['DGL_NUM_SERVER'] = str(server_count) dgl.distributed.initialize("optim_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( '/tmp/dist_graph/{}.json'.format(graph_name), part_id, None) g = DistGraph(graph_name, gpb=gpb) policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) num_nodes = g.number_of_nodes() emb_dim = 4 dgl_emb = NodeEmbedding(num_nodes, emb_dim, name='optim', init_func=initializer, part_policy=policy) dgl_emb_zero = NodeEmbedding(num_nodes, emb_dim, name='optim-zero', init_func=initializer, part_policy=policy) dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01) dgl_adam._world_size = 1 dgl_adam._rank = 0 torch_emb = th.nn.Embedding(num_nodes, emb_dim, sparse=True) torch_emb_zero = th.nn.Embedding(num_nodes, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(torch_emb_zero.weight, 0, 1.0) torch_adam = th.optim.SparseAdam(list(torch_emb.parameters()) + list(torch_emb_zero.parameters()), lr=0.01) labels = th.ones((4, )).long() idx = th.randint(0, num_nodes, size=(4, )) dgl_value = dgl_emb(idx, device).to(th.device('cpu')) torch_value = torch_emb(idx) torch_adam.zero_grad() torch_loss = th.nn.functional.cross_entropy(torch_value, labels) torch_loss.backward() torch_adam.step() dgl_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) dgl_loss.backward() dgl_adam.step() assert F.allclose(dgl_emb.weight[0:num_nodes // 2], torch_emb.weight[0:num_nodes // 2])