示例#1
0
def run_server(graph_name,
               server_id,
               server_count,
               num_clients,
               shared_mem,
               keep_alive=False):
    g = DistGraphServer(server_id,
                        "kv_ip_config.txt",
                        server_count,
                        num_clients,
                        '/tmp/dist_graph/{}.json'.format(graph_name),
                        disable_shared_mem=not shared_mem,
                        graph_format=['csc', 'coo'],
                        keep_alive=keep_alive)
    print('start server', server_id)
    # verify dtype of underlying graph
    cg = g.client_g
    for k, dtype in dgl.distributed.dist_graph.FIELD_DICT.items():
        if k in cg.ndata:
            assert F.dtype(
                cg.ndata[k]
            ) == dtype, "Data type of {} in ndata should be {}.".format(
                k, dtype)
        if k in cg.edata:
            assert F.dtype(
                cg.edata[k]
            ) == dtype, "Data type of {} in edata should be {}.".format(
                k, dtype)
    g.start()
def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
    """Internal function to compute batch information for subgraphs.
    Parameters
    ----------
    keys : List[str]
        The node/edge type keys.
    induced_indices_arr : List[Tensor]
        The induced node/edge index tensor for all node/edge types.
    batch_num_objs : Tensor
        Number of nodes/edges for each graph in the original batch.
    Returns
    -------
    Mapping[str, Tensor]
        A dictionary mapping all node/edge type keys to the ``batch_num_objs``
        array of corresponding graph.
    """
    bucket_offset = np.expand_dims(np.cumsum(F.asnumpy(batch_num_objs), 0),
                                   -1)  # (num_bkts, 1)
    ret = {}
    for key, induced_indices in zip(keys, induced_indices_arr):
        # NOTE(Zihao): this implementation is not efficient and we can replace it with
        # binary search in the future.
        induced_indices = np.expand_dims(F.asnumpy(induced_indices),
                                         0)  # (1, num_nodes)
        new_offset = np.sum((induced_indices < bucket_offset),
                            1)  # (num_bkts,)
        # start_offset = [0] + [new_offset[i-1] for i in range(1, n_bkts)]
        start_offset = np.concatenate([np.zeros((1, )), new_offset[:-1]], 0)
        new_batch_num_objs = new_offset - start_offset
        ret[key] = F.tensor(new_batch_num_objs, dtype=F.dtype(batch_num_objs))
    return ret
示例#3
0
def _get_inner_edge_mask(graph, etype_id):
    if dgl.ETYPE in graph.edata:
        dtype = F.dtype(graph.edata['inner_edge'])
        return graph.edata['inner_edge'] * F.astype(
            graph.edata[dgl.ETYPE] == etype_id, dtype) == 1
    else:
        return graph.edata['inner_edge'] == 1
示例#4
0
def _get_inner_node_mask(graph, ntype_id):
    if dgl.NTYPE in graph.ndata:
        dtype = F.dtype(graph.ndata['inner_node'])
        return graph.ndata['inner_node'] * F.astype(
            graph.ndata[dgl.NTYPE] == ntype_id, dtype) == 1
    else:
        return graph.ndata['inner_node'] == 1
示例#5
0
def start_client(num_clients, num_servers):
    os.environ['DGL_DIST_MODE'] = 'distributed'
    # Note: connect to server first !
    dgl.distributed.initialize(ip_config='kv_ip_config.txt')
    # Init kvclient
    kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt', num_servers=num_servers)
    kvclient.map_shared_data(partition_book=gpb)
    assert dgl.distributed.get_num_client() == num_clients
    kvclient.init_data(name='data_1', 
                       shape=F.shape(data_1), 
                       dtype=F.dtype(data_1), 
                       part_policy=edge_policy,
                       init_func=init_zero_func)
    kvclient.init_data(name='data_2', 
                       shape=F.shape(data_2), 
                       dtype=F.dtype(data_2), 
                       part_policy=node_policy,
                       init_func=init_zero_func)
    
    # Test data_name_list
    name_list = kvclient.data_name_list()
    print(name_list)
    assert 'data_0' in name_list
    assert 'data_0_1' in name_list
    assert 'data_0_2' in name_list
    assert 'data_0_3' in name_list
    assert 'data_1' in name_list
    assert 'data_2' in name_list
    # Test get_meta_data
    meta = kvclient.get_data_meta('data_0')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0)
    assert shape == F.shape(data_0)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_1)
    assert shape == F.shape(data_0_1)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_2)
    assert shape == F.shape(data_0_2)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_3')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_3)
    assert shape == F.shape(data_0_3)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_1)
    assert shape == F.shape(data_1)
    assert policy.policy_str == 'edge:_E'

    meta = kvclient.get_data_meta('data_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_2)
    assert shape == F.shape(data_2)
    assert policy.policy_str == 'node:_N'

    # Test push and pull
    id_tensor = F.tensor([0,2,4], F.int64)
    data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32)
    kvclient.push(name='data_0',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_1',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_2',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    # Register new push handler
    kvclient.register_push_handler('data_0', udf_push)
    kvclient.register_push_handler('data_1', udf_push)
    kvclient.register_push_handler('data_2', udf_push)
    # Test push and pull
    kvclient.push(name='data_0',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_1',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_2',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.barrier()
    data_tensor = data_tensor * data_tensor
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))

    # Test delete data
    kvclient.delete_data('data_0')
    kvclient.delete_data('data_1')
    kvclient.delete_data('data_2')

    # Register new push handler
    kvclient.init_data(name='data_3', 
                       shape=F.shape(data_2),
                       dtype=F.dtype(data_2), 
                       part_policy=node_policy,
                       init_func=init_zero_func)
    kvclient.register_push_handler('data_3', add_push)
    data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32)
    kvclient.barrier()
    time.sleep(kvclient.client_id + 1)
    print("add...")
    kvclient.push(name='data_3',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.barrier()
    res = kvclient.pull(name='data_3', id_tensor=id_tensor)
    data_tensor = data_tensor * num_clients
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
示例#6
0
def check_partition(part_method, reshuffle):
    g = create_random_graph(10000)
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10))
    g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10))
    g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
    g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
    num_parts = 4
    num_hops = 2

    partition_graph(g, 'test', num_parts, '/tmp/partition', num_hops=num_hops,
                    part_method=part_method, reshuffle=reshuffle)
    part_sizes = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _ = load_partition('/tmp/partition/test.json', i)

        # Check the metadata
        assert gpb._num_nodes() == g.number_of_nodes()
        assert gpb._num_edges() == g.number_of_edges()

        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges']))

        local_nid = gpb.nid2localnid(F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']), i)
        assert F.dtype(local_nid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid)))
        local_eid = gpb.eid2localeid(F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']), i)
        assert F.dtype(local_eid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid)))

        # Check the node map.
        local_nodes = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node'])
        llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nodes1 = gpb.partid2nids(i)
        assert F.dtype(local_nodes1) in (F.int32, F.int64)
        assert np.all(np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(local_nodes1)))

        # Check the edge map.
        local_edges = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge'])
        local_edges1 = gpb.partid2eids(i)
        assert F.dtype(local_edges1) in (F.int32, F.int64)
        assert np.all(np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(local_edges1)))

        if reshuffle:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata['orig_id'])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata['orig_id'])
            # when we read node data from the original global graph, we should use orig_id.
            local_nodes = F.boolean_mask(part_g.ndata['orig_id'], part_g.ndata['inner_node'])
            local_edges = F.boolean_mask(part_g.edata['orig_id'], part_g.edata['inner_edge'])
        else:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata[dgl.NID])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata[dgl.NID])
        part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
        part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
        assert F.allclose(F.gather_row(g.ndata['h'], local_nodes),
                          F.gather_row(part_g.ndata['h'], llocal_nodes))
        assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes),
                          F.gather_row(part_g.ndata['eh'], llocal_nodes))

        for name in ['labels', 'feats']:
            assert name in node_feats
            assert node_feats[name].shape[0] == len(local_nodes)
            assert np.all(F.asnumpy(g.ndata[name])[F.asnumpy(local_nodes)] == F.asnumpy(node_feats[name]))
        for name in ['feats']:
            assert name in edge_feats
            assert edge_feats[name].shape[0] == len(local_edges)
            assert np.all(F.asnumpy(g.edata[name])[F.asnumpy(local_edges)] == F.asnumpy(edge_feats[name]))

    if reshuffle:
        node_map = []
        edge_map = []
        for i, (num_nodes, num_edges) in enumerate(part_sizes):
            node_map.append(np.ones(num_nodes) * i)
            edge_map.append(np.ones(num_edges) * i)
        node_map = np.concatenate(node_map)
        edge_map = np.concatenate(edge_map)
        nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
        assert F.dtype(nid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(nid2pid) == node_map)
        eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
        assert F.dtype(eid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(eid2pid) == edge_map)
示例#7
0
def check_partition(g, part_method, reshuffle):
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10),
                                F.float32)
    g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10),
                                F.float32)
    g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
    g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
    num_parts = 4
    num_hops = 2

    orig_nids, orig_eids = partition_graph(g,
                                           'test',
                                           num_parts,
                                           '/tmp/partition',
                                           num_hops=num_hops,
                                           part_method=part_method,
                                           reshuffle=reshuffle,
                                           return_mapping=True)
    part_sizes = []
    shuffled_labels = []
    shuffled_edata = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition(
            '/tmp/partition/test.json', i)

        # Check the metadata
        assert gpb._num_nodes() == g.number_of_nodes()
        assert gpb._num_edges() == g.number_of_edges()

        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges']))

        nid = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node'])
        local_nid = gpb.nid2localnid(nid, i)
        assert F.dtype(local_nid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid)))
        eid = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge'])
        local_eid = gpb.eid2localeid(eid, i)
        assert F.dtype(local_eid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid)))

        # Check the node map.
        local_nodes = F.boolean_mask(part_g.ndata[dgl.NID],
                                     part_g.ndata['inner_node'])
        llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nodes1 = gpb.partid2nids(i)
        assert F.dtype(local_nodes1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(
                local_nodes1)))
        assert np.all(F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)))

        # Check the edge map.
        local_edges = F.boolean_mask(part_g.edata[dgl.EID],
                                     part_g.edata['inner_edge'])
        llocal_edges = F.nonzero_1d(part_g.edata['inner_edge'])
        local_edges1 = gpb.partid2eids(i)
        assert F.dtype(local_edges1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(
                local_edges1)))
        assert np.all(F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)))

        # Verify the mapping between the reshuffled IDs and the original IDs.
        part_src_ids, part_dst_ids = part_g.edges()
        part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids)
        part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids)
        part_eids = part_g.edata[dgl.EID]
        orig_src_ids = F.gather_row(orig_nids, part_src_ids)
        orig_dst_ids = F.gather_row(orig_nids, part_dst_ids)
        orig_eids1 = F.gather_row(orig_eids, part_eids)
        orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids)
        assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0]
        assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))

        if reshuffle:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata['orig_id'])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata['orig_id'])
            # when we read node data from the original global graph, we should use orig_id.
            local_nodes = F.boolean_mask(part_g.ndata['orig_id'],
                                         part_g.ndata['inner_node'])
            local_edges = F.boolean_mask(part_g.edata['orig_id'],
                                         part_g.edata['inner_edge'])
        else:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata[dgl.NID])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata[dgl.NID])

        part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
        part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
        assert F.allclose(F.gather_row(g.ndata['h'], local_nodes),
                          F.gather_row(part_g.ndata['h'], llocal_nodes))
        assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes),
                          F.gather_row(part_g.ndata['eh'], llocal_nodes))

        for name in ['labels', 'feats']:
            assert '_N/' + name in node_feats
            assert node_feats['_N/' + name].shape[0] == len(local_nodes)
            true_feats = F.gather_row(g.ndata[name], local_nodes)
            ndata = F.gather_row(node_feats['_N/' + name], local_nid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata))
        for name in ['feats']:
            assert '_E/' + name in edge_feats
            assert edge_feats['_E/' + name].shape[0] == len(local_edges)
            true_feats = F.gather_row(g.edata[name], local_edges)
            edata = F.gather_row(edge_feats['_E/' + name], local_eid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata))

        # This only works if node/edge IDs are shuffled.
        if reshuffle:
            shuffled_labels.append(node_feats['_N/labels'])
            shuffled_edata.append(edge_feats['_E/feats'])

    # Verify that we can reconstruct node/edge data for original IDs.
    if reshuffle:
        shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
        shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
        orig_labels = np.zeros(shuffled_labels.shape,
                               dtype=shuffled_labels.dtype)
        orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
        orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
        orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
        assert np.all(orig_labels == F.asnumpy(g.ndata['labels']))
        assert np.all(orig_edata == F.asnumpy(g.edata['feats']))

    if reshuffle:
        node_map = []
        edge_map = []
        for i, (num_nodes, num_edges) in enumerate(part_sizes):
            node_map.append(np.ones(num_nodes) * i)
            edge_map.append(np.ones(num_edges) * i)
        node_map = np.concatenate(node_map)
        edge_map = np.concatenate(edge_map)
        nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
        assert F.dtype(nid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(nid2pid) == node_map)
        eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
        assert F.dtype(eid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(eid2pid) == edge_map)
示例#8
0
def start_client():
    # Note: connect to server first !
    dgl.distributed.connect_to_server(ip_config='kv_ip_config.txt')
    # Init kvclient
    kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt')
    kvclient.init_data(name='data_1',
                       shape=F.shape(data_1),
                       dtype=F.dtype(data_1),
                       policy_str='edge',
                       partition_book=gpb,
                       init_func=init_zero_func)
    kvclient.init_data(name='data_2',
                       shape=F.shape(data_2),
                       dtype=F.dtype(data_2),
                       policy_str='node',
                       partition_book=gpb,
                       init_func=init_zero_func)

    kvclient.map_shared_data(partition_book=gpb)

    # Test data_name_list
    name_list = kvclient.data_name_list()
    print(name_list)
    assert 'data_0' in name_list
    assert 'data_0_1' in name_list
    assert 'data_0_2' in name_list
    assert 'data_0_3' in name_list
    assert 'data_1' in name_list
    assert 'data_2' in name_list
    # Test get_meta_data
    meta = kvclient.get_data_meta('data_0')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0)
    assert shape == F.shape(data_0)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_0_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_1)
    assert shape == F.shape(data_0_1)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_0_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_2)
    assert shape == F.shape(data_0_2)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_0_3')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_3)
    assert shape == F.shape(data_0_3)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_1)
    assert shape == F.shape(data_1)
    assert policy.policy_str == 'edge'

    meta = kvclient.get_data_meta('data_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_2)
    assert shape == F.shape(data_2)
    assert policy.policy_str == 'node'

    # Test push and pull
    id_tensor = F.tensor([0, 2, 4], F.int64)
    data_tensor = F.tensor([[6., 6.], [6., 6.], [6., 6.]], F.float32)
    kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor)
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    # Register new push handler
    kvclient.register_push_handler('data_0', udf_push)
    kvclient.register_push_handler('data_1', udf_push)
    kvclient.register_push_handler('data_2', udf_push)
    # Test push and pull
    kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor)
    data_tensor = data_tensor * data_tensor
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    # clean up
    dgl.distributed.shutdown_servers()
    dgl.distributed.finalize_client()
示例#9
0
    def _test(feat_scale):
        in_feat = 16 * feat_scale
        out_feat = 8 * feat_scale
        print("in/out feat", in_feat, out_feat)
        E_per_rel = F.copy_to(
            F.tensor([
                50, 100, 20, 284, 89, 10, 82, 9200, 10, 20, 30, 100, 128, 20,
                284, 89, 10, 82, 92, 10, 20, 30, 100, 1280, 20, 284, 89, 1000,
                82, 92, 10, 2000, 30, 100, 128, 20, 284, 89, 10, 82, 92, 10,
                20, 30
            ]), F.cpu())

        E_per_rel *= n_edge_scale
        num_rel = len(E_per_rel)
        print('num_rel', num_rel)
        W_per_len = F.copy_to(
            F.full((num_rel, ), in_feat, dtype=F.dtype(E_per_rel)), F.cpu())

        H_arr = []
        W_arr = []
        Out_arr = []
        Out_grad_arr = []

        for eid in range(num_rel):
            H_arr.append(F.randn((E_per_rel[eid], in_feat)))
            W_arr.append(F.randn((in_feat, out_feat)))
            Out_arr.append(F.zeros((E_per_rel[eid], out_feat)))
            Out_grad_arr.append(F.ones((E_per_rel[eid], out_feat)))

        H = F.cat([h for h in H_arr], 0)
        W = F.cat([w for w in W_arr], 0)
        W_3D = W.reshape(num_rel, in_feat, out_feat)
        Out = F.cat([out for out in Out_arr], 0)
        Out_grad = F.cat([o for o in Out_grad_arr], 0)

        print('H.shape', H.shape)
        print('W.shape', W.shape)
        print('W_3D.shape', W_3D.shape)
        print('Out.shape', Out.shape)

        etype_arr = []
        for eid in range(num_rel):
            etype_arr.append(
                F.full((E_per_rel[eid], ), eid, dtype=F.dtype(E_per_rel)))
        etypes = F.cat([etype for etype in etype_arr], 0)

        #################################################################
        #  low-mem version using PyTorch operator
        #################################################################

        # forward pass
        out = []
        for i in range(len(E_per_rel)):
            Hi = H_arr[i]
            Wi = W_arr[i]
            out.append(F.matmul(Hi, Wi))
        out_low_mem = F.cat(out, 0)

        # backward pass
        H_grad = []
        W_grad = []
        for i in range(len(E_per_rel)):
            Hi = H_arr[i]
            Wi = W_arr[i]
            Out_gradi = Out_grad_arr[i]
            H_grad.append(F.matmul(Out_gradi, Wi.transpose(0, 1)))
            W_grad.append(F.matmul(Hi.transpose(0, 1), Out_gradi))
        Hgrad_low_mem = F.cat(H_grad, 0)
        Wgrad_low_mem = F.cat(W_grad, 0)
        Wgrad_low_mem = Wgrad_low_mem.reshape(num_rel, in_feat, out_feat)

        #################################################################
        #  gather_mm where H sorted according to etype
        #################################################################

        seglen_A = E_per_rel
        F.attach_grad(H)
        F.attach_grad(W_3D)
        with F.record_grad():
            out_gmm_sorted = dgl.ops.segment_mm(H, W_3D, seglen_A)
            F.backward(F.reduce_sum(out_gmm_sorted))
            Hgrad_gmm_sorted = H.grad
            Wgrad_gmm_sorted = W_3D.grad

        #################################################################
        #  gather_mm where H is not sorted (backward not supported yet)
        #################################################################

        F.attach_grad(H)
        F.attach_grad(W_3D)
        with F.record_grad():
            out_gmm_unsorted = dgl.ops.gather_mm(H, W_3D, idx_rhs=etypes)
            F.backward(F.reduce_sum(out_gmm_unsorted))
            Hgrad_gmm_unsorted = H.grad
            Wgrad_gmm_unsorted = W_3D.grad

        # correctness check
        assert F.allclose(out_low_mem, out_gmm_sorted, atol=1e-3, rtol=1e-3)
        assert F.allclose(Hgrad_low_mem,
                          Hgrad_gmm_sorted,
                          atol=1e-3,
                          rtol=1e-3)
        assert F.allclose(Wgrad_low_mem,
                          Wgrad_gmm_sorted,
                          atol=1e-3,
                          rtol=1e-3)
        assert F.allclose(out_low_mem, out_gmm_unsorted, atol=1e-3, rtol=1e-3)
        assert F.allclose(Hgrad_low_mem,
                          Hgrad_gmm_unsorted,
                          atol=1e-3,
                          rtol=1e-3)
        assert F.allclose(Wgrad_low_mem,
                          Wgrad_gmm_unsorted,
                          atol=1e-3,
                          rtol=1e-3)
示例#10
0
def test_nx_conversion(idtype):
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(F.unsqueeze(attr[k], 0))
            for k in node_feat:
                feat = F.cat(node_feat[k], 0)
                assert F.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1  # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
                feat = F.cat(edge_feat[k], 0)
                assert F.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = F.randn((5, 3))
    n2 = F.randn((5, 10))
    n3 = F.randn((5, 4))
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = dgl.graph([(0, 2), (1, 4), (3, 0), (4, 3)],
                  idtype=idtype,
                  device=F.ctx())
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = dgl.to_networkx(g.cpu(),
                          node_attrs=['n1', 'n3'],
                          edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g = dgl.from_networkx(nxg,
                          node_attrs=['n1'],
                          edge_attrs=['e1', 'id'],
                          idtype=idtype)
    assert g.idtype == idtype
    assert g.device == F.cpu()
    g = g.to(F.ctx())
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert F.allclose(g.edata['e1'], e1)
    assert F.array_equal(g.edata['id'], F.arange(0, 4, F.dtype(g.edata['id'])))

    # test conversion after modifying DGLGraph
    # TODO(minjie): enable after mutation is supported
    #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges
    #new_n = F.randn((2, 3))
    #new_e = F.randn((3, 5))
    #g.add_nodes(2, data={'n1': new_n})
    ## add three edges, one is a multi-edge
    #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    #n1 = F.cat((n1, new_n), 0)
    #e1 = F.cat((e1, new_e), 0)
    ## convert to networkx again
    #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    #assert len(nxg) == 7
    #assert nxg.size() == 7
    #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = dgl.from_networkx(nxg,
                          node_attrs=['n1'],
                          edge_attrs=['e1'],
                          idtype=idtype)
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(F.unsqueeze(attr['e1'], 0))
    edge_feat = F.cat(edge_feat, 0)
    assert F.allclose(g.edata['e1'], edge_feat)