Пример #1
0
def test_sddmm(g, shp, lhs_target, rhs_target, msg, index_dtype):
    if dgl.backend.backend_name == 'mxnet' and g.number_of_edges() == 0:
        pytest.skip()  # mxnet do not support zero shape tensor
    if dgl.backend.backend_name == 'tensorflow' and index_dtype == 'int32':
        pytest.skip()  # tensorflow dlpack has problem with int32 ndarray.
    if index_dtype == 'int32':
        g = g.int()
    else:
        g = g.long()
    print(g)
    print(g.idtype)

    len_lhs = select(lhs_target, g.number_of_src_nodes(), g.number_of_edges(),
                     g.number_of_dst_nodes())
    lhs_shp = (len_lhs, ) + shp[0]
    len_rhs = select(rhs_target, g.number_of_src_nodes(), g.number_of_edges(),
                     g.number_of_dst_nodes())
    rhs_shp = (len_rhs, ) + shp[1]
    feat_lhs = F.tensor(np.random.rand(*lhs_shp) + 1)
    feat_rhs = F.tensor(np.random.rand(*rhs_shp) + 1)
    print('lhs shape: {}, rhs shape: {}'.format(F.shape(feat_lhs),
                                                F.shape(feat_rhs)))

    lhs_frame = select(lhs_target, g.srcdata, g.edata, g.dstdata)
    rhs_frame = select(rhs_target, g.srcdata, g.edata, g.dstdata)
    lhs_frame['x'] = F.attach_grad(F.clone(feat_lhs))
    rhs_frame['y'] = F.attach_grad(F.clone(feat_rhs))
    msg_func = lhs_target + '_' + msg + '_' + rhs_target
    print('SDDMM(message func: {})'.format(msg_func))

    lhs = F.attach_grad(F.clone(feat_lhs))
    rhs = F.attach_grad(F.clone(feat_rhs))
    with F.record_grad():
        e = gsddmm(g,
                   msg,
                   lhs,
                   rhs,
                   lhs_target=lhs_target,
                   rhs_target=rhs_target)
        F.backward(F.reduce_sum(e))
        grad_lhs = F.grad(lhs)
        grad_rhs = F.grad(rhs)

    with F.record_grad():
        g.apply_edges(udf_apply_edges[msg_func])
        if g.number_of_edges() > 0:
            e1 = g.edata['m']
            assert F.allclose(e, e1)
            print('forward passed')

            F.backward(F.reduce_sum(e1))
            if msg != 'copy_rhs':
                assert F.allclose(F.grad(lhs_frame['x']), grad_lhs)
            if msg != 'copy_lhs':
                assert F.allclose(F.grad(rhs_frame['y']), grad_rhs)
            print('backward passed')

    lhs_frame.pop('x')
    rhs_frame.pop('y')
    if 'm' in g.edata: g.edata.pop('m')
Пример #2
0
def test_row1():
    # test row getter/setter
    data = create_test_data()
    f = FrameRef(Frame(data))

    # getter
    # test non-duplicate keys
    rowid = Index(F.tensor([0, 2]))
    rows = f[rowid]
    for k, v in rows.items():
        assert tuple(F.shape(v)) == (len(rowid), D)
        assert F.allclose(v, F.gather_row(data[k], F.tensor(rowid.tousertensor())))
    # test duplicate keys
    rowid = Index(F.tensor([8, 2, 2, 1]))
    rows = f[rowid]
    for k, v in rows.items():
        assert tuple(F.shape(v)) == (len(rowid), D)
        assert F.allclose(v, F.gather_row(data[k], F.tensor(rowid.tousertensor())))

    # setter
    rowid = Index(F.tensor([0, 2, 4]))
    vals = {'a1' : F.zeros((len(rowid), D)),
            'a2' : F.zeros((len(rowid), D)),
            'a3' : F.zeros((len(rowid), D)),
            }
    f[rowid] = vals
    for k, v in f[rowid].items():
        assert F.allclose(v, F.zeros((len(rowid), D)))

    # setting rows with new column should raise error with error initializer
    f.set_initializer(lambda shape, dtype : assert_(False))
    def failed_update_rows():
        vals['a4'] = F.ones((len(rowid), D))
        f[rowid] = vals
    assert check_fail(failed_update_rows)
Пример #3
0
def _test_layer_sampler(prefetch=False):
    g = generate_rand_graph(100)
    nid = g.nodes()
    src, dst, eid = g.all_edges(form='all', order='eid')
    n_batches = 5
    batch_size = 50
    seed_batches = [
        np.sort(np.random.choice(F.asnumpy(nid), batch_size, replace=False))
        for i in range(n_batches)
    ]
    seed_nodes = np.hstack(seed_batches)
    layer_sizes = [50] * 3
    LayerSampler = getattr(dgl.contrib.sampling, 'LayerSampler')
    sampler = LayerSampler(g,
                           batch_size,
                           layer_sizes,
                           'in',
                           seed_nodes=seed_nodes,
                           num_workers=4,
                           prefetch=prefetch)
    for sub_g in sampler:
        assert all(
            sub_g.layer_size(i) < size for i, size in enumerate(layer_sizes))
        sub_nid = F.arange(0, sub_g.number_of_nodes())
        assert all(
            np.all(np.isin(F.asnumpy(sub_g.layer_nid(i)), F.asnumpy(sub_nid)))
            for i in range(sub_g.num_layers))
        assert np.all(
            np.isin(F.asnumpy(sub_g.map_to_parent_nid(sub_nid)),
                    F.asnumpy(nid)))
        sub_eid = F.arange(0, sub_g.number_of_edges())
        assert np.all(
            np.isin(F.asnumpy(sub_g.map_to_parent_eid(sub_eid)),
                    F.asnumpy(eid)))
        assert any(
            np.all(
                np.sort(F.asnumpy(sub_g.layer_parent_nid(-1))) == seed_batch)
            for seed_batch in seed_batches)

        sub_src, sub_dst = sub_g.all_edges(order='eid')
        for i in range(sub_g.num_blocks):
            block_eid = sub_g.block_eid(i)
            block_src = sub_g.map_to_parent_nid(sub_src[block_eid])
            block_dst = sub_g.map_to_parent_nid(sub_dst[block_eid])

            block_parent_eid = sub_g.block_parent_eid(i)
            block_parent_src = src[block_parent_eid]
            block_parent_dst = dst[block_parent_eid]

            assert np.all(F.asnumpy(block_src == block_parent_src))

        n_layers = sub_g.num_layers
        sub_n = sub_g.number_of_nodes()
        assert sum(F.shape(sub_g.layer_nid(i))[0]
                   for i in range(n_layers)) == sub_n
        n_blocks = sub_g.num_blocks
        sub_m = sub_g.number_of_edges()
        assert sum(F.shape(sub_g.block_eid(i))[0]
                   for i in range(n_blocks)) == sub_m
Пример #4
0
def test_readonly():
    g = dgl.DGLGraph()
    g.add_nodes(5)
    g.add_edges([0, 1, 2, 3], [1, 2, 3, 4])
    g.ndata['x'] = F.zeros((5, 3))
    g.edata['x'] = F.zeros((4, 4))

    g.readonly(False)
    assert g._graph.is_readonly() == False
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4

    g.readonly()
    assert g._graph.is_readonly() == True
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4

    try:
        g.add_nodes(5)
        fail = False
    except DGLError:
        fail = True
    finally:
        assert fail

    g.readonly()
    assert g._graph.is_readonly() == True
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4

    try:
        g.add_nodes(5)
        fail = False
    except DGLError:
        fail = True
    finally:
        assert fail

    g.readonly(False)
    assert g._graph.is_readonly() == False
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4

    try:
        g.add_nodes(10)
        g.add_edges([4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                    [5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
        fail = False
    except DGLError:
        fail = True
    finally:
        assert not fail
        assert g.number_of_nodes() == 15
        assert F.shape(g.ndata['x']) == (15, 3)
        assert g.number_of_edges() == 14
        assert F.shape(g.edata['x']) == (14, 4)
Пример #5
0
def test_spmm(idtype, g, shp, msg, reducer):
    g = g.astype(idtype).to(F.ctx())
    if dgl.backend.backend_name == 'tensorflow' and (reducer in ['min', 'max']):
        pytest.skip()  # tensorflow dlpack has problem writing into int32 arrays on GPU.
    print(g)
    print(g.idtype)

    hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(),) + shp[0])) + 1)
    he = F.tensor(np.random.rand(*((g.number_of_edges(),) + shp[1])) + 1)
    print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he)))

    g.srcdata['x'] = F.attach_grad(F.clone(hu))
    g.edata['w'] = F.attach_grad(F.clone(he))
    print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer))

    u = F.attach_grad(F.clone(hu))
    e = F.attach_grad(F.clone(he))
    with F.record_grad():
        v = gspmm(g, msg, reducer, u, e)
        non_degree_indices = F.tensor(
            np.nonzero(F.asnumpy(g.in_degrees()) != 0)[0])
        v = F.gather_row(v, non_degree_indices)
        if g.number_of_edges() > 0:
            F.backward(F.reduce_sum(v))
            if msg != 'copy_rhs':
                grad_u = F.grad(u)
            if msg != 'copy_lhs':
                grad_e = F.grad(e)

    with F.record_grad():
        g.update_all(udf_msg[msg], udf_reduce[reducer])
        if g.number_of_edges() > 0:
            v1 = F.gather_row(g.dstdata['v'], non_degree_indices)
            assert F.allclose(v, v1)
            print('forward passed')

            F.backward(F.reduce_sum(v1))
            if msg != 'copy_rhs':
                if reducer in ['min', 'max']: # there might be some numerical errors
                    rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\
                           F.reduce_sum(F.abs(grad_u))
                    assert F.as_scalar(rate) < 1e-2, rate
                else:
                    assert F.allclose(F.grad(g.srcdata['x']), grad_u)
            if msg != 'copy_lhs':
                if reducer in ['min', 'max']:
                    rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\
                           F.reduce_sum(F.abs(grad_e))
                    assert F.as_scalar(rate) < 1e-2, rate
                else:
                    assert F.allclose(F.grad(g.edata['w']), grad_e)
            print('backward passed')

    g.srcdata.pop('x')
    g.edata.pop('w')
    if 'v' in g.dstdata: g.dstdata.pop('v')
Пример #6
0
def test_spmm(idtype, g, shp, msg, reducer):
    g = g.astype(idtype).to(F.ctx())
    print(g)
    print(g.idtype)

    hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(), ) + shp[0])) + 1)
    he = F.tensor(np.random.rand(*((g.number_of_edges(), ) + shp[1])) + 1)
    print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he)))

    g.srcdata['x'] = F.attach_grad(F.clone(hu))
    g.edata['w'] = F.attach_grad(F.clone(he))
    print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer))

    u = F.attach_grad(F.clone(hu))
    e = F.attach_grad(F.clone(he))
    with F.record_grad():
        v = gspmm(g, msg, reducer, u, e)
        if reducer in ['max', 'min']:
            v = F.replace_inf_with_zero(v)
        if g.number_of_edges() > 0:
            F.backward(F.reduce_sum(v))
            if msg != 'copy_rhs':
                grad_u = F.grad(u)
            if msg != 'copy_lhs':
                grad_e = F.grad(e)

    with F.record_grad():
        g.update_all(udf_msg[msg], udf_reduce[reducer])
        if g.number_of_edges() > 0:
            v1 = g.dstdata['v']
            assert F.allclose(v, v1)
            print('forward passed')

            F.backward(F.reduce_sum(v1))
            if msg != 'copy_rhs':
                if reducer in ['min',
                               'max']:  # there might be some numerical errors
                    rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\
                           F.reduce_sum(F.abs(grad_u))
                    assert F.as_scalar(rate) < 1e-2, rate
                else:
                    assert F.allclose(F.grad(g.srcdata['x']), grad_u)
            if msg != 'copy_lhs':
                if reducer in ['min', 'max']:
                    rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\
                           F.reduce_sum(F.abs(grad_e))
                    assert F.as_scalar(rate) < 1e-2, rate
                else:
                    assert F.allclose(F.grad(g.edata['w']), grad_e)
            print('backward passed')

    g.srcdata.pop('x')
    g.edata.pop('w')
    if 'v' in g.dstdata: g.dstdata.pop('v')
Пример #7
0
 def __call__(self, edges):
     sdata = edges.src[self.src_field]
     edata = edges.data[self.edge_field]
     # Due to the different broadcasting semantics of different backends,
     # we need to broadcast the sdata and edata to be of the same rank.
     rank = max(F.ndim(sdata), F.ndim(edata))
     sshape = F.shape(sdata)
     eshape = F.shape(edata)
     sdata = F.reshape(sdata, sshape + (1, ) * (rank - F.ndim(sdata)))
     edata = F.reshape(edata, eshape + (1, ) * (rank - F.ndim(edata)))
     ret = self.mul_op(sdata, edata)
     return {self.out_field: ret}
Пример #8
0
def test_segment_reduce(reducer):
    ctx = F.ctx()
    value = F.tensor(np.random.rand(10, 5))
    v1 = F.attach_grad(F.clone(value))
    v2 = F.attach_grad(F.clone(value))
    seglen = F.tensor([2, 3, 0, 4, 1, 0, 0])
    u = F.copy_to(F.arange(0, F.shape(value)[0], F.int32), ctx)
    v = F.repeat(F.copy_to(F.arange(0, len(seglen), F.int32), ctx),
                 seglen,
                 dim=0)

    num_nodes = {'_U': len(u), '_V': len(seglen)}
    g = dgl.convert.heterograph({('_U', '_E', '_V'): (u, v)},
                                num_nodes_dict=num_nodes)
    with F.record_grad():
        rst1 = gspmm(g, 'copy_lhs', reducer, v1, None)
        if reducer in ['max', 'min']:
            rst1 = F.replace_inf_with_zero(rst1)
        F.backward(F.reduce_sum(rst1))
        grad1 = F.grad(v1)

    with F.record_grad():
        rst2 = segment_reduce(seglen, v2, reducer=reducer)
        F.backward(F.reduce_sum(rst2))
        assert F.allclose(rst1, rst2)
        print('forward passed')

        grad2 = F.grad(v2)
        assert F.allclose(grad1, grad2)
        print('backward passed')
Пример #9
0
def test_append1():
    # test append API on Frame
    data = create_test_data()
    f1 = Frame()
    f2 = Frame(data)
    f1.append(data)
    assert f1.num_rows == N
    f1.append(f2)
    assert f1.num_rows == 2 * N
    c1 = f1['a1']
    assert tuple(F.shape(c1.data)) == (2 * N, D)
    truth = F.cat([data['a1'], data['a1']], 0)
    assert F.allclose(truth, c1.data)
    # append dict of different length columns should fail
    f3 = {'a1' : F.zeros((3, D)), 'a2' : F.zeros((3, D)), 'a3' : F.zeros((2, D))}
    def failed_append():
        f1.append(f3)
    assert check_fail(failed_append)
Пример #10
0
def check_partition(g, part_method, reshuffle):
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10),
                                F.float32)
    g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10),
                                F.float32)
    g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
    g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
    num_parts = 4
    num_hops = 2

    orig_nids, orig_eids = partition_graph(g,
                                           'test',
                                           num_parts,
                                           '/tmp/partition',
                                           num_hops=num_hops,
                                           part_method=part_method,
                                           reshuffle=reshuffle,
                                           return_mapping=True)
    part_sizes = []
    shuffled_labels = []
    shuffled_edata = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition(
            '/tmp/partition/test.json', i)

        # Check the metadata
        assert gpb._num_nodes() == g.number_of_nodes()
        assert gpb._num_edges() == g.number_of_edges()

        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges']))

        nid = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node'])
        local_nid = gpb.nid2localnid(nid, i)
        assert F.dtype(local_nid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid)))
        eid = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge'])
        local_eid = gpb.eid2localeid(eid, i)
        assert F.dtype(local_eid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid)))

        # Check the node map.
        local_nodes = F.boolean_mask(part_g.ndata[dgl.NID],
                                     part_g.ndata['inner_node'])
        llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nodes1 = gpb.partid2nids(i)
        assert F.dtype(local_nodes1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(
                local_nodes1)))
        assert np.all(F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)))

        # Check the edge map.
        local_edges = F.boolean_mask(part_g.edata[dgl.EID],
                                     part_g.edata['inner_edge'])
        llocal_edges = F.nonzero_1d(part_g.edata['inner_edge'])
        local_edges1 = gpb.partid2eids(i)
        assert F.dtype(local_edges1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(
                local_edges1)))
        assert np.all(F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)))

        # Verify the mapping between the reshuffled IDs and the original IDs.
        part_src_ids, part_dst_ids = part_g.edges()
        part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids)
        part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids)
        part_eids = part_g.edata[dgl.EID]
        orig_src_ids = F.gather_row(orig_nids, part_src_ids)
        orig_dst_ids = F.gather_row(orig_nids, part_dst_ids)
        orig_eids1 = F.gather_row(orig_eids, part_eids)
        orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids)
        assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0]
        assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))

        if reshuffle:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata['orig_id'])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata['orig_id'])
            # when we read node data from the original global graph, we should use orig_id.
            local_nodes = F.boolean_mask(part_g.ndata['orig_id'],
                                         part_g.ndata['inner_node'])
            local_edges = F.boolean_mask(part_g.edata['orig_id'],
                                         part_g.edata['inner_edge'])
        else:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata[dgl.NID])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata[dgl.NID])

        part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
        part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
        assert F.allclose(F.gather_row(g.ndata['h'], local_nodes),
                          F.gather_row(part_g.ndata['h'], llocal_nodes))
        assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes),
                          F.gather_row(part_g.ndata['eh'], llocal_nodes))

        for name in ['labels', 'feats']:
            assert '_N/' + name in node_feats
            assert node_feats['_N/' + name].shape[0] == len(local_nodes)
            true_feats = F.gather_row(g.ndata[name], local_nodes)
            ndata = F.gather_row(node_feats['_N/' + name], local_nid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata))
        for name in ['feats']:
            assert '_E/' + name in edge_feats
            assert edge_feats['_E/' + name].shape[0] == len(local_edges)
            true_feats = F.gather_row(g.edata[name], local_edges)
            edata = F.gather_row(edge_feats['_E/' + name], local_eid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata))

        # This only works if node/edge IDs are shuffled.
        if reshuffle:
            shuffled_labels.append(node_feats['_N/labels'])
            shuffled_edata.append(edge_feats['_E/feats'])

    # Verify that we can reconstruct node/edge data for original IDs.
    if reshuffle:
        shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
        shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
        orig_labels = np.zeros(shuffled_labels.shape,
                               dtype=shuffled_labels.dtype)
        orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
        orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
        orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
        assert np.all(orig_labels == F.asnumpy(g.ndata['labels']))
        assert np.all(orig_edata == F.asnumpy(g.edata['feats']))

    if reshuffle:
        node_map = []
        edge_map = []
        for i, (num_nodes, num_edges) in enumerate(part_sizes):
            node_map.append(np.ones(num_nodes) * i)
            edge_map.append(np.ones(num_edges) * i)
        node_map = np.concatenate(node_map)
        edge_map = np.concatenate(edge_map)
        nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
        assert F.dtype(nid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(nid2pid) == node_map)
        eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
        assert F.dtype(eid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(eid2pid) == edge_map)
Пример #11
0
 def _fmsg(edges):
     assert tuple(F.shape(edges.src['h'])) == (5, D)
     return {'m': edges.src['h']}
Пример #12
0
def reduce_func(nodes):
    msgs = nodes.mailbox['m']
    reduce_msg_shapes.add(tuple(msgs.shape))
    assert F.ndim(msgs) == 3
    assert F.shape(msgs)[2] == D
    return {'accum': F.sum(msgs, 1)}
Пример #13
0
def message_func(edges):
    assert F.ndim(edges.src['h']) == 2
    assert F.shape(edges.src['h'])[1] == D
    return {'m': edges.src['h']}
Пример #14
0
def start_client(num_clients, num_servers):
    os.environ['DGL_DIST_MODE'] = 'distributed'
    # Note: connect to server first !
    dgl.distributed.initialize(ip_config='kv_ip_config.txt')
    # Init kvclient
    kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt', num_servers=num_servers)
    kvclient.map_shared_data(partition_book=gpb)
    assert dgl.distributed.get_num_client() == num_clients
    kvclient.init_data(name='data_1', 
                       shape=F.shape(data_1), 
                       dtype=F.dtype(data_1), 
                       part_policy=edge_policy,
                       init_func=init_zero_func)
    kvclient.init_data(name='data_2', 
                       shape=F.shape(data_2), 
                       dtype=F.dtype(data_2), 
                       part_policy=node_policy,
                       init_func=init_zero_func)
    
    # Test data_name_list
    name_list = kvclient.data_name_list()
    print(name_list)
    assert 'data_0' in name_list
    assert 'data_0_1' in name_list
    assert 'data_0_2' in name_list
    assert 'data_0_3' in name_list
    assert 'data_1' in name_list
    assert 'data_2' in name_list
    # Test get_meta_data
    meta = kvclient.get_data_meta('data_0')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0)
    assert shape == F.shape(data_0)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_1)
    assert shape == F.shape(data_0_1)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_2)
    assert shape == F.shape(data_0_2)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_0_3')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_3)
    assert shape == F.shape(data_0_3)
    assert policy.policy_str == 'node:_N'

    meta = kvclient.get_data_meta('data_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_1)
    assert shape == F.shape(data_1)
    assert policy.policy_str == 'edge:_E'

    meta = kvclient.get_data_meta('data_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_2)
    assert shape == F.shape(data_2)
    assert policy.policy_str == 'node:_N'

    # Test push and pull
    id_tensor = F.tensor([0,2,4], F.int64)
    data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32)
    kvclient.push(name='data_0',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_1',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_2',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    # Register new push handler
    kvclient.register_push_handler('data_0', udf_push)
    kvclient.register_push_handler('data_1', udf_push)
    kvclient.register_push_handler('data_2', udf_push)
    # Test push and pull
    kvclient.push(name='data_0',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_1',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.push(name='data_2',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.barrier()
    data_tensor = data_tensor * data_tensor
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))

    # Test delete data
    kvclient.delete_data('data_0')
    kvclient.delete_data('data_1')
    kvclient.delete_data('data_2')

    # Register new push handler
    kvclient.init_data(name='data_3', 
                       shape=F.shape(data_2),
                       dtype=F.dtype(data_2), 
                       part_policy=node_policy,
                       init_func=init_zero_func)
    kvclient.register_push_handler('data_3', add_push)
    data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32)
    kvclient.barrier()
    time.sleep(kvclient.client_id + 1)
    print("add...")
    kvclient.push(name='data_3',
                  id_tensor=id_tensor,
                  data_tensor=data_tensor)
    kvclient.barrier()
    res = kvclient.pull(name='data_3', id_tensor=id_tensor)
    data_tensor = data_tensor * num_clients
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
Пример #15
0
def test_edge_batch():
    d = 10
    g = dgl.DGLGraph(nx.path_graph(20))
    nfeat = F.randn((g.number_of_nodes(), d))
    efeat = F.randn((g.number_of_edges(), d))
    g.ndata['x'] = nfeat
    g.edata['x'] = efeat

    # test all
    eid = ALL
    u, v, _ = g._graph.edges('eid')

    src_data = g.get_n_repr(u)
    edge_data = g.get_e_repr(eid)
    dst_data = g.get_n_repr(v)
    ebatch = EdgeBatch(g, (u, v, eid), src_data, edge_data, dst_data)
    assert F.shape(ebatch.src['x'])[0] == g.number_of_edges() and\
        F.shape(ebatch.src['x'])[1] == d
    assert F.shape(ebatch.dst['x'])[0] == g.number_of_edges() and\
        F.shape(ebatch.dst['x'])[1] == d
    assert F.shape(ebatch.data['x'])[0] == g.number_of_edges() and\
        F.shape(ebatch.data['x'])[1] == d
    assert F.allclose(ebatch.edges()[0], u.tousertensor())
    assert F.allclose(ebatch.edges()[1], v.tousertensor())
    assert F.allclose(ebatch.edges()[2], F.arange(0, g.number_of_edges()))
    assert ebatch.batch_size() == g.number_of_edges()
    assert len(ebatch) == g.number_of_edges()

    # test partial
    eid = utils.toindex(F.tensor([0, 3, 5, 7, 11, 13, 15, 27]))
    u, v, _ = g._graph.find_edges(eid)
    src_data = g.get_n_repr(u)
    edge_data = g.get_e_repr(eid)
    dst_data = g.get_n_repr(v)
    ebatch = EdgeBatch(g, (u, v, eid), src_data, edge_data, dst_data)
    assert F.shape(ebatch.src['x'])[0] == 8 and\
        F.shape(ebatch.src['x'])[1] == d
    assert F.shape(ebatch.dst['x'])[0] == 8 and\
        F.shape(ebatch.dst['x'])[1] == d
    assert F.shape(ebatch.data['x'])[0] == 8 and\
        F.shape(ebatch.data['x'])[1] == d
    assert F.allclose(ebatch.edges()[0], u.tousertensor())
    assert F.allclose(ebatch.edges()[1], v.tousertensor())
    assert F.allclose(ebatch.edges()[2], eid.tousertensor())
    assert ebatch.batch_size() == 8
    assert len(ebatch) == 8
Пример #16
0
def start_client():
    # Note: connect to server first !
    dgl.distributed.connect_to_server(ip_config='kv_ip_config.txt')
    # Init kvclient
    kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt')
    kvclient.init_data(name='data_1',
                       shape=F.shape(data_1),
                       dtype=F.dtype(data_1),
                       policy_str='edge',
                       partition_book=gpb,
                       init_func=init_zero_func)
    kvclient.init_data(name='data_2',
                       shape=F.shape(data_2),
                       dtype=F.dtype(data_2),
                       policy_str='node',
                       partition_book=gpb,
                       init_func=init_zero_func)

    kvclient.map_shared_data(partition_book=gpb)

    # Test data_name_list
    name_list = kvclient.data_name_list()
    print(name_list)
    assert 'data_0' in name_list
    assert 'data_0_1' in name_list
    assert 'data_0_2' in name_list
    assert 'data_0_3' in name_list
    assert 'data_1' in name_list
    assert 'data_2' in name_list
    # Test get_meta_data
    meta = kvclient.get_data_meta('data_0')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0)
    assert shape == F.shape(data_0)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_0_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_1)
    assert shape == F.shape(data_0_1)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_0_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_2)
    assert shape == F.shape(data_0_2)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_0_3')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_0_3)
    assert shape == F.shape(data_0_3)
    assert policy.policy_str == 'node'

    meta = kvclient.get_data_meta('data_1')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_1)
    assert shape == F.shape(data_1)
    assert policy.policy_str == 'edge'

    meta = kvclient.get_data_meta('data_2')
    dtype, shape, policy = meta
    assert dtype == F.dtype(data_2)
    assert shape == F.shape(data_2)
    assert policy.policy_str == 'node'

    # Test push and pull
    id_tensor = F.tensor([0, 2, 4], F.int64)
    data_tensor = F.tensor([[6., 6.], [6., 6.], [6., 6.]], F.float32)
    kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor)
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    # Register new push handler
    kvclient.register_push_handler('data_0', udf_push)
    kvclient.register_push_handler('data_1', udf_push)
    kvclient.register_push_handler('data_2', udf_push)
    # Test push and pull
    kvclient.push(name='data_0', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_1', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.push(name='data_2', id_tensor=id_tensor, data_tensor=data_tensor)
    data_tensor = data_tensor * data_tensor
    res = kvclient.pull(name='data_0', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_1', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    res = kvclient.pull(name='data_2', id_tensor=id_tensor)
    assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
    # clean up
    dgl.distributed.shutdown_servers()
    dgl.distributed.finalize_client()
Пример #17
0
def start_client():
    my_client = KVClient(server_namebook=server_namebook)
    my_client.connect()

    my_client.init_data(name='data_2',
                        shape=(num_entries, dim_size),
                        dtype=F.float32,
                        target_name='data_0')
    print("Init data from client..")

    name_list = my_client.get_data_name_list()
    assert len(name_list) == 6
    assert 'data_0' in name_list
    assert 'data_1' in name_list
    assert 'data_2' in name_list
    assert 'data_3' in name_list
    assert 'data_4' in name_list
    assert 'data_5' in name_list

    meta_0 = my_client.get_data_meta('data_0')
    assert meta_0[0] == F.float32
    assert meta_0[1] == tuple(F.shape(data_0))
    assert_array_equal(meta_0[2], partition_0)

    meta_1 = my_client.get_data_meta('data_1')
    assert meta_1[0] == F.float32
    assert meta_1[1] == tuple(F.shape(data_1))
    assert_array_equal(meta_1[2], partition_1)

    meta_2 = my_client.get_data_meta('data_2')
    assert meta_2[0] == F.float32
    assert meta_2[1] == tuple(F.shape(data_0))
    assert_array_equal(meta_2[2], partition_0)

    meta_3 = my_client.get_data_meta('data_3')
    assert meta_3[0] == F.int64
    assert meta_3[1] == tuple(F.shape(data_3))
    assert_array_equal(meta_3[2], partition_0)

    meta_4 = my_client.get_data_meta('data_4')
    assert meta_4[0] == F.float64
    assert meta_4[1] == tuple(F.shape(data_4))
    assert_array_equal(meta_3[2], partition_0)

    meta_5 = my_client.get_data_meta('data_5')
    assert meta_5[0] == F.int32
    assert meta_5[1] == tuple(F.shape(data_5))
    assert_array_equal(meta_3[2], partition_0)

    my_client.push(name='data_0',
                   id_tensor=F.tensor([0, 1, 2]),
                   data_tensor=F.tensor([[1., 1., 1.], [2., 2., 2.],
                                         [3., 3., 3.]]))
    my_client.push(name='data_2',
                   id_tensor=F.tensor([0, 1, 2]),
                   data_tensor=F.tensor([[1., 1., 1.], [2., 2., 2.],
                                         [3., 3., 3.]]))
    my_client.push(name='data_3',
                   id_tensor=F.tensor([0, 1, 2]),
                   data_tensor=F.tensor([[1, 1, 1], [2, 2, 2], [3, 3, 3]]))
    my_client.push(name='data_4',
                   id_tensor=F.tensor([0, 1, 2]),
                   data_tensor=F.tensor(
                       [[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]], F.float64))
    my_client.push(name='data_5',
                   id_tensor=F.tensor([0, 1, 2]),
                   data_tensor=F.tensor([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
                                        F.int32))

    target = F.tensor([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]])

    res = my_client.pull(name='data_0', id_tensor=F.tensor([0, 1, 2]))
    assert_array_equal(res, target)

    res = my_client.pull(name='data_2', id_tensor=F.tensor([0, 1, 2]))
    assert_array_equal(res, target)

    target = F.tensor([[1, 1, 1], [2, 2, 2], [3, 3, 3]])

    res = my_client.pull(name='data_3', id_tensor=F.tensor([0, 1, 2]))
    assert_array_equal(res, target)

    target = F.tensor([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]], F.float64)

    res = my_client.pull(name='data_4', id_tensor=F.tensor([0, 1, 2]))
    assert_array_equal(res, target)

    target = F.tensor([[1, 1, 1], [2, 2, 2], [3, 3, 3]], F.int32)

    res = my_client.pull(name='data_5', id_tensor=F.tensor([0, 1, 2]))
    assert_array_equal(res, target)

    my_client.shut_down()