示例#1
0
def start_server(args):
    """Start kvstore service
    """
    server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

    my_server = KVServer(server_id=args.server_id,
                         server_namebook=server_namebook,
                         num_client=args.num_client)

    data = F.zeros((num_entries, args.dim_size), F.float32, F.cpu())
    g2l = F.zeros(num_entries * args.num_servers, F.int64, F.cpu())
    start = num_entries * my_server.get_machine_id()
    end = num_entries * (my_server.get_machine_id() + 1)
    g2l[start:end] = F.arange(0, num_entries)

    partition = np.arange(args.num_servers)
    partition = F.tensor(np.repeat(partition, num_entries))
    if my_server.get_id() % my_server.get_group_count() == 0:  # master server
        my_server.set_global2local(name='entity_embed', global2local=g2l)
        my_server.init_data(name='entity_embed', data_tensor=data)
        my_server.set_partition_book(name='entity_embed',
                                     partition_book=partition)
    else:
        my_server.set_global2local(name='entity_embed')
        my_server.init_data(name='entity_embed')
        my_server.set_partition_book(name='entity_embed')

    my_server.print()

    my_server.start()
示例#2
0
    def _init_data(self, name, shape, init_type, low, high):
        """Initialize kvstore tensor.

        Parameters
        ----------
        name : str
            data name
        shape : list of int
            The tensor shape
        init_type : str
            initialize method, including 'zero' and 'uniform'
        low : float
            min threshold
        high : float
            max threshold
        """
        if init_type == 'uniform':
            self._data_store[name] = F.uniform(shape=shape,
                                               dtype=F.float32,
                                               ctx=F.cpu(),
                                               low=low,
                                               high=high)
        elif init_type == 'zero':
            self._data_store[name] = F.zeros(shape=shape,
                                             dtype=F.float32,
                                             ctx=F.cpu())
        else:
            raise RuntimeError('Unknown initial method')
示例#3
0
def start_client(args):
    if args.range == -1:
        policy, gpb = create_partition_policy(args)
    else:
        policy, gpb = create_range_partition_policy(args)
    print("create data...")
    data = create_data(args)
    print("Create data done.")
    dgl.distributed.connect_to_server(ip_config=args.ip_config)
    kvclient = dgl.distributed.KVClient(ip_config=args.ip_config)
    kvclient.barrier()
    kvclient.map_shared_data(partition_book=gpb)

    #################################### local fast-pull ####################################

    
    if args.machine_id == 1:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Local fast-pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    
    

    #################################### remote fast-pull ####################################

    
    if args.machine_id == 0:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Remote fast-pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    
    

    #################################### local pull ##################################

    
    kvclient.register_pull_handler('data', udf_pull)
    kvclient.barrier()

    if args.machine_id == 1:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Local pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    

    #################################### remote pull ##################################

    
    if args.machine_id == 0:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Remote pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    
    
    ################################# local push ######################################

    
    if args.machine_id == 1:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)
    data_tensor = F.zeros((args.data_size, args.dim), F.float32, F.cpu())

    kvclient.barrier()
    start = time.time()
    for _ in range(100):
        res = kvclient.push(name='data', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.barrier()
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Local push Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    

    ################################# remote push ######################################

    
    if args.machine_id == 0:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    kvclient.barrier()
    start = time.time()
    for _ in range(100):
        res = kvclient.push(name='data', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.barrier()
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Remote push Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    

    dgl.distributed.shutdown_servers()
    dgl.distributed.finalize_client()
示例#4
0
def create_data(args):
    """Create data hold by server nodes
    """
    data = F.zeros((args.graph_size, args.dim), F.float32, F.cpu())
    return data
示例#5
0
def zeros_init(shape, dtype):
    return F.zeros(shape, dtype=dtype, ctx=F.cpu())
示例#6
0
def start_client(args):
    """Start client
    """
    server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

    my_client = KVClient(server_namebook=server_namebook)

    my_client.connect()

    my_client.print()

    my_client.barrier()

    local_start = num_entries * my_client.get_machine_id()
    local_end = num_entries * (my_client.get_machine_id() + 1)
    local_range = np.arange(local_start, local_end)
    id_list = []
    for i in range(10000):
        ids = np.random.choice(local_range, args.batch_size)
        id_list.append(F.tensor(ids))

    print("Pull from local...")
    num_bytes = 0
    start = time.time()
    for ids in id_list:
        tmp = my_client.pull(name='entity_embed', id_tensor=ids)
        ndim = tmp.shape[1]
        num_bytes += np.prod(tmp.shape) * 4
    print("Total time: %.3f, #bytes: %.3f GB" %
          (time.time() - start, num_bytes / 1000 / 1000 / 1000))

    my_client.barrier()

    arr = F.zeros((num_entries, ndim), F.float32, F.cpu())
    print('Slice from a tensor...')
    num_bytes = 0
    start = time.time()
    for ids in id_list:
        tmp = arr[ids]
        num_bytes += np.prod(tmp.shape) * 4
    print("Total time: %.3f, #bytes: %.3f GB" %
          (time.time() - start, num_bytes / 1000 / 1000 / 1000))

    print("Pull from remote...")
    if local_start == 0:
        remote_range = np.arange(local_end, num_entries * args.num_servers)
    elif local_end == num_entries * args.num_servers:
        remote_range = np.arange(0, local_start)
    else:
        range1 = np.arange(0, local_start)
        range2 = np.arange(local_end, num_entries * args.num_servers)
        remote_range = np.concatenate((range1, range2))
    id_list = []
    for i in range(1000):
        ids = np.random.choice(remote_range, args.batch_size)
        id_list.append(F.tensor(ids))

    num_bytes = 0
    start = time.time()
    for ids in id_list:
        tmp = my_client.pull(name='entity_embed', id_tensor=ids)
        num_bytes += np.prod(tmp.shape) * 4
    print("Total time: %.3f, #bytes: %.3f GB" %
          (time.time() - start, num_bytes / 1000 / 1000 / 1000))

    my_client.barrier()

    if my_client.get_id() == 0:
        my_client.shut_down()