Exemplo n.º 1
0
def send_multigpu(tensor,
                  dst_rank: int,
                  dst_gpu_index: int,
                  group_name: str = "default"):
    """Send a tensor to a remote GPU synchronously.

    The function asssume each process owns >1 GPUs, and the sender
    process and receiver process has equal nubmer of GPUs.

    Args:
        tensor: the tensor to send, located on a GPU.
        dst_rank (int): the rank of the destination process.
        dst_gpu_index (int): the destination gpu index.
        group_name (str): the name of the collective group.

    Returns:
        None
    """
    if not types.cupy_available():
        raise RuntimeError("send_multigpu call requires NCCL.")
    _check_single_tensor_input(tensor)
    g = _check_and_get_group(group_name)
    _check_rank_valid(g, dst_rank)
    if dst_rank == g.rank:
        raise RuntimeError("The dst_rank '{}' is self. Considering "
                           "doing GPU to GPU memcpy instead?".format(dst_rank))
    opts = types.SendOptions()
    opts.dst_rank = dst_rank
    opts.dst_gpu_index = dst_gpu_index
    g.send([tensor], opts)
Exemplo n.º 2
0
def send(tensor, dst_rank: int, group_name: str = "default"):
    """Send a tensor to a remote process synchronously.

    Args:
        tensor: the tensor to send.
        dst_rank (int): the rank of the destination process.
        group_name (str): the name of the collective group.

    Returns:
        None
    """
    _check_single_tensor_input(tensor)
    g = _check_and_get_group(group_name)
    _check_rank_valid(g, dst_rank)
    if dst_rank == g.rank:
        raise RuntimeError(
            "The destination rank '{}' is self.".format(dst_rank))
    opts = types.SendOptions()
    opts.dst_rank = dst_rank
    g.send([tensor], opts)