Пример #1
0
def init_nccl_comm(mpi_comm):
    from chainermn import nccl
    if mpi_comm.rank == 0:
        nccl_comm_id = nccl.get_unique_id()
    else:
        nccl_comm_id = None
    nccl_comm_id = mpi_comm.bcast(nccl_comm_id)
    return nccl.NcclCommunicator(mpi_comm.size, nccl_comm_id, mpi_comm.rank)
Пример #2
0
def init_nccl_comm(mpi_comm):
    from chainermn import nccl
    if mpi_comm.rank == 0:
        nccl_comm_id = nccl.get_unique_id()
    else:
        nccl_comm_id = None
    nccl_comm_id = mpi_comm.bcast(nccl_comm_id)
    return nccl.NcclCommunicator(mpi_comm.size, nccl_comm_id, mpi_comm.rank)
def init_comms(mpi_comm, intra_rank, intra_size, inter_rank, use_nccl=True):
    intra_mpi_comm = mpi_comm.Split(inter_rank, intra_rank)
    inter_mpi_comm = mpi_comm.Split(intra_rank, inter_rank)
    if use_nccl:
        from chainermn import nccl
        intra_nccl_comm_id = intra_mpi_comm.bcast(nccl.get_unique_id())
        intra_nccl_comm = nccl.NcclCommunicator(
            intra_size, intra_nccl_comm_id, intra_rank)
        if nccl.get_version() >= 2000:
            nccl_comm_id = mpi_comm.bcast(nccl.get_unique_id())
            nccl_comm = nccl.NcclCommunicator(
                mpi_comm.size, nccl_comm_id, mpi_comm.rank)
        else:
            nccl_comm = None
        return intra_mpi_comm, inter_mpi_comm, intra_nccl_comm, nccl_comm
    else:
        return intra_mpi_comm, inter_mpi_comm