def setup_workers(self):
        # work only once
        if self._initialized:
            return
        self._initialized = True

        self.model.cleargrads()
        for i in six.moves.range(1, len(self.gpus)):
            pipe, worker_end = multiprocessing.Pipe()
            worker = _Worker(
                i, worker_end, self.model, self.gpus, self.da,
                int(
                    float(self.batch) / len(self.gpus) /
                    self.train_batch_divide), self)
            worker.start()
            self._workers.append(worker)
            self._pipes.append(pipe)

        with cuda.Device(self.gpus[0]):
            self.model.to_gpu(self.gpus[0])
            if len(self.gpus) > 1:
                communication_id = nccl.get_unique_id()
                self._send_message(("set comm_id", communication_id))
                self.communication = nccl.NcclCommunicator(
                    len(self.gpus), communication_id, 0)
    def setup(self):
        _, comm_id = self.pipe.recv()
        self.comm = nccl.NcclCommunicator(self.n_devices, comm_id,
                                          self.proc_id)

        self.model.to_gpu(self.device)
        self.reporter = reporter.Reporter()
        self.reporter.add_observer('main', self.model)
    def setup(self):
        _, comm_id = self.pipe.recv()
        self.comm = nccl.NcclCommunicator(self.n_devices, comm_id,
                                          self.proc_id)

        self.model.to_device(self.device)
        self.reporter = reporter.Reporter()
        self.reporter.add_observer('main', self.model)
        self.reporter.add_observers('main',
                                    self.model.namedlinks(skipself=True))
示例#4
0
文件: _nccl_comm.py 项目: takagi/cupy
 def _init_with_mpi(self, n_devices, rank):
     # MPI is used only for management purposes
     # so the rank may be different than the one specified
     self._mpi_comm = MPI.COMM_WORLD
     self._mpi_rank = self._mpi_comm.Get_rank()
     self._mpi_comm.Barrier()
     nccl_id = None
     if self._mpi_rank == 0:
         nccl_id = nccl.get_unique_id()
     nccl_id = self._mpi_comm.bcast(nccl_id, root=0)
     # Initialize devices
     self._comm = nccl.NcclCommunicator(n_devices, nccl_id, rank)
示例#5
0
文件: _nccl_comm.py 项目: takagi/cupy
 def _init_with_tcp_store(self, n_devices, rank, host, port):
     nccl_id = None
     if rank == 0:
         self._store.run(host, port)
         nccl_id = nccl.get_unique_id()
         # get_unique_id return negative values due to cython issues
         # with bytes && c strings. We shift them by 128 to
         # make them positive and send them as bytes to the proxy store
         shifted_nccl_id = bytes([b + 128 for b in nccl_id])
         self._store_proxy['nccl_id'] = shifted_nccl_id
         self._store_proxy.barrier()
     else:
         self._store_proxy.barrier()
         nccl_id = self._store_proxy['nccl_id']
         nccl_id = tuple([int(b) - 128 for b in nccl_id])
     self._comm = nccl.NcclCommunicator(n_devices, nccl_id, rank)
示例#6
0
    def _get_nccl_comm(self, device, devices):
        if str(devices) in self.nccl_comms:
            return self.nccl_comms[str(devices)]

        if self.rank == 0:
            nccl_comm_id = nccl.get_unique_id()
        else:
            nccl_comm_id = None

        nccl_comm_id = self.mpi_comm.bcast(nccl_comm_id)

        with device:
            nccl_comm = nccl.NcclCommunicator(self.size, nccl_comm_id,
                                              self.rank)
            self.nccl_comms[str(devices)] = nccl_comm

        return nccl_comm
    def setup_workers(self):
        if self._initialized:
            return
        self._initialized = True

        self._master.cleargrads()
        for i in six.moves.range(1, len(self._devices)):
            pipe, worker_end = multiprocessing.Pipe()
            worker = _Worker(i, worker_end, self)
            worker.start()
            self._workers.append(worker)
            self._pipes.append(pipe)

        with cuda.Device(self._devices[0]):
            self._master.to_gpu(self._devices[0])
            if len(self._devices) > 1:
                comm_id = nccl.get_unique_id()
                self._send_message(("set comm_id", comm_id))
                self.comm = nccl.NcclCommunicator(len(self._devices), comm_id,
                                                  0)
示例#8
0
    def __init__(self):
        if config.mpi4py_enabled:
            self.mpi_comm = MPI.COMM_WORLD
            self.size = self.mpi_comm.Get_size()
            self.rank = self.mpi_comm.Get_rank()
        else:
            self.size = 1
            self.rank = 0
            
        self.device = Device(self.rank % cp.cuda.runtime.getDeviceCount())

        if config.nccl_enabled:
            if self.rank == 0:
                nccl_comm_id = nccl.get_unique_id()
            else:
                nccl_comm_id = None

            nccl_comm_id = self.mpi_comm.bcast(nccl_comm_id)

            with self.device:
                self.nccl_comm = nccl.NcclCommunicator(
                    self.size, nccl_comm_id, self.rank)
示例#9
0
 def initialize(self, head_id):
     self.communicator = nccl.NcclCommunicator(self.world_size, head_id,
                                               self.rank)
示例#10
0
 def construct(self, size, sesame, rank):
     from cupy.cuda import nccl
     comm_id = tuple(json.loads(sesame))
     self.nccl_comm = nccl.NcclCommunicator(size, comm_id, rank)
     print('NCCL initialized:', size, rank)
     assert self.nccl_comm is not None
示例#11
0
 def test_comm_size(self):
     id = nccl.get_unique_id()
     comm = nccl.NcclCommunicator(1, id, 0)
     assert 1 == comm.size()
示例#12
0
 def test_check_async_error(self):
     id = nccl.get_unique_id()
     comm = nccl.NcclCommunicator(1, id, 0)
     comm.check_async_error()
     comm.destroy()
示例#13
0
 def test_abort(self):
     id = nccl.get_unique_id()
     comm = nccl.NcclCommunicator(1, id, 0)
     comm.abort()
示例#14
0
 def test_single_proc_ring(self):
     id = nccl.get_unique_id()
     comm = nccl.NcclCommunicator(1, id, 0)
     assert 0 == comm.rank_id()
     comm.destroy()
 def setup(self):
     _, communication_id = self.pipe.recv()
     self.communication = nccl.NcclCommunicator(self.number_of_devices,
                                                communication_id,
                                                self.process_id)
     self.model.to_gpu(self.device)
示例#16
0
 def test_nccl(self):
     uid = libnccl.get_unique_id()
     comm = libnccl.NcclCommunicator(1, uid, 0)  # NOQA