Пример #1
0
    def __init__(self,
                 total_process,
                 mi,
                 batch_template,
                 cb_remote_initialize=None,
                 cb_remote_batch_process=None,
                 args=None):
        ''' Initialize `SharedData` class with a few hooks

        Args:
            total_process: number of processes
            mi: ModelInterface
            batch_template:
            cb_remote_initialize: Callbacks for remote Initialization
            cb_remote_batch_process: Callbacks for remote process
            args: additional arguments
        '''
        self.server = ParameterServer(total_process)
        self.cb_remote_initialize = cb_remote_initialize
        self.cb_remote_batch_process = cb_remote_batch_process
        self.args = args

        # def get_gpu_id(i): return i + 1
        def get_gpu_id(i):
            return 0

        # Share only training batches.
        shared_batches = []
        cvs_send = []
        cvs_recv = []
        qs = []
        for i in range(total_process - 1):
            # gpu_id = get_gpu_id(i)
            # shared_batches.append(
            #     cpu2gpu(all_batches[train_idx][0], gpu=gpu_id))
            shared_batches.append(utils_elf.pin_clone(batch_template))
            qs.append(mp.Queue(1))
            qs[-1].put(shared_batches[i])
            cvs_send.append(Cond())
            cvs_recv.append(Cond())

        self.cvs_send = cvs_send
        self.cvs_recv = cvs_recv
        self.shared_batches = shared_batches
        self.qs = qs
        self.b = mp.Barrier(total_process)

        self.optimizers = [
            mp.process(target=self.process_main, args=(i, get_gpu_id(i)))
            for i in range(total_process - 1)
        ]
        for optimizer in self.optimizers:
            optimizer.start()

        # Wait until all models have received the shared memory.
        self.b.wait()

        self.server.server_send_model(mi)
Пример #2
0
    def __init__(self, total_process, mi, batch_template,
                 cb_remote_initialize=None,
                 cb_remote_batch_process=None,
                 args=None):
        ''' Initialize `SharedData` class with a few hooks

        Args:
            total_process: number of processes
            mi: ModelInterface
            batch_template:
            cb_remote_initialize: Callbacks for remote Initialization
            cb_remote_batch_process: Callbacks for remote process
            args: additional arguments
        '''
        self.server = ParameterServer(total_process)
        self.cb_remote_initialize = cb_remote_initialize
        self.cb_remote_batch_process = cb_remote_batch_process
        self.args = args

        # def get_gpu_id(i): return i + 1
        def get_gpu_id(i): return 0

        # Share only training batches.
        shared_batches = []
        cvs_send = []
        cvs_recv = []
        qs = []
        for i in range(total_process - 1):
            # gpu_id = get_gpu_id(i)
            # shared_batches.append(
            #     cpu2gpu(all_batches[train_idx][0], gpu=gpu_id))
            shared_batches.append(utils_elf.pin_clone(batch_template))
            qs.append(mp.Queue(1))
            qs[-1].put(shared_batches[i])
            cvs_send.append(Cond())
            cvs_recv.append(Cond())

        self.cvs_send = cvs_send
        self.cvs_recv = cvs_recv
        self.shared_batches = shared_batches
        self.qs = qs
        self.b = mp.Barrier(total_process)

        self.optimizers = [
            mp.process(
                target=self.process_main, args=(
                    i, get_gpu_id(i))) for i in range(
                total_process - 1)]
        for optimizer in self.optimizers:
            optimizer.start()

        # Wait until all models have received the shared memory.
        self.b.wait()

        self.server.server_send_model(mi)
Пример #3
0
    def __init__(self,
                 total_process,
                 mi,
                 batch_template,
                 cb_remote_initialize=None,
                 cb_remote_batch_process=None,
                 args=None):
        self.server = ParameterServer(total_process)
        self.cb_remote_initialize = cb_remote_initialize
        self.cb_remote_batch_process = cb_remote_batch_process
        self.args = args

        #def get_gpu_id(i): return i + 1
        def get_gpu_id(i):
            return 0

        # Share only training batches.
        shared_batches = []
        cvs_send = []
        cvs_recv = []
        qs = []
        for i in range(total_process - 1):
            # gpu_id = get_gpu_id(i)
            # shared_batches.append(cpu2gpu(all_batches[train_idx][0], gpu=gpu_id))
            shared_batches.append(utils_elf.pin_clone(batch_template))
            qs.append(mp.Queue(1))
            qs[-1].put(shared_batches[i])
            cvs_send.append(Cond())
            cvs_recv.append(Cond())

        self.cvs_send = cvs_send
        self.cvs_recv = cvs_recv
        self.shared_batches = shared_batches
        self.qs = qs
        self.b = mp.Barrier(total_process)

        self.optimizers = [
            mp.Process(target=self.process_main, args=(i, get_gpu_id(i)))
            for i in range(total_process - 1)
        ]
        for optimizer in self.optimizers:
            optimizer.start()

        # Wait until all models have received the shared memory.
        self.b.wait()

        self.server.server_send_model(mi)
Пример #4
0
    def __init__(self, total_process, mi, batch_template,
                 cb_remote_initialize=None,
                 cb_remote_batch_process=None,
                 args=None):
        self.server = ParameterServer(total_process)
        self.cb_remote_initialize = cb_remote_initialize
        self.cb_remote_batch_process = cb_remote_batch_process
        self.args = args

        #def get_gpu_id(i): return i + 1
        def get_gpu_id(i): return 0

        # Share only training batches.
        shared_batches = []
        cvs_send = []
        cvs_recv = []
        qs = []
        for i in range(total_process - 1):
            # gpu_id = get_gpu_id(i)
            # shared_batches.append(cpu2gpu(all_batches[train_idx][0], gpu=gpu_id))
            shared_batches.append(utils_elf.pin_clone(batch_template))
            qs.append(mp.Queue(1))
            qs[-1].put(shared_batches[i])
            cvs_send.append(Cond())
            cvs_recv.append(Cond())

        self.cvs_send = cvs_send
        self.cvs_recv = cvs_recv
        self.shared_batches = shared_batches
        self.qs = qs
        self.b = mp.Barrier(total_process)

        self.optimizers = [mp.Process(target=self.process_main, args=(i, get_gpu_id(i))) for i in range(total_process - 1)]
        for optimizer in self.optimizers: optimizer.start()

        # Wait until all models have received the shared memory.
        self.b.wait()

        self.server.server_send_model(mi)