Пример #1
0
    def _swap_out_unpinned_tensors(self,
                                   aio_handle,
                                   unpinned_tensors,
                                   dest_paths,
                                   pinned_buffers):

        swap_buffer_count = len(pinned_buffers)
        unpinned_tensor_count = len(unpinned_tensors)

        for i in range(0, unpinned_tensor_count, swap_buffer_count):
            swap_tensor_count = min((unpinned_tensor_count - i), swap_buffer_count)

            src_tensors = unpinned_tensors[i:(i + swap_tensor_count)]
            compute_lengths = [t.numel() for t in src_tensors]
            compute_buffers = get_sized_buffers(pinned_buffers, compute_lengths)

            for dst, src in zip(compute_buffers, src_tensors):
                dst.data.copy_(src.data)

            swap_lengths = [self._io_aligned_numel(t.numel()) for t in src_tensors]
            swap_buffers = get_sized_buffers(pinned_buffers, swap_lengths)

            swap_paths = dest_paths[i:(i + swap_tensor_count)]
            swap_out_tensors(aio_handle, swap_buffers, swap_paths)

            assert aio_handle.wait() == swap_tensor_count
    def _swap_in_parameter(self, aio_handle, parameter, dest_buffers):
        swap_info = self._get_param_swap_info(parameter)
        if swap_info is None:
            return

        assert len(swap_info.tensors) <= len(dest_buffers)

        swap_lengths = [self._io_aligned_numel(swap_info.numel())] * len(
            swap_info.tensors)
        swap_buffers = get_sized_buffers(dest_buffers, swap_lengths)

        READ_TIMER = 'swap_submit_read_param'
        WAIT_TIMER = 'swap_wait_read_param'

        self._start_timer(READ_TIMER)
        swap_in_tensors(aio_handle, swap_buffers, swap_info.swap_paths)
        self._stop_timer(READ_TIMER)

        swap_bytes = sum([
            buffer.numel() * buffer.element_size() for buffer in swap_buffers
        ])

        self._start_timer(WAIT_TIMER)
        aio_handle.wait()
        self._stop_timer(WAIT_TIMER)

        compute_lengths = [swap_info.numel()] * len(swap_info.tensors)
        compute_buffers = get_sized_buffers(dest_buffers, compute_lengths)
        for t, buffer in zip(swap_info.tensors, compute_buffers):
            t.data = buffer.data

        self._log_timers([READ_TIMER, WAIT_TIMER])
        if DEBUG_MODE and torch.distributed.get_rank() == 0:
            logger.info(
                f'optimizer_param_swap_in: {(swap_bytes/(1024**3)):5.2f} GB')
Пример #3
0
 def set_swap_buffers(self, buffers):
     compute_lengths = [self.numel()] * len(self.tensors)
     compute_buffers = get_sized_buffers(buffers, compute_lengths)
     for t, buffer in zip(self.tensors, compute_buffers):
         t.data = buffer.data