def copy_to_device(self, ary, stream=None, context=None): """Copy `ary` to `self`. If `ary` is a HSA memory, perform a device-to-device transfer. Otherwise, perform a a host-to-device transfer. If `stream` is a stream object, an async copy to used. """ if ary.size == 0: # Nothing to do return if context is not None: if self.dgpu_data is not None: expect, got = self._context, context if expect.unproxy != got.unproxy: raise HsaContextMismatchError(expect=expect, got=got) else: context = self._context # TODO: Worry about multiple dGPUs # if _driver.is_device_memory(ary): # sz = min(self.alloc_size, ary.alloc_size) # _driver.device_to_device(self, ary, sz) # else: # sz = min(_driver.host_memory_size(ary), self.alloc_size) sz = self.alloc_size # host_to_dGPU(context, dst, src, size): if stream is None: _driver.hsa.implicit_sync() if isinstance(ary, DeviceNDArray): _driver.dGPU_to_dGPU(self._context, self, ary, sz) else: _driver.host_to_dGPU(self._context, self, ary, sz) else: if isinstance(ary, DeviceNDArray): _driver.async_dGPU_to_dGPU( dst_ctx=self._context, src_ctx=ary._context, dst=self, src=ary, size=sz, stream=stream, ) else: _driver.async_host_to_dGPU( dst_ctx=self._context, src_ctx=devices.get_cpu_context(), dst=self, src=ary, size=sz, stream=stream, )
def copy_to_device(self, ary, stream=None, context=None): """Copy `ary` to `self`. If `ary` is a HSA memory, perform a device-to-device transfer. Otherwise, perform a a host-to-device transfer. If `stream` is a stream object, an async copy to used. """ if ary.size == 0: # Nothing to do return if context is not None: if self.dgpu_data is not None: expect, got = self._context, context if expect.unproxy != got.unproxy: raise HsaContextMismatchError(expect=expect, got=got) else: context = self._context # TODO: Worry about multiple dGPUs #if _driver.is_device_memory(ary): # sz = min(self.alloc_size, ary.alloc_size) # _driver.device_to_device(self, ary, sz) #else: # sz = min(_driver.host_memory_size(ary), self.alloc_size) sz = self.alloc_size # host_to_dGPU(context, dst, src, size): if stream is None: _driver.hsa.implicit_sync() if isinstance(ary, DeviceNDArray): _driver.dGPU_to_dGPU(self._context, self, ary, sz) else: _driver.host_to_dGPU(self._context, self, ary, sz) else: if isinstance(ary, DeviceNDArray): _driver.async_dGPU_to_dGPU(dst_ctx=self._context, src_ctx=ary._context, dst=self, src=ary, size=sz, stream=stream) else: _driver.async_host_to_dGPU(dst_ctx=self._context, src_ctx=devices.get_cpu_context(), dst=self, src=ary, size=sz, stream=stream)