def copy_to_device(self, ary, stream=0): """Copy `ary` to `self`. If `ary` is a CUDA memory, perform a device-to-device transfer. Otherwise, perform a a host-to-device transfer. """ if ary.size == 0: # Nothing to do return sentry_contiguous(self) stream = self._default_stream(stream) self_core, ary_core = array_core(self), array_core(ary) if _driver.is_device_memory(ary): sentry_contiguous(ary) check_array_compatibility(self_core, ary_core) _driver.device_to_device(self, ary, self.alloc_size, stream=stream) else: # Ensure same contiguity. Only makes a host-side copy if necessary # (i.e., in order to materialize a writable strided view) ary_core = np.array( ary_core, order='C' if self_core.flags['C_CONTIGUOUS'] else 'F', subok=True, copy=not ary_core.flags['WRITEABLE']) check_array_compatibility(self_core, ary_core) _driver.host_to_device(self, ary_core, self.alloc_size, stream=stream)
def _do_setitem(self, key, value, stream=0): stream = self._default_stream(stream) # If the record didn't have a default stream, and the user didn't # provide a stream, then we will use the default stream for the # assignment kernel and synchronize on it. synchronous = not stream if synchronous: ctx = devices.get_context() stream = ctx.get_default_stream() # (1) prepare LHS typ, offset = self.dtype.fields[key] newdata = self.gpu_data.view(offset) lhs = type(self)(dtype=typ, stream=stream, gpu_data=newdata) # (2) prepare RHS rhs, _ = auto_device(lhs.dtype.type(value), stream=stream) # (3) do the copy _driver.device_to_device(lhs, rhs, rhs.dtype.itemsize, stream) if synchronous: stream.synchronize()
def test_d2d(self): hst = np.arange(100, dtype=np.uint32) hst2 = np.empty_like(hst) sz = hst.size * hst.dtype.itemsize dev1 = self.context.memalloc(sz) dev2 = self.context.memalloc(sz) driver.host_to_device(dev1, hst, sz) driver.device_to_device(dev2, dev1, sz) driver.device_to_host(hst2, dev2, sz) self.assertTrue(np.all(hst == hst2))
def test_d2d(self): hst = np.arange(100, dtype=np.uint32) hst2 = np.empty_like(hst) sz = hst.size * hst.dtype.itemsize dev1 = self.context.memalloc(sz) dev2 = self.context.memalloc(sz) driver.host_to_device(dev1, hst, sz) driver.device_to_device(dev2, dev1, sz) driver.device_to_host(hst2, dev2, sz) self.assertTrue(np.all(hst == hst2))