def has_usm_memory(obj): """ Determine and return a SYCL device accesible object. as_usm_memory() converts Python object with `__sycl_usm_array_interface__` property to one of :class:`.MemoryUSMShared`, :class:`.MemoryUSMDevice`, or :class:`.MemoryUSMHost` instances. For more information please refer: https://github.com/IntelPython/dpctl/blob/0.8.0/dpctl/memory/_memory.pyx#L673 Args: obj: Object to be tested and data copied from. Returns: A Python object allocated using USM memory if argument is already allocated using USM (zero-copy), None otherwise. """ usm_mem = None try: usm_mem = dpctl_mem.as_usm_memory(obj) except Exception as e: if hasattr(obj, "base"): try: usm_mem = dpctl_mem.as_usm_memory(obj.base) except Exception as e: if config.DEBUG: print(e) else: if config.DEBUG: print(e) return usm_mem
def _with_constructor(self, buffer_cls): try: buf = buffer_cls(64) except Exception: self.SkipTest( "{} could not be allocated".format(buffer_cls.__name__) ) # reuse queue from buffer's SUAI v = View(buf, shape=(64,), strides=(1,), offset=0) m = as_usm_memory(v) self.assertTrue(m.get_usm_type() == buf.get_usm_type()) self.assertTrue(m._pointer == buf._pointer) self.assertTrue(m.sycl_device == buf.sycl_device) # Use SyclContext v = View( buf, shape=(64,), strides=(1,), offset=0, syclobj=buf.sycl_context ) m = as_usm_memory(v) self.assertTrue(m.get_usm_type() == buf.get_usm_type()) self.assertTrue(m._pointer == buf._pointer) self.assertTrue(m.sycl_device == buf.sycl_device) # Use queue capsule v = View( buf, shape=(64,), strides=(1,), offset=0, syclobj=buf._queue._get_capsule(), ) m = as_usm_memory(v) self.assertTrue(m.get_usm_type() == buf.get_usm_type()) self.assertTrue(m._pointer == buf._pointer) self.assertTrue(m.sycl_device == buf.sycl_device) # Use context capsule v = View( buf, shape=(64,), strides=(1,), offset=0, syclobj=buf.sycl_context._get_capsule(), ) m = as_usm_memory(v) self.assertTrue(m.get_usm_type() == buf.get_usm_type()) self.assertTrue(m._pointer == buf._pointer) self.assertTrue(m.sycl_device == buf.sycl_device) # Use filter string v = View( buf, shape=(64,), strides=(1,), offset=0, syclobj=buf.sycl_device.filter_string, ) m = as_usm_memory(v) self.assertTrue(m.get_usm_type() == buf.get_usm_type()) self.assertTrue(m._pointer == buf._pointer) self.assertTrue(m.sycl_device == buf.sycl_device)
def has_memory_overlap(x1, x2): m1 = dpm.as_usm_memory(x1) m2 = dpm.as_usm_memory(x2) if m1.sycl_device == m2.sycl_device: p1_beg = m1._pointer p1_end = p1_beg + m1.nbytes p2_beg = m2._pointer p2_end = p2_beg + m2.nbytes return p1_beg > p2_end or p2_beg < p1_end else: return False
def copy_same_dtype(dst, src): if type(dst) is not dpt.usm_ndarray or type(src) is not dpt.usm_ndarray: raise TypeError if dst.shape != src.shape: raise ValueError if dst.dtype != src.dtype: raise ValueError # check that memory regions do not overlap if has_memory_overlap(dst, src): tmp = copy_to_numpy(src) copy_from_numpy_into(dst, tmp) return if (dst.flags & 1) and (src.flags & 1): dst_mem = dpm.as_usm_memory(dst) src_mem = dpm.as_usm_memory(src) dst_mem.copy_from_device(src_mem) return # simplify strides sh_i, dst_st, dst_disp, src_st, src_disp = contract_iter2( dst.shape, dst.strides, src.strides) # sh_i, dst_st, dst_disp, src_st, src_disp = ( # dst.shape, dst.strides, 0, src.strides, 0 # ) src_iface = src.__sycl_usm_array_interface__ dst_iface = dst.__sycl_usm_array_interface__ src_iface["shape"] = tuple() src_iface.pop("strides", None) dst_iface["shape"] = tuple() dst_iface.pop("strides", None) dst_disp = dst_disp + dst_iface.get("offset", 0) src_disp = src_disp + src_iface.get("offset", 0) for i in range(dst.size): mi = np.unravel_index(i, sh_i) dst_offset = dst_disp src_offset = src_disp for j, dst_stj, src_stj in zip(mi, dst_st, src_st): dst_offset = dst_offset + j * dst_stj src_offset = src_offset + j * src_stj dst_iface["offset"] = dst_offset src_iface["offset"] = src_offset msrc = dpm.as_usm_memory(Dummy(src_iface)) mdst = dpm.as_usm_memory(Dummy(dst_iface)) mdst.copy_from_device(msrc)
def _transfer_to_host(queue, *data): has_usm_data, has_host_data = False, False host_data = [] for item in data: usm_iface = getattr(item, '__sycl_usm_array_interface__', None) if usm_iface is not None: if not dpctl_available: raise RuntimeError("dpctl need to be installed to work " "with __sycl_usm_array_interface__") if queue is not None: if queue.sycl_device != usm_iface['syclobj'].sycl_device: raise RuntimeError('Input data shall be located ' 'on single target device') else: queue = usm_iface['syclobj'] buffer = as_usm_memory(item).copy_to_host() item = np.ndarray(shape=usm_iface['shape'], dtype=usm_iface['typestr'], buffer=buffer) has_usm_data = True else: has_host_data = True mismatch_host_item = usm_iface is None and item is not None and has_usm_data mismatch_usm_item = usm_iface is not None and has_host_data if mismatch_host_item or mismatch_usm_item: raise RuntimeError( 'Input data shall be located on single target device') host_data.append(item) return queue, host_data
def get_info_from_suai(obj): """ Convenience function to gather information from __sycl_usm_array_interface__. Args: obj: Array with SUAI attribute. Returns: usm_mem: USM memory object. total_size: Total number of items in the array. shape: Shape of the array. ndim: Total number of dimensions. itemsize: Size of each item. strides: Stride of the array. dtype: Dtype of the array. """ usm_mem = dpctl_mem.as_usm_memory(obj) assert usm_mem is not None shape = obj.__sycl_usm_array_interface__["shape"] total_size = np.prod(obj.__sycl_usm_array_interface__["shape"]) ndim = len(obj.__sycl_usm_array_interface__["shape"]) itemsize = np.dtype(obj.__sycl_usm_array_interface__["typestr"]).itemsize dtype = np.dtype(obj.__sycl_usm_array_interface__["typestr"]) strides = obj.__sycl_usm_array_interface__["strides"] if strides is None: strides = [1] * ndim for i in reversed(range(1, ndim)): strides[i - 1] = strides[i] * shape[i] strides = tuple(strides) return usm_mem, total_size, shape, ndim, itemsize, strides, dtype
def test_slice_suai(usm_type): Xh = np.arange(0, 10, dtype="u1") default_device = dpctl.select_default_device() Xusm = _from_numpy(Xh, device=default_device, usm_type=usm_type) for ind in [slice(2, 3, None), slice(5, 7, None), slice(3, 9, None)]: assert np.array_equal( dpm.as_usm_memory(Xusm[ind]).copy_to_host(), Xh[ind]), "Failed for {}".format(ind)
def copy_from_numpy_into(dst, np_ary): if not isinstance(np_ary, np.ndarray): raise TypeError("Expected numpy.ndarray, got {}".format(type(np_ary))) src_ary = np.broadcast_to(np.asarray(np_ary, dtype=dst.dtype), dst.shape) for i in range(dst.size): mi = np.unravel_index(i, dst.shape) host_buf = np.array(src_ary[mi], ndmin=1).view("u1") usm_mem = dpm.as_usm_memory(dst[mi]) usm_mem.copy_from_host(host_buf)
def check_view(v): """ Memory object created from duck __sycl_usm_array_interface__ argument should be consistent with the buffer from which the argument was constructed """ assert type(v) is View buf = v.buffer_ m = as_usm_memory(v) assert m.get_usm_type() == buf.get_usm_type() assert m._pointer == buf._pointer assert m.sycl_device == buf.sycl_device
def copy_same_shape(dst, src): if src.dtype == dst.dtype: copy_same_dtype(dst, src) return # check that memory regions do not overlap if has_memory_overlap(dst, src): tmp = copy_to_numpy(src) tmp = tmp.astype(dst.dtype) copy_from_numpy_into(dst, tmp) return # simplify strides sh_i, dst_st, dst_disp, src_st, src_disp = contract_iter2( dst.shape, dst.strides, src.strides) src_iface = src.__sycl_usm_array_interface__ dst_iface = dst.__sycl_usm_array_interface__ src_iface["shape"] = tuple() src_iface.pop("strides", None) dst_iface["shape"] = tuple() dst_iface.pop("strides", None) dst_disp = dst_disp + dst_iface.get("offset", 0) src_disp = src_disp + src_iface.get("offset", 0) for i in range(dst.size): mi = np.unravel_index(i, sh_i) dst_offset = dst_disp src_offset = src_disp for j, dst_stj, src_stj in zip(mi, dst_st, src_st): dst_offset = dst_offset + j * dst_stj src_offset = src_offset + j * src_stj dst_iface["offset"] = dst_offset src_iface["offset"] = src_offset msrc = dpm.as_usm_memory(Dummy(src_iface)) mdst = dpm.as_usm_memory(Dummy(dst_iface)) tmp = msrc.copy_to_host().view(src.dtype) tmp = tmp.astype(dst.dtype) mdst.copy_from_host(tmp.view("u1"))
def test_slicing_basic(): Xusm = dpt.usm_ndarray((10, 5), dtype="c16") Xusm[None] Xusm[...] Xusm[8] Xusm[-3] with pytest.raises(IndexError): Xusm[..., ...] with pytest.raises(IndexError): Xusm[1, 1, :, 1] Xusm[:, -4] with pytest.raises(IndexError): Xusm[:, -128] with pytest.raises(TypeError): Xusm[{1, 2, 3, 4, 5, 6, 7}] X = dpt.usm_ndarray(10, "u1") X.usm_data.copy_from_host(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09") int(X[X[2]] ) # check that objects with __index__ method can be used as indices Xh = dpm.as_usm_memory(X[X[2]:X[5]]).copy_to_host() Xnp = np.arange(0, 10, dtype="u1") assert np.array_equal(Xh, Xnp[Xnp[2]:Xnp[5]])
def asarray( obj, dtype=None, device=None, copy=None, usm_type=None, sycl_queue=None, order="K", ): """asarray(obj, dtype=None, copy=None, order="K", device=None, usm_type=None, sycl_queue=None) Converts `obj` to :class:`dpctl.tensor.usm_ndarray`. Args: obj: Python object to convert. Can be an instance of `usm_ndarray`, an object representing SYCL USM allocation and implementing `__sycl_usm_array_interface__` protocol, an instance of `numpy.ndarray`, an object supporting Python buffer protocol, a Python scalar, or a (possibly nested) sequence of Python scalars. dtype (data type, optional): output array data type. If `dtype` is `None`, the output array data type is inferred from data types in `obj`. Default: `None` copy (`bool`, optional): boolean indicating whether or not to copy the input. If `True`, always creates a copy. If `False`, need to copy raises `ValueError`. If `None`, try to reuse existing memory allocations if possible, but allowed to perform a copy otherwise. Default: `None`. order ("C","F","A","K", optional): memory layout of the output array. Default: "C" device (optional): array API concept of device where the output array is created. `device` can be `None`, a oneAPI filter selector string, an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, an instance of :class:`dpctl.SyclQueue`, or a `Device` object returnedby `dpctl.tensor.usm_array.device`. Default: `None`. usm_type ("device"|"shared"|"host", optional): The type of SYCL USM allocation for the output array. For `usm_type=None` the allocation type is inferred from the input if `obj` has USM allocation, or `"device"` is used instead. Default: `None`. sycl_queue: (:class:`dpctl.SyclQueue`, optional): The SYCL queue to use for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same underlying SYCL queue to be used. If both a `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ # 1. Check that copy is a valid keyword if copy not in [None, True, False]: raise TypeError( "Recognized copy keyword values should be True, False, or None") # 2. Check that dtype is None, or a valid dtype if dtype is not None: dtype = np.dtype(dtype) # 3. Validate order if not isinstance(order, str): raise TypeError( f"Expected order keyword to be of type str, got {type(order)}") if len(order) == 0 or order[0] not in "KkAaCcFf": raise ValueError( "Unrecognized order keyword value, expecting 'K', 'A', 'F', or 'C'." ) else: order = order[0].upper() # 4. Check that usm_type is None, or a valid value if usm_type is not None: if isinstance(usm_type, str): if usm_type not in ["device", "shared", "host"]: raise ValueError( f"Unrecognized value of usm_type={usm_type}, " "expected 'device', 'shared', 'host', or None.") else: raise TypeError( f"Expected usm_type to be a str or None, got {type(usm_type)}") # 5. Normalize device/sycl_queue [keep it None if was None] if device is not None or sycl_queue is not None: sycl_queue = _normalize_queue_device(q=sycl_queue, d=device) # handle instance(obj, usm_ndarray) if isinstance(obj, dpt.usm_ndarray): return _asarray_from_usm_ndarray( obj, dtype=dtype, copy=copy, usm_type=usm_type, sycl_queue=sycl_queue, order=order, ) elif hasattr(obj, "__sycl_usm_array_interface__"): sua_iface = getattr(obj, "__sycl_usm_array_interface__") membuf = dpm.as_usm_memory(obj) ary = dpt.usm_ndarray( sua_iface["shape"], dtype=sua_iface["typestr"], buffer=membuf, strides=sua_iface.get("strides", None), ) return _asarray_from_usm_ndarray( ary, dtype=dtype, copy=copy, usm_type=usm_type, sycl_queue=sycl_queue, order=order, ) elif isinstance(obj, np.ndarray): if copy is False: raise ValueError( "Converting numpy.ndarray to usm_ndarray requires a copy") return _asarray_from_numpy_ndarray( obj, dtype=dtype, usm_type=usm_type, sycl_queue=sycl_queue, order=order, ) elif _is_object_with_buffer_protocol(obj): if copy is False: raise ValueError( f"Converting {type(obj)} to usm_ndarray requires a copy") return _asarray_from_numpy_ndarray( np.array(obj), dtype=dtype, usm_type=usm_type, sycl_queue=sycl_queue, order=order, ) elif isinstance(obj, (list, tuple, range)): if copy is False: raise ValueError( "Converting Python sequence to usm_ndarray requires a copy") _, dt, devs = _array_info_sequence(obj) if devs == _host_set: return _asarray_from_numpy_ndarray( np.asarray(obj, dt, order=order), dtype=dtype, usm_type=usm_type, sycl_queue=sycl_queue, order=order, ) # for sequences raise NotImplementedError( "Converting Python sequences is not implemented") if copy is False: raise ValueError( f"Converting {type(obj)} to usm_ndarray requires a copy") # obj is a scalar, create 0d array return _asarray_from_numpy_ndarray( np.asarray(obj), dtype=dtype, usm_type=usm_type, sycl_queue=sycl_queue, order="C", )
# coding: utf-8 import external_usm_alloc as eua import numpy as np import dpctl import dpctl.memory as dpm q = dpctl.SyclQueue("gpu") matr = eua.DMatrix(q, 5, 5) print(matr) print(matr.__sycl_usm_array_interface__) blob = dpm.as_usm_memory(matr) print(blob.get_usm_type()) Xh = np.array( [ [1, 1, 1, 2, 2], [1, 0, 1, 2, 2], [1, 1, 0, 2, 2], [0, 0, 0, 3, -1], [0, 0, 0, -1, 5], ], dtype="d", ) host_bytes_view = Xh.reshape((-1)).view(np.ubyte)