def sum_reduce(A): global_size = len(A) work_group_size = 64 nb_work_groups = global_size // work_group_size if (global_size % work_group_size) != 0: nb_work_groups += 1 partial_sums = np.zeros(nb_work_groups).astype(A.dtype) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): inp_buf = dpctl_mem.MemoryUSMShared(A.size * A.dtype.itemsize) inp_ndarray = np.ndarray(A.shape, buffer=inp_buf, dtype=A.dtype) np.copyto(inp_ndarray, A) partial_sums_buf = dpctl_mem.MemoryUSMShared( partial_sums.size * partial_sums.dtype.itemsize) partial_sums_ndarray = np.ndarray( partial_sums.shape, buffer=partial_sums_buf, dtype=partial_sums.dtype, ) np.copyto(partial_sums_ndarray, partial_sums) result = sum_recursive_reduction(global_size, work_group_size, inp_ndarray, partial_sums_ndarray) return result
def main(): blockdim = 512, 1 griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1 # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): for i in range(iterations): black_scholes_dppy[blockdim, griddim]( callResult, putResult, stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY, ) print("callResult : \n", callResult) print("putResult : \n", putResult) print("Done...")
def main(): # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): c = f1(a, b) print("c:", c, hex(c.ctypes.data)) for i in range(N): for j in range(N): for k in range(N): for l in range(N): # noqa for m in range(N): if c[i, j, k, l, m] != 2.0: print( "First index not equal to 2.0 was", i, j, k, l, m, ) break print("Done...")
def main(): global_size = 64 local_size = 32 N = global_size * local_size print("N", N) a = np.ones(N, dtype=np.float32) b = np.ones(N, dtype=np.float32) print("a:", a, hex(a.ctypes.data)) print("b:", b, hex(b.ctypes.data)) # Use the environment variable SYCL_DEVICE_FILTER to change # the default device. See # https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): c = f1(a, b) print("RESULT c:", c, hex(c.ctypes.data)) for i in range(N): if c[i] != 2.0: print("First index not equal to 2.0 was", i) break print("Done...")
def no_arg_barrier_support(): """ This example demonstrates the usage of numba_dppy's ``barrier`` intrinsic function. The ``barrier`` function is usable only inside a ``kernel`` and is equivalent to OpenCL's ``barrier`` function. """ @dppy.kernel def twice(A): i = dppy.get_global_id(0) d = A[i] # no argument defaults to global mem fence dppy.barrier() A[i] = d * 2 N = 10 arr = np.arange(N).astype(np.float32) print(arr) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): twice[N, dppy.DEFAULT_LOCAL_SIZE](arr) # the output should be `arr * 2, i.e. [0, 2, 4, 6, ...]` print(arr)
def local_memory(): """ This example demonstrates the usage of numba-dppy's `local.array` intrinsic function. The function is used to create a static array allocated on the devices local address space. """ blocksize = 10 @dppy.kernel def reverse_array(A): lm = dppy.local.array(shape=10, dtype=float32) i = dppy.get_global_id(0) # preload lm[i] = A[i] # barrier local or global will both work as we only have one work group dppy.barrier(dppy.CLK_LOCAL_MEM_FENCE) # local mem fence # write A[i] += lm[blocksize - 1 - i] arr = np.arange(blocksize).astype(np.float32) print(arr) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): reverse_array[blocksize, dppy.DEFAULT_LOCAL_SIZE](arr) # the output should be `orig[::-1] + orig, i.e. [9, 9, 9, ...]`` print(arr)
def main(): size = 9 scale = 3.0 # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): result = rand() # Random values in a given shape (3, 2) print(result) result = random_sample(size) # Array of shape (9,) with random floats in the # half-open interval [0.0, 1.0) print(result) result = random_exponential(scale, size) # Array of shape (9,) with samples from an exponential distribution print(result) result = random_normal(0.0, 0.1, size) # Array of shape (9,) with samples from a normal distribution print(result) print("Done...")
def main(): global_size = 10 N = global_size print("N", N) a = np.array(np.random.random(N), dtype=np.float32) b = np.array(np.random.random(N), dtype=np.float32) c = np.ones_like(a) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer="shared") da.usm_data.copy_from_host(a.reshape((-1)).view("|u1")) db = dpt.usm_ndarray(b.shape, dtype=b.dtype, buffer="shared") db.usm_data.copy_from_host(b.reshape((-1)).view("|u1")) dc = dpt.usm_ndarray(c.shape, dtype=c.dtype, buffer="shared") driver(da, db, dc, global_size) print("Done...")
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--api", required=False, default="numba", choices=["numba", "numba-dppy"], help="Start the version of functions using numba or numba-dppy API", ) args = parser.parse_args() print("Using API:", args.api) global_size = 10 N = global_size a = np.arange(N, dtype=np.float32) b = np.arange(N, dtype=np.float32) c = np.empty_like(a) if args.api == "numba-dppy": device = dpctl.select_default_device() with dppy.offload_to_sycl_device(device): dppy_kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a, b, c) else: numba_func_driver(a, b, c) print("Done...")
def private_memory(): """ This example demonstrates the usage of numba-dppy's `private.array` intrinsic function. The function is used to create a static array allocated on the devices private address space. """ @numba_dppy.kernel def private_memory_kernel(A): memory = numba_dppy.private.array(shape=1, dtype=np.float32) i = numba_dppy.get_global_id(0) # preload memory[0] = i numba_dppy.barrier(numba_dppy.CLK_LOCAL_MEM_FENCE) # local mem fence # memory will not hold correct deterministic result if it is not # private to each thread. A[i] = memory[0] * 2 N = 4 arr = np.zeros(N).astype(np.float32) orig = np.arange(N).astype(np.float32) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with numba_dppy.offload_to_sycl_device(device): private_memory_kernel[N, N](arr) np.testing.assert_allclose(orig * 2, arr) # the output should be `orig[i] * 2, i.e. [0, 2, 4, ..]`` print(arr)
def test_dppy_kernel_valid_usm_obj(dtype): """Test if a ``numba_dppy.kernel`` function accepts a DuckUSMArray argument. The ``DuckUSMArray`` uses ``dpctl.memory`` to allocate a Python object that defines a __sycl_usm_array__interface__ attribute. We test if ``numba_dppy`` recognizes the ``DuckUSMArray`` as a valid USM-backed Python object and accepts it as a kernel argument. """ N = 1024 buffA = np.arange(0, N, dtype=dtype) A = DuckUSMArray(shape=buffA.shape, dtype=dtype, host_buffer=buffA) buffB = np.arange(0, N, dtype=dtype) B = DuckUSMArray(shape=buffB.shape, dtype=dtype, host_buffer=buffB) buffC = np.zeros(N, dtype=dtype) C = DuckUSMArray(shape=buffC.shape, dtype=dtype, host_buffer=buffC) try: with dpctl.device_context(dpctl.select_default_device()): vecadd[N, dppy.DEFAULT_LOCAL_SIZE](A, B, C) except Exception: pytest.fail( "Could not pass Python object with sycl_usm_array_interface" + " to a kernel.")
def test_slice_suai(usm_type): Xh = np.arange(0, 10, dtype="u1") default_device = dpctl.select_default_device() Xusm = _from_numpy(Xh, device=default_device, usm_type=usm_type) for ind in [slice(2, 3, None), slice(5, 7, None), slice(3, 9, None)]: assert np.array_equal( dpm.as_usm_memory(Xusm[ind]).copy_to_host(), Xh[ind]), "Failed for {}".format(ind)
def test_vectorize(): A = np.arange(10, dtype=np.float64).reshape((5, 2)) B = np.arange(10, dtype=np.float64).reshape((5, 2)) device = dpctl.select_default_device() with dpctl.device_context(device): C = vector_add(A, B) print(C)
def create_default_device(): """ Create default SyclDevice using `cl::sycl::default_selector`. Device created can be influenced by environment variable SYCL_DEVICE_FILTER, which determines SYCL devices seen by the SYCL runtime. """ d1 = dpctl.SyclDevice() d2 = dpctl.select_default_device() assert d1 == d2 print_device(d1) return d1
def is_available(): """Returns a boolean indicating if dpctl could find a default device. A valueError is thrown by dpctl if no default device is found and it implies that numba-dppy cannot create a SYCL queue to compile kernels. Returns: bool: True if a default SYCL device is found, otherwise False. """ try: d = dpctl.select_default_device() return not d.is_host except ValueError: return False
def main(): # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): print("before A: ", a) print("before B: ", b) data_parallel_sum[global_size, local_size](a, b, c) print("after C: ", c) print("Done...")
def main(): N = 10 a = np.ones(N) b = np.ones(N) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): driver(a, b, N) print("Done...")
def main(): parser = argparse.ArgumentParser(description="Black-Scholes") parser.add_argument("--iter", dest="iter", type=int, default=10) args = parser.parse_args() iter = args.iter # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): run(iter) print("Done...")
def test_slice_constructor_1d(): Xh = np.arange(37, dtype="i4") default_device = dpctl.select_default_device() Xusm = _from_numpy(Xh, device=default_device, usm_type="device") for ind in [ slice(1, None, 2), slice(0, None, 3), slice(1, None, 3), slice(2, None, 3), slice(None, None, -1), slice(-2, 2, -2), slice(-1, 1, -2), slice(None, None, -13), ]: assert np.array_equal(_to_numpy(Xusm[ind]), Xh[ind]), "Failed for {}".format(ind)
def main(): times = None # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): times = driver() times = np.asarray(times, dtype=np.float32) print("Average time of %d runs is = %fs" % (args.r, times.mean())) print("Done...")
def main(): N = 10 b = np.ones(N) c = np.ones(N) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): result = add_two_arrays(b, c) print("Result :", result) print("Done...")
def main(): global_size = 10 N = global_size a = np.arange(N, dtype=np.float32) b = np.arange(N, dtype=np.float32) c = np.empty_like(a) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): driver(a, b, c, global_size) print("Done...")
def main(): global_size = 10 N = global_size print("N", N) a = np.array(np.random.random(N), dtype=np.float32) b = np.array(np.random.random(N), dtype=np.float32) c = np.ones_like(a) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): driver(a, b, c, global_size) print("Done...")
def test_njit(): N = 10 dtype = np.float64 A = np.arange(N, dtype=dtype) B = np.arange(N, dtype=dtype) * 10 # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): C = ufunc_kernel(A, B) print(C) print("Done...")
def sum_reduce(A): """Size of A should be power of two.""" total = len(A) # max size will require half the size of A to store sum R = np.array(np.random.random(math.ceil(total / 2)), dtype=A.dtype) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): while total > 1: global_size = total // 2 sum_reduction_kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](A, R, global_size) total = total // 2 return R[0]
def test_dppy_kernel_invalid_usm_obj(dtype): """Test if a ``numba_dppy.kernel`` function rejects a PseudoDuckUSMArray argument. The ``PseudoDuckUSMArray`` defines a fake attribute called __sycl_usm_array__interface__. We test if ``numba_dppy`` correctly recognizes and rejects the ``PseudoDuckUSMArray``. """ N = 1024 A = PseudoDuckUSMArray() B = PseudoDuckUSMArray() C = PseudoDuckUSMArray() with pytest.raises(Exception): with dpctl.device_context(dpctl.select_default_device()): vecadd[N, dppy.DEFAULT_LOCAL_SIZE](A, B, C)
def main(): # Array dimensions X = 8 Y = 8 global_size = X, Y a = np.arange(X * Y, dtype=np.float32).reshape(X, Y) b = np.array(np.random.random(X * Y), dtype=np.float32).reshape(X, Y) c = np.ones_like(a).reshape(X, Y) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): driver(a, b, c, global_size) print(c) print("Done...")
def main(): a = np.arange(X * X, dtype=np.float32).reshape(X, X) b = np.array(np.random.random(X * X), dtype=np.float32).reshape(X, X) c = np.ones_like(a).reshape(X, X) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): driver(a, b, c) # Host compute using standard NumPy Amat = np.matrix(a) Bmat = np.matrix(b) Cans = Amat * Bmat # Check result assert np.allclose(c, Cans) print("Done...")
def sum_reduce(A): global_size = len(A) work_group_size = 64 # nb_work_groups have to be even for this implementation nb_work_groups = global_size // work_group_size partial_sums = np.zeros(nb_work_groups).astype(A.dtype) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dppy.offload_to_sycl_device(device): sum_reduction_kernel[global_size, work_group_size](A, partial_sums) final_sum = 0 # calculate the final sum in HOST for i in range(nb_work_groups): final_sum += partial_sums[i] return final_sum
def main(): """ The example demonstrates the use of numba_dppy's ``atomic_add`` intrinsic function on a SYCL GPU device. The ``dpctl.select_gpu_device`` is equivalent to ``sycl::gpu_selector`` and returns a sycl::device of type GPU. If we want to generate native floating point atomics for spported SYCL devices we need to set two environment variables: NUMBA_DPPY_ACTIVATE_ATOMICS_FP_NATIVE=1 NUMBA_DPPY_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv To run this example: NUMBA_DPPY_ACTIVATE_ATOMICS_FP_NATIVE=1 NUMBA_DPPY_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv python atomic_op.py Without these two environment variables Numba_dppy will use other implementation for floating point atomics. """ @dppy.kernel def atomic_add(a): dppy.atomic.add(a, 0, 1) global_size = 100 a = np.array([0], dtype=np.float32) # Use the environment variable SYCL_DEVICE_FILTER to change the default device. # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter. device = dpctl.select_default_device() print("Using device ...") device.print_device_info() with dpctl.device_context(device): atomic_add[global_size, dppy.DEFAULT_LOCAL_SIZE](a) # Expected 100, because global_size = 100 print(a) print("Done...")