Пример #1
0
def sum_reduce(A):
    global_size = len(A)
    work_group_size = 64
    nb_work_groups = global_size // work_group_size
    if (global_size % work_group_size) != 0:
        nb_work_groups += 1

    partial_sums = np.zeros(nb_work_groups).astype(A.dtype)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        inp_buf = dpctl_mem.MemoryUSMShared(A.size * A.dtype.itemsize)
        inp_ndarray = np.ndarray(A.shape, buffer=inp_buf, dtype=A.dtype)
        np.copyto(inp_ndarray, A)

        partial_sums_buf = dpctl_mem.MemoryUSMShared(
            partial_sums.size * partial_sums.dtype.itemsize)
        partial_sums_ndarray = np.ndarray(
            partial_sums.shape,
            buffer=partial_sums_buf,
            dtype=partial_sums.dtype,
        )
        np.copyto(partial_sums_ndarray, partial_sums)

        result = sum_recursive_reduction(global_size, work_group_size,
                                         inp_ndarray, partial_sums_ndarray)

    return result
Пример #2
0
def main():
    blockdim = 512, 1
    griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        for i in range(iterations):
            black_scholes_dppy[blockdim, griddim](
                callResult,
                putResult,
                stockPrice,
                optionStrike,
                optionYears,
                RISKFREE,
                VOLATILITY,
            )

    print("callResult : \n", callResult)
    print("putResult : \n", putResult)

    print("Done...")
Пример #3
0
def main():
    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        c = f1(a, b)

    print("c:", c, hex(c.ctypes.data))
    for i in range(N):
        for j in range(N):
            for k in range(N):
                for l in range(N):  # noqa
                    for m in range(N):
                        if c[i, j, k, l, m] != 2.0:
                            print(
                                "First index not equal to 2.0 was",
                                i,
                                j,
                                k,
                                l,
                                m,
                            )
                            break

    print("Done...")
Пример #4
0
def main():
    global_size = 64
    local_size = 32
    N = global_size * local_size
    print("N", N)

    a = np.ones(N, dtype=np.float32)
    b = np.ones(N, dtype=np.float32)

    print("a:", a, hex(a.ctypes.data))
    print("b:", b, hex(b.ctypes.data))

    # Use the environment variable SYCL_DEVICE_FILTER to change
    # the default device. See
    # https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        c = f1(a, b)

    print("RESULT c:", c, hex(c.ctypes.data))
    for i in range(N):
        if c[i] != 2.0:
            print("First index not equal to 2.0 was", i)
            break

    print("Done...")
Пример #5
0
def no_arg_barrier_support():
    """
    This example demonstrates the usage of numba_dppy's ``barrier``
    intrinsic function. The ``barrier`` function is usable only inside
    a ``kernel`` and is equivalent to OpenCL's ``barrier`` function.
    """
    @dppy.kernel
    def twice(A):
        i = dppy.get_global_id(0)
        d = A[i]
        # no argument defaults to global mem fence
        dppy.barrier()
        A[i] = d * 2

    N = 10
    arr = np.arange(N).astype(np.float32)
    print(arr)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        twice[N, dppy.DEFAULT_LOCAL_SIZE](arr)

    # the output should be `arr * 2, i.e. [0, 2, 4, 6, ...]`
    print(arr)
Пример #6
0
def local_memory():
    """
    This example demonstrates the usage of numba-dppy's `local.array`
    intrinsic function. The function is used to create a static array
    allocated on the devices local address space.
    """
    blocksize = 10

    @dppy.kernel
    def reverse_array(A):
        lm = dppy.local.array(shape=10, dtype=float32)
        i = dppy.get_global_id(0)

        # preload
        lm[i] = A[i]
        # barrier local or global will both work as we only have one work group
        dppy.barrier(dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence
        # write
        A[i] += lm[blocksize - 1 - i]

    arr = np.arange(blocksize).astype(np.float32)
    print(arr)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        reverse_array[blocksize, dppy.DEFAULT_LOCAL_SIZE](arr)

    # the output should be `orig[::-1] + orig, i.e. [9, 9, 9, ...]``
    print(arr)
Пример #7
0
def main():
    size = 9
    scale = 3.0

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        result = rand()
        # Random values in a given shape (3, 2)
        print(result)

        result = random_sample(size)
        # Array of shape (9,) with random floats in the
        # half-open interval [0.0, 1.0)
        print(result)

        result = random_exponential(scale, size)
        # Array of shape (9,) with samples from an exponential distribution
        print(result)

        result = random_normal(0.0, 0.1, size)
        # Array of shape (9,) with samples from a normal distribution
        print(result)

    print("Done...")
Пример #8
0
def main():
    global_size = 10
    N = global_size
    print("N", N)

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer="shared")
        da.usm_data.copy_from_host(a.reshape((-1)).view("|u1"))

        db = dpt.usm_ndarray(b.shape, dtype=b.dtype, buffer="shared")
        db.usm_data.copy_from_host(b.reshape((-1)).view("|u1"))

        dc = dpt.usm_ndarray(c.shape, dtype=c.dtype, buffer="shared")

        driver(da, db, dc, global_size)

    print("Done...")
Пример #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--api",
        required=False,
        default="numba",
        choices=["numba", "numba-dppy"],
        help="Start the version of functions using numba or numba-dppy API",
    )

    args = parser.parse_args()

    print("Using API:", args.api)

    global_size = 10
    N = global_size

    a = np.arange(N, dtype=np.float32)
    b = np.arange(N, dtype=np.float32)
    c = np.empty_like(a)

    if args.api == "numba-dppy":
        device = dpctl.select_default_device()
        with dppy.offload_to_sycl_device(device):
            dppy_kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a, b, c)
    else:
        numba_func_driver(a, b, c)

    print("Done...")
def private_memory():
    """
    This example demonstrates the usage of numba-dppy's `private.array`
    intrinsic function. The function is used to create a static array
    allocated on the devices private address space.
    """
    @numba_dppy.kernel
    def private_memory_kernel(A):
        memory = numba_dppy.private.array(shape=1, dtype=np.float32)
        i = numba_dppy.get_global_id(0)

        # preload
        memory[0] = i
        numba_dppy.barrier(numba_dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence

        # memory will not hold correct deterministic result if it is not
        # private to each thread.
        A[i] = memory[0] * 2

    N = 4
    arr = np.zeros(N).astype(np.float32)
    orig = np.arange(N).astype(np.float32)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with numba_dppy.offload_to_sycl_device(device):
        private_memory_kernel[N, N](arr)

    np.testing.assert_allclose(orig * 2, arr)
    # the output should be `orig[i] * 2, i.e. [0, 2, 4, ..]``
    print(arr)
Пример #11
0
def test_dppy_kernel_valid_usm_obj(dtype):
    """Test if a ``numba_dppy.kernel`` function accepts a DuckUSMArray argument.

    The ``DuckUSMArray`` uses ``dpctl.memory`` to allocate a Python object that
    defines a __sycl_usm_array__interface__ attribute. We test if
    ``numba_dppy`` recognizes the ``DuckUSMArray`` as a valid USM-backed Python
    object and accepts it as a kernel argument.

    """
    N = 1024

    buffA = np.arange(0, N, dtype=dtype)
    A = DuckUSMArray(shape=buffA.shape, dtype=dtype, host_buffer=buffA)

    buffB = np.arange(0, N, dtype=dtype)
    B = DuckUSMArray(shape=buffB.shape, dtype=dtype, host_buffer=buffB)

    buffC = np.zeros(N, dtype=dtype)
    C = DuckUSMArray(shape=buffC.shape, dtype=dtype, host_buffer=buffC)

    try:
        with dpctl.device_context(dpctl.select_default_device()):
            vecadd[N, dppy.DEFAULT_LOCAL_SIZE](A, B, C)
    except Exception:
        pytest.fail(
            "Could not pass Python object with sycl_usm_array_interface" +
            " to a kernel.")
Пример #12
0
def test_slice_suai(usm_type):
    Xh = np.arange(0, 10, dtype="u1")
    default_device = dpctl.select_default_device()
    Xusm = _from_numpy(Xh, device=default_device, usm_type=usm_type)
    for ind in [slice(2, 3, None), slice(5, 7, None), slice(3, 9, None)]:
        assert np.array_equal(
            dpm.as_usm_memory(Xusm[ind]).copy_to_host(),
            Xh[ind]), "Failed for {}".format(ind)
Пример #13
0
def test_vectorize():
    A = np.arange(10, dtype=np.float64).reshape((5, 2))
    B = np.arange(10, dtype=np.float64).reshape((5, 2))

    device = dpctl.select_default_device()
    with dpctl.device_context(device):
        C = vector_add(A, B)

    print(C)
Пример #14
0
def create_default_device():
    """
    Create default SyclDevice using `cl::sycl::default_selector`.
    Device created can be influenced by environment variable
    SYCL_DEVICE_FILTER, which determines SYCL devices seen by the
    SYCL runtime.
    """
    d1 = dpctl.SyclDevice()
    d2 = dpctl.select_default_device()
    assert d1 == d2
    print_device(d1)
    return d1
Пример #15
0
def is_available():
    """Returns a boolean indicating if dpctl could find a default device.

    A valueError is thrown by dpctl if no default device is found and it
    implies that numba-dppy cannot create a SYCL queue to compile kernels.

    Returns:
        bool: True if a default SYCL device is found, otherwise False.
    """
    try:
        d = dpctl.select_default_device()
        return not d.is_host
    except ValueError:
        return False
Пример #16
0
def main():

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        print("before A: ", a)
        print("before B: ", b)
        data_parallel_sum[global_size, local_size](a, b, c)
        print("after  C: ", c)

    print("Done...")
Пример #17
0
def main():
    N = 10
    a = np.ones(N)
    b = np.ones(N)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        driver(a, b, N)

    print("Done...")
Пример #18
0
def main():
    parser = argparse.ArgumentParser(description="Black-Scholes")
    parser.add_argument("--iter", dest="iter", type=int, default=10)
    args = parser.parse_args()
    iter = args.iter

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        run(iter)

    print("Done...")
Пример #19
0
def test_slice_constructor_1d():
    Xh = np.arange(37, dtype="i4")
    default_device = dpctl.select_default_device()
    Xusm = _from_numpy(Xh, device=default_device, usm_type="device")
    for ind in [
            slice(1, None, 2),
            slice(0, None, 3),
            slice(1, None, 3),
            slice(2, None, 3),
            slice(None, None, -1),
            slice(-2, 2, -2),
            slice(-1, 1, -2),
            slice(None, None, -13),
    ]:
        assert np.array_equal(_to_numpy(Xusm[ind]),
                              Xh[ind]), "Failed for {}".format(ind)
Пример #20
0
def main():
    times = None

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        times = driver()

    times = np.asarray(times, dtype=np.float32)
    print("Average time of %d runs is = %fs" % (args.r, times.mean()))

    print("Done...")
Пример #21
0
def main():
    N = 10
    b = np.ones(N)
    c = np.ones(N)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        result = add_two_arrays(b, c)

    print("Result :", result)

    print("Done...")
Пример #22
0
def main():
    global_size = 10
    N = global_size

    a = np.arange(N, dtype=np.float32)
    b = np.arange(N, dtype=np.float32)
    c = np.empty_like(a)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        driver(a, b, c, global_size)

    print("Done...")
Пример #23
0
def main():
    global_size = 10
    N = global_size
    print("N", N)

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        driver(a, b, c, global_size)

    print("Done...")
Пример #24
0
def test_njit():
    N = 10
    dtype = np.float64

    A = np.arange(N, dtype=dtype)
    B = np.arange(N, dtype=dtype) * 10

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        C = ufunc_kernel(A, B)

    print(C)

    print("Done...")
Пример #25
0
def sum_reduce(A):
    """Size of A should be power of two."""
    total = len(A)
    # max size will require half the size of A to store sum
    R = np.array(np.random.random(math.ceil(total / 2)), dtype=A.dtype)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        while total > 1:
            global_size = total // 2
            sum_reduction_kernel[global_size,
                                 dppy.DEFAULT_LOCAL_SIZE](A, R, global_size)
            total = total // 2

    return R[0]
Пример #26
0
def test_dppy_kernel_invalid_usm_obj(dtype):
    """Test if a ``numba_dppy.kernel`` function rejects a PseudoDuckUSMArray
    argument.

    The ``PseudoDuckUSMArray`` defines a fake attribute called
    __sycl_usm_array__interface__. We test if
    ``numba_dppy`` correctly recognizes and rejects the ``PseudoDuckUSMArray``.

    """
    N = 1024

    A = PseudoDuckUSMArray()

    B = PseudoDuckUSMArray()

    C = PseudoDuckUSMArray()

    with pytest.raises(Exception):
        with dpctl.device_context(dpctl.select_default_device()):
            vecadd[N, dppy.DEFAULT_LOCAL_SIZE](A, B, C)
Пример #27
0
def main():
    # Array dimensions
    X = 8
    Y = 8
    global_size = X, Y

    a = np.arange(X * Y, dtype=np.float32).reshape(X, Y)
    b = np.array(np.random.random(X * Y), dtype=np.float32).reshape(X, Y)
    c = np.ones_like(a).reshape(X, Y)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        driver(a, b, c, global_size)

    print(c)

    print("Done...")
Пример #28
0
def main():
    a = np.arange(X * X, dtype=np.float32).reshape(X, X)
    b = np.array(np.random.random(X * X), dtype=np.float32).reshape(X, X)
    c = np.ones_like(a).reshape(X, X)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        driver(a, b, c)

    # Host compute using standard NumPy
    Amat = np.matrix(a)
    Bmat = np.matrix(b)
    Cans = Amat * Bmat

    # Check result
    assert np.allclose(c, Cans)

    print("Done...")
Пример #29
0
def sum_reduce(A):
    global_size = len(A)
    work_group_size = 64
    # nb_work_groups have to be even for this implementation
    nb_work_groups = global_size // work_group_size

    partial_sums = np.zeros(nb_work_groups).astype(A.dtype)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        sum_reduction_kernel[global_size, work_group_size](A, partial_sums)

    final_sum = 0
    # calculate the final sum in HOST
    for i in range(nb_work_groups):
        final_sum += partial_sums[i]

    return final_sum
Пример #30
0
def main():
    """
    The example demonstrates the use of numba_dppy's ``atomic_add`` intrinsic
    function on a SYCL GPU device. The ``dpctl.select_gpu_device`` is
    equivalent to ``sycl::gpu_selector`` and returns a sycl::device of type GPU.

    If we want to generate native floating point atomics for spported
    SYCL devices we need to set two environment variables:
    NUMBA_DPPY_ACTIVATE_ATOMICS_FP_NATIVE=1
    NUMBA_DPPY_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv

    To run this example:
    NUMBA_DPPY_ACTIVATE_ATOMICS_FP_NATIVE=1 NUMBA_DPPY_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv python atomic_op.py

    Without these two environment variables Numba_dppy will use other
    implementation for floating point atomics.
    """
    @dppy.kernel
    def atomic_add(a):
        dppy.atomic.add(a, 0, 1)

    global_size = 100
    a = np.array([0], dtype=np.float32)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dpctl.device_context(device):
        atomic_add[global_size, dppy.DEFAULT_LOCAL_SIZE](a)

    # Expected 100, because global_size = 100
    print(a)

    print("Done...")