Пример #1
0
def test_local_memory(filter_str):
    if skip_test(filter_str):
        pytest.skip()
    blocksize = 10

    if skip_if_win():
        pytest.skip()

    @dppy.kernel("void(float32[::1])")
    def reverse_array(A):
        lm = dppy.local.array(shape=10, dtype=np.float32)
        i = dppy.get_global_id(0)

        # preload
        lm[i] = A[i]
        # barrier local or global will both work as we only have one work group
        dppy.barrier(dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence
        # write
        A[i] += lm[blocksize - 1 - i]

    arr = np.arange(blocksize).astype(np.float32)
    orig = arr.copy()

    with dppy.offload_to_sycl_device(filter_str):
        reverse_array[blocksize, blocksize](arr)

    expected = orig[::-1] + orig
    np.testing.assert_allclose(expected, arr)
Пример #2
0
def test_caching_kernel_using_same_context(filter_str):
    """Test kernel caching for the scenario where different SYCL queues that
    share a SYCL context are used to submit a kernel.

    Args:
        filter_str: SYCL filter selector string
    """
    if skip_test(filter_str):
        pytest.skip()
    global_size = 10
    N = global_size

    def data_parallel_sum(a, b, c):
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    # Set the global queue to the default device so that the cached_kernel gets
    # created for that device
    dpctl.set_global_queue(filter_str)
    func = dppy.kernel(data_parallel_sum)
    default_queue = dpctl.get_current_queue()
    cached_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
        func._get_argtypes(a, b, c), default_queue)
    for i in range(0, 10):
        # Each iteration create a fresh queue that will share the same context
        with dppy.offload_to_sycl_device(filter_str) as gpu_queue:
            _kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
                func._get_argtypes(a, b, c), gpu_queue)
            assert _kernel == cached_kernel
Пример #3
0
def test_consuming_array_from_dpnp(offload_device, dtype):
    if not ensure_dpnp():
        pytest.skip("No DPNP")

    import dpnp

    if skip_test(offload_device):
        pytest.skip("No device for " + offload_device)

    if ("opencl" not in dpctl.get_current_queue().sycl_device.filter_string
            and "opencl" in offload_device):
        pytest.skip("Bug in DPNP. See: IntelPython/dpnp#723")

    @dppy.kernel
    def data_parallel_sum(a, b, c):
        """
        Vector addition using the ``kernel`` decorator.
        """
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    global_size = 1021

    with dppy.offload_to_sycl_device(offload_device):
        a = dppy.asarray(dpnp.arange(global_size, dtype=dtype))
        b = dppy.asarray(dpnp.arange(global_size, dtype=dtype))
        c = dppy.asarray(dpnp.ones_like(a))

        data_parallel_sum[global_size, dppy.DEFAULT_LOCAL_SIZE](a, b, c)
Пример #4
0
def test_caching_kernel_using_same_queue(filter_str):
    """Test kernel caching when the same queue is used to submit a kernel
    multiple times.

    Args:
        filter_str: SYCL filter selector string
    """
    if skip_test(filter_str):
        pytest.skip()
    global_size = 10
    N = global_size

    def data_parallel_sum(a, b, c):
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    with dppy.offload_to_sycl_device(filter_str) as gpu_queue:
        func = dppy.kernel(data_parallel_sum)
        cached_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
            func._get_argtypes(a, b, c), gpu_queue)

        for i in range(10):
            _kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
                func._get_argtypes(a, b, c), gpu_queue)
            assert _kernel == cached_kernel
Пример #5
0
def test_trigonometric_fn(filter_str, trig_op, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    # FIXME: Why does archcosh fail on Gen12 discrete graphics card?
    if trig_op == "arccosh" and is_gen12(filter_str):
        pytest.skip()

    a, b = input_arrays
    trig_fn = getattr(np, trig_op)
    actual = np.empty(shape=a.shape, dtype=a.dtype)
    expected = np.empty(shape=a.shape, dtype=a.dtype)

    if trig_op == "arctan2":

        @njit
        def f(a, b):
            return trig_fn(a, b)

        device = dpctl.SyclDevice(filter_str)
        with dppy.offload_to_sycl_device(device), assert_auto_offloading():
            actual = f(a, b)
        expected = trig_fn(a, b)
    else:

        @njit
        def f(a):
            return trig_fn(a)

        device = dpctl.SyclDevice(filter_str)
        with dppy.offload_to_sycl_device(device), assert_auto_offloading():
            actual = f(a)
        expected = trig_fn(a)

    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Пример #6
0
def test_proper_lowering(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    # We perform eager compilation at the site of
    # @dppy.kernel. This takes the default dpctl
    # queue which is level_zero backed. Level_zero
    # is not yet supported on Windows platform and
    # hence we skip these tests if the platform is
    # Windows regardless of which backend filter_str
    # specifies.
    if skip_if_win():
        pytest.skip()

    # This will trigger eager compilation
    @dppy.kernel("void(float32[::1])")
    def twice(A):
        i = dppy.get_global_id(0)
        d = A[i]
        dppy.barrier(dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence
        A[i] = d * 2

    N = 256
    arr = np.random.random(N).astype(np.float32)
    orig = arr.copy()

    with dppy.offload_to_sycl_device(filter_str):
        twice[N, N // 2](arr)

    # The computation is correct?
    np.testing.assert_allclose(orig * 2, arr)
Пример #7
0
def test_usm_ndarray_type(offload_device, dtype, usm_type):
    if skip_test(offload_device):
        pytest.skip()

    a = np.array(np.random.random(10), dtype)
    da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer=usm_type)

    assert isinstance(typeof(da), USMNdArrayType)
    assert da.usm_type == usm_type
Пример #8
0
def test_kernel_arg_accessor(filter_str, input_arrays, kernel):
    if skip_test(filter_str):
        pytest.skip()

    a, b, actual = input_arrays
    expected = a + b
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        call_kernel(global_size, local_size, a, b, actual, kernel)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Пример #9
0
def test_dpctl_api(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        dpctl.lsplatform()
        dpctl.get_current_queue()
        dpctl.get_num_activated_queues()
        dpctl.is_in_device_context()
Пример #10
0
def atomic_skip_test(device_type):
    skip = False
    if skip_test(device_type):
        skip = True

    if not skip:
        if not dppy.ocl.atomic_support_present():
            skip = True

    return skip
Пример #11
0
def dpnp_skip_test(device_type):
    skip = False
    if skip_test(device_type):
        skip = True

    if not skip:
        if not ensure_dpnp():
            skip = True

    return skip
Пример #12
0
def test_kernel_arg_types(filter_str, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    kernel = dppy.kernel(mul_kernel)
    a, actual, c = input_arrays
    expected = a * c
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[global_size, local_size](a, actual, c)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Пример #13
0
def test_return(offload_device, sig):
    if skip_test(offload_device):
        pytest.skip()

    a = np.array(np.random.random(122), np.int32)

    with pytest.raises(TypeError):
        kernel = dppy.kernel(sig)(f)

        device = dpctl.SyclDevice(offload_device)
        with dppy.offload_to_sycl_device(device):
            kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a)
Пример #14
0
def test_bool_type(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    kernel = dppy.kernel(check_bool_kernel)
    a = np.array([2], np.int64)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, True)
        assert a[0] == 111
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, False)
        assert a[0] == 222
Пример #15
0
def test_consuming_usm_ndarray(offload_device, dtype, usm_type):
    if skip_test(offload_device):
        pytest.skip()

    @dppy.kernel
    def data_parallel_sum(a, b, c):
        """
        Vector addition using the ``kernel`` decorator.
        """
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    global_size = 1021
    N = global_size

    a = np.array(np.random.random(N), dtype=dtype)
    b = np.array(np.random.random(N), dtype=dtype)

    got = np.ones_like(a)

    with dppy.offload_to_sycl_device(offload_device) as gpu_queue:
        da = dpt.usm_ndarray(
            a.shape,
            dtype=a.dtype,
            buffer=usm_type,
            buffer_ctor_kwargs={"queue": gpu_queue},
        )
        da.usm_data.copy_from_host(a.reshape((-1)).view("|u1"))

        db = dpt.usm_ndarray(
            b.shape,
            dtype=b.dtype,
            buffer=usm_type,
            buffer_ctor_kwargs={"queue": gpu_queue},
        )
        db.usm_data.copy_from_host(b.reshape((-1)).view("|u1"))

        dc = dpt.usm_ndarray(
            got.shape,
            dtype=got.dtype,
            buffer=usm_type,
            buffer_ctor_kwargs={"queue": gpu_queue},
        )

        data_parallel_sum[global_size, dppy.DEFAULT_LOCAL_SIZE](da, db, dc)

        dc.usm_data.copy_to_host(got.reshape((-1)).view("|u1"))

        expected = a + b

        assert np.array_equal(got, expected)
Пример #16
0
def test_dpnp_create_array_in_context(offload_device, dtype):
    if not ensure_dpnp():
        pytest.skip("No DPNP")

    import dpnp

    if skip_test(offload_device):
        pytest.skip("No device for " + offload_device)

    if ("opencl" not in dpctl.get_current_queue().sycl_device.filter_string
            and "opencl" in offload_device):
        pytest.skip("Bug in DPNP. See: IntelPython/dpnp#723")

    with dpctl.device_context(offload_device):
        a = dpnp.arange(1024, dtype=dtype)  # noqa
Пример #17
0
def test_strided_array_kernel(offload_device):
    if skip_test(offload_device):
        pytest.skip()

    global_size = 606
    a = np.arange(global_size * 2, dtype="i4")[::2]
    b = np.arange(global_size, dtype="i4")[::-1]
    got = np.zeros(global_size, dtype="i4")

    expected = a + b

    with dpctl.device_context(offload_device):
        sum[global_size, numba_dppy.DEFAULT_LOCAL_SIZE](a, b, got)

    assert np.array_equal(expected, got)
Пример #18
0
def test_print(filter_str, input_arrays, capfd):
    if skip_test(filter_str):
        pytest.skip()

    @dppy.kernel
    def f(a):
        print("test", a[0])

    a = input_arrays
    global_size = 3

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        f[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
        captured = capfd.readouterr()
        assert "test" in captured.out
Пример #19
0
def test_as_usm_obj(offload_device):
    if skip_test(offload_device):
        pytest.skip()

    a = np.ones(1023, dtype=np.float32)
    b = a * 3

    with dpctl.device_context(offload_device) as queue:
        a_copy = np.empty_like(a)
        usm_mem = as_usm_obj(a, queue=queue)
        copy_to_numpy_from_usm_obj(usm_mem, a_copy)
        assert np.all(a == a_copy)

        b_copy = np.empty_like(b)
        usm_mem = as_usm_obj(b, queue=queue, copy=False)
        copy_to_numpy_from_usm_obj(usm_mem, b_copy)
        assert np.any(np.not_equal(b, b_copy))
Пример #20
0
def test_unary_ops(filter_str, unary_op, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    a = input_arrays[0]
    uop = getattr(np, unary_op)
    actual = np.empty(shape=a.shape, dtype=a.dtype)
    expected = np.empty(shape=a.shape, dtype=a.dtype)

    @njit
    def f(a):
        return uop(a)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device), assert_auto_offloading():
        actual = f(a)

    expected = uop(a)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Пример #21
0
def test_binary_ops(filter_str, unary_op, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    a, actual = input_arrays
    uop = getattr(math, unary_op)
    np_uop = getattr(np, unary_op)

    @dppy.kernel
    def f(a, b):
        i = dppy.get_global_id(0)
        b[i] = uop(a[i])

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        f[a.size, dppy.DEFAULT_LOCAL_SIZE](a, actual)

    expected = np_uop(a)

    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Пример #22
0
def test_has_usm_memory(offload_device):
    if skip_test(offload_device):
        pytest.skip()

    a = np.ones(1023, dtype=np.float32)

    with dpctl.device_context(offload_device):
        # test usm_ndarray
        da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer="shared")
        usm_mem = has_usm_memory(da)
        assert da.usm_data._pointer == usm_mem._pointer

        # test usm allocated numpy.ndarray
        buf = dpctl_mem.MemoryUSMShared(a.size * a.dtype.itemsize)
        ary_buf = np.ndarray(a.shape, buffer=buf, dtype=a.dtype)
        usm_mem = has_usm_memory(ary_buf)
        assert buf._pointer == usm_mem._pointer

        usm_mem = has_usm_memory(a)
        assert usm_mem is None
def test_unary_ops(filter_str, unary_op, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    # FIXME: Why does sign fail on Gen12 discrete graphics card?
    skip_ops = ["sign", "log", "log2", "log10", "expm1"]
    if unary_op in skip_ops and is_gen12(filter_str):
        pytest.skip()

    a = input_arrays[0]
    uop = getattr(np, unary_op)
    actual = np.empty(shape=a.shape, dtype=a.dtype)
    expected = np.empty(shape=a.shape, dtype=a.dtype)

    @njit
    def f(a):
        return uop(a)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device), assert_auto_offloading():
        actual = f(a)

    expected = uop(a)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Пример #24
0
def test_no_arg_barrier_support(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    if skip_if_win():
        pytest.skip()

    @dppy.kernel("void(float32[::1])")
    def twice(A):
        i = dppy.get_global_id(0)
        d = A[i]
        # no argument defaults to global mem fence
        dppy.barrier()
        A[i] = d * 2

    N = 256
    arr = np.random.random(N).astype(np.float32)
    orig = arr.copy()

    with dppy.offload_to_sycl_device(filter_str):
        twice[N, dppy.DEFAULT_LOCAL_SIZE](arr)

    # The computation is correct?
    np.testing.assert_allclose(orig * 2, arr)